#
# ConfidenceLevel.R    11/04/2025    Ralf Tautenhahn (ralf.tautenhahn@thermofisher.com) 
# 
#
#  Annotation Confidence Level 
#
#  based on Schymanski et. al. https://pubs.acs.org/doi/full/10.1021/es5002105 
# 
#  R Libraries required: rjson, XML, RSQLite
#  Optional  : OpenBabel https://github.com/openbabel/openbabel/releases
#
#
# Requested Tables and Columns    
#
# Compounds: Formula, Annot Source, mzCloud Best Match, mzVault Best Match, MS2; ChemSpider Results: Number of References; mzVault Results: Best Match, mzVault Library; Predicted Compositions: Delta Mass in ppm; mzCloud Results: Best Match
# Compounds: Name, Formula, Annot Source, mzCloud Best Match, mzVault Best Match, MS2
#

VERSION = "1.52"

#
#
#
#  v1.40 04/15/2025 RC1
#  v1.41 04/26/2025 RC2 - check that MaxNumberOfCandidates is set to >= 10 in the Predict Elemental Composition node
#                       - MS2 requirement for L3 and L2: also accept MS2 for other ions (not just for preferred ion)
#
#  v1.42  05/05/2025 Compatibility with Compound Discoverer 3.4
#  v1.43  05/12/2025 Read compound names directly from database to avoid problem with special characters in compound names
#  v1.44  05/19/2025 changed names of result columns to "ConfLevel" and "ConfLevel_Flags"
#  v1.45  05/20/2025 added Confidence Level Summary table
#  v1.46  05/20/2025 compatible with older result files for comparison
#  v1.47  05/28/2025 bugfix comparison
#  v1.48  08/06/2025 Added Redundant Annotation Flag column 
#  v1.49  08/20/2025 Added "L1/L2 duplicate-identical evidence" flags
#  v1.50  08/25/2025 bugfix
#  v1.51  08/27/2025 bugfix
#  v1.52  11/04/2025 make sure correct thresholds are used for L1 (PPM and Min. Match Factor Difference thresholds are currently the same for L1 and L2)


# Returns additional columns with check marks for each level, if set to TRUE
DEBUG=FALSE

# Names of in-silico spectral libraries (regular expressions are OK)
IN_SILICO_LIBRARIES = c("LipidBlast", "PFAS_CFM_specLibrary_Duke" )

# Names of result columns
resultColName = "ConfLevel"
resultColFlagsName = "ConfLevel_Flags"
resultColRAFName = "Redundant Annotation Flag"
RAFtext="putative analog of "
MultipleL1HitsText="L1 duplicate - identical evidence"
MultipleL2HitsText="L2 duplicate - identical evidence"

# OpenBabel
OPENBABEL = c("C:\\Program Files\\OpenBabel-3.1.1\\obabel.exe")

STRUCFORMAT = "CD"    ## "InChIKey", "InChI", "CD"

# search table by name in JSON, returns index of a table
getTableIdx <- function(JSON_in, name) {
  TableNames <- sapply (JSON_in$Tables, function(x) x$TableName)
  which(TableNames %in% name)
}

# search table by name in JSON, returns table content
getTable <- function(JSON_in, name) {
  
  TableNames <- sapply (JSON_in$Tables, function(x) x$TableName)
  idx <- TableNames %in% name
  
  if (!any(idx)) 
    stop("Table not found.")
  
  read.table(JSON_in$Tables[[ which(idx) ]]$DataFile, 
             header=TRUE, check.names = FALSE, stringsAsFactors = FALSE)
}

# get workflow input files form cdresult file
getWorkFlowInputFiles <- function(cdresult.file) {
  require(RSQLite)
  
  
  query <- paste('SELECT *
                  FROM WorkflowInputFiles ')
  
  drv <- dbDriver("SQLite")
  con <- dbConnect(drv, cdresult.file)
  
  qres=dbGetQuery(con,query)
  
  dbDisconnect(con)
  
  if (nrow(qres) < 1) stop("Error: Cannot read WorkFlowInputFilesTable.")
  else return(qres)
}

# reads workflow (XML) from cdresult file for a given workflowID
getWorkFlowXML <- function(cdresult.file, workflowID) {
  require(RSQLite)
  
  query <- paste('SELECT WorkflowXML
                  FROM Workflows
                 WHERE WorkflowID =', workflowID)
  
  drv <- dbDriver("SQLite")
  con <- dbConnect(drv, cdresult.file)
  
  qres=dbGetQuery(con,query)
  
  dbDisconnect(con)
  
  if (nrow(qres) < 1) stop("Error: Cannot read WorkFlows table.")
  else return(qres[1,])
}

# looks for a specific processing node and its paramaters in the workflow (XML), returns value(s) for this parameter
getParameterXML <- function(xml_data, ProcessingNodeName, ParameterName) {
  pnodes <- sapply(xml_data$WorkflowTree, function(x) x$.attrs["ProcessingNodeName"])
  idx <- which(pnodes == ProcessingNodeName)
  if (length(idx) < 1) stop("No ", ProcessingNodeName, " node in the workflow?")
  pnode <- xml_data$WorkflowTree[[idx]]
  
  param <- lapply(pnode$ProcessingNodeParameters[-1], function(x) { if (mode(x) == "list") x$.attrs else NULL } ) 

  paramidx <- which(sapply(param, function(x) x[["Name"]]) == ParameterName)
  if (length(paramidx) < 1) 
    stop("No ", ParameterName, " parameter in the ", ProcessingNodeName, " node ?")
  
  return(pnode$ProcessingNodeParameters[-1][[paramidx]]$.attrs[["DisplayValue"]])
}

# reads Name column of the Compounds table from the cdresult file 
getCompoundNames <- function(sqlcon) {
  
   query= paste("SELECT ID, Name
                  FROM ConsolidatedUnknownCompoundItems")
  
  dbGetQuery(sqlcon,query)
}

# accesses the cdresult file, returns MassListSearchResults table
getMassListSearchResults <- function(sqlcon, cid) {
  
  query= paste("SELECT * 
                  FROM ConsolidatedUnknownCompoundItemsMassListSearchItems, MassListSearchItems
                  WHERE ConsolidatedUnknownCompoundItemsMassListSearchItems.ConsolidatedUnknownCompoundItemsID = ",cid, 
               " AND MassListSearchItems.ID = MassListSearchItemsID", sep="")
  
  #Notes: MolStructure: GZipStream rawToChar(memDecompress(blob[[1]], type="gzip")),
  #     CompoundMatchStatus: 4= Full Match, 3= Partial Match
  
  dbGetQuery(sqlcon,query)
}

# accesses the cdresult file, returns BioCycResults table
getBioCycResults <- function(sqlcon, cid) {
  
  query= paste("SELECT *  
                FROM ConsolidatedUnknownCompoundItemsBioCycResultItems, BioCycResultItems 
                WHERE ConsolidatedUnknownCompoundItemsBioCycResultItems.ConsolidatedUnknownCompoundItemsID = ",cid, 
               " AND BioCycResultItems.ID = BioCycResultItemsID", sep="")
  
  dbGetQuery(sqlcon,query)
}

# accesses the cdresult file, returns MetabolikaResults table
getMetabolikaResults <- function(sqlcon, cid) {
  
  query= paste("SELECT *  
                FROM ConsolidatedUnknownCompoundItemsMetabolikaSearchResultItem, MetabolikaSearchResultItem
                WHERE ConsolidatedUnknownCompoundItemsMetabolikaSearchResultItem.ConsolidatedUnknownCompoundItemsID = ",cid, 
               " AND MetabolikaSearchResultItem.ID = MetabolikaSearchResultItemID", sep="")
  
  dbGetQuery(sqlcon,query)
}

# accesses the cdresult file, returns the Structure column from any of the SearchResult tables
getStructure <- function(sqlcon, table=c("ConsolidatedUnknownCompoundItems", "MzCloudSearchResultItems", "MzCloud2SearchResultItems", "MzVaultSearchResultItems" ), cid) {
  
  query= paste("SELECT MolStructure ", 
               "FROM ", table,
               " WHERE ID = ",cid,  sep="")
  
  gzMol <- dbGetQuery(sqlcon,query)[1,1][[1]]
  
  if (length(gzMol) == 0) return("") else
    return(gsub("\r\n", "\n",rawToChar(memDecompress(gzMol, type="gzip"))))
}

# calculates InChI string from MOL file format using OpenBabel
calcInChI <- function(molf, resformat="InChI" , OPENBABEL) {
  
  if (molf == "") 
    return(NA)
  
  mod <- (length(grep(";", molf)) >0)  # it's one of those MOL files that use ; instead of \n 
  
  if (mod) 
    molf <- gsub(";","\n", molf)
  
  molFilePath <- paste(tempfile(),".mol",sep="")
  tmpfile <- file(molFilePath,"w")
  
  if (mod)
    cat("\n",molf,"\n", file=tmpfile) else
      cat(molf,"\n", file=tmpfile)
  
  close(tmpfile)
  
  InChIFilePath <- paste(tempfile(),".inchi",sep="")
  
  if (toupper(resformat) == "INCHI")
    rc  <- system( paste(shQuote(OPENBABEL)," -imol ", molFilePath ," -o inchi"," -O", InChIFilePath," -xT/connect", sep=""), ignore.stderr=TRUE) else
      rc  <- system( paste(shQuote(OPENBABEL)," -imol ", molFilePath ," -o inchikey"," -O", InChIFilePath,sep=""), ignore.stderr=TRUE)
  
  inchi <- readLines(InChIFilePath)
  
  if (length(inchi) < 1) inchi <- ""
  
  unlink(molFilePath)
  unlink(InChIFilePath)
  
  inchi
}

# creates n random strings 
createRandStrings <- function(n = 5000) {
  a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
  paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
}

# compares structures using either InChI or InChiKey format, input is list of structures in MOL file format
compareStructures <- function(cl=NULL, structures, resformat="InChIKey", OPENBABEL, USE_PARALLEL = TRUE) {
  
  if (USE_PARALLEL)  InChI <- parSapply(cl,structures, calcInChI, resformat=resformat, OPENBABEL=OPENBABEL, USE.NAMES=FALSE) else 
    InChI <- sapply(structures, calcInChI, resformat=resformat, OPENBABEL=OPENBABEL)
  
  if (toupper(resformat) == "INCHI")
    InChI <- gsub("H[0-9]*","", InChI)  # only for InChI: remove all hydrogens from formula, -d (Delete hydrogens (make all hydrogen implicit)) does not work as expected in OpenBabel
  
  if (any(is.na(InChI))) {  
    naidx <- which(is.na(InChI))
    InChI[naidx] <- createRandStrings(length(naidx)) 
  }   
  
  if (toupper(resformat) == "INCHI") 
    dups <- duplicated(InChI) else
      dups <- duplicated(substr(InChI, 1, 14) ) # InChIKeys 1:14
    
    return(dups)
}

## remove "duplicates" : compounds that share the same structure, ignoring stereo-chemistry, using either (parts of) InChI string, InChIKey, or Compound Match status provided by CD
removeDuplicateStructures <- function(mzV, sqlcon, cl, strucformat= "InChI", USE_PARALLEL=TRUE, OPENBABEL) {
  
  if (strucformat != "CD") {
    structures <- lapply(mzV[,"mzVault Results ID"], getStructure, table="MzVaultSearchResultItems", sqlcon=sqlcon)
    dups <- compareStructures(cl=cl, structures=structures, resformat= strucformat, OPENBABEL=OPENBABEL, USE_PARALLEL=USE_PARALLEL)
  } else 
  {
    dups <- mzV[,"Compound Match"] == "FullMatch"
    #dups <- mzV[,"Compound Match"] %in% c("FullMatch", "PartialMatch")
    dups[1] <- FALSE
  }
  
  if  (any(dups)) # (length(which(dups)) > 1)  
    mzV <- mzV[!dups,]
  
  return(mzV)
}

# add a flag to an existing string of flags
addFlag <- function(x, flag, sep=",") {
  
  if (length(x) < 1) y <- paste(x,flag,sep=sep)
  else y <- flag
  
  return(y)
}

# returns CD version that was used to process this result file
getCDversion <- function(sqlcon) {
  query= "SELECT SoftwareVersion FROM SchemaInfo WHERE Kind='CompoundDiscovererResult';"

  dbGetQuery(sqlcon,query)
}

# ----- read parameters from CD ---- 

# Read arguments from CD.
args <- commandArgs()

# 6th argument is the name of the JSON file
inputFile <- args[6]

# Open JSON file, find exported files, read into tables
library(rjson)
CD_json_in <- fromJSON(file=inputFile)


## load parameters
if (!("NodeParameters" %in% names(CD_json_in)))
  stop("No parameters ?")

USE_PARALLEL = as.logical(CD_json_in$NodeParameters["Use Parallel Computing"])  # Use parallel computing

## Level 4 : 
#       4a: ONE valid molecular formula with accuracy <= 3ppm 
#       4b: more than one candidates for formula with accuracy <= 3ppm 

L4.PPM = as.numeric(CD_json_in$NodeParameters["PPM (Level 4)"])


## Level 3 : mzCloud | NIST | ChemSpider match | MassList | BioCyC | Metabolika  && MS2

L1to3.PPM = as.numeric(CD_json_in$NodeParameters["PPM"])

L3.PPM = L1to3.PPM
L3.MinSpectralLibraryScore <- L3.MinLocalLibraryScore <- as.numeric(CD_json_in$NodeParameters["Match Factor Threshold (Level 3b)"])

L3.MinMzLogicScore = 30

#  L3.MinChemSpiderReferences=50 (removed for now)


## Level 2 

L2.PPM = L1to3.PPM

L2.MinSpectralLibraryScore <- L2.MinLocalLibraryScore <- as.numeric(CD_json_in$NodeParameters["Match Factor Threshold (Level 1-2)"])

# if there are multiple hits with difference of L2.MinDiffLibraryScore or less -> downgrade to Level 3a
L2.MinDiffLibraryScore= as.numeric(CD_json_in$NodeParameters["Min. Match Factor Difference (Level 1-2)"])

# if TRUE, compare mzVault and mzCloud results, DOWNGRADE to 3a if structures are different and diff score < L2.MinDiffLibraryScore 
CHECK_mzCLOUD_vs_mzVault = as.logical(CD_json_in$NodeParameters["Check mzCloud vs. mzVault results"])

## Level 1

L1.PPM = L1to3.PPM
L1.MinLocalLibraryScore = as.numeric(CD_json_in$NodeParameters["Match Factor Threshold (Level 1-2)"])
L1.MinDiffLibraryScore = as.numeric(CD_json_in$NodeParameters["Min. Match Factor Difference (Level 1-2)"])

# Retention time threshold for Level 1 is directly taken from the settings that were used in the Search mzVault node


# ----- load CD result tables -----
library(RSQLite)
library(XML)

cdresult.file=CD_json_in$ResultFilePath
require(RSQLite)

# check if mzCloud results are available
drv <- dbDriver("SQLite")
sqlcon <- dbConnect(drv, cdresult.file)

CD_version <- getCDversion(sqlcon)
CD_version_main <- substr(CD_version,1,3)

Compounds <- getTable(CD_json_in, "Compounds")
if ("Name" %in% colnames(Compounds))
    Compounds <- Compounds[,-(which(colnames(Compounds) %in% "Name"))]
CompoundNames <- getCompoundNames(sqlcon) #  Read compound names from database, avoid problems with special characters in CSV
Compounds <- merge(Compounds, CompoundNames, by.x="Compounds ID", by.y="ID")


PredictedCompositions <- getTable(CD_json_in, "Predicted Compositions")
Compounds_PredictedCompositions <- getTable(CD_json_in, "ConsolidatedUnknownCompoundItem-PredictedCompositionItem")

ChemSpiderResults <- getTable(CD_json_in,"ChemSpider Results")
Compounds_ChemSpiderResults <- getTable(CD_json_in,"ConsolidatedUnknownCompoundItem-ChemSpiderResultItem")

mzVaultResults <- getTable(CD_json_in,"mzVault Results")
Compounds_mzVaultResults <- getTable(CD_json_in,"ConsolidatedUnknownCompoundItem-MzVaultSearchResultItem")

mzCloudResults <- getTable(CD_json_in,"mzCloud Results")

if (CD_version_main == "3.3")
Compounds_mzCloudResults <- getTable(CD_json_in,"ConsolidatedUnknownCompoundItem-MzCloudSearchResultItem") else
  Compounds_mzCloudResults <- getTable(CD_json_in,"ConsolidatedUnknownCompoundItem-MzCloud2SearchResultItem") 

COMPARE_RESULTS = TRUE

## load previous result, if available
if (COMPARE_RESULTS) {
  
  resultDir <- dirname(CD_json_in$ResultFilePath)
  fl <- list.files(path=resultDir, pattern="CL-\\d+\\.rdat")
  if (length(fl) > 0) {
    gr <- gregexpr("[0-9]+", fl)
    result <- regmatches(fl, gr)
    nr <- as.numeric(unlist(result))
    prevResNr <- max(nr)
    load( file=paste(resultDir,"/", "CL-", prevResNr ,".rdat", sep=""))
    if (colnames(prevRes)[2] == "IDConfLevel" || colnames(prevRes)[2] == "resultColName" ) 
      colnames(prevRes)[2] <- resultColName # also use older result 
    COMPARE <- (nrow(prevRes) == nrow(Compounds)) && (resultColName %in% colnames(prevRes)) # check if results are comparable
  } else {
    prevResNr <- 0
    COMPARE <- FALSE
  }
  
  
} else COMPARE <- FALSE


# ----- read workflow from result file, read and check some parameters in the workflow ----- 

WFIF <- getWorkFlowInputFiles(cdresult.file)
WF <- getWorkFlowXML(cdresult.file, WFIF[1,"WorkflowID"])
xml <- xmlParse(WF)
xml_data <- xmlToList(xml)

# ----- read mzVault Search node parameters, get RT tolerance -----
if (CD_version_main == "3.3")
  mzVault.RTtol <- as.numeric(getParameterXML(xml_data, "MzVaultSearchNode", "RetentionTimeTolerance" )) else {
    
    # 3 possible parameter sets for CD >= 3.4
    
    mzVault.RetentionTimeTolerance <- sapply(X=list("RetentionTimeTolerance", "RetentionTimeToleranceB", "RetentionTimeToleranceC" ),  
                            function(x) as.numeric(getParameterXML(xml_data, "MzVaultSearchNode", x )) )
    
    mzVault.UseRetentionTimeTolerance <- sapply(X=list("UseRetentionTimeTolerance","UseRetentionTimeToleranceB", "UseRetentionTimeToleranceC"),  
                                                function(x) as.logical(getParameterXML(xml_data, "MzVaultSearchNode", x )) )
    
    mzVault.RTtol <- if (any(mzVault.UseRetentionTimeTolerance)) max(mzVault.RetentionTimeTolerance[mzVault.UseRetentionTimeTolerance]) else 0.2
  }

mzVault.RTtol.digits <- nchar(mzVault.RTtol) - 2

# read Predict Elemental composition node parameters
if (as.numeric(getParameterXML(xml_data, "CompositionPredictorNode", "MaxNumberOfCandidates")) < 10)
  warning("MaxNumberOfCandidates is set to < 10 in the Predict Elemental Composition node.", immediate.=TRUE)

MzCloudSearchResultItems <- if (CD_version_main == "3.3") "MzCloudSearchResultItems" else "MzCloud2SearchResultItems"

# ----- initialize -----

# for debugging only 
if (cli:::hash_sha256(Sys.info()['nodename']) == "89efe629ea1f3f516dcfa28bfb7af895409c533571c929a65daacf2e9fa8dd95")  #do this only when running on Ralf's laptop
  save.image(file="C:\\TEMP\\CD node Rimage.dat")
# load("C:\\TEMP\\CD node Rimage.dat")

if ( (STRUCFORMAT != "CD") && (file.access(OPENBABEL,1) != 0))
  stop(OPENBABEL, " cannot be executed!\n")

# Create result columns
CL <- CL_Flags <- RAF <- vector("character", nrow(Compounds) )

# only used for debugging information
B1.LocalLibraryMatch <- B2.LocalLibraryMatch <- B2.mzCloudMatch <- 
  B3.ChemSpiderMatch <-  B3.mzCloudMatch <- B3.LocalLibraryMatch <- 
  B4.FormulaMatch <-   vector("logical", nrow(Compounds) )


# some housekeeping first, remove leading and trailing spaces 
Compounds[,"Annot Source Predicted Compositions"] <- trimws(Compounds[,"Annot Source Predicted Compositions"])
Compounds[,"Annot Source mzCloud Search"]  <- trimws(Compounds[,"Annot Source mzCloud Search"])
Compounds_ChemSpiderResults[, "Compound Match"] <- trimws(Compounds_ChemSpiderResults[, "Compound Match"])


library(parallel)
cl <- makeCluster(detectCores())

# ----- Loop through all compounds ------

for (ci in 1:nrow(Compounds)) {
  
  CL[ci] <- "5"    # per default assign confidence level 5,  no other checks, assuming HRAM data
  
  cid <- Compounds[ci,"Compounds ID"]  # Compounds[ci,"Name"]
  
  
  # ------------ LEVEL 4 -------------- 
  
  
  if (Compounds[ci,"Annot Source Predicted Compositions"] == "Full match" ||  Compounds[ci,"Annot Source Predicted Compositions"] == "Not the top hit" )  {
    
    PredictedCompositionsIdx <- which(Compounds_PredictedCompositions[,'Compounds ID'] %in% cid)
    if (length(PredictedCompositionsIdx) == 0) {
      next # no formula -> nothing else to do here, L5 it is
    } else {
      PredictedCompositionsIDs <- Compounds_PredictedCompositions[PredictedCompositionsIdx , "Predicted Compositions ID"]
      formulas <- PredictedCompositions[PredictedCompositions[,"Predicted Compositions ID" ] %in%  PredictedCompositionsIDs , ]
      L4.ppm.match <- abs(formulas[,"Delta Mass in ppm"]) <= L4.PPM
      
      if (any(L4.ppm.match)) {
        B4.FormulaMatch[ci] <- TRUE
        CL[ci] <- "4b"
      }
      
      if (length(which(L4.ppm.match)) == 1) {
        B4.FormulaMatch[ci] <- TRUE
        CL[ci] <- "4a"
      }
    }
    
  } else next # no formula -> nothing else to do here, L5 it is
  
  
  ## get all mzVault Hits ##
  
  mzVHits <- mzVHitsRT <- mzVHitsNoRT <- data.frame()
  mzV.RTmatches <- NULL
  
  mzVIdx <- which(Compounds_mzVaultResults[,'Compounds ID'] %in% cid)
  if (length(mzVIdx) > 0) {
    
    mzVIDs <- Compounds_mzVaultResults[mzVIdx , "mzVault Results ID"]
    mzVHitsData <- Compounds_mzVaultResults[mzVIdx , ]
    mzVHits <- cbind(mzVaultResults[mzVaultResults[, "mzVault Results ID" ] %in%  mzVIDs , ],   mzVHitsData)
    
    #Remove hits with invalid mass
    mzVHits <- mzVHits[mzVHits[,"Compound Match"] == "FullMatch" | mzVHits[,"Compound Match"] == "PartialMatch", ]
    
    # mzVault hits with RT
    if ( ("Delta RT in min" %in% colnames(mzVHits))  && any(!is.na(mzVHits[,"Delta RT in min"])) ) {
      mzV.RTmatches <- round(mzVHits[,"Delta RT in min"], mzVault.RTtol.digits) <= mzVault.RTtol
      
      if (length(na.omit(mzV.RTmatches)) > 0) {
        mzVHitsRT <- mzVHits[which(mzV.RTmatches),]
        mzVHitsRT <- mzVHitsRT[order(mzVHitsRT[,"Match"], decreasing = TRUE),]
      }
    }
    
    # mzVault hits without RT
    mzVHitsNoRT <- mzVHits
    if (length(mzV.RTmatches) > 0)
      if (length(na.omit(mzV.RTmatches)) > 0) 
        mzVHitsNoRT <- mzVHits[-which(mzV.RTmatches),]
    
    mzVHitsNoRT <- mzVHitsNoRT[order(mzVHitsNoRT[,"Match"], decreasing = TRUE),]
  }
  
  
  
  
  
  # ------------ LEVEL 3 -------------- 
  
  
  
  L3.Formula.match <- any (abs(formulas[,"Delta Mass in ppm"]) <= L3.PPM )
  L3.mzCloud.score <- if (is.na(Compounds[ci,"mzCloud Best Match"])) FALSE else Compounds[ci,"mzCloud Best Match"] >= L3.MinSpectralLibraryScore
  L3.mzCloud.match <- any( (Compounds[ci,"Annot Source mzCloud Search"] == "Full match")  &&  L3.mzCloud.score )
  
  
  ## check for any ChemSpider Hits ##
  
  ChemSpiderIdx <- which(Compounds_ChemSpiderResults[,'Compounds ID'] %in% cid)
  if (length(ChemSpiderIdx) == 0) {
    L3.ChemSpider.match <- FALSE
    L3.ChemSpider.mzLogic <- FALSE
  } else {
    ChemSpiderIDs <- Compounds_ChemSpiderResults[ChemSpiderIdx , "ChemSpider Results CSID"]
    ChemSpiderCompoundMatch <- Compounds_ChemSpiderResults[ChemSpiderIdx , "Compound Match"]
    if ("mzLogic Score" %in% colnames(Compounds_ChemSpiderResults)) {
      ChemSpiderCompoundMatch <- Compounds_ChemSpiderResults[ChemSpiderIdx , c("Compound Match","mzLogic Score")] 
      ChemSpiderHits <- cbind(ChemSpiderResults[ChemSpiderResults[,"ChemSpider Results CSID" ] %in%  ChemSpiderIDs, ], ChemSpiderCompoundMatch) } else 
      {
        ChemSpiderCompoundMatch <- Compounds_ChemSpiderResults[ChemSpiderIdx , "Compound Match"]
        ChemSpiderHits <- cbind(ChemSpiderResults[ChemSpiderResults[,"ChemSpider Results CSID" ] %in%  ChemSpiderIDs, ], "Compound Match" = ChemSpiderCompoundMatch)
      }
    
    
    fullMatchIdx <- which(ChemSpiderHits[,"Compound Match"] == "FullMatch")
    if (length(fullMatchIdx) >= 1) {
      # L3.ChemSpider.match <- (nrow(ChemSpiderHits) == 1  || any(ChemSpiderHits[fullMatchIdx,"Number of References"] >= L3.MinChemSpiderReferences))
      L3.ChemSpider.match <- TRUE
      if ("mzLogic Score" %in% colnames(ChemSpiderHits))
        L3.ChemSpider.mzLogic <-  any(ChemSpiderHits[fullMatchIdx,"mzLogic Score"] >= L3.MinMzLogicScore, na.rm = TRUE ) else
          L3.ChemSpider.mzLogic <- FALSE
    } else {
      L3.ChemSpider.match <- FALSE  # no Full Match
      L3.ChemSpider.mzLogic <- FALSE
    }
  }
  
  
  ## check for any Mass List Search results ##
  
  L3.MassListSearch.match <- L3.MassListSearch.mzLogic <- FALSE  
  if ("Annot Source MassList Search" %in% colnames(Compounds)) {
    
    MassListSearchResults <- getMassListSearchResults(sqlcon, cid)
    
    massListHidx <- MassListSearchResults[,"CompoundMatchStatus"] %in% c(3,4)
    
    if (any(massListHidx )) {
      L3.MassListSearch.match <- TRUE
      if ("NormalizedMzLogicScore" %in% colnames(MassListSearchResults))
        L3.MassListSearch.mzLogic <- any(MassListSearchResults[massListHidx, "NormalizedMzLogicScore"] >= L3.MinMzLogicScore, na.rm = TRUE )
    }
  } 
  
  
  ## check for any BioCyc results ##
  
  L3.BioCyc.match <- L3.BioCyc.mzLogic <- FALSE  
  if ("Annot Source BioCyc Search" %in% colnames(Compounds)) {
    
    BioCycResults <- getBioCycResults(sqlcon, cid)
    
    BioCycHidx <- BioCycResults[,"CompoundMatchStatus"] %in% c(3,4)
    
    if (any(BioCycHidx)) {
      L3.BioCyc.match <- TRUE
      if ("NormalizedMzLogicScore" %in% colnames(BioCycResults))
        L3.BioCyc.mzLogic <- any(BioCycResults[BioCycHidx, "NormalizedMzLogicScore"] >= L3.MinMzLogicScore, na.rm = TRUE )
    }
  } 
  
  ## check for any Metabolika results ##
  
  L3.Metabolika.match <- L3.Metabolika.mzLogic <- FALSE  
  if ("Annot Source Metabolika Search" %in% colnames(Compounds)) {
    
    MetabolikaResults <- getMetabolikaResults(sqlcon, cid)
    
    MetabolikaHidx <- MetabolikaResults[,"CompoundMatchStatus"] %in% c(3,4)
    
    if (any(MetabolikaHidx)) {
      L3.Metabolika.match <- TRUE
      if ("NormalizedMzLogicScore" %in% colnames(MetabolikaResults))
        L3.Metabolika.mzLogic <- any(MetabolikaResults[MetabolikaHidx, "NormalizedMzLogicScore"] >= L3.MinMzLogicScore, na.rm = TRUE )
    }
  } 
  
  
  ## check for any mzVault Hits ##
  
  if (nrow(mzVHits) > 0) {
    #PPM filter
    L3.mzVHits <- mzVHits[abs(as.numeric(mzVHits[,"Delta Mass in ppm"])) <= L3.PPM ,]
    
    #Score filter
    L3.mzVHits <- mzVHits[mzVHits[,"Match"] >= L3.MinLocalLibraryScore,]
    
    L3.LocalLibraryMatch <- nrow(L3.mzVHits) > 0
    
  } else  L3.LocalLibraryMatch <- FALSE
  
  ##
  
  if (L3.Formula.match  
      && (L3.mzCloud.match || L3.ChemSpider.match ||  L3.MassListSearch.match || L3.BioCyc.match || L3.Metabolika.match ||  L3.LocalLibraryMatch) 
      && (Compounds[ci,"MS2"] == "PreferredDDA" || Compounds[ci,"MS2"] == "OtherDDA")  
      && (L3.ChemSpider.mzLogic || L3.MassListSearch.mzLogic || L3.BioCyc.mzLogic || L3.Metabolika.mzLogic) )
    CL[ci] <- "3b"  
  #else  CL[ci] <- "4a"
  
  
  B3.ChemSpiderMatch[ci] <- L3.ChemSpider.match
  B3.mzCloudMatch[ci] <- L3.mzCloud.match
  B3.LocalLibraryMatch[ci] <-L3.LocalLibraryMatch 
  
  
  
  # ------------ LEVEL 2 -------------- 
  
  
  L2.Formula.match <- any (abs(formulas[,"Delta Mass in ppm"]) <= L2.PPM)  
  L2.mzCloud.score <- if (is.na(Compounds[ci,"mzCloud Best Match"])) FALSE else Compounds[ci,"mzCloud Best Match"] >= L2.MinSpectralLibraryScore
  L2.mzCloud.match <- any((Compounds[ci,"Annot Source mzCloud Search"] == "Full match"| Compounds[ci,"Annot Source mzCloud Search"] == "Partial match"  | Compounds[ci,"Annot Source mzCloud Search"] == "Not the top hit" )  && L2.mzCloud.score )
  
  ## check for any mzVault Hits ##
  
  if (nrow(mzVHits) > 0) {
    #PPM filter
    L2.mzVHits <- mzVHits[abs(as.numeric(mzVHits[,"Delta Mass in ppm"])) <= L2.PPM ,]
    
    #Score filter
    L2.mzVHits <- mzVHits[mzVHits[,"Match"] >= L2.MinLocalLibraryScore,]
    
    L2.LocalLibraryMatch <- nrow(L2.mzVHits) > 0
    
  } else  L2.LocalLibraryMatch <- FALSE
  
  ##    
  
  
  if (L2.Formula.match && (L2.mzCloud.match || L2.LocalLibraryMatch) && (Compounds[ci,"MS2"] == "PreferredDDA" || Compounds[ci,"MS2"] == "OtherDDA") )
    CL[ci] <- "2"  
  
  B2.LocalLibraryMatch[ci] <- L2.LocalLibraryMatch
  B2.mzCloudMatch[ci] <- L2.mzCloud.match
  
  
  ## check to see if we need to downgrade from Level 2 to Level 3a
  
  ## I. check to see if there is no distinguished mzCloud top hit
  
  if ((CL[ci] == "2") && L2.mzCloud.match) {
    
    mzCloudIdx <- which(Compounds_mzCloudResults[,'Compounds ID'] %in% cid)
    if (length(mzCloudIdx) > 1) {
      
      mzCloudHits <- Compounds_mzCloudResults[mzCloudIdx, ]
      
      mzCloudHitsF <- mzCloudHits <- mzCloudHits[   (mzCloudHits[,"Type"] == "Identity") 
                                                    &  (abs(mzCloudHits[,"Delta Mass in ppm"]) <= L2.PPM) 
                                                    & (mzCloudHits[,"Match"] >= L2.MinSpectralLibraryScore)    
                                                    & ((mzCloudHits[,"Compound Match"] == "FullMatch") 
                                                       | (mzCloudHits[,"Compound Match"] == "PartialMatch")), ]
      
      if (nrow(mzCloudHits) > 1) {
        
        # Do we have a significant difference in score between the top 2 mzCloud hits ?
        topNHitMzCloud <- FALSE
        if (nrow(mzCloudHits) >= 2) {
          mzCloudHits <- mzCloudHits[order(mzCloudHits[,"Match"], decreasing = TRUE),]
          topNHitMzCloud <- abs(diff(mzCloudHits[1:2,"Match"])) > L2.MinDiffLibraryScore 
        }
        
        ## remove "duplicates" : compounds that share the same structure, ignoring stereo-chemistry
        if (STRUCFORMAT != "CD") {
          mzCloudHitsStructures <- lapply(mzCloudHits[,"mzCloud Results ID"], getStructure, table=MzCloudSearchResultItems, sqlcon=sqlcon)
          dups <- compareStructures(cl=cl, structures=mzCloudHitsStructures, resformat=STRUCFORMAT, OPENBABEL=OPENBABEL, USE_PARALLEL=USE_PARALLEL) 
        } else 
        {
          dups <- mzCloudHits[,"Compound Match"] == "FullMatch"
          if (any(dups)) {
            fh <- which(dups)[1]
            dups[fh] <- FALSE
          }
          
        }
        
        if  (any(dups)) #(length(which(dups)) > 1) 
          mzCloudHits <- mzCloudHits[!dups,]
        
        if (nrow(mzCloudHits) > 1) {
          mzCloudHits <- mzCloudHits[order(mzCloudHits[,"Match"], decreasing = TRUE),]
          if ( abs(diff(mzCloudHits[1:2,"Match"])) < L2.MinDiffLibraryScore )
            CL[ci] <- "3a"  
        }
        
        # special case, 1 partial match and one full match 
        if ( (nrow(mzCloudHits) == 1) && (nrow(mzCloudHitsF)==2) && STRUCFORMAT == "CD" && !topNHitMzCloud) {
          if ( (all(mzCloudHitsF[,"Compound Match"] == c("PartialMatch", "FullMatch")) || all(mzCloudHitsF[,"Compound Match"] == c("FullMatch", "PartialMatch")) )  && ( abs(diff(mzCloudHitsF[1:2,"Match"])) < L2.MinDiffLibraryScore )  )
            CL[ci] <- "3a"
        }
        
      } 
    }
  }
  
  ## II. check to see if there is no distinguished mzVault top hit, also check for use of in-silico libraries
  ## TODO after CD3.4 release: check for LipidSearch results in the same way
  
  if ((CL[ci] == "2") && L2.LocalLibraryMatch) {
    
    if (nrow(mzVHitsRT) > 1) {
      
      L2.mzVHitsRT <- mzVHitsRT[abs(as.numeric(mzVHitsRT[,"Delta Mass in ppm"])) <= L2.PPM & (mzVHitsRT[,"Match"] >= L2.MinLocalLibraryScore),] 
      
      if (nrow(L2.mzVHitsRT) > 1) {
        L2.mzVHitsRT <- removeDuplicateStructures(L2.mzVHitsRT, sqlcon=sqlcon, cl=cl, strucformat= STRUCFORMAT, USE_PARALLEL=USE_PARALLEL, OPENBABEL=OPENBABEL)
        
        if (nrow(L2.mzVHitsRT) > 1)
          if ( abs(diff(L2.mzVHitsRT[1:2,"Match"])) < L2.MinDiffLibraryScore )
            CL[ci] <- "3a"
      }  
    }
    
    if (nrow(mzVHitsNoRT) > 0) {
      
      L2.mzVHitsNoRT <- mzVHitsNoRT[abs(as.numeric(mzVHitsNoRT[,"Delta Mass in ppm"])) <= L2.PPM & (mzVHitsNoRT[,"Match"] >= L2.MinLocalLibraryScore),] 
      
      for (pi in 1:length(IN_SILICO_LIBRARIES)) {
        ip <- startsWith(L2.mzVHitsNoRT[,"mzVault Library"], IN_SILICO_LIBRARIES[pi] )
        if (any(ip)) L2.mzVHitsNoRT <- L2.mzVHitsNoRT[!ip,]
      }
      
      if (nrow(L2.mzVHitsNoRT) == 0) # we only had mzVault hits (no RT) from in-silico libraries -> downgrade to 3b
        CL[ci] <- "3b"
      
      
      if (nrow(L2.mzVHitsNoRT) > 1) {
        L2.mzVHitsNoRT <- removeDuplicateStructures(L2.mzVHitsNoRT, sqlcon=sqlcon, cl=cl, strucformat= STRUCFORMAT, USE_PARALLEL=USE_PARALLEL, OPENBABEL=OPENBABEL)
        
        if (nrow(L2.mzVHitsNoRT) > 1)
          if ( abs(diff(L2.mzVHitsNoRT[1:2,"Match"])) < L2.MinDiffLibraryScore )
            CL[ci] <- "3a"
      }
      
    }
    
  }  
  
  ##
  ## Optional: check mzVault vs mzCloud results 
  ##
  ##           -  different structure, Δscore < L2.MinDiffLibraryScore → Level 3a
  ##
  
  if (CHECK_mzCLOUD_vs_mzVault) {
    
    if ((CL[ci] == "2" || CL[ci] == "3a") && L2.mzCloud.match && L2.LocalLibraryMatch ) {
      
      mzCloudIdx <- which(Compounds_mzCloudResults[,'Compounds ID'] %in% cid)
      
      if (length(mzCloudIdx) > 0) {
        
        mzCloudHits <- Compounds_mzCloudResults[mzCloudIdx, ]
        
        mzCloudHits <- mzCloudHits[   (mzCloudHits[,"Type"] == "Identity") 
                                      &  (abs(mzCloudHits[,"Delta Mass in ppm"]) <= L2.PPM) 
                                      & (mzCloudHits[,"Match"] >= L2.MinSpectralLibraryScore)    
                                      & ((mzCloudHits[,"Compound Match"] == "FullMatch") 
                                         | (mzCloudHits[,"Compound Match"] == "PartialMatch")), ]
        
      }
      
      L2.mzVHitsNoRT <- mzVHitsNoRT[abs(as.numeric(mzVHitsNoRT[,"Delta Mass in ppm"])) <= L2.PPM & (mzVHitsNoRT[,"Match"] >= L2.MinLocalLibraryScore),] 
      
      if (( nrow(L2.mzVHitsNoRT) > 0) && (nrow(mzCloudHits) > 0))  {
        mzCloudHitsStructures <- lapply(mzCloudHits[,"mzCloud Results ID"], getStructure, table=MzCloudSearchResultItems, sqlcon=sqlcon)
        mzVStructures <- lapply(L2.mzVHitsNoRT[,"mzVault Results ID"], getStructure, table="MzVaultSearchResultItems", sqlcon=sqlcon)
        
        allStruc <- c(mzCloudHitsStructures, mzVStructures)
        allHits <- data.frame(type=rep("mzCloud", nrow(mzCloudHits)), score=mzCloudHits[,"Match"], cpdmatch=mzCloudHits[,"Compound Match"] )  
        allHits <- allHitsF <- rbind(allHits, data.frame(type=rep("mzVault", nrow(L2.mzVHitsNoRT)), score=L2.mzVHitsNoRT[,"Match"],cpdmatch=L2.mzVHitsNoRT[,"Compound Match"]  ))
        
        # Do we have a significant difference in score between the top 2 hits ?
        topNHit <- FALSE
        if (nrow(allHits) >= 2) {
          allHits <- allHits[order(allHits[,"score"], decreasing = TRUE),]
          topNHit <- abs(diff(allHits[1:2,"score"])) > L2.MinDiffLibraryScore 
          CL[ci] <- "2"
        }
        
        if (STRUCFORMAT != "CD") 
          dups <- compareStructures(cl=cl, structures=allStruc, resformat=STRUCFORMAT, OPENBABEL=OPENBABEL, USE_PARALLEL=USE_PARALLEL) else 
          {
            dups <- allHits[,"cpdmatch"] == "FullMatch"
            if (any(dups)) {
              fh <- which(dups)[1]
              dups[fh] <- FALSE
            }
          }
        
        if (any(dups))
          allHits <- allHits[!dups,]
        
        if ( (nrow(allHits) > 1) && (any(allHits[,"cpdmatch"] == "FullMatch")) && !topNHit ) {
          allHits <- allHits[order(allHits[,"score"], decreasing = TRUE),]
          if ( abs(diff(allHits[1:2,"score"])) < L2.MinDiffLibraryScore ) {
            CL[ci] <- "3a"
            # CL_Flags[ci] <- addFlag(CL_Flags[ci], "was_L2_mzC_vs_mzV")
          }  
        }
        
        # special case, 1 partial match and one full match 
        if ( (nrow(allHits) == 1) && (nrow(allHitsF)==2) && STRUCFORMAT == "CD" )  {
          if ( (all(allHitsF[,"cpdmatch"] == c("PartialMatch", "FullMatch")) || all(allHitsF[,"cpdmatch"] == c("FullMatch", "PartialMatch")) )  && ( abs(diff(allHitsF[1:2,"score"])) < L2.MinDiffLibraryScore )  )
            CL[ci] <- "3a"
        }
        
      }
    }
  }
  
  
  
  # ------------ LEVEL 1 -------------- 
  
  
  L1.Formula.match <- any (abs(formulas[,"Delta Mass in ppm"]) <= L1.PPM )
  
  mzVIdx <- which(Compounds_mzVaultResults[,'Compounds ID'] %in% cid)
  
  ## check for any mzVault Hits with RT ##
  
  if (nrow(mzVHitsRT) > 0) {
    #PPM filter
    # L1.mzVHitsRT <- mzVHitsRT[abs(as.numeric(mzVHitsRT[,"Delta Mass in ppm"])) <= L2.PPM ,]
    
    #Score filter
    L1.mzVHitsRTF <- L1.mzVHitsRT <- mzVHitsRT[abs(as.numeric(mzVHitsRT[,"Delta Mass in ppm"])) <= L1.PPM &  mzVHitsRT[,"Match"] >= L1.MinLocalLibraryScore,]

    L1.LocalLibraryMatch <- nrow(L1.mzVHitsRT) > 0
    
  } else  L1.LocalLibraryMatch <- FALSE
  
  
  if ( L1.Formula.match  && L1.LocalLibraryMatch ) {
    CL[ci] <- "1"
    
    # downgrade to 3a if there are multiple matches with different structures
    if (nrow(L1.mzVHitsRT) > 1) {
      L1.mzVHitsRT <- removeDuplicateStructures(L1.mzVHitsRT, sqlcon=sqlcon, cl=cl, strucformat= STRUCFORMAT, USE_PARALLEL=USE_PARALLEL, OPENBABEL=OPENBABEL)
      
      if (nrow(L1.mzVHitsRT) > 1)
        if (abs(diff(L1.mzVHitsRT[1:2,"Match"])) < L1.MinDiffLibraryScore ) {
          CL[ci] <- "3a"
          # CL_Flags[ci] <- addFlag(CL_Flags[ci], "was_L1")  
        } 
    }  
    
    # special case, 1 partial match and one full match 
    if ( (nrow(L1.mzVHitsRT) == 1) && (nrow(L1.mzVHitsRTF)==2) && STRUCFORMAT == "CD" )  {
      if ( (all(L1.mzVHitsRTF[,"Compound Match"] == c("PartialMatch", "FullMatch")) || all(L1.mzVHitsRTF[,"Compound Match"] == c("FullMatch", "PartialMatch")) )  && ( abs(diff(L1.mzVHitsRTF[1:2,"Match"])) < L1.MinDiffLibraryScore) )
        CL[ci] <- "3a"
    }
    
    if (nrow(L1.mzVHitsRT) > 0) {
      # Check for L1 if name used for cpd annotation was actually coming from mzVaultHitsRT top hit -> if not warning flag
      if ( L1.mzVHitsRT[which.max(L1.mzVHitsRT[,"Match"]),"Name"]  != Compounds[ci,"Name"] )
        CL_Flags[ci] <- addFlag(CL_Flags[ci], "L1 CpdName not matching mzVault w RT top Hit")  
    }
    
  }

  B1.LocalLibraryMatch[ci] <- L1.LocalLibraryMatch
  
}  # for

dbDisconnect(sqlcon) 
stopCluster(cl)



# ------ Compare with previous result ------
if (COMPARE) { 
  changes <- (CL !=  prevRes[, resultColName])
  chidx <- which(changes)
  if (length(chidx) > 0) {
    for (cx in 1:length(chidx)) {
      cii <- chidx[cx]
      # double check to see if results are comparable
      if (prevRes[cii, "CID"] == Compounds[cii,"Compounds ID"] ) { 
        CL_Flags[cii] <- addFlag(CL_Flags[cii], paste("prev_", prevRes[cii, resultColName], sep="") )
      } else warning("Result comparison: Indices do not match. \n")
    }
  }
} 

# ----- Create Result Table ------  

if (DEBUG) data.output <- cbind(Compounds, resultColName_ = CL, resultColFlagsName = CL_Flags, resultColRAFName_ = RAF, "L1.LocalLibraryMatch" = B1.LocalLibraryMatch, 
                                "L2.LocalLibraryMatch" = B2.LocalLibraryMatch, "L2.mzCloudMatch"= B2.mzCloudMatch,
                                "L3.LocalLibraryMatch" = B3.LocalLibraryMatch, "L3.mzCloudMatch"= B3.mzCloudMatch,  "L3.ChemSpiderMatch" =  B3.ChemSpiderMatch,     
                                "L4.FormulaWatch" = B4.FormulaMatch ) else 
                                  data.output <- cbind(Compounds, resultColName_ = CL, resultColFlagsName_ = CL_Flags, resultColRAFName_ = RAF)

names(data.output)[names(data.output) == "resultColName_"] <- resultColName
names(data.output)[names(data.output) == "resultColFlagsName_"] <- resultColFlagsName
names(data.output)[names(data.output) == "resultColRAFName_"] <- resultColRAFName


# ------ Check for redundant annotations -----

CL1idx <- which(data.output[,resultColName] == "1")
if (length(CL1idx) > 1) {
  # unique compound names
  CL1names <- unique(data.output[CL1idx,"Name"])
  
  for (li in 1: length(CL1names)) {
    cname <- CL1names[li]
    cidx <-  which( data.output[,"Name"] %in% cname)
    if (length(cidx) > 1) {
      # does it contain at least one L1 ID ?
      l1id <- data.output[cidx, resultColName] == "1"
      
      # mark other annotations with same name on higher levels as potentially redundant
      if (any(l1id) && any(!l1id)) 
        data.output[cidx[!l1id], resultColRAFName]  <- sapply(data.output[cidx[!l1id], resultColRAFName], addFlag, paste(RAFtext, cname, sep=""), sep=";")
      
      # add flag if we have multiple rows with the same name at the same level
      if (any(l1id) && length(which(l1id)) > 1 )  
        data.output[cidx[l1id], resultColRAFName] <- sapply(data.output[cidx[l1id], resultColRAFName] , addFlag, MultipleL1HitsText, sep=";")
    }
  }
}

CL2idx <- which(data.output[,resultColName] == "2")
if (length(CL2idx) > 1) {
  # unique compound names
  CL2names <- unique(data.output[CL2idx,"Name"])
  
  for (li in 1: length(CL2names)) {
    cname <- CL2names[li]
    cidx <-  which( data.output[,"Name"] %in% cname)
    if (length(cidx) > 1) {
      # does it contain at least one L2 ID ?
      l2id <- data.output[cidx, resultColName] == "2"
      
      # add flag if we have multiple rows with the same name at the same level
      if (any(l2id) && length(which(l2id)) > 1 )  
        data.output[cidx[l2id], resultColRAFName] <- sapply(data.output[cidx[l2id], resultColRAFName] , addFlag, MultipleL2HitsText, sep=";")
    }
  }
}



# ----- Clean up result table ------

# remove the Name column to avoid problems when returning the table
if ("Name" %in% colnames(data.output))
  data.output <- data.output[,-(which(colnames(data.output) %in% "Name"))]


# remove AnnotationMatchStatus column from the result table

colToRemove <- grep("Annot Source.",colnames(data.output)) 
data.output <-  data.output[, - colToRemove]

colDescToRemove <-  grep("Annot.",sapply(CD_json_in$Tables[[1]]$ColumnDescriptions, function(x) x$ColumnName))
CD_json_in$Tables[[1]]$ColumnDescriptions <- CD_json_in$Tables[[1]]$ColumnDescriptions[-colDescToRemove]

# Add new column to JSON structure.

newcolumn <- list()
newcolumn[[1]] = resultColName       ## ColumnName
newcolumn[[2]] = FALSE      ## IsID
newcolumn[[3]] = "String"    ## DataType
newcolumn[[4]] <- list(PositionAfter="Formula")    ## Options
names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn

newcolumn <- list()
newcolumn[[1]] = resultColFlagsName       ## ColumnName
newcolumn[[2]] = FALSE      ## IsID
newcolumn[[3]] = "String"    ## DataType
newcolumn[[4]] <- list(PositionAfter="Formula")    ## Options
names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn

newcolumn <- list()
newcolumn[[1]] = resultColRAFName       ## ColumnName
newcolumn[[2]] = FALSE      ## IsID
newcolumn[[3]] = "String"    ## DataType
newcolumn[[4]] <- list(PositionAfter="Formula")    ## Options
names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn


if (DEBUG) {
  
  newcolumn <- list()
  newcolumn[[1]] = "L1.LocalLibraryMatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
  
  newcolumn <- list()
  newcolumn[[1]] = "L2.LocalLibraryMatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
  
  newcolumn <- list()
  newcolumn[[1]] = "L2.mzCloudMatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
  newcolumn <- list()
  newcolumn[[1]] = "L3.LocalLibraryMatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
  
  newcolumn <- list()
  newcolumn[[1]] = "L3.mzCloudMatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
  newcolumn <- list()
  newcolumn[[1]] = "L3.ChemSpiderMatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
  newcolumn <- list()
  newcolumn[[1]] = "L4.FormulaWatch"       ## ColumnName
  newcolumn[[2]] = FALSE      ## IsID
  newcolumn[[3]] = "Boolean"    ## DataType
  newcolumn[[4]] <- list(PositionAfter="Formula", SpecialCellRenderer="330D9522-50CC-41B7-9D45-5E5D8F708103")    ## Options
  names(newcolumn) <- c("ColumnName", "IsID", "DataType", "Options") 
  CD_json_in$Tables[[1]]$ColumnDescriptions[[length(CD_json_in$Tables[[1]]$ColumnDescriptions) + 1]] <- newcolumn
  
}


# Write modified Compounds table to temporary folder.
Compounds.idx <- getTableIdx(CD_json_in, "Compounds")
Compounds.datafile <- CD_json_in$Tables[[ Compounds.idx  ]]$DataFile
resultout <- gsub(".txt", ".out.txt", Compounds.datafile)
write.table(data.output, file = resultout, sep='\t', row.names = FALSE)

# write summary table
SCL <- table(CL)
SCL <- cbind(ID=1:nrow(SCL), CL=rownames(SCL), SCL, Notes="")
SCL <- rbind(SCL, c(ID=nrow(SCL)+1, CL="", SCL="", Notes=paste("version", VERSION)))
summaryTableFile <- paste(cdresult.file,"_summary.txt",sep="")
write.table(SCL, file = summaryTableFile , sep='\t', row.names = FALSE, col.names = c("ID", "Confidence Level", "Count", "Notes"))


# Remove all the other tables in the JSON structure so that only the new Compounds table remains
for (j in seq(length(CD_json_in$Tables),2,-1) ) 
  CD_json_in$Tables[j] <- NULL;

# Create entry in node_args.json for the new summary table 
newTableInfo = list()
STI <- length(CD_json_in$Tables) + 1

newTableInfo[[1]] = 'Confidence Level Summary'
newTableInfo[[2]] = summaryTableFile 
newTableInfo[[3]] = 'CSV'
newTableInfo[[4]] = list()
newTableInfo[[5]] = list()
names(newTableInfo) <- c("TableName", "DataFile", "DataFormat", "Options", "ColumnDescriptions")
CD_json_in$Tables[[STI]] <- newTableInfo

# Add column descriptions for each column
newcolumn = list()
newcolumn[[1]] = "ID"
newcolumn[[2]] = "Other"
newcolumn[[3]] = "Int"
newcolumn[[4]] = list()
names(newcolumn) = c("ColumnName", "ID", "DataType", "Options")
CD_json_in$Tables[[STI]]$ColumnDescriptions[[1]] <- newcolumn

newcolumn = list()
newcolumn[[1]] = "Confidence Level"
newcolumn[[2]] = ""
newcolumn[[3]] = "String"
newcolumn[[4]] = list()
names(newcolumn) = c("ColumnName", "ID", "DataType", "Options")
CD_json_in$Tables[[STI]]$ColumnDescriptions[[2]] <- newcolumn

newcolumn = list()
newcolumn[[1]] = "Count"
newcolumn[[2]] = ""
newcolumn[[3]] = "Int"
newcolumn[[4]] = list()
names(newcolumn) = c("ColumnName", "ID", "DataType", "Options")
CD_json_in$Tables[[STI]]$ColumnDescriptions[[3]] <- newcolumn

newcolumn = list()
newcolumn[[1]] = "Notes"
newcolumn[[2]] = ""
newcolumn[[3]] = "String"
newcolumn[[4]] = list()
names(newcolumn) = c("ColumnName", "ID", "DataType", "Options")
CD_json_in$Tables[[STI]]$ColumnDescriptions[[4]] <- newcolumn

# Write out node_response.json file - use same file as node_args.json but change the pathway input file to the new one

CD_json_in$Tables[[Compounds.idx]]$DataFile = resultout
jsonOutFile <- CD_json_in$ExpectedResponsePath

responseJSON <- toJSON(CD_json_in, indent=1, method="C")

# responseJSON has incorrect format for the empty Options lists.  Will use a regular expression to find and replace the [\n\n\] with the {}

responseJSON2 <- gsub("\\[\n\n[[:blank:]]+\\]", "{ }", responseJSON)

jsonfileconn <- file(jsonOutFile)

writeLines(responseJSON2, jsonfileconn)

close (jsonfileconn)

# save results in separate file that can be used later for comparison

if (COMPARE_RESULTS) {
  
  prevRes <- cbind(CID=Compounds[,"Compounds ID"], "resultColName_" = CL)
  colnames(prevRes)[colnames(prevRes) == "resultColName_"] <- resultColName
  PREV_VERSION <- VERSION
  
  save(prevRes, PREV_VERSION, file=paste(resultDir,"/", "CL-", prevResNr + 1 ,".rdat", sep="") )
}





