#
# SIL_Area.R    07/24/2023    Ralf Tautenhahn
#
# Stable Isotope Labeling: Calculate relative peak area for each isotopologue, export as table
#
# v1.1
#
# 1.1 : - added Formula column
#       - Support for Checked column (if any Compounds are checked in the Compounds table, export only those)
#       - also writes out tables with the Exchange Rate in the same format
#       - adds column Filename with the actual filename for each Study File ID
#
#
# 1.0 : Initial release.
#
# Requested Tables and Columns: 
#

# If ONE_FILE_PER_COMPOUND below is set to TRUE, script will create one xlsx output file per compound
# If set to FALSE, script will create one large xlsx output file for ALL compounds 
#
#  
ONE_FILE_PER_COMPOUND = FALSE
OUTPUT_DIR_NAME = "SIL_Area"


getTableIdx <- function(JSON_in, name) {
  TableNames <- sapply (JSON_in$Tables, function(x) x$TableName)
  which(TableNames %in% name)
}
  
getTable <- function(JSON_in, name) {
  
  TableNames <- sapply (JSON_in$Tables, function(x) x$TableName)
  idx <- TableNames %in% name
  
  if (!any(idx)) 
    stop("Table not found.")
  
  read.table(JSON_in$Tables[[ which(idx) ]]$DataFile, 
             header=TRUE, check.names = FALSE, stringsAsFactors = FALSE)
}

vlookup <- function(x, data, key, value, nomatch = NA) {
  data[[value]][match(x, data[[key]], nomatch)]
}


# Read arguments from CD.
args <- commandArgs()

# 6th argument is the name of the JSON file
inputFile <- args[6]

# Open JSON file, find exported files, read into tables
library(rjson)
CD_json_in <- fromJSON(file=inputFile)

TableNames <- sapply (CD_json_in$Tables, function(x) x$TableName)

Compounds <- getTable(CD_json_in, "Compounds")
LabeledCompoundsPerFile <- getTable(CD_json_in, "Labeled Compounds per File")
Compounds_LabeledCompoundsPerFile <- getTable(CD_json_in,"ConsolidatedUnknownCompoundItem-LabeledCompoundInstanceItem")
InputFiles <- getTable(CD_json_in, "Input Files")


library(stringr)
library(writexl)

cdresult.file=CD_json_in$ResultFilePath
cdresult.dir=dirname(cdresult.file)
setwd(cdresult.dir)

if (ONE_FILE_PER_COMPOUND) {
  dir.create(OUTPUT_DIR_NAME)  
  setwd(OUTPUT_DIR_NAME)
}

AList <- ERList <- list()

checked <- as.logical(Compounds[,"Checked"])

## loop through all Compounds, or only checked
iCpds <- seq(1,nrow(Compounds))
if (any(checked)) iCpds <- which(checked)

for (ci in iCpds) {
 
  cid <- Compounds[ci,"Compounds ID"]

  LabeledCompoundsIdx <- which(Compounds_LabeledCompoundsPerFile[,'Compounds ID'] %in% cid)
  
  if (length(LabeledCompoundsIdx) == 0) {
  
  # Compound with no data in Labeled Compounds Per File
    
  } else {  
    
   LabeledCompoundsPerFileIDs <- Compounds_LabeledCompoundsPerFile[LabeledCompoundsIdx, "Labeled Compounds per File ID"]
   LabeledCompoundsPerFileData <- LabeledCompoundsPerFile[LabeledCompoundsPerFile[,"Labeled Compounds per File ID"] %in% LabeledCompoundsPerFileIDs,]
   
   ERdf <- LabeledCompoundsPerFileData[, grep("Exchange Rate in Percent.", colnames(LabeledCompoundsPerFileData))]
   dataInCol <- colSums(is.na(ERdf))<nrow(ERdf)
   
   if (length(which(dataInCol)) > 1) { # do we have any actual data in Labeled Compounds per File for this compound
   
     if (ONE_FILE_PER_COMPOUND) 
       ERdf <- ERdf[,dataInCol] # drop columns with only NA
     
     Area=LabeledCompoundsPerFileData[,"Area"]
     res=round(sweep(ERdf, MARGIN=1,Area , `*`) / 100)
     
     colnames(res) <- str_replace(colnames(res), "Exchange Rate in Percent", "Relative Area")

     #lookup file name using Study File ID
     filenames <- basename(vlookup(LabeledCompoundsPerFileData[,"Study File ID"], InputFiles, "Study File ID", "File Name"))
          
     Adf <- cbind(Name=Compounds[ci,"Name"],Formula=Compounds[ci,"Formula"], LabeledCompoundsPerFileData[,c("Molecular Weight", "RT in min","Area", "Status", "Study File ID")], Filename=filenames, res)
     Edf <- cbind(Name=Compounds[ci,"Name"],Formula=Compounds[ci,"Formula"], LabeledCompoundsPerFileData[,c("Molecular Weight", "RT in min","Area", "Status", "Study File ID")], Filename=filenames, ERdf)

     AList[[length(AList) +1 ]] <- Adf 
     ERList[[length(ERList) +1 ]] <- Edf 
     
     if (ONE_FILE_PER_COMPOUND) {
       basename <- paste(str_pad(ci, 5, pad = "0"),"__","MW_",round(Compounds[ci,"Calc MW"],2),"_","RT_",round(Compounds[ci,"RT in min"],2),"_",str_trunc(Compounds[ci,"Name"],20),  sep="")
       write_xlsx(Adf, paste(basename,".xlsx",sep=""))
     }
   
   }
   
  }


}  

AT <- do.call(rbind, AList)
ERT <- do.call(rbind, ERList)


write.csv(AT, paste(cdresult.file,"_SIL_Relative_NA_Area.csv",sep=""))
write_xlsx(AT, paste(cdresult.file,"_SIL_Relative_Area.xlsx",sep=""))
cidx <- grep("Relative Area.", colnames(AT))
AT[is.na(AT)] <-  0
write_xlsx(AT, paste(cdresult.file,"_SIL_Relative_Area_0Fill.xlsx",sep=""))

write.csv(ERT, paste(cdresult.file,"_SIL_Exchange_Rate_NA_Area.csv",sep=""))
write_xlsx(ERT, paste(cdresult.file,"_SIL_Exchange_Rate.xlsx",sep=""))
cidx <- grep("Exchange Rate.", colnames(ERT))
ERT[is.na(ERT)] <-  0
write_xlsx(ERT, paste(cdresult.file,"_SIL_Exchange_Rate_0Fill.xlsx",sep=""))


