From 4b9a1527f2e768792af30b94b83a4a77ccce2774 Mon Sep 17 00:00:00 2001 From: Thomas OBADIA <tobadia@pasteur.fr> Date: Wed, 9 Feb 2022 13:19:21 +0100 Subject: [PATCH] Mandatory switch to Unix line endings for all 3 .R files before doing any more changes --- FUNCTIONS.R | 3416 +++++++++++++++++++++++------------------------ RUN_PvSeroTAT.R | 178 +-- SHINY_APP.R | 1788 ++++++++++++------------- 3 files changed, 2691 insertions(+), 2691 deletions(-) diff --git a/FUNCTIONS.R b/FUNCTIONS.R index 8c4a358..6e54907 100644 --- a/FUNCTIONS.R +++ b/FUNCTIONS.R @@ -1,1708 +1,1708 @@ -###################################################################### -###################################################################### -## -## R code functions for Shiny App -## Developed by Ivo Mueller's Institut Pasteur and WEHI units -## -## Date: February 24th, 2021 -## -## Function 1: converting MFI to RAU -## Code developed by Connie Li Wai Suen, with additions by Narimane Nekkab and Shazia Ruybal -## -## Function 2: data processing function for RAU -## Code written by Narimane Nekkab -## -## Function 3: antigen naming corrector -## Code written by Narimane Nekkab -## -## Function 4: from RAU to classifying sero+ and sero- cases using the Random Forest algorithm and the top 8 antigens -## Code developed by Michael White and Narimane Nekkab -## -## Function 5: from RAU to classifying sero+ and sero- cases using the Support Vector Machine algorithm and the top 8 antigens -## Code developed by Narimane Nekkab with aid of Tom Woudenberg -## -###################################################################### -###################################################################### - -options(scipen = 999) - -################# Function 1: core MFI - RAU conversion code - -runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEMPLATE_CSV){ - - ############################################################################################## - ## ## - ## R code written by Connie Li Wai Suen. ## - ## Date: 28 May 2018 ## - ## ## - ## Description: ## - ## This R script fits a 5-parameter logistic standard curve ## - ## to the dilutions of the positive controls for each protein and ## - ## converts the median fluorescence intensity (MFI) values into relative antibody units. ## - ## ## - ## Input: ## - ## - Luminex-200 or Magpix output file (required) ## - ## - Plate layout with bleedcode information (optional) ## - ## ## - ## Output: ## - ## - csv file of relative antibody unit for each protein, ## - ## its reciprocal, the minimum and maximum standard, ## - ## and the minimum and maximum dilution for that protein. ## - ## ## - ############################################################################################## - ############################################################################################## - - ########################################################################################################## - #### READ DATA - ########################################################################################################## - - # ## The first 41 rows are not relevant and are not imported into R. - - if(MFI_CSV){ - - # Updated reading - L <- data.frame(read.csv(fname1, - header=T, - stringsAsFactors=FALSE, - colClasses=c(rep("character",3+as.integer(MFI_N_ANTIGENS))), - na.string=c(""))) - - # Identify first row - ROW_N = match("Location", L[,1]) - - # Select data - L = L[ROW_N:(ROW_N+96),1:(3+as.integer(MFI_N_ANTIGENS))] - # the first row will be the header - colnames(L) = L[1, ] - # removing the first row. - L = L[-1, ] - ## Exclude column that corresponds to "Total events" - L <- L[, !(colnames(L) %in% c("Total Events","TotalEvents"))] - - # Antigen names clean-up - colnames(L) = gsub("\\..*", "", colnames(L)) - # Remove any values in (parentheses) - colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) - # Remove spaces - colnames(L) = gsub(" ","", colnames(L)) - - dim(L) - - # MFI values as numeric - L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events"))], as.numeric) - - ## Load the counts to check for run quality control - C <- data.frame(read.csv(fname1, - header=T, - stringsAsFactors=FALSE, - colClasses=c(rep("character",3+as.integer(MFI_N_ANTIGENS))), - na.string=c(""))) - - # Row - ROW_C = 239 - # Select data - C = C[ROW_C:(ROW_C+nrow(L)),1:(3+as.integer(MFI_N_ANTIGENS))] - # the first row will be the header - colnames(C) = C[1, ] - # removing the first row. - C = C[-1, ] - ## Exclude column that corresponds to "Total events" - C <- C[, !(colnames(C) %in% c("Total Events","TotalEvents"))] - - # Antigen names clean-up - colnames(C) = gsub("\\..*", "", colnames(C)) - # Remove any values in (parentheses) - colnames(C) = gsub("\\s*\\([^\\)]+\\)","", colnames(C)) - # Remove spaces - colnames(C) = gsub(" ","", colnames(C)) - dim(C) - - ## Save the MFI values for the blank sample(s) for run quality control - B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) - dim(B) - - ## Save the MFI values for the standards for run quality control - S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) - dim(S) #there should be 10 rows - - }else{ - - # Read - L_full <- as.data.frame(read_excel(fname1)) - - # Identify rows - median_row_number <- which(L_full$xPONENT == "Median") - count_row_number <- which(L_full$xPONENT == "Count") - endcount_row_number <- which(L_full$xPONENT == "Avg Net MFI") - - # Load Excel file - L <- as.data.frame(read_excel(fname1, skip = median_row_number+1)) - - ## Find all blank rows (i.e. rows that are all NA). - ## Then keep rows preceding the first blank row. - blank.row.number <- which(rowSums(is.na(L)) == length(names(L)))[1] - if(is.na(blank.row.number)){ - L = L - }else{ - L <- L[1:(blank.row.number-1),] - } - - ## Exclude column that corresponds to "Total events" - L <- L[, !(colnames(L) %in% c("Total Events","TotalEvents"))] - - # Antigen names clean-up - colnames(L) = gsub("\\..*", "", colnames(L)) - # Remove any values in (parentheses) - colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) - # Remove spaces - colnames(L) = gsub(" ","", colnames(L)) - - dim(L) - - # Change "NaN" to 0s - L <- L %>% mutate_all(funs(gsub("NaN", 0, .))) - - # MFI values as numeric - L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events"))], as.numeric) - - # Skip - ROW_C = 239 - - ## Load the counts to check for run quality control - C <- as.data.frame(read_excel(fname1, skip = ROW_C-1)) - - ## Find all blank rows (i.e. rows that are all NA). - ## Then keep rows preceding the first blank row. - blank.row.number <- which(rowSums(is.na(C)) == length(names(C)))[1] - if(is.na(blank.row.number)){ - C = C - }else{ - C <- C[1:(blank.row.number-1),] - } - - # The first row will be the header - colnames(C) = C[1, ] - # Removing the first row. - C = C[-1, ] - ## Exclude column that corresponds to "Total events" - C <- C[, !(colnames(C) %in% c("Total Events","TotalEvents"))] - - # Antigen names clean-up - colnames(C) = gsub("\\..*", "", colnames(C)) - # Remove any values in (parentheses) - colnames(C) = gsub("\\s*\\([^\\)]+\\)","", colnames(C)) - # Remove spaces - colnames(C) = gsub(" ","", colnames(C)) - dim(C) - - ## Save the MFI values for the blank sample(s) for run quality control - B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) - dim(B) - - ## Save the MFI values for the standards for run quality control - S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) - dim(S) #there should be 10 rows - - } - - ########################################################################################################## - #### QUALITY CONTROL WARNINGS - ########################################################################################################## - - # Shazia's warning script - C <- C %>% - # clean_names() %>% - # dplyr::select(-c(sample, total_events)) %>% # can maybe "keep" relevant columns + protein names? - dplyr::mutate(Location=gsub(".*,", "", Location)) %>% - dplyr::mutate(Location=substr(Location, 1, nchar(Location)-1)) %>% - tidyr::pivot_longer(-Location, names_to = "protein", values_to = "count") %>% - dplyr::filter(grepl("^\\d+$",count)) %>% - dplyr::mutate(count = as.numeric(count), - warning = case_when( - count<15~1, - count>=15~0 - )) %>% - dplyr::select(Location, warning) %>% - dplyr::group_by(Location) %>% - dplyr::summarise(sum = sum(warning)) %>% - dplyr::mutate(colour = case_when( - sum>=1 ~ "repeat", - sum<1 ~ "sufficient beads" - )) %>% - dplyr::mutate(row = substr(Location, 1, nchar(Location)-1)) %>% - dplyr::mutate(col = substr(Location, 2, nchar(Location))) %>% - dplyr::mutate(row = gsub("1", "", row)) %>% - dplyr::mutate(row = as.factor(row)) %>% - dplyr::mutate(col = as.numeric(col)) - - - ########################################################################################################## - #### PLATE LAYOUT AND PROTEIN NAMES - ########################################################################################################## - - ## Plate layout - if(TEMPLATE_CSV){ - layout = read.csv(fname2); layout - layout = data.frame(lapply(layout, as.character), stringsAsFactors=FALSE) - colnames(layout) = sub("X","",colnames(layout)) - }else{ - layout = as.data.frame(read_excel(fname2)); layout - } - - ## Protein name list obtained after removing variable names: "Location" and "Sample" - proteins <- names(L[, -c(1:2)]); proteins - ## Add new variable to data frame to indicate the first letter of sample type ("B", "C", "S", "U","N") - ## "B" = blank, "C"=control, "S"=standard (dilution of the pool), "U"=sample, "N"=negative control - L$type.letter <- substr(L$Sample, start=1, stop=1) - dilution <- c(1/50, 1/100, 1/200, 1/400, 1/800, 1/1600, 1/3200, 1/6400, 1/12800, 1/25600) - dilution.scaled <- dilution*25600; dilution.scaled - dilution.plot <- c("1/50", "1/100", "1/200", "1/400", "1/800", - "1/1600", "1/3200", "1/6400", "1/12800", "1/25600") - - ########################################################################################################## - #### LOG-LOG MODEL - ########################################################################################################## - - results.df.wide <- NULL - model_list <- NULL - - for (i in proteins){ - results.df <- NULL - ## Taking the mean of duplicates for each standard - ## and storing in object std in the following order: S1, S2, S3, ..., S9, S10. - std <- NULL - b <- c <- d <- e <- NULL - for (r in 1:nrow(L)){ - if (L$type.letter[r]=="S"){ - std <- c(std, as.numeric(L[r,i])) - } - } - - ## Log-log model to obtain a more linear relationship - ## and therefore make it easier to interpolate around the lower asymptote. - log.std <- log(as.numeric(std)); log.std - - ## Five-parameter logistic function is given by the expression: - ## f(x) = c + \frac{d-c}{(1+\exp(b(\log(x)-\log(e))))^f} - model1 <- drm(log.std ~ dilution, fct=LL.5(names=c("b", "c", "d", "e", "f"))) - summary(model1) - - ## Save output of model for each protein - model_list[[i]] <- model1 - - # Sys.sleep(0.1) ## Suspends execution for 0.1 second to prevent RStudio errors when plotting within the loop. - # plot(model1, main=i) - - ## F(x) = ((A-D)/(1+((x/C)^B))) + D ## where A=minimum asymptote, B=Hill slope, C=ED50, D=Maximum asymptote - ## x = C*(((A-D)/(F(x)-D))-1)^(1/B) = e*(((c-d)/(log(mfi.X)-d))-1)^(1/b) - b <- coef(summary(model1))[1]; b ## slope - c <- coef(summary(model1))[2]; c ## lower asymptote - d <- coef(summary(model1))[3]; d ## upper asymptote - e <- coef(summary(model1))[4]; e ## ED50 - f <- coef(summary(model1))[5]; f ## asymmetry parameter (f=1 for 4PL curves) - - - ########################################################################################################## - #### MFI TO RAU CONVERSION - ########################################################################################################## - - for (r in 1:nrow(L)){ - results <- NULL - if (L$type.letter[r]=="U"){ - mfi.X <- as.numeric(L[r, i]) - y <- log(mfi.X) - - if (y > max(log.std)) { - dil.X <- max(dilution) - } else { - dil.X <- e*(( ((d-c)/(y-c))^(1/f) - 1 )^(1/b) ) - } - dil.X <- ifelse(dil.X > 0.02, 0.02, dil.X) - dil.X <- ifelse((is.na(dil.X) & y>log.std[2]), 0.02, dil.X) ## Setting observations with very high MFI to 1/50. - dil.X <- ifelse(dil.X < 1/51200, 1/51200, dil.X) - dil.X <- ifelse((is.na(dil.X) & y<max(log.std)), 1/51200, dil.X) ## Setting observations with very low MFI to 1/51200. - location.X <- L[r, "Location"] - sample.X <- L[r, "Sample"] - results <- cbind(Location=location.X, Sample=sample.X, MFI=mfi.X, Dilution=dil.X, DilutionReciprocal=1/dil.X, MinStd=min(std), MaxDilution=min(dilution), MaxStd=max(std), MinDilution=max(dilution)) - results.colnames <- c("Location", "Sample", paste0(i, "_", c("MFI", "Dilution", "DilutionReciprocal", "MinStd", "MaxDilution", "MaxStd", "MinDilution"))) - colnames(results) <- results.colnames - } - results.df <- rbind(results.df, results) - } - - if (is.null(results.df.wide)){ - results.df.wide <- results.df - } else { results.df.wide <- merge(results.df.wide, results.df, by=c("Location", "Sample")) } - - } - - ########################################################################################################## - #### MODEL RESULTS AND PLOTS - ########################################################################################################## - - ## Save all model results - model_results <- NULL - for (i in names(model_list)){ - title <- as.character(i) - model_results[[i]] <- plot(model_list[[i]], main = title) - } - - ########################################################################################################## - #### MERGE DATA - ########################################################################################################## - - results.df.wide <- as.data.frame(results.df.wide) - results.location <- matrix(unlist(strsplit(as.character(results.df.wide$Location), ",")), ncol=2, byrow=T)[,2] - results.location <- substr(results.location, 1, nchar(results.location)-1) - results.df.wide <- cbind(Location.2=results.location, results.df.wide) - - ## Matching bleedcode from plate layout to corresponding sample. - location.1 <- matrix(unlist(strsplit(L$Location, ",")), ncol=2, byrow=T)[,2] - location.1 <- substr(location.1, 1, nchar(location.1)-1) - location.2 <- data.frame(Location.2=location.1, alpha=gsub("[[:digit:]]", "", location.1), numeric=gsub("[^[:digit:]]", "", location.1), Bleedcode=NA, stringsAsFactors = FALSE) - for (i in location.2[, "Location.2"]){ - location.2[location.2$Location.2==i, "Bleedcode"] <- layout[layout$Plate==location.2[location.2$Location.2==i, "alpha"], colnames(layout)==location.2[location.2$Location.2==i, "numeric"]] - } - - ## Using join() from plyr package to add bleedcode information to results.df.wide. (default or given folder location and unique name) - results.df.wide <- join(results.df.wide, location.2[,c("Location.2", "Bleedcode")], by="Location.2", type="left") - ## Move bleedcode to first column - results.df.wide <- results.df.wide[, c("Bleedcode", colnames(results.df.wide)[!(colnames(results.df.wide) %in% "Bleedcode")])] - - # Make all columns after 1st 4 numeric - results.df.wide[,5:ncol(results.df.wide)] = lapply(results.df.wide[,5:ncol(results.df.wide)], as.character) - results.df.wide[,5:ncol(results.df.wide)] = lapply(results.df.wide[,5:ncol(results.df.wide)], as.numeric) - - - ########################################################################################################## - #### OUTPUT - ########################################################################################################## - - #### Save just MFI and RAU for downstream analyses - col_selection <- grepl("SampleID|_MFI|\\_Dilution$", colnames(results.df.wide)) - #colnames(results.df.wide)[col_selection] - MFI_RAU_results <- results.df.wide[, col_selection] - - # Sample data - S <- S %>% mutate(dilution = dilution, dilution_plot = dilution.plot) - - # Output - #return(list(results.df.wide, Warnings, MFI_RAU_results, model_results)) # this included Narimane's warnings - return(list(results.df.wide, S, B, C, MFI_RAU_results, model_results)) -} - -################# Function 2: data processing function for RAU - -getRelativeAntibodyUnits = function(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, - MFI_CSV, MFI_N_ANTIGENS, - TEMPLATE_FILE_PATH, TEMPLATE_CSV, - ID, DATE, - EXP_DIR){ - - ############################################################################################## - ## ## - ## R code written by Connie Li Wai Suen. ## - ## Date: 28 May 2018 ## - ## ## - ## Description: ## - ## This R script fits a 5-parameter logistic standard curve ## - ## to the dilutions of the positive controls for each protein and ## - ## converts the median fluorescence intensity (MFI) values into relative antibody units. ## - ## ## - ## Input: ## - ## - Luminex-200 or Magpix output file (required) ## - ## - Plate layout with bleedcode information (optional) ## - ## ## - ## Output: ## - ## - csv file of relative antibody unit for each protein, ## - ## its reciprocal, the minimum and maximum standard, ## - ## and the minimum and maximum dilution for that protein. ## - ## ## - ############################################################################################## - ############################################################################################## - ## ## - ## R code adapated to Shiny App by Narimane Nekkab ## - ## Date: 28 January 2020 ## - ## ## - ############################################################################################## - ############################################################################################## - - ####################################### - ## CHANGE FILE NAMES IN THIS SECTION ## - ####################################### - - cat("******************Running RAU function******************\n") - - # Get Luminex-MagPix full file path - fname1 = RAW_MFI_FILE_PATH - - # Extract working directly of this file - input_directory = paste0(getwd(),"/RAW_DATA/") - - # Output folder - cat(paste0("Output directory: ",EXP_DIR,"\n")) - - # If no plate layout provided, use default - fname2 = TEMPLATE_FILE_PATH - - cat("Path files have been read inside function...\n") - - ####################################################################################################################### - - cat("***RUN MAIN CODE***\n") - - results = runRelativeAntibodyUnits(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEMPLATE_CSV) - - cat("***END CODE RUN***\n") - - ###################################################################################################################### - - cat("Start plotting and saving process...\n") - - ##################################### - results.df.wide = results[[1]] - std_curve = results[[2]] - blank_MFI = results[[3]] - bead_counts = results[[4]] - MFI_RAU_results = results[[5]] - model_results = results[[6]] - - ## Plot standard curve raw MFI - plot_stdcurve <- std_curve %>% - dplyr::select(-Location) %>% - dplyr::mutate(Sample = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"), - Sample_dilution = factor(paste0(Sample,": ",dilution_plot),levels=paste0(Sample,": ",dilution_plot))) %>% - tidyr::pivot_longer(-c(Sample,dilution,dilution_plot,Sample_dilution), names_to = "protein", values_to = "MFI") %>% - # mutate(Sample = factor(Sample, c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"))) %>% - dplyr::mutate(MFI = as.numeric(MFI)) %>% - ggplot(aes(x = Sample_dilution, y = MFI, color = protein, group = protein)) + - geom_point() + - geom_line() + - scale_y_log10(breaks = c(0, 10, 100, 1000, 10000)) + - labs(x = "standard curve", - y = "log(MFI)") + - facet_wrap(~protein) + - theme_bw() + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) - - ## Plot plate counts - plot_counts <- bead_counts %>% - ggplot(mapping = aes(x = col, y = fct_rev(row), fill = colour), fill = summary)+ - geom_tile(aes(height = 0.90, width = 0.90)) + - scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))+ - scale_fill_manual(values = c("sufficient beads" = "#91bfdb", "repeat" = "#d73027"))+ - theme_linedraw()+ - labs(x = "columns", y = "rows", fill = "") - - ## Plot blank sample MFI for each protein - if there is more than one blank sample label as "Blank1", "Blank2" etc - plot_blank <- blank_MFI %>% - dplyr::select(-Location) %>% - dplyr::mutate(Sample = paste0(Sample,1:n())) %>% - tidyr::pivot_longer(-Sample, names_to = "protein", values_to = "MFI") %>% - ggplot(aes(x = factor(protein), y = as.numeric(MFI), fill = Sample)) + - geom_bar(stat = "identity", position = "dodge") + - geom_hline(yintercept = 50, linetype = "dashed", color = "grey") + - labs(x = "protein", - y = "MFI") + - theme_linedraw() + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) - - ## Plot model curves - plots_model <- lapply(seq_along(model_results), function(x){ - ggplot(data = model_results[[x]], - aes(x = dilution, y = `1`)) + - geom_line() + - scale_x_log10(breaks = c(1e-5, 1e-4, 1e-3, 1e-2, 0.03), - labels = c("0.00001", "0.0001", "0.001", "0.01", "0.03")) + - labs(x = "antibody dilution", - y = "standard curve", - title = names(model_results[x])) + - theme_bw() - }) - - all_model_plots <- gridExtra::grid.arrange(grobs = plots_model, nrow = 3) - all_model_plots - - # Save .RData for future loading - save(results.df.wide, std_curve, blank_MFI, MFI_RAU_results, model_results, - file = paste0(EXP_DIR, "ALL_RESULTS.RData")) - - # Save .rds of standard curve for future QC - and record the experiment name - # saveRDS(std_curve, file = paste0(EXP_DIR, "STD_CURVES.rds")) - - # Save PDF of std curve plots - ggsave(plot_stdcurve, - filename = paste0(EXP_DIR,"STD_CURVES_PLOT.pdf"), - height = 8, - width = 12, - units = "in") - - # Save PDF of bead count plot - ggsave(plot_counts, - filename = paste0(EXP_DIR,"PLATE_BEADS_COUNT_PLOT.pdf"), - height = 8, - width = 12, - units = "in") - - # Save PDF ofblank sample QC plot - ggsave(plot_blank, - filename = paste0(EXP_DIR,"BLANK_SAMPLE_PLOT.pdf"), - height = 4, - width = 6, - units = "in") - - # Save PDF of model plots - ggsave(all_model_plots, - filename = paste0(EXP_DIR,"MODEL_PLOTS.pdf"), - height = 8.27, - width = 11.69, - units = "in") - - ## Write to file - write.csv(results.df.wide, - file = paste0(EXP_DIR, "RAU_RESULTS.csv"), - row.names = F) - # write.csv(MFI_RAU_results, - # file = paste0(EXP_DIR, "MFI_RAU_RESULTS.csv"), - # row.names = F) - - cat("******************End RAU function******************\n") - - return(results) - -} - -################# Function 3: antigen naming corrector - -getAntigenNames = function(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV){ - - ############################################################################################## - ## ## - ## R code written by Narimane Nekkab ## - ## Date: 3 March 2020 ## - ## ## - ## Description: ## - ## R code to match the top 8 antigen names since multiple naming conventions exists ## - ## ## - ## Input: ## - ## - Default naming guide containing the W names, L names, and PVX names ## - ## ## - ## Output: ## - ## - The matched W names ## - ## ## - ############################################################################################## - ############################################################################################## - - # Load naming guide for top 8 antigens - if(ANTIGEN_CSV){ - NAME_GUIDE = read.csv(ANTIGEN_FILE_PATH, stringsAsFactors = F) - }else{ - NAME_GUIDE = read_excel(ANTIGEN_FILE_PATH, stringsAsFactors = F) - } - - # Get RAU_NAMES in order (ONLY TOP 8) - RAU_NAMES = vector("character", nrow(NAME_GUIDE)) - for(i in 1:nrow(NAME_GUIDE)){ - RAU_NAMES[i] = ifelse(identical(colnames(RAU_Dilution)[agrep(NAME_GUIDE$Antigen_Name[i], colnames(RAU_Dilution), ignore.case = T, value = FALSE, max.distance = list(cost=1, substitutions=0,deletions=1, insertions=0), fixed = T, useBytes = T)], character(0)), NA, - colnames(RAU_Dilution)[agrep(NAME_GUIDE$Antigen_Name[i], colnames(RAU_Dilution), ignore.case = T, value = FALSE, max.distance = list(cost=1, substitutions=0,deletions=1, insertions=0), fixed = T, useBytes = T)]) - } - - # Match - NAME_GUIDE_MATCH = cbind(NAME_GUIDE, RAU_NAMES) - # Remove NAs - NAME_GUIDE_MATCH = NAME_GUIDE_MATCH[complete.cases(NAME_GUIDE_MATCH),] - # Keep only one match - NAME_GUIDE_MATCH = unique(NAME_GUIDE_MATCH[,c("Protein_Code","RAU_NAMES")]) - # Make into character - NAME_GUIDE_MATCH$RAU_NAMES = as.character(NAME_GUIDE_MATCH$RAU_NAMES) - - # Subset columns by name - RAU_Dilution_Subset = RAU_Dilution[,which(colnames(RAU_Dilution) %in% NAME_GUIDE_MATCH$RAU_NAMES)] - - # Order names to match data order - NAME_GUIDE_MATCH = NAME_GUIDE_MATCH[match(colnames(RAU_Dilution_Subset), NAME_GUIDE_MATCH$RAU_NAMES),] - # Assign correct W names - colnames(RAU_Dilution_Subset) = paste0(NAME_GUIDE_MATCH$Protein_Code, "_", colnames(RAU_Dilution_Subset)) - - return(RAU_Dilution_Subset) -} - -################# Function 4: Random forest serology analysis - -getSeropositiveResults_RF = function(PATHWAY_1, - RAU_user_id_columns, RAU_user_RAU_columns, - RAU_RESULTS, - ANTIGEN_FILE_PATH, ANTIGEN_CSV, - CHECK_NAME, CHECK_ID, ID, DATE, - EXP_DIR, - MODEL_W16, - MODEL_W16_3_TARGETS, - MODEL_W16_EQUAL_TARGET, - MODEL_W16_HIGH_SP_TARGET, - MODEL_W16_HIGH_SE_TARGET, - MODEL_W16_OTHER_SE, - MODEL_W16_OTHER_SP, - MODEL_W47, - MODEL_W47_3_TARGETS, - MODEL_W47_EQUAL_TARGET, - MODEL_W47_HIGH_SP_TARGET, - MODEL_W47_HIGH_SE_TARGET, - MODEL_W47_OTHER_SE, - MODEL_W47_OTHER_SP){ - - ############################################################################################## - ## ## - ## R code written by Michael White and adapted by Narimane Nekkab ## - ## Date: 3 March 2020 ## - ## ## - ## Description: ## - ## This R script determines the classification of sero+ and sero- cases from RAU data. ## - ## Classification performance is based on the current default model that uses ## - ## Brazilian, Thai, and Solomon Islands, and control data of the top 8 antigens ## - ## with a known senstivity and specificy. ## - ## It is based on this soon to be published work: ## - ## https://www.biorxiv.org/content/10.1101/481168v1 ## - ## ## - ## Input: ## - ## - Relative antibody unit data processed by the App or user uploaded RAU data ## - ## ## - ## Output: ## - ## - The id data, the RAU of the top 8 antigens, PvSero+ classification, and treatment ## - ## ## - ############################################################################################## - ############################################################################################## - - cat("******************Start classifier function******************\n") - - # Get RAU data - RAU_RESULTS = RAU_RESULTS - - # Output folder - cat(paste0("Output directory: ",EXP_DIR,"\n")) - - cat("Data has been read inside function...\n") - - cat("RUN FUNCTION\n") - - ################################################################################################## - ####################### RAU processed by app or user submitted RAU_RESULTS ####################### - - # Determine in data is user-submitted or pre-processed by app if uploaded - if(PATHWAY_1){ - - # Subset dilution results - RAU_Dilution = RAU_RESULTS[,grepl( "_Dilution", names(RAU_RESULTS))] - RAU_Dilution = RAU_Dilution[,!grepl("DilutionReciprocal", names(RAU_Dilution))] - # Antigen names clean-up - colnames(RAU_Dilution) = str_replace(colnames(RAU_Dilution), "_Dilution", "") - colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) - # Remove any values in (parentheses) - colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) - - }else{ - - # Subset user-uploaded RAU data - RAU_Dilution = RAU_RESULTS[,(as.integer(RAU_user_id_columns)+1):(as.integer(RAU_user_id_columns)+as.integer(RAU_user_RAU_columns))] - # Antigen names clean-up - colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) - # Remove any values in (parentheses) - colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) - - } - - # Correct for names and subset only top 8 antigens - cat("Match antigen names\n") - RAU_Dilution_Subset = getAntigenNames(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV) - - # Check that data is in a data.frame format and numeric - RAU_Dilution_Subset = as.data.frame(RAU_Dilution_Subset) - RAU_Dilution_Subset = sapply(RAU_Dilution_Subset, as.numeric) - - # Save full names - RAU_RFOREST_NAMES = colnames(RAU_Dilution_Subset) - - # Rename keeping only W names - colnames(RAU_Dilution_Subset) = substr(colnames(RAU_Dilution_Subset), 1, 3) - - ################################################################### - ####################### RANDOM FOREST MODEL ####################### - - # Select model chosen: W16 - if(MODEL_W16){ - # Reorder to match Random Forest model - W_Name_Order = c("W50", "W01", "W16", "W02", "W30", "W08", "W58", "W39") - RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] - colnames(RAU_Dilution_Subset) = W_Name_Order - - # TAKE LOG OF RAU - RAU_Dilution_Subset = log(RAU_Dilution_Subset) - - # Load Random Forest DEFAULT model fit - cat("Load model\n") - load(paste0(getwd(), "/MODEL/RFOREST_MODEL_W16.Rdata")) - # Number of trees - N_tree = 2500 - - ################ PREDICT - cat("Predict\n") - RFOREST_MODEL_PREDICTION = predict(RFOREST_MODEL, newdata=RAU_Dilution_Subset, predict.all=TRUE, type="prob") - ################ PREDICT - - # Tally votes - RFOREST_MODEL_VOTES = rowSums(RFOREST_MODEL_PREDICTION$individual=="old")/N_tree - - # Merge results back to RAU - colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] - } - - # Select model chosen: W47 - if(MODEL_W47){ - # Reorder to match Random Forest model - W_Name_Order = c("W50", "W01", "W47", "W02", "W30", "W08", "W58", "W39") - RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] - colnames(RAU_Dilution_Subset) = W_Name_Order - - # TAKE LOG OF RAU - RAU_Dilution_Subset = log(RAU_Dilution_Subset) - - # Load Random Forest DEFAULT model fit - cat("Load model\n") - load(paste0(getwd(), "/MODEL/RFOREST_MODEL_W47.Rdata")) - # Number of trees - N_tree = 2500 - - ################ PREDICT - cat("Predict\n") - RFOREST_MODEL_PREDICTION = predict(RFOREST_MODEL, newdata=RAU_Dilution_Subset, predict.all=TRUE, type="prob") - ################ PREDICT - - # Tally votes - RFOREST_MODEL_VOTES = rowSums(RFOREST_MODEL_PREDICTION$individual=="old")/N_tree - - # Merge results back to RAU - colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] - } - - ################################################################## - ##################### TARGET CUTOFFS BY MODEL #################### - - # Get ROC curves per model and targets - if(MODEL_W16){ - RF_ROC=read.csv(paste0(getwd(), "/MODEL/RF_W16_ROC_TABLE.csv")) - } - if(MODEL_W47){ - RF_ROC=read.csv(paste0(getwd(), "/MODEL/RF_W47_ROC_TABLE.csv")) - } - - ################### - ### Get cutoffs ### - - # Equal - CUTOFF_1 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SP - RF_ROC$SE))] - - # Identify SP90 target - CUTOFF_2 = RF_ROC$THRESHOLD[min(which(RF_ROC$SP > 0.90))] - - # Identify SE90 target - CUTOFF_3 = RF_ROC$THRESHOLD[max(which(RF_ROC$SE > 0.90))] - - # Other - if(MODEL_W16_OTHER_SE){ - if(MODEL_W16){ - CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] - SP_Other = RF_ROC$SP[which.min(abs(RF_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] - SE_Other = MODEL_W16_HIGH_SE_VALUE - }else{ - CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] - SP_Other = RF_ROC$SP[which.min(abs(RF_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] - SE_Other = MODEL_W47_HIGH_SE_VALUE - } - } - if(MODEL_W16_OTHER_SP){ - if(MODEL_W16){ - CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] - SP_Other = MODEL_W16_HIGH_SP_VALUE - SE_Other = RF_ROC$SE[which.min(abs(RF_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] - }else{ - CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] - SP_Other = MODEL_W47_HIGH_SP_VALUE - SE_Other = RF_ROC$SE[which.min(abs(RF_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] - } - } - - # ALL 3 TARGETS OUTPUT - if(MODEL_W16_3_TARGETS | MODEL_W47_3_TARGETS){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # EQUAL TARGET OUTPUT - if(MODEL_W16_EQUAL_TARGET | MODEL_W47_EQUAL_TARGET){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # HIGH SP TARGET OUTPUT - if(MODEL_W16_HIGH_SP_TARGET | MODEL_W47_HIGH_SP_TARGET){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # HIGH SE TARGET OUTPUT - if(MODEL_W16_HIGH_SE_TARGET | MODEL_W47_HIGH_SE_TARGET){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # OTHER TARGETS - if(MODEL_W16_OTHER_SP | MODEL_W47_OTHER_SP){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - } - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - RFOREST_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - } - } - } - } - - - ################################################## - ##################### OUTPUTS #################### - - cat("Start saving proces...\n") - - #################################################################################### - ############################ SAVE SEROPOSITIVTY RESULTS ############################ - - cat("SAVE SEROPOSITIVITY RESULTS\n") - write.csv(RFOREST_VOTES_SEROPOS, file=paste0(EXP_DIR,"/PVSEROTAT_RF_CLASS_RESULTS.csv"), row.names=F) - - cat("******************End Sero function******************\n") - - return(RFOREST_VOTES_SEROPOS) - -} - -################# Function 5: SVM serology analysis - -getSeropositiveResults_SVM = function(PATHWAY_1, - RAU_user_id_columns, RAU_user_RAU_columns, - RAU_RESULTS, - ANTIGEN_FILE_PATH, ANTIGEN_CSV, - CHECK_NAME, CHECK_ID, ID, DATE, - EXP_DIR, - MODEL_W16, - MODEL_W16_3_TARGETS, - MODEL_W16_EQUAL_TARGET, - MODEL_W16_HIGH_SP_TARGET, - MODEL_W16_HIGH_SE_TARGET, - MODEL_W16_OTHER_SE, - MODEL_W16_OTHER_SP, - MODEL_W47, - MODEL_W47_3_TARGETS, - MODEL_W47_EQUAL_TARGET, - MODEL_W47_HIGH_SP_TARGET, - MODEL_W47_HIGH_SE_TARGET, - MODEL_W47_OTHER_SE, - MODEL_W47_OTHER_SP){ - - ############################################################################################## - ## ## - ## R code written by Michael White and adapted by Narimane Nekkab ## - ## Date: 3 March 2020 ## - ## ## - ## Description: ## - ## This R script determines the classification of sero+ and sero- cases from RAU data. ## - ## Classification performance is based on the current default model that uses ## - ## Brazilian, Thai, and Solomon Islands, and control data of the top 8 antigens ## - ## with a known senstivity and specificy. ## - ## It is based on this soon to be published work: ## - ## https://www.biorxiv.org/content/10.1101/481168v1 ## - ## ## - ## Input: ## - ## - Relative antibody unit data processed by the App or user uploaded RAU data ## - ## ## - ## Output: ## - ## - The id data, the RAU of the top 8 antigens, PvSero+ classification, and treatment ## - ## ## - ############################################################################################## - ############################################################################################## - - cat("******************Start classifier function******************\n") - - # Get RAU data - RAU_RESULTS = RAU_RESULTS - - cat("Data has been read inside function...\n") - - cat("RUN FUNCTION\n") - - ################################################################################################## - ####################### RAU processed by app or user submitted RAU_RESULTS ####################### - - # Determine in data is user-submitted or pre-processed by app if uploaded - if(PATHWAY_1){ - - # Subset dilution results - RAU_Dilution = RAU_RESULTS[,grepl( "_Dilution", names(RAU_RESULTS))] - RAU_Dilution = RAU_Dilution[,!grepl("DilutionReciprocal", names(RAU_Dilution))] - # Antigen names clean-up - colnames(RAU_Dilution) = str_replace(colnames(RAU_Dilution), "_Dilution", "") - colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) - # Remove any values in (parentheses) - colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) - - }else{ - - # Subset user-uploaded RAU data - RAU_Dilution = RAU_RESULTS[,(as.integer(RAU_user_id_columns)+1):(as.integer(RAU_user_id_columns)+as.integer(RAU_user_RAU_columns))] - # Antigen names clean-up - colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) - # Remove any values in (parentheses) - colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) - - } - - # Correct for names and subset only top 8 antigens - cat("Match antigen names\n") - RAU_Dilution_Subset = getAntigenNames(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV) - - # Check that data is in a data.frame format and numeric - RAU_Dilution_Subset = as.data.frame(RAU_Dilution_Subset) - RAU_Dilution_Subset = sapply(RAU_Dilution_Subset, as.numeric) - - # Save full names - RAU_RFOREST_NAMES = colnames(RAU_Dilution_Subset) - - # Rename keeping only W names - colnames(RAU_Dilution_Subset) = substr(colnames(RAU_Dilution_Subset), 1, 3) - - ################################################################### - ####################### RANDOM FOREST MODEL ####################### - - # Select model chosen: W16 - if(MODEL_W16){ - # Reorder to match Random Forest model - W_Name_Order = c("W50", "W01", "W16", "W02", "W30", "W08", "W58", "W39") - RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] - colnames(RAU_Dilution_Subset) = W_Name_Order - - # TAKE LOG OF RAU - RAU_Dilution_Subset = log(RAU_Dilution_Subset) - - # Load Random Forest DEFAULT model fit - cat("Load model\n") - load(paste0(getwd(), "/MODEL/SVM_MODEL_W16.Rdata")) - # Number of trees - N_tree = 2500 - - ################ PREDICT - cat("Predict\n") - SVM_MODEL_PREDICTION = predict(SVM_8, newdata=RAU_Dilution_Subset, decision.values = TRUE) - ################ PREDICT - - # Tally votes - SVM_MODEL_VOTES = attr(SVM_MODEL_PREDICTION, "decision.values")[,1] - - # Merge results back to RAU - colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] - } - - # Select model chosen: W47 - if(MODEL_W47){ - # Reorder to match Random Forest model - W_Name_Order = c("W50", "W01", "W47", "W02", "W30", "W08", "W58", "W39") - RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] - colnames(RAU_Dilution_Subset) = W_Name_Order - - # TAKE LOG OF RAU - RAU_Dilution_Subset = log(RAU_Dilution_Subset) - - # Load Random Forest DEFAULT model fit - cat("Load model\n") - load(paste0(getwd(), "/MODEL/SVM_MODEL_W47.Rdata")) - # Number of trees - N_tree = 2500 - - ################ PREDICT - cat("Predict\n") - SVM_MODEL_PREDICTION = predict(SVM_8, newdata=RAU_Dilution_Subset, decision.values = TRUE) - ################ PREDICT - - # Tally votes - SVM_MODEL_VOTES = attr(SVM_MODEL_PREDICTION, "decision.values")[,1] - - # Merge results back to RAU - colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] - } - - ################################################################## - ##################### TARGET CUTOFFS BY MODEL #################### - - # Get ROC curves per model and targets - if(MODEL_W16){ - SVM_ROC=read.csv(paste0(getwd(), "/MODEL/SVM_W16_ROC_TABLE.csv")) - } - if(MODEL_W47){ - SVM_ROC=read.csv(paste0(getwd(), "/MODEL/SVM_W47_ROC_TABLE.csv")) - } - - ################### - ### Get cutoffs ### - - # Equal - CUTOFF_1 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SP - SVM_ROC$SE))] - - # Identify SP90 target - CUTOFF_2 = SVM_ROC$THRESHOLD[min(which(SVM_ROC$SP > 0.90))] - - # Identify SE90 target - CUTOFF_3 = SVM_ROC$THRESHOLD[max(which(SVM_ROC$SE > 0.90))] - - # Other - if(MODEL_W16_OTHER_SE){ - if(MODEL_W16){ - CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] - SP_Other = SVM_ROC$SP[which.min(abs(SVM_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] - SE_Other = MODEL_W16_HIGH_SE_VALUE - }else{ - CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] - SP_Other = SVM_ROC$SP[which.min(abs(SVM_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] - SE_Other = MODEL_W47_HIGH_SE_VALUE - } - } - if(MODEL_W16_OTHER_SP){ - if(MODEL_W16){ - CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] - SP_Other = MODEL_W16_HIGH_SP_VALUE - SE_Other = SVM_ROC$SE[which.min(abs(SVM_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] - }else{ - CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] - SP_Other = MODEL_W47_HIGH_SP_VALUE - SE_Other = SVM_ROC$SE[which.min(abs(SVM_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] - } - } - - # ALL 3 TARGETS OUTPUT - if(MODEL_W16_3_TARGETS | MODEL_W47_3_TARGETS){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # EQUAL TARGET OUTPUT - if(MODEL_W16_EQUAL_TARGET | MODEL_W47_EQUAL_TARGET){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - } - - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # HIGH SP TARGET OUTPUT - if(MODEL_W16_HIGH_SP_TARGET | MODEL_W47_HIGH_SP_TARGET){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - } - - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # HIGH SE TARGET OUTPUT - if(MODEL_W16_HIGH_SE_TARGET | MODEL_W47_HIGH_SE_TARGET){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) - } - } - } - } - - # OTHER TARGETS - if(MODEL_W16_OTHER_SP | MODEL_W47_OTHER_SP){ - - #W16 - if(MODEL_W16){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - } - } - } - - #W47 - if(MODEL_W47){ - - # Create binary seropositivity value - if(PATHWAY_1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - - }else{ - if(RAU_user_id_columns == 1){ - col_name = colnames(RAU_RESULTS)[1] - SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) - } - if(RAU_user_id_columns > 1){ - SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) - } - } - } - } - - - ################################################## - ##################### OUTPUTS #################### - - cat("Start saving proces...\n") - - #################################################################################### - ############################ SAVE SEROPOSITIVTY RESULTS ############################ - - cat("SAVE SEROPOSITIVITY RESULTS\n") - write.csv(SVM_SEROPOS, file=paste0(EXP_DIR,"/PVSEROTAT_SVM_CLASS_RESULTS.csv"), row.names=F) - - cat("******************End Sero function******************\n") - - return(SVM_SEROPOS) - - -} +###################################################################### +###################################################################### +## +## R code functions for Shiny App +## Developed by Ivo Mueller's Institut Pasteur and WEHI units +## +## Date: February 24th, 2021 +## +## Function 1: converting MFI to RAU +## Code developed by Connie Li Wai Suen, with additions by Narimane Nekkab and Shazia Ruybal +## +## Function 2: data processing function for RAU +## Code written by Narimane Nekkab +## +## Function 3: antigen naming corrector +## Code written by Narimane Nekkab +## +## Function 4: from RAU to classifying sero+ and sero- cases using the Random Forest algorithm and the top 8 antigens +## Code developed by Michael White and Narimane Nekkab +## +## Function 5: from RAU to classifying sero+ and sero- cases using the Support Vector Machine algorithm and the top 8 antigens +## Code developed by Narimane Nekkab with aid of Tom Woudenberg +## +###################################################################### +###################################################################### + +options(scipen = 999) + +################# Function 1: core MFI - RAU conversion code + +runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEMPLATE_CSV){ + + ############################################################################################## + ## ## + ## R code written by Connie Li Wai Suen. ## + ## Date: 28 May 2018 ## + ## ## + ## Description: ## + ## This R script fits a 5-parameter logistic standard curve ## + ## to the dilutions of the positive controls for each protein and ## + ## converts the median fluorescence intensity (MFI) values into relative antibody units. ## + ## ## + ## Input: ## + ## - Luminex-200 or Magpix output file (required) ## + ## - Plate layout with bleedcode information (optional) ## + ## ## + ## Output: ## + ## - csv file of relative antibody unit for each protein, ## + ## its reciprocal, the minimum and maximum standard, ## + ## and the minimum and maximum dilution for that protein. ## + ## ## + ############################################################################################## + ############################################################################################## + + ########################################################################################################## + #### READ DATA + ########################################################################################################## + + # ## The first 41 rows are not relevant and are not imported into R. + + if(MFI_CSV){ + + # Updated reading + L <- data.frame(read.csv(fname1, + header=T, + stringsAsFactors=FALSE, + colClasses=c(rep("character",3+as.integer(MFI_N_ANTIGENS))), + na.string=c(""))) + + # Identify first row + ROW_N = match("Location", L[,1]) + + # Select data + L = L[ROW_N:(ROW_N+96),1:(3+as.integer(MFI_N_ANTIGENS))] + # the first row will be the header + colnames(L) = L[1, ] + # removing the first row. + L = L[-1, ] + ## Exclude column that corresponds to "Total events" + L <- L[, !(colnames(L) %in% c("Total Events","TotalEvents"))] + + # Antigen names clean-up + colnames(L) = gsub("\\..*", "", colnames(L)) + # Remove any values in (parentheses) + colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) + # Remove spaces + colnames(L) = gsub(" ","", colnames(L)) + + dim(L) + + # MFI values as numeric + L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events"))], as.numeric) + + ## Load the counts to check for run quality control + C <- data.frame(read.csv(fname1, + header=T, + stringsAsFactors=FALSE, + colClasses=c(rep("character",3+as.integer(MFI_N_ANTIGENS))), + na.string=c(""))) + + # Row + ROW_C = 239 + # Select data + C = C[ROW_C:(ROW_C+nrow(L)),1:(3+as.integer(MFI_N_ANTIGENS))] + # the first row will be the header + colnames(C) = C[1, ] + # removing the first row. + C = C[-1, ] + ## Exclude column that corresponds to "Total events" + C <- C[, !(colnames(C) %in% c("Total Events","TotalEvents"))] + + # Antigen names clean-up + colnames(C) = gsub("\\..*", "", colnames(C)) + # Remove any values in (parentheses) + colnames(C) = gsub("\\s*\\([^\\)]+\\)","", colnames(C)) + # Remove spaces + colnames(C) = gsub(" ","", colnames(C)) + dim(C) + + ## Save the MFI values for the blank sample(s) for run quality control + B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) + dim(B) + + ## Save the MFI values for the standards for run quality control + S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) + dim(S) #there should be 10 rows + + }else{ + + # Read + L_full <- as.data.frame(read_excel(fname1)) + + # Identify rows + median_row_number <- which(L_full$xPONENT == "Median") + count_row_number <- which(L_full$xPONENT == "Count") + endcount_row_number <- which(L_full$xPONENT == "Avg Net MFI") + + # Load Excel file + L <- as.data.frame(read_excel(fname1, skip = median_row_number+1)) + + ## Find all blank rows (i.e. rows that are all NA). + ## Then keep rows preceding the first blank row. + blank.row.number <- which(rowSums(is.na(L)) == length(names(L)))[1] + if(is.na(blank.row.number)){ + L = L + }else{ + L <- L[1:(blank.row.number-1),] + } + + ## Exclude column that corresponds to "Total events" + L <- L[, !(colnames(L) %in% c("Total Events","TotalEvents"))] + + # Antigen names clean-up + colnames(L) = gsub("\\..*", "", colnames(L)) + # Remove any values in (parentheses) + colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) + # Remove spaces + colnames(L) = gsub(" ","", colnames(L)) + + dim(L) + + # Change "NaN" to 0s + L <- L %>% mutate_all(funs(gsub("NaN", 0, .))) + + # MFI values as numeric + L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events"))], as.numeric) + + # Skip + ROW_C = 239 + + ## Load the counts to check for run quality control + C <- as.data.frame(read_excel(fname1, skip = ROW_C-1)) + + ## Find all blank rows (i.e. rows that are all NA). + ## Then keep rows preceding the first blank row. + blank.row.number <- which(rowSums(is.na(C)) == length(names(C)))[1] + if(is.na(blank.row.number)){ + C = C + }else{ + C <- C[1:(blank.row.number-1),] + } + + # The first row will be the header + colnames(C) = C[1, ] + # Removing the first row. + C = C[-1, ] + ## Exclude column that corresponds to "Total events" + C <- C[, !(colnames(C) %in% c("Total Events","TotalEvents"))] + + # Antigen names clean-up + colnames(C) = gsub("\\..*", "", colnames(C)) + # Remove any values in (parentheses) + colnames(C) = gsub("\\s*\\([^\\)]+\\)","", colnames(C)) + # Remove spaces + colnames(C) = gsub(" ","", colnames(C)) + dim(C) + + ## Save the MFI values for the blank sample(s) for run quality control + B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) + dim(B) + + ## Save the MFI values for the standards for run quality control + S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) + dim(S) #there should be 10 rows + + } + + ########################################################################################################## + #### QUALITY CONTROL WARNINGS + ########################################################################################################## + + # Shazia's warning script + C <- C %>% + # clean_names() %>% + # dplyr::select(-c(sample, total_events)) %>% # can maybe "keep" relevant columns + protein names? + dplyr::mutate(Location=gsub(".*,", "", Location)) %>% + dplyr::mutate(Location=substr(Location, 1, nchar(Location)-1)) %>% + tidyr::pivot_longer(-Location, names_to = "protein", values_to = "count") %>% + dplyr::filter(grepl("^\\d+$",count)) %>% + dplyr::mutate(count = as.numeric(count), + warning = case_when( + count<15~1, + count>=15~0 + )) %>% + dplyr::select(Location, warning) %>% + dplyr::group_by(Location) %>% + dplyr::summarise(sum = sum(warning)) %>% + dplyr::mutate(colour = case_when( + sum>=1 ~ "repeat", + sum<1 ~ "sufficient beads" + )) %>% + dplyr::mutate(row = substr(Location, 1, nchar(Location)-1)) %>% + dplyr::mutate(col = substr(Location, 2, nchar(Location))) %>% + dplyr::mutate(row = gsub("1", "", row)) %>% + dplyr::mutate(row = as.factor(row)) %>% + dplyr::mutate(col = as.numeric(col)) + + + ########################################################################################################## + #### PLATE LAYOUT AND PROTEIN NAMES + ########################################################################################################## + + ## Plate layout + if(TEMPLATE_CSV){ + layout = read.csv(fname2); layout + layout = data.frame(lapply(layout, as.character), stringsAsFactors=FALSE) + colnames(layout) = sub("X","",colnames(layout)) + }else{ + layout = as.data.frame(read_excel(fname2)); layout + } + + ## Protein name list obtained after removing variable names: "Location" and "Sample" + proteins <- names(L[, -c(1:2)]); proteins + ## Add new variable to data frame to indicate the first letter of sample type ("B", "C", "S", "U","N") + ## "B" = blank, "C"=control, "S"=standard (dilution of the pool), "U"=sample, "N"=negative control + L$type.letter <- substr(L$Sample, start=1, stop=1) + dilution <- c(1/50, 1/100, 1/200, 1/400, 1/800, 1/1600, 1/3200, 1/6400, 1/12800, 1/25600) + dilution.scaled <- dilution*25600; dilution.scaled + dilution.plot <- c("1/50", "1/100", "1/200", "1/400", "1/800", + "1/1600", "1/3200", "1/6400", "1/12800", "1/25600") + + ########################################################################################################## + #### LOG-LOG MODEL + ########################################################################################################## + + results.df.wide <- NULL + model_list <- NULL + + for (i in proteins){ + results.df <- NULL + ## Taking the mean of duplicates for each standard + ## and storing in object std in the following order: S1, S2, S3, ..., S9, S10. + std <- NULL + b <- c <- d <- e <- NULL + for (r in 1:nrow(L)){ + if (L$type.letter[r]=="S"){ + std <- c(std, as.numeric(L[r,i])) + } + } + + ## Log-log model to obtain a more linear relationship + ## and therefore make it easier to interpolate around the lower asymptote. + log.std <- log(as.numeric(std)); log.std + + ## Five-parameter logistic function is given by the expression: + ## f(x) = c + \frac{d-c}{(1+\exp(b(\log(x)-\log(e))))^f} + model1 <- drm(log.std ~ dilution, fct=LL.5(names=c("b", "c", "d", "e", "f"))) + summary(model1) + + ## Save output of model for each protein + model_list[[i]] <- model1 + + # Sys.sleep(0.1) ## Suspends execution for 0.1 second to prevent RStudio errors when plotting within the loop. + # plot(model1, main=i) + + ## F(x) = ((A-D)/(1+((x/C)^B))) + D ## where A=minimum asymptote, B=Hill slope, C=ED50, D=Maximum asymptote + ## x = C*(((A-D)/(F(x)-D))-1)^(1/B) = e*(((c-d)/(log(mfi.X)-d))-1)^(1/b) + b <- coef(summary(model1))[1]; b ## slope + c <- coef(summary(model1))[2]; c ## lower asymptote + d <- coef(summary(model1))[3]; d ## upper asymptote + e <- coef(summary(model1))[4]; e ## ED50 + f <- coef(summary(model1))[5]; f ## asymmetry parameter (f=1 for 4PL curves) + + + ########################################################################################################## + #### MFI TO RAU CONVERSION + ########################################################################################################## + + for (r in 1:nrow(L)){ + results <- NULL + if (L$type.letter[r]=="U"){ + mfi.X <- as.numeric(L[r, i]) + y <- log(mfi.X) + + if (y > max(log.std)) { + dil.X <- max(dilution) + } else { + dil.X <- e*(( ((d-c)/(y-c))^(1/f) - 1 )^(1/b) ) + } + dil.X <- ifelse(dil.X > 0.02, 0.02, dil.X) + dil.X <- ifelse((is.na(dil.X) & y>log.std[2]), 0.02, dil.X) ## Setting observations with very high MFI to 1/50. + dil.X <- ifelse(dil.X < 1/51200, 1/51200, dil.X) + dil.X <- ifelse((is.na(dil.X) & y<max(log.std)), 1/51200, dil.X) ## Setting observations with very low MFI to 1/51200. + location.X <- L[r, "Location"] + sample.X <- L[r, "Sample"] + results <- cbind(Location=location.X, Sample=sample.X, MFI=mfi.X, Dilution=dil.X, DilutionReciprocal=1/dil.X, MinStd=min(std), MaxDilution=min(dilution), MaxStd=max(std), MinDilution=max(dilution)) + results.colnames <- c("Location", "Sample", paste0(i, "_", c("MFI", "Dilution", "DilutionReciprocal", "MinStd", "MaxDilution", "MaxStd", "MinDilution"))) + colnames(results) <- results.colnames + } + results.df <- rbind(results.df, results) + } + + if (is.null(results.df.wide)){ + results.df.wide <- results.df + } else { results.df.wide <- merge(results.df.wide, results.df, by=c("Location", "Sample")) } + + } + + ########################################################################################################## + #### MODEL RESULTS AND PLOTS + ########################################################################################################## + + ## Save all model results + model_results <- NULL + for (i in names(model_list)){ + title <- as.character(i) + model_results[[i]] <- plot(model_list[[i]], main = title) + } + + ########################################################################################################## + #### MERGE DATA + ########################################################################################################## + + results.df.wide <- as.data.frame(results.df.wide) + results.location <- matrix(unlist(strsplit(as.character(results.df.wide$Location), ",")), ncol=2, byrow=T)[,2] + results.location <- substr(results.location, 1, nchar(results.location)-1) + results.df.wide <- cbind(Location.2=results.location, results.df.wide) + + ## Matching bleedcode from plate layout to corresponding sample. + location.1 <- matrix(unlist(strsplit(L$Location, ",")), ncol=2, byrow=T)[,2] + location.1 <- substr(location.1, 1, nchar(location.1)-1) + location.2 <- data.frame(Location.2=location.1, alpha=gsub("[[:digit:]]", "", location.1), numeric=gsub("[^[:digit:]]", "", location.1), Bleedcode=NA, stringsAsFactors = FALSE) + for (i in location.2[, "Location.2"]){ + location.2[location.2$Location.2==i, "Bleedcode"] <- layout[layout$Plate==location.2[location.2$Location.2==i, "alpha"], colnames(layout)==location.2[location.2$Location.2==i, "numeric"]] + } + + ## Using join() from plyr package to add bleedcode information to results.df.wide. (default or given folder location and unique name) + results.df.wide <- join(results.df.wide, location.2[,c("Location.2", "Bleedcode")], by="Location.2", type="left") + ## Move bleedcode to first column + results.df.wide <- results.df.wide[, c("Bleedcode", colnames(results.df.wide)[!(colnames(results.df.wide) %in% "Bleedcode")])] + + # Make all columns after 1st 4 numeric + results.df.wide[,5:ncol(results.df.wide)] = lapply(results.df.wide[,5:ncol(results.df.wide)], as.character) + results.df.wide[,5:ncol(results.df.wide)] = lapply(results.df.wide[,5:ncol(results.df.wide)], as.numeric) + + + ########################################################################################################## + #### OUTPUT + ########################################################################################################## + + #### Save just MFI and RAU for downstream analyses + col_selection <- grepl("SampleID|_MFI|\\_Dilution$", colnames(results.df.wide)) + #colnames(results.df.wide)[col_selection] + MFI_RAU_results <- results.df.wide[, col_selection] + + # Sample data + S <- S %>% mutate(dilution = dilution, dilution_plot = dilution.plot) + + # Output + #return(list(results.df.wide, Warnings, MFI_RAU_results, model_results)) # this included Narimane's warnings + return(list(results.df.wide, S, B, C, MFI_RAU_results, model_results)) +} + +################# Function 2: data processing function for RAU + +getRelativeAntibodyUnits = function(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, + MFI_CSV, MFI_N_ANTIGENS, + TEMPLATE_FILE_PATH, TEMPLATE_CSV, + ID, DATE, + EXP_DIR){ + + ############################################################################################## + ## ## + ## R code written by Connie Li Wai Suen. ## + ## Date: 28 May 2018 ## + ## ## + ## Description: ## + ## This R script fits a 5-parameter logistic standard curve ## + ## to the dilutions of the positive controls for each protein and ## + ## converts the median fluorescence intensity (MFI) values into relative antibody units. ## + ## ## + ## Input: ## + ## - Luminex-200 or Magpix output file (required) ## + ## - Plate layout with bleedcode information (optional) ## + ## ## + ## Output: ## + ## - csv file of relative antibody unit for each protein, ## + ## its reciprocal, the minimum and maximum standard, ## + ## and the minimum and maximum dilution for that protein. ## + ## ## + ############################################################################################## + ############################################################################################## + ## ## + ## R code adapated to Shiny App by Narimane Nekkab ## + ## Date: 28 January 2020 ## + ## ## + ############################################################################################## + ############################################################################################## + + ####################################### + ## CHANGE FILE NAMES IN THIS SECTION ## + ####################################### + + cat("******************Running RAU function******************\n") + + # Get Luminex-MagPix full file path + fname1 = RAW_MFI_FILE_PATH + + # Extract working directly of this file + input_directory = paste0(getwd(),"/RAW_DATA/") + + # Output folder + cat(paste0("Output directory: ",EXP_DIR,"\n")) + + # If no plate layout provided, use default + fname2 = TEMPLATE_FILE_PATH + + cat("Path files have been read inside function...\n") + + ####################################################################################################################### + + cat("***RUN MAIN CODE***\n") + + results = runRelativeAntibodyUnits(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEMPLATE_CSV) + + cat("***END CODE RUN***\n") + + ###################################################################################################################### + + cat("Start plotting and saving process...\n") + + ##################################### + results.df.wide = results[[1]] + std_curve = results[[2]] + blank_MFI = results[[3]] + bead_counts = results[[4]] + MFI_RAU_results = results[[5]] + model_results = results[[6]] + + ## Plot standard curve raw MFI + plot_stdcurve <- std_curve %>% + dplyr::select(-Location) %>% + dplyr::mutate(Sample = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"), + Sample_dilution = factor(paste0(Sample,": ",dilution_plot),levels=paste0(Sample,": ",dilution_plot))) %>% + tidyr::pivot_longer(-c(Sample,dilution,dilution_plot,Sample_dilution), names_to = "protein", values_to = "MFI") %>% + # mutate(Sample = factor(Sample, c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"))) %>% + dplyr::mutate(MFI = as.numeric(MFI)) %>% + ggplot(aes(x = Sample_dilution, y = MFI, color = protein, group = protein)) + + geom_point() + + geom_line() + + scale_y_log10(breaks = c(0, 10, 100, 1000, 10000)) + + labs(x = "standard curve", + y = "log(MFI)") + + facet_wrap(~protein) + + theme_bw() + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + ## Plot plate counts + plot_counts <- bead_counts %>% + ggplot(mapping = aes(x = col, y = fct_rev(row), fill = colour), fill = summary)+ + geom_tile(aes(height = 0.90, width = 0.90)) + + scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))+ + scale_fill_manual(values = c("sufficient beads" = "#91bfdb", "repeat" = "#d73027"))+ + theme_linedraw()+ + labs(x = "columns", y = "rows", fill = "") + + ## Plot blank sample MFI for each protein - if there is more than one blank sample label as "Blank1", "Blank2" etc + plot_blank <- blank_MFI %>% + dplyr::select(-Location) %>% + dplyr::mutate(Sample = paste0(Sample,1:n())) %>% + tidyr::pivot_longer(-Sample, names_to = "protein", values_to = "MFI") %>% + ggplot(aes(x = factor(protein), y = as.numeric(MFI), fill = Sample)) + + geom_bar(stat = "identity", position = "dodge") + + geom_hline(yintercept = 50, linetype = "dashed", color = "grey") + + labs(x = "protein", + y = "MFI") + + theme_linedraw() + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + ## Plot model curves + plots_model <- lapply(seq_along(model_results), function(x){ + ggplot(data = model_results[[x]], + aes(x = dilution, y = `1`)) + + geom_line() + + scale_x_log10(breaks = c(1e-5, 1e-4, 1e-3, 1e-2, 0.03), + labels = c("0.00001", "0.0001", "0.001", "0.01", "0.03")) + + labs(x = "antibody dilution", + y = "standard curve", + title = names(model_results[x])) + + theme_bw() + }) + + all_model_plots <- gridExtra::grid.arrange(grobs = plots_model, nrow = 3) + all_model_plots + + # Save .RData for future loading + save(results.df.wide, std_curve, blank_MFI, MFI_RAU_results, model_results, + file = paste0(EXP_DIR, "ALL_RESULTS.RData")) + + # Save .rds of standard curve for future QC - and record the experiment name + # saveRDS(std_curve, file = paste0(EXP_DIR, "STD_CURVES.rds")) + + # Save PDF of std curve plots + ggsave(plot_stdcurve, + filename = paste0(EXP_DIR,"STD_CURVES_PLOT.pdf"), + height = 8, + width = 12, + units = "in") + + # Save PDF of bead count plot + ggsave(plot_counts, + filename = paste0(EXP_DIR,"PLATE_BEADS_COUNT_PLOT.pdf"), + height = 8, + width = 12, + units = "in") + + # Save PDF ofblank sample QC plot + ggsave(plot_blank, + filename = paste0(EXP_DIR,"BLANK_SAMPLE_PLOT.pdf"), + height = 4, + width = 6, + units = "in") + + # Save PDF of model plots + ggsave(all_model_plots, + filename = paste0(EXP_DIR,"MODEL_PLOTS.pdf"), + height = 8.27, + width = 11.69, + units = "in") + + ## Write to file + write.csv(results.df.wide, + file = paste0(EXP_DIR, "RAU_RESULTS.csv"), + row.names = F) + # write.csv(MFI_RAU_results, + # file = paste0(EXP_DIR, "MFI_RAU_RESULTS.csv"), + # row.names = F) + + cat("******************End RAU function******************\n") + + return(results) + +} + +################# Function 3: antigen naming corrector + +getAntigenNames = function(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV){ + + ############################################################################################## + ## ## + ## R code written by Narimane Nekkab ## + ## Date: 3 March 2020 ## + ## ## + ## Description: ## + ## R code to match the top 8 antigen names since multiple naming conventions exists ## + ## ## + ## Input: ## + ## - Default naming guide containing the W names, L names, and PVX names ## + ## ## + ## Output: ## + ## - The matched W names ## + ## ## + ############################################################################################## + ############################################################################################## + + # Load naming guide for top 8 antigens + if(ANTIGEN_CSV){ + NAME_GUIDE = read.csv(ANTIGEN_FILE_PATH, stringsAsFactors = F) + }else{ + NAME_GUIDE = read_excel(ANTIGEN_FILE_PATH, stringsAsFactors = F) + } + + # Get RAU_NAMES in order (ONLY TOP 8) + RAU_NAMES = vector("character", nrow(NAME_GUIDE)) + for(i in 1:nrow(NAME_GUIDE)){ + RAU_NAMES[i] = ifelse(identical(colnames(RAU_Dilution)[agrep(NAME_GUIDE$Antigen_Name[i], colnames(RAU_Dilution), ignore.case = T, value = FALSE, max.distance = list(cost=1, substitutions=0,deletions=1, insertions=0), fixed = T, useBytes = T)], character(0)), NA, + colnames(RAU_Dilution)[agrep(NAME_GUIDE$Antigen_Name[i], colnames(RAU_Dilution), ignore.case = T, value = FALSE, max.distance = list(cost=1, substitutions=0,deletions=1, insertions=0), fixed = T, useBytes = T)]) + } + + # Match + NAME_GUIDE_MATCH = cbind(NAME_GUIDE, RAU_NAMES) + # Remove NAs + NAME_GUIDE_MATCH = NAME_GUIDE_MATCH[complete.cases(NAME_GUIDE_MATCH),] + # Keep only one match + NAME_GUIDE_MATCH = unique(NAME_GUIDE_MATCH[,c("Protein_Code","RAU_NAMES")]) + # Make into character + NAME_GUIDE_MATCH$RAU_NAMES = as.character(NAME_GUIDE_MATCH$RAU_NAMES) + + # Subset columns by name + RAU_Dilution_Subset = RAU_Dilution[,which(colnames(RAU_Dilution) %in% NAME_GUIDE_MATCH$RAU_NAMES)] + + # Order names to match data order + NAME_GUIDE_MATCH = NAME_GUIDE_MATCH[match(colnames(RAU_Dilution_Subset), NAME_GUIDE_MATCH$RAU_NAMES),] + # Assign correct W names + colnames(RAU_Dilution_Subset) = paste0(NAME_GUIDE_MATCH$Protein_Code, "_", colnames(RAU_Dilution_Subset)) + + return(RAU_Dilution_Subset) +} + +################# Function 4: Random forest serology analysis + +getSeropositiveResults_RF = function(PATHWAY_1, + RAU_user_id_columns, RAU_user_RAU_columns, + RAU_RESULTS, + ANTIGEN_FILE_PATH, ANTIGEN_CSV, + CHECK_NAME, CHECK_ID, ID, DATE, + EXP_DIR, + MODEL_W16, + MODEL_W16_3_TARGETS, + MODEL_W16_EQUAL_TARGET, + MODEL_W16_HIGH_SP_TARGET, + MODEL_W16_HIGH_SE_TARGET, + MODEL_W16_OTHER_SE, + MODEL_W16_OTHER_SP, + MODEL_W47, + MODEL_W47_3_TARGETS, + MODEL_W47_EQUAL_TARGET, + MODEL_W47_HIGH_SP_TARGET, + MODEL_W47_HIGH_SE_TARGET, + MODEL_W47_OTHER_SE, + MODEL_W47_OTHER_SP){ + + ############################################################################################## + ## ## + ## R code written by Michael White and adapted by Narimane Nekkab ## + ## Date: 3 March 2020 ## + ## ## + ## Description: ## + ## This R script determines the classification of sero+ and sero- cases from RAU data. ## + ## Classification performance is based on the current default model that uses ## + ## Brazilian, Thai, and Solomon Islands, and control data of the top 8 antigens ## + ## with a known senstivity and specificy. ## + ## It is based on this soon to be published work: ## + ## https://www.biorxiv.org/content/10.1101/481168v1 ## + ## ## + ## Input: ## + ## - Relative antibody unit data processed by the App or user uploaded RAU data ## + ## ## + ## Output: ## + ## - The id data, the RAU of the top 8 antigens, PvSero+ classification, and treatment ## + ## ## + ############################################################################################## + ############################################################################################## + + cat("******************Start classifier function******************\n") + + # Get RAU data + RAU_RESULTS = RAU_RESULTS + + # Output folder + cat(paste0("Output directory: ",EXP_DIR,"\n")) + + cat("Data has been read inside function...\n") + + cat("RUN FUNCTION\n") + + ################################################################################################## + ####################### RAU processed by app or user submitted RAU_RESULTS ####################### + + # Determine in data is user-submitted or pre-processed by app if uploaded + if(PATHWAY_1){ + + # Subset dilution results + RAU_Dilution = RAU_RESULTS[,grepl( "_Dilution", names(RAU_RESULTS))] + RAU_Dilution = RAU_Dilution[,!grepl("DilutionReciprocal", names(RAU_Dilution))] + # Antigen names clean-up + colnames(RAU_Dilution) = str_replace(colnames(RAU_Dilution), "_Dilution", "") + colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) + # Remove any values in (parentheses) + colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) + + }else{ + + # Subset user-uploaded RAU data + RAU_Dilution = RAU_RESULTS[,(as.integer(RAU_user_id_columns)+1):(as.integer(RAU_user_id_columns)+as.integer(RAU_user_RAU_columns))] + # Antigen names clean-up + colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) + # Remove any values in (parentheses) + colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) + + } + + # Correct for names and subset only top 8 antigens + cat("Match antigen names\n") + RAU_Dilution_Subset = getAntigenNames(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV) + + # Check that data is in a data.frame format and numeric + RAU_Dilution_Subset = as.data.frame(RAU_Dilution_Subset) + RAU_Dilution_Subset = sapply(RAU_Dilution_Subset, as.numeric) + + # Save full names + RAU_RFOREST_NAMES = colnames(RAU_Dilution_Subset) + + # Rename keeping only W names + colnames(RAU_Dilution_Subset) = substr(colnames(RAU_Dilution_Subset), 1, 3) + + ################################################################### + ####################### RANDOM FOREST MODEL ####################### + + # Select model chosen: W16 + if(MODEL_W16){ + # Reorder to match Random Forest model + W_Name_Order = c("W50", "W01", "W16", "W02", "W30", "W08", "W58", "W39") + RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] + colnames(RAU_Dilution_Subset) = W_Name_Order + + # TAKE LOG OF RAU + RAU_Dilution_Subset = log(RAU_Dilution_Subset) + + # Load Random Forest DEFAULT model fit + cat("Load model\n") + load(paste0(getwd(), "/MODEL/RFOREST_MODEL_W16.Rdata")) + # Number of trees + N_tree = 2500 + + ################ PREDICT + cat("Predict\n") + RFOREST_MODEL_PREDICTION = predict(RFOREST_MODEL, newdata=RAU_Dilution_Subset, predict.all=TRUE, type="prob") + ################ PREDICT + + # Tally votes + RFOREST_MODEL_VOTES = rowSums(RFOREST_MODEL_PREDICTION$individual=="old")/N_tree + + # Merge results back to RAU + colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] + } + + # Select model chosen: W47 + if(MODEL_W47){ + # Reorder to match Random Forest model + W_Name_Order = c("W50", "W01", "W47", "W02", "W30", "W08", "W58", "W39") + RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] + colnames(RAU_Dilution_Subset) = W_Name_Order + + # TAKE LOG OF RAU + RAU_Dilution_Subset = log(RAU_Dilution_Subset) + + # Load Random Forest DEFAULT model fit + cat("Load model\n") + load(paste0(getwd(), "/MODEL/RFOREST_MODEL_W47.Rdata")) + # Number of trees + N_tree = 2500 + + ################ PREDICT + cat("Predict\n") + RFOREST_MODEL_PREDICTION = predict(RFOREST_MODEL, newdata=RAU_Dilution_Subset, predict.all=TRUE, type="prob") + ################ PREDICT + + # Tally votes + RFOREST_MODEL_VOTES = rowSums(RFOREST_MODEL_PREDICTION$individual=="old")/N_tree + + # Merge results back to RAU + colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] + } + + ################################################################## + ##################### TARGET CUTOFFS BY MODEL #################### + + # Get ROC curves per model and targets + if(MODEL_W16){ + RF_ROC=read.csv(paste0(getwd(), "/MODEL/RF_W16_ROC_TABLE.csv")) + } + if(MODEL_W47){ + RF_ROC=read.csv(paste0(getwd(), "/MODEL/RF_W47_ROC_TABLE.csv")) + } + + ################### + ### Get cutoffs ### + + # Equal + CUTOFF_1 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SP - RF_ROC$SE))] + + # Identify SP90 target + CUTOFF_2 = RF_ROC$THRESHOLD[min(which(RF_ROC$SP > 0.90))] + + # Identify SE90 target + CUTOFF_3 = RF_ROC$THRESHOLD[max(which(RF_ROC$SE > 0.90))] + + # Other + if(MODEL_W16_OTHER_SE){ + if(MODEL_W16){ + CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] + SP_Other = RF_ROC$SP[which.min(abs(RF_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] + SE_Other = MODEL_W16_HIGH_SE_VALUE + }else{ + CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] + SP_Other = RF_ROC$SP[which.min(abs(RF_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] + SE_Other = MODEL_W47_HIGH_SE_VALUE + } + } + if(MODEL_W16_OTHER_SP){ + if(MODEL_W16){ + CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] + SP_Other = MODEL_W16_HIGH_SP_VALUE + SE_Other = RF_ROC$SE[which.min(abs(RF_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] + }else{ + CUTOFF_4 = RF_ROC$THRESHOLD[which.min(abs(RF_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] + SP_Other = MODEL_W47_HIGH_SP_VALUE + SE_Other = RF_ROC$SE[which.min(abs(RF_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] + } + } + + # ALL 3 TARGETS OUTPUT + if(MODEL_W16_3_TARGETS | MODEL_W47_3_TARGETS){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # EQUAL TARGET OUTPUT + if(MODEL_W16_EQUAL_TARGET | MODEL_W47_EQUAL_TARGET){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # HIGH SP TARGET OUTPUT + if(MODEL_W16_HIGH_SP_TARGET | MODEL_W47_HIGH_SP_TARGET){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # HIGH SE TARGET OUTPUT + if(MODEL_W16_HIGH_SE_TARGET | MODEL_W47_HIGH_SE_TARGET){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(RFOREST_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # OTHER TARGETS + if(MODEL_W16_OTHER_SP | MODEL_W47_OTHER_SP){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + } + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + colnames(RFOREST_VOTES_SEROPOS) = c(col_name, colnames(RFOREST_VOTES_SEROPOS[2:ncol(RFOREST_VOTES_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + RFOREST_VOTES_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + RFOREST_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(RFOREST_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + } + } + } + } + + + ################################################## + ##################### OUTPUTS #################### + + cat("Start saving proces...\n") + + #################################################################################### + ############################ SAVE SEROPOSITIVTY RESULTS ############################ + + cat("SAVE SEROPOSITIVITY RESULTS\n") + write.csv(RFOREST_VOTES_SEROPOS, file=paste0(EXP_DIR,"/PVSEROTAT_RF_CLASS_RESULTS.csv"), row.names=F) + + cat("******************End Sero function******************\n") + + return(RFOREST_VOTES_SEROPOS) + +} + +################# Function 5: SVM serology analysis + +getSeropositiveResults_SVM = function(PATHWAY_1, + RAU_user_id_columns, RAU_user_RAU_columns, + RAU_RESULTS, + ANTIGEN_FILE_PATH, ANTIGEN_CSV, + CHECK_NAME, CHECK_ID, ID, DATE, + EXP_DIR, + MODEL_W16, + MODEL_W16_3_TARGETS, + MODEL_W16_EQUAL_TARGET, + MODEL_W16_HIGH_SP_TARGET, + MODEL_W16_HIGH_SE_TARGET, + MODEL_W16_OTHER_SE, + MODEL_W16_OTHER_SP, + MODEL_W47, + MODEL_W47_3_TARGETS, + MODEL_W47_EQUAL_TARGET, + MODEL_W47_HIGH_SP_TARGET, + MODEL_W47_HIGH_SE_TARGET, + MODEL_W47_OTHER_SE, + MODEL_W47_OTHER_SP){ + + ############################################################################################## + ## ## + ## R code written by Michael White and adapted by Narimane Nekkab ## + ## Date: 3 March 2020 ## + ## ## + ## Description: ## + ## This R script determines the classification of sero+ and sero- cases from RAU data. ## + ## Classification performance is based on the current default model that uses ## + ## Brazilian, Thai, and Solomon Islands, and control data of the top 8 antigens ## + ## with a known senstivity and specificy. ## + ## It is based on this soon to be published work: ## + ## https://www.biorxiv.org/content/10.1101/481168v1 ## + ## ## + ## Input: ## + ## - Relative antibody unit data processed by the App or user uploaded RAU data ## + ## ## + ## Output: ## + ## - The id data, the RAU of the top 8 antigens, PvSero+ classification, and treatment ## + ## ## + ############################################################################################## + ############################################################################################## + + cat("******************Start classifier function******************\n") + + # Get RAU data + RAU_RESULTS = RAU_RESULTS + + cat("Data has been read inside function...\n") + + cat("RUN FUNCTION\n") + + ################################################################################################## + ####################### RAU processed by app or user submitted RAU_RESULTS ####################### + + # Determine in data is user-submitted or pre-processed by app if uploaded + if(PATHWAY_1){ + + # Subset dilution results + RAU_Dilution = RAU_RESULTS[,grepl( "_Dilution", names(RAU_RESULTS))] + RAU_Dilution = RAU_Dilution[,!grepl("DilutionReciprocal", names(RAU_Dilution))] + # Antigen names clean-up + colnames(RAU_Dilution) = str_replace(colnames(RAU_Dilution), "_Dilution", "") + colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) + # Remove any values in (parentheses) + colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) + + }else{ + + # Subset user-uploaded RAU data + RAU_Dilution = RAU_RESULTS[,(as.integer(RAU_user_id_columns)+1):(as.integer(RAU_user_id_columns)+as.integer(RAU_user_RAU_columns))] + # Antigen names clean-up + colnames(RAU_Dilution) = gsub("\\..*", "", colnames(RAU_Dilution)) + # Remove any values in (parentheses) + colnames(RAU_Dilution) = gsub("\\s*\\([^\\)]+\\)","", colnames(RAU_Dilution)) + + } + + # Correct for names and subset only top 8 antigens + cat("Match antigen names\n") + RAU_Dilution_Subset = getAntigenNames(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV) + + # Check that data is in a data.frame format and numeric + RAU_Dilution_Subset = as.data.frame(RAU_Dilution_Subset) + RAU_Dilution_Subset = sapply(RAU_Dilution_Subset, as.numeric) + + # Save full names + RAU_RFOREST_NAMES = colnames(RAU_Dilution_Subset) + + # Rename keeping only W names + colnames(RAU_Dilution_Subset) = substr(colnames(RAU_Dilution_Subset), 1, 3) + + ################################################################### + ####################### RANDOM FOREST MODEL ####################### + + # Select model chosen: W16 + if(MODEL_W16){ + # Reorder to match Random Forest model + W_Name_Order = c("W50", "W01", "W16", "W02", "W30", "W08", "W58", "W39") + RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] + colnames(RAU_Dilution_Subset) = W_Name_Order + + # TAKE LOG OF RAU + RAU_Dilution_Subset = log(RAU_Dilution_Subset) + + # Load Random Forest DEFAULT model fit + cat("Load model\n") + load(paste0(getwd(), "/MODEL/SVM_MODEL_W16.Rdata")) + # Number of trees + N_tree = 2500 + + ################ PREDICT + cat("Predict\n") + SVM_MODEL_PREDICTION = predict(SVM_8, newdata=RAU_Dilution_Subset, decision.values = TRUE) + ################ PREDICT + + # Tally votes + SVM_MODEL_VOTES = attr(SVM_MODEL_PREDICTION, "decision.values")[,1] + + # Merge results back to RAU + colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] + } + + # Select model chosen: W47 + if(MODEL_W47){ + # Reorder to match Random Forest model + W_Name_Order = c("W50", "W01", "W47", "W02", "W30", "W08", "W58", "W39") + RAU_Dilution_Subset = RAU_Dilution_Subset[,match(W_Name_Order, colnames(RAU_Dilution_Subset))] + colnames(RAU_Dilution_Subset) = W_Name_Order + + # TAKE LOG OF RAU + RAU_Dilution_Subset = log(RAU_Dilution_Subset) + + # Load Random Forest DEFAULT model fit + cat("Load model\n") + load(paste0(getwd(), "/MODEL/SVM_MODEL_W47.Rdata")) + # Number of trees + N_tree = 2500 + + ################ PREDICT + cat("Predict\n") + SVM_MODEL_PREDICTION = predict(SVM_8, newdata=RAU_Dilution_Subset, decision.values = TRUE) + ################ PREDICT + + # Tally votes + SVM_MODEL_VOTES = attr(SVM_MODEL_PREDICTION, "decision.values")[,1] + + # Merge results back to RAU + colnames(RAU_Dilution_Subset) = RAU_RFOREST_NAMES[match(colnames(RAU_Dilution_Subset), substr(RAU_RFOREST_NAMES, 1, 3))] + } + + ################################################################## + ##################### TARGET CUTOFFS BY MODEL #################### + + # Get ROC curves per model and targets + if(MODEL_W16){ + SVM_ROC=read.csv(paste0(getwd(), "/MODEL/SVM_W16_ROC_TABLE.csv")) + } + if(MODEL_W47){ + SVM_ROC=read.csv(paste0(getwd(), "/MODEL/SVM_W47_ROC_TABLE.csv")) + } + + ################### + ### Get cutoffs ### + + # Equal + CUTOFF_1 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SP - SVM_ROC$SE))] + + # Identify SP90 target + CUTOFF_2 = SVM_ROC$THRESHOLD[min(which(SVM_ROC$SP > 0.90))] + + # Identify SE90 target + CUTOFF_3 = SVM_ROC$THRESHOLD[max(which(SVM_ROC$SE > 0.90))] + + # Other + if(MODEL_W16_OTHER_SE){ + if(MODEL_W16){ + CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] + SP_Other = SVM_ROC$SP[which.min(abs(SVM_ROC$SE - MODEL_W16_HIGH_SE_VALUE))] + SE_Other = MODEL_W16_HIGH_SE_VALUE + }else{ + CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] + SP_Other = SVM_ROC$SP[which.min(abs(SVM_ROC$SE - MODEL_W47_HIGH_SE_VALUE))] + SE_Other = MODEL_W47_HIGH_SE_VALUE + } + } + if(MODEL_W16_OTHER_SP){ + if(MODEL_W16){ + CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] + SP_Other = MODEL_W16_HIGH_SP_VALUE + SE_Other = SVM_ROC$SE[which.min(abs(SVM_ROC$SP - MODEL_W16_HIGH_SP_VALUE))] + }else{ + CUTOFF_4 = SVM_ROC$THRESHOLD[which.min(abs(SVM_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] + SP_Other = MODEL_W47_HIGH_SP_VALUE + SE_Other = SVM_ROC$SE[which.min(abs(SVM_ROC$SP - MODEL_W47_HIGH_SP_VALUE))] + } + } + + # ALL 3 TARGETS OUTPUT + if(MODEL_W16_3_TARGETS | MODEL_W47_3_TARGETS){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # EQUAL TARGET OUTPUT + if(MODEL_W16_EQUAL_TARGET | MODEL_W47_EQUAL_TARGET){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + } + + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # HIGH SP TARGET OUTPUT + if(MODEL_W16_HIGH_SP_TARGET | MODEL_W47_HIGH_SP_TARGET){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + } + + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # HIGH SE TARGET OUTPUT + if(MODEL_W16_HIGH_SE_TARGET | MODEL_W47_HIGH_SE_TARGET){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + } + } + } + } + + # OTHER TARGETS + if(MODEL_W16_OTHER_SP | MODEL_W47_OTHER_SP){ + + #W16 + if(MODEL_W16){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + } + } + } + + #W47 + if(MODEL_W47){ + + # Create binary seropositivity value + if(PATHWAY_1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + + }else{ + if(RAU_user_id_columns == 1){ + col_name = colnames(RAU_RESULTS)[1] + SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) + } + if(RAU_user_id_columns > 1){ + SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) + } + } + } + } + + + ################################################## + ##################### OUTPUTS #################### + + cat("Start saving proces...\n") + + #################################################################################### + ############################ SAVE SEROPOSITIVTY RESULTS ############################ + + cat("SAVE SEROPOSITIVITY RESULTS\n") + write.csv(SVM_SEROPOS, file=paste0(EXP_DIR,"/PVSEROTAT_SVM_CLASS_RESULTS.csv"), row.names=F) + + cat("******************End Sero function******************\n") + + return(SVM_SEROPOS) + + +} diff --git a/RUN_PvSeroTAT.R b/RUN_PvSeroTAT.R index 4423b0d..f89df83 100644 --- a/RUN_PvSeroTAT.R +++ b/RUN_PvSeroTAT.R @@ -1,89 +1,89 @@ -######################################################### -######################################################### -### ### -### PvSeroTAT ### -### R Shiny App Script ### -### WEHI and Institut Pasteur ### -### ### -######################################################### -######################################################### - -######################### -### CHECK ENVIRONMENT ### -######################### - -# Check that current working directory is where all the shiny app files are located -getwd() -# If the working directory is incorrect, either -# 1) Close R Studio and re-open this file (that should be in the correct directory) -# 2) Set the working directory i.e. setwd(path) - - -###################### -### CHECK PACKAGES ### -###################### - -# List of required packaged to run app -required_packages = c("readxl", "drc", "dplyr","tidyr","plyr","shiny","ggplot2", - "shinythemes","stringr","ROCR","gplots","caTools","randomForest", - "forcats","kernlab") -# Check if any new packages need to be installed -new_packages = required_packages[!(required_packages %in% installed.packages()[,"Package"])] -# If any are missing, they will be installed -if(length(new_packages)) install.packages(new_packages) - -# Note: this section only needs to be read in once so you can comment these lines after the first run - - -########### -### SVM ### -########### - -# Please contact developers how to use this model and how to interpret results - - -##################### -### LOAD PACKAGES ### -##################### - -library(readxl) -library(drc) -library(dplyr) -library(tidyr) -library(plyr) -library(shiny) -library(ggplot2) -library(shinythemes) -library(stringr) -library(ROCR) -library(gplots) -library(caTools) -library(randomForest) -library(forcats) -library(kernlab) - - -######################### -### RUN PVSEROTAT APP ### -######################### - -# This line will run the PvSeroTat Shiny App by opening and running the script SHINY_APP.R -# Either run this script manually or click on Run App (on the upper right ^) -# Do not modify SHINY_APP.R unless asked to by the developer. -# Once this line is run, the app window will be opened. - -runApp('SHINY_APP.R') - -# Enter all of the necessary information on the app and hit the submit button. -# The progress of the analysis will be shown here on R in the console below. -# If any errors arise, they will be shown here on R in the console below. - -# Once you opn the window and run one analysis, you can modify what is necessary on the app -# and run the analysis as many times as you would like. -# The output folders of the results will be indicated. - - - - - - +######################################################### +######################################################### +### ### +### PvSeroTAT ### +### R Shiny App Script ### +### WEHI and Institut Pasteur ### +### ### +######################################################### +######################################################### + +######################### +### CHECK ENVIRONMENT ### +######################### + +# Check that current working directory is where all the shiny app files are located +getwd() +# If the working directory is incorrect, either +# 1) Close R Studio and re-open this file (that should be in the correct directory) +# 2) Set the working directory i.e. setwd(path) + + +###################### +### CHECK PACKAGES ### +###################### + +# List of required packaged to run app +required_packages = c("readxl", "drc", "dplyr","tidyr","plyr","shiny","ggplot2", + "shinythemes","stringr","ROCR","gplots","caTools","randomForest", + "forcats","kernlab") +# Check if any new packages need to be installed +new_packages = required_packages[!(required_packages %in% installed.packages()[,"Package"])] +# If any are missing, they will be installed +if(length(new_packages)) install.packages(new_packages) + +# Note: this section only needs to be read in once so you can comment these lines after the first run + + +########### +### SVM ### +########### + +# Please contact developers how to use this model and how to interpret results + + +##################### +### LOAD PACKAGES ### +##################### + +library(readxl) +library(drc) +library(dplyr) +library(tidyr) +library(plyr) +library(shiny) +library(ggplot2) +library(shinythemes) +library(stringr) +library(ROCR) +library(gplots) +library(caTools) +library(randomForest) +library(forcats) +library(kernlab) + + +######################### +### RUN PVSEROTAT APP ### +######################### + +# This line will run the PvSeroTat Shiny App by opening and running the script SHINY_APP.R +# Either run this script manually or click on Run App (on the upper right ^) +# Do not modify SHINY_APP.R unless asked to by the developer. +# Once this line is run, the app window will be opened. + +runApp('SHINY_APP.R') + +# Enter all of the necessary information on the app and hit the submit button. +# The progress of the analysis will be shown here on R in the console below. +# If any errors arise, they will be shown here on R in the console below. + +# Once you opn the window and run one analysis, you can modify what is necessary on the app +# and run the analysis as many times as you would like. +# The output folders of the results will be indicated. + + + + + + diff --git a/SHINY_APP.R b/SHINY_APP.R index 69deec7..0a3a99c 100644 --- a/SHINY_APP.R +++ b/SHINY_APP.R @@ -1,894 +1,894 @@ -######################################################### -######################################################### -### ### -### PvSeroTAT ### -### R Shiny App Script ### -### WEHI and Institut Pasteur ### -### ### -######################################################### -######################################################### - - -#################################################################### -############################ DEFINE APP ############################ -#################################################################### - -ui <- fluidPage( - - # Theme - theme = shinytheme("cosmo"), - - # Options - tags$head( - tags$style(HTML("hr {border-top: 1px solid #000000; margin: 10px 0 10px 0;}"), - HTML("h3 { margin: 10px 0 10px 0; }"), - HTML("h4 { margin: 10px 0 10px 0; }"), - HTML("h5 { margin: 20px 0 10px 0; color: #5e5e5e; font-weight:bold; }"), - HTML(".checkbox { margin-bottom: 10px; }"), - HTML(".help-block { margin: 10px 0 10px 0; }"), - HTML(".shiny-input-container { margin: 0 0 5px 0; }")) - ), - - - - ############################ MAIN PANEL ############################ - - fluidRow( - column(12, - align = "left", - br(), - mainPanel( - # Image - img(src = "IP_WEHI_logo_2020.png", height = 88, width = 468) - )), - column(12, - align = "left", - # App title ---- - h1(strong(" Pv SeroTAT Tool")), - hr())), - - - ############################ HEADER ########################### - - tabsetPanel( - tabPanel("About", - br(), - p("The Pv SeroTAT Tool has been developed by Ivo Mueller's research groups at the Institut Pasteur and Walter and Eliza Hall Institut of Medical Research. The tool is a patented diagnostic test based on validated serological markers of recent",tags$em("Plasmodium vivax")," infections and hypnozoite carriage. The tool uses machine learning classification algorithms (Random Forest (RF) and Support Vector Machine (SVM)) to predict sero-positivity with varying performance."), - p("Validation of this tool using Random Forest can be found in the", a("Nature Medicine Publication", href="https://www.nature.com/articles/s41591-020-0841-4")), - p("Please contact on how to cite this tool in publications."), - p(em("Follow steps 1 - 6 to process Luminex MFI or Relative Antibody Unit (RAU) data and run the diagnostic test.")), - hr()), - tabPanel("Biomarkers of exposure", - br(), - p("The tools has been validated with predicting sero-positivity with 8 biomarkers of exposure. They include:"), - tags$li("W01 | L01 | PVX_099980 | MSP1-19"), - tags$li("W02 | L02 | PVX_099980"), - tags$li("W08 | L12 | PVX_112670"), - tags$li("W30 | L34 | PVX_097625 | MSP8"), - tags$li("W39 | L54 | PVX_097720 | MSP3a"), - tags$li("W50 | PVX_094255 | RBP2b"), - tags$li("W58 | KMZ83376.1d | EBPII"), - tags$li("W47 | PVX_087885 | RAMA from CellFree Science", strong("OR"), "W16 | L23 | PVX_087885 from Japan"), - hr()), - tabPanel("Contacts", - br(), - p(a("Michael White", href="mailto:michael.white@pasteur.fr"),"|", - a("Rhea Longley", href="mailto:longley.r@wehi.edu.au"),"|", - a("Thomas Obadia", href="mailto:thomas.obadia@pasteur.fr"),"|", - a("Ramin Mazhari", href="mailto:mazhari@wehi.edu.au"),"|", - a("Narimane Nekkab", href="mailto:narimane.nekkab@pastuer.fr"),"|", - a("Shazia Ruybal", href="mailto:ruybal.s@wehi.edu.au")), - p("Please contact on how to cite in publications."), - hr())), - - ############################ MODEL VERSION ############################ - - tabsetPanel( - tabPanel("Random Forest Classifier", - ######################## RANDOM FOREST ######################## - radioButtons("model_version_RF", - label = h4(strong("1. Choose model")), - choices = list("With antigen W16 / L23 / CellFree Sciences" = 1, "With antigen W47 / PVX_087885 / Japan" = 0), - selected = 1), - helpText("Note 1: The Random Forest classification algorithm has been previously validated to determining sero-positivity. We have studied its performance extensively and is a reliable method."), - helpText("Note 2: The two RAMA proteins are the same but were produced by different labs. They will give slighly different predictions. - Check with Rhea Longley or Ramin Mazhari if unsure which was used. - If proteins were coupled before 2016, there is a chance the W47 protein was used.")) - - # , - # # SVM - # tabPanel("Support-Vector Machine Classifier", - # ######################## SUPPORT-VECTOR MACHINE ####################### - # radioButtons("model_version_SVM", - # label = h4(strong("1. Choose model")), - # choices = list("With antigen W16 / L23 / CellFree Sciences" = 1, "With antigen W47 / PVX_087885 / Japan" = 0), - # selected = 1), - # helpText("Note 1: The Support-Vector Machine classification algorithm has not been validated or studied extensively. However, it may have better performance. Please contact Michael White on how to interpret results."), - # helpText("Note 2: The two RAMA proteins are the same but were produced by different labs. They will give slighly different predictions. - # Check with Rhea Longley or Ramin Mazhari if unsure which was used. - # If proteins were coupled before 2016, there is a chance the W47 protein was used.")) - - ), - hr(), - - ############################ DIAGNOSTIC TARGETS ############################ - ############################## RANDOM FOREST ############################### - - # Model W16 - conditionalPanel("input.model_version_RF == 1", - radioButtons("diagnostic_tests_W16", - label = h4(strong("2. Choose diagnostic target")), - choices = list("All 3 default targets" = 0, - "79% sensitivity and 79% specificity" = 1, - "63% sensitivity and 90% specificity" = 2, - "90% sensitivity and 59% specificity" = 3, - "Other" = 4), - selected = 0)), - # Model W47 - conditionalPanel("input.model_version_RF == 0", - radioButtons("diagnostic_tests_W47", - label = h4(strong("2. Choose diagnostic target")), - choices = list("All 3 default targets" = 0, - "80% sensitivity and 80% specificity" = 1, - "64% sensitivity and 90% specificity" = 2, - "90% sensitivity and 60% specificity" = 3, - "Other" = 4), - selected = 0)), - - #### Other - conditionalPanel("input.diagnostic_tests_W16 == 4 | input.diagnostic_tests_W47 == 4", - radioButtons("other_target", - label = h4(strong("2.1 Choose target")), - choices = list("Sensitivity" = 0, - "Specificity" = 1)), - conditionalPanel("input.other_target == 0", - textInput("sensitivity_value", - p("Value between 0 to 1"), - value = NULL)), - conditionalPanel("input.other_target == 1", - textInput("specificity_value", - p("Value between 0 to 1"), - value = NULL))), - - helpText("Note: different prediction cutoffs can be used to determine sero-positivity. We chose 3 default targets along the ROC curve that we recommend; however, other targets can be selected."), - - - hr(), - - # ############################ DIAGNOSTIC TARGETS ############################ - # ########################## SUPPORT VECTOR MACHINE ########################## - # - # # Model W16 - # conditionalPanel("input.model_version_SVM == 1", - # radioButtons("diagnostic_tests_W16", - # label = h4(strong("2. Choose diagnostic target")), - # choices = list("All 3 default targets" = 0, - # "87% sensitivity and 87% specificity" = 1, - # "78% sensitivity and 90% specificity" = 2, - # "90% sensitivity and 86% specificity" = 3, - # "Other" = 4), - # selected = 0)), - # # Model W47 - # conditionalPanel("input.model_version_SVM == 0", - # radioButtons("diagnostic_tests_W47", - # label = h4(strong("2. Choose diagnostic target")), - # choices = list("All 3 default targets" = 0, - # "86% sensitivity and 86% specificity" = 1, - # "78% sensitivity and 90% specificity" = 2, - # "90% sensitivity and 85% specificity" = 3, - # "Other" = 4), - # selected = 0)), - # - # #### Other - # conditionalPanel("input.diagnostic_tests_W16 == 4 | input.diagnostic_tests_W47 == 4", - # radioButtons("other_target", - # label = h4(strong("2.1 Choose target")), - # choices = list("Sensitivity" = 0, - # "Specificity" = 1)), - # conditionalPanel("input.other_target == 0", - # textInput("sensitivity_value", - # p("Value between 0 to 1"), - # value = NULL)), - # conditionalPanel("input.other_target == 1", - # textInput("specificity_value", - # p("Value between 0 to 1"), - # value = NULL))), - # - # helpText("Note: different prediction cutoffs can be used to determine sero-positivity. We chose 3 default targets along the ROC curve that we recommend; however, other targets can be selected."), - # - - ############################ DATA UPLOAD ############################ - - radioButtons("radio_data", - label = h4(strong("3. Choose data to process")), - choices = list("Median Fluorescent Intensity (MFI) data" = 1, "Relative Antibody Units (RAU) data" = 0), - selected = 1), - helpText("Note: if MFI data is loaded, it will be converted into RAU and then processed."), - - hr(), - h4(strong("4. Data upload")), - - tabsetPanel( - tabPanel("Load MFI or RAU data files", - # br(), - ######################## MFI ######################## - conditionalPanel("input.radio_data == 1", - fileInput("MFI_file", - h5("4.1 Load raw MFI file (required)"), - accept = c(".csv", ".xlsx")), - helpText("Note: a (.csv) or (.xlsx) file of raw outputs of the Luminex-MagPix machine. - If you encounter errors when loading a (.csv) file, convert it to (.xlsx) in Excel and load the (.xlsx) file instead.")), - conditionalPanel("input.radio_data == 1", - textInput("MFI_num_antigens", - h5("4.2 Number of antigens processed"), - value = NULL), - helpText("Note: please indicate the number of antigens processed by the MagPix in this file. - This is required only if you have loaded a (.csv) file.")), - conditionalPanel("input.radio_data == 1", - fileInput("template", - h5("4.3 Load plate template (required)"), - accept = c(".csv", ".xlsx")), - helpText("Note: for each plate, upload a (.csv) or (.xlsx) file indicating the serial dilutions, - positions of blanks, and sample numbers. Refer to the default template (12 column, 7 row, - two-fold diluation from 1/50 to 1/25600)")), - ######################## RAU ######################## - conditionalPanel("input.radio_data == 0", - fileInput("RAU_file", - h5("4.1 Load RAU file (required)"), - accept = c(".csv", ".xlsx")), - helpText("Note: a (.csv) or (.xlsx) file with ID variables and antigen protein names in the column header")), - conditionalPanel("input.radio_data == 0", - radioButtons("radio_RAU_upload_type", - label = h5("4.2 File type"), - choices = list("RAU data file exported by this app" = 1, "RAU data file from other sources" = 0), - selected = 1), - helpText("Note: it is important to indicate if the data file comes from an outside source to be able to correctly read it in. - If data is from an outside source, file must contains ID columns and RAU data columns (untransformed) with the header label indicating - the antigens names that correspond to the default list (in RESOURCES folder) followed by '_Dilution' i.e. W50_Dilution, L01_Dilution, MSP3a_Dilution")), - conditionalPanel("input.radio_RAU_upload_type == 0", - textInput("RAU_user_id_columns", - h5("4.3 Number of ID columns (from the left)"), - value = NULL), - helpText("Note: input the total number of columns starting from the left that contain ID info (not including the RAU columns)"), - textInput("RAU_user_RAU_columns", - h5("4.4 Number of RAU columns (after the ID)"), - value = NULL), - helpText("Note: input the total number of columns after the ID columns corresponding to RAU. - This should correpond to the number of antigens assessed. - Please do not include more data than needed in the file."))), - ############################ DATA INFO ############################ - # tabPanel("Plate information", - # # br(), - # fluidRow( - # column(4, - # textInput("id", - # h5("4.4 Plate ID number (recommended)"), - # value = NULL), - # helpText("Note: plate ID for output file name")), - # column(4, - # dateInput("date", - # h5("4.5 Date input (recommended)")), - # helpText("Note: date for output file name; default is today's date")))), - - ############################ ANTIGEN INFO ############################ - tabPanel("Antigen names", - # br(), - fileInput("name", - h5("4.4 Load antigen naming reference file (optional)")), - helpText("Note: a (.csv) or (.xlsx) file indicating the reference W names under - column name 'Protein_Code' and your corresponding name under column 'Antigen_Name'. - A default reference file will be used if not provided."))), - - ############################ EXPERIMENT SUB-DIRECTORY ############################ - - hr(), - h4(strong("5. Data output")), - h5("5.1 Run only Relative Antibody Unit conversion without serological classification?"), - checkboxInput("RAU_only", "Yes", value = F), - helpText("Note: RAU conversion results in the output folder RESULTS will be created. - Serological classification with RF or SVM will not be outputed. - An option if all 8 antigens were not processed."), - h5("5.2 Change default experiment sub-directory folder name?"), - checkboxInput("keep_experiment_folder_name", "Yes", value = F), - helpText("Note: a default experiment sub-directory in the output folder RESULTS will be created. - If you wish to change the experiment folder name, give the new name."), - conditionalPanel("input.keep_experiment_folder_name == 1", - textInput("new_experiment_folder_name","5.3 Add folder name", value = NULL)), - - - ############################ SUBMIT ############################ - - hr(), - h4(strong("6. Run Analysis")), - h5(""), - actionButton("run","Submit"), - h5(""), - - - ############################ STATUS ############################ - - hr(), - h4(strong("Status:")), - fluidRow( - column(12, - textOutput("progress_RAU_warn"), - tags$head(tags$style("#progress_RAU_warn{color: red; - font-size: 16px; - }", - "#run{background-color:#34b3ff}")), - br()), - column(12, - textOutput("progress_RAU_done"), - tags$head(tags$style("#progress_RAU_done{color: green; - font-size: 16px; - }", - "#run{background-color:#34b3ff}")), - br()), - column(12, - textOutput("progress_SERO_done"), - tags$head(tags$style("#progress_SERO_done{color: green; - font-size: 16px; - }", - "#run{background-color:#34b3ff}")), - br())) - ) - -#################################################################### -############################ DEFINE APP ############################ -#################################################################### - -server <- function(input, output, session) { - - ############################ CONDITIONAL ON SUBMISSION ############################ - observeEvent(input$run, { - - ### START PROGRESS ### - # Create a Progress object - progress <- shiny::Progress$new() - # Make sure it closes when we exit this reactive, even if there's an error - on.exit(progress$close()) - # Create - progress$set(message = "PvSeroTAT Analysis", value = 0) - - ############################ START ############################ - - ###STEP 1### - progress$inc(1/9, detail = paste("Step ", 1)) - cat("############################ STEP 1: START ############################\n") - - # Step 1a - cat("Files has been submitted for analysis\n") - - # Step 1b - cat("Load functions\n") - source("FUNCTIONS.R") - - # Step 1c - cat(paste0("Working directory: ", print(getwd()),"\n")) - - ############################ MODEL PARAMS ############################ - - #RF - if(isTRUE(print(input$model_version_RF) == 1)){ - - # Model - MODEL_W16 = TRUE - MODEL_W47 = FALSE - - # Default - MODEL_W16_3_TARGETS = FALSE - MODEL_W16_EQUAL_TARGET = FALSE - MODEL_W16_HIGH_SP_TARGET = FALSE - MODEL_W16_HIGH_SE_TARGET = FALSE - MODEL_W16_OTHER_SE = FALSE - MODEL_W16_OTHER_SP = FALSE - # Default - MODEL_W47_3_TARGETS = FALSE - MODEL_W47_EQUAL_TARGET = FALSE - MODEL_W47_HIGH_SP_TARGET = FALSE - MODEL_W47_HIGH_SE_TARGET = FALSE - MODEL_W47_OTHER_SE = FALSE - MODEL_W47_OTHER_SP = FALSE - - # Which test target - if(isTRUE(print(input$diagnostic_tests_W16) == 0)){ - MODEL_W16_3_TARGETS = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W16) == 1)){ - MODEL_W16_EQUAL_TARGET = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W16) == 2)){ - MODEL_W16_HIGH_SP_TARGET = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W16) == 3)){ - MODEL_W16_HIGH_SE_TARGET = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W16) == 4)){ - if(isTRUE(print(input$other_target) == 0)){ - MODEL_W16_OTHER_SE = TRUE - MODEL_W16_HIGH_SE_VALUE = input$sensitivity_value - }else{ - MODEL_W16_OTHER_SP = TRUE - MODEL_W16_HIGH_SP_VALUE = input$specificity_value - } - } - }else{ - - # Model - MODEL_W47 = TRUE - MODEL_W16 = FALSE - - # Default - MODEL_W47_3_TARGETS = FALSE - MODEL_W47_EQUAL_TARGET = FALSE - MODEL_W47_HIGH_SP_TARGET = FALSE - MODEL_W47_HIGH_SE_TARGET = FALSE - MODEL_W47_OTHER_SE = FALSE - MODEL_W47_OTHER_SP = FALSE - # Default - MODEL_W16_3_TARGETS = FALSE - MODEL_W16_EQUAL_TARGET = FALSE - MODEL_W16_HIGH_SP_TARGET = FALSE - MODEL_W16_HIGH_SE_TARGET = FALSE - MODEL_W16_OTHER_SE = FALSE - MODEL_W16_OTHER_SP = FALSE - - # Which test target - if(isTRUE(print(input$diagnostic_tests_W47) == 0)){ - MODEL_W47_3_TARGETS = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W47) == 1)){ - MODEL_W47_EQUAL_TARGET = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W47) == 2)){ - MODEL_W47_HIGH_SP_TARGET = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W47) == 3)){ - MODEL_W47_HIGH_SE_TARGET = TRUE - } - if(isTRUE(print(input$diagnostic_tests_W47) == 4)){ - if(isTRUE(print(input$other_target) == 0)){ - MODEL_W47_OTHER_SE = TRUE - MODEL_W47_HIGH_SE_VALUE = input$sensitivity_value - }else{ - MODEL_W47_OTHER_SP = TRUE - MODEL_W47_HIGH_SP_VALUE = input$specificity_value - } - } - } - - # #SVM - # if(isTRUE(print(input$model_version_SVM) == 1)){ - # - # # Model - # MODEL_W16 = TRUE - # MODEL_W47 = FALSE - # - # # Default - # MODEL_W16_3_TARGETS = FALSE - # MODEL_W16_EQUAL_TARGET = FALSE - # MODEL_W16_HIGH_SP_TARGET = FALSE - # MODEL_W16_HIGH_SE_TARGET = FALSE - # MODEL_W16_OTHER_SE = FALSE - # MODEL_W16_OTHER_SP = FALSE - # # Default - # MODEL_W47_3_TARGETS = FALSE - # MODEL_W47_EQUAL_TARGET = FALSE - # MODEL_W47_HIGH_SP_TARGET = FALSE - # MODEL_W47_HIGH_SE_TARGET = FALSE - # MODEL_W47_OTHER_SE = FALSE - # MODEL_W47_OTHER_SP = FALSE - # - # # Which test target - # if(isTRUE(print(input$diagnostic_tests_W16) == 0)){ - # MODEL_W16_3_TARGETS = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W16) == 1)){ - # MODEL_W16_EQUAL_TARGET = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W16) == 2)){ - # MODEL_W16_HIGH_SP_TARGET = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W16) == 3)){ - # MODEL_W16_HIGH_SE_TARGET = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W16) == 4)){ - # if(isTRUE(print(input$other_target) == 0)){ - # MODEL_W16_OTHER_SE = TRUE - # MODEL_W16_HIGH_SE_VALUE = input$sensitivity_value - # }else{ - # MODEL_W16_OTHER_SP = TRUE - # MODEL_W16_HIGH_SP_VALUE = input$specificity_value - # } - # } - # }else{ - # - # # Model - # MODEL_W47 = TRUE - # MODEL_W16 = FALSE - # - # # Default - # MODEL_W47_3_TARGETS = FALSE - # MODEL_W47_EQUAL_TARGET = FALSE - # MODEL_W47_HIGH_SP_TARGET = FALSE - # MODEL_W47_HIGH_SE_TARGET = FALSE - # MODEL_W47_OTHER_SE = FALSE - # MODEL_W47_OTHER_SP = FALSE - # # Default - # MODEL_W16_3_TARGETS = FALSE - # MODEL_W16_EQUAL_TARGET = FALSE - # MODEL_W16_HIGH_SP_TARGET = FALSE - # MODEL_W16_HIGH_SE_TARGET = FALSE - # MODEL_W16_OTHER_SE = FALSE - # MODEL_W16_OTHER_SP = FALSE - # - # # Which test target - # if(isTRUE(print(input$diagnostic_tests_W47) == 0)){ - # MODEL_W47_3_TARGETS = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W47) == 1)){ - # MODEL_W47_EQUAL_TARGET = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W47) == 2)){ - # MODEL_W47_HIGH_SP_TARGET = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W47) == 3)){ - # MODEL_W47_HIGH_SE_TARGET = TRUE - # } - # if(isTRUE(print(input$diagnostic_tests_W47) == 4)){ - # if(isTRUE(print(input$other_target) == 0)){ - # MODEL_W47_OTHER_SE = TRUE - # MODEL_W47_HIGH_SE_VALUE = input$sensitivity_value - # }else{ - # MODEL_W47_OTHER_SP = TRUE - # MODEL_W47_HIGH_SP_VALUE = input$specificity_value - # } - # } - # } - - cat("############################ STEP 1: END ############################\n") - - ############################ DATA UPLOAD ############################ - - ###STEP 2### - progress$inc(2/9, detail = paste("Step", 2)) - cat("############################ STEP 2: START ############################\n") - - # Step 2: check if raw MFI file was uploaded or RAU file - if(isTRUE(print(input$radio_data) == 1)){ - - # Set analysis pathways (1 = RAU default, 2 = RAU user submitted) - PATHWAY_1 = TRUE - - # Print - cat(paste0("Load MFI file: ", print(input$MFI_file$name),"\n")) - cat(paste0("MFI file path: ", print(input$MFI_file$datapath),"\n")) - - # Step 2a: set file path - RAW_MFI_FILE_PATH = input$MFI_file$datapath - - # Step 2b: file name - RAW_MFI_FILE_NAME = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(input$MFI_file$name)) - cat(paste0("MFI file Name: ", print(RAW_MFI_FILE_NAME),"\n")) - - # Step 2c: CSV or XLSX uploaded? - MFI_CSV = ifelse(sub('.*\\.', '', print(input$MFI_file$name)) == "CSV" | - sub('.*\\.', '', print(input$MFI_file$name)) == "csv", TRUE, FALSE) - cat(paste0("MFI file type is CSV?: ", print(MFI_CSV),"\n")) - - # Step 2d: if a CSV file was uploaded, how many columns to index? - MFI_N_ANTIGENS = input$MFI_num_antigens - - }else{ - - # Print - cat(paste0("Load RAU file: ", print(input$RAU_file$name),"\n")) - cat(paste0("RAU file path: ", print(input$RAU_file$datapath),"\n")) - - # Step 2a: set file path - RAW_RAU_FILE_PATH = input$RAU_file$datapath - - # Step 2b: file name - RAW_RAU_FILE_NAME = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(input$RAU_file$name)) - cat(paste0("RAU file Name: ", print(RAW_RAU_FILE_NAME),"\n")) - - # Step 2c: CSV or XLSX uploaded? - RAU_CSV = ifelse(sub('.*\\.', '', print(input$RAU_file$name)) == "CSV" | - sub('.*\\.', '', print(input$RAU_file$name)) == "csv", TRUE, FALSE) - cat(paste0("RAU file type is CSV?: ", print(RAU_CSV),"\n")) - - # Step 2d: is it a user uploaded file? - if(isTRUE(print(input$radio_RAU_upload_type) == 0)){ - # Pathway - PATHWAY_1 = FALSE - # If user uploaded data, set ID and RAU columns for indexing - RAU_user_id_columns = input$RAU_user_id_columns - cat(paste0("Number of ID columns: ", print(RAU_user_id_columns),"\n")) - RAU_user_RAU_columns = input$RAU_user_RAU_columns - cat(paste0("Number of RAU columns: ", print(RAU_user_RAU_columns),"\n")) - }else{ - # Pathways - PATHWAY_1 = TRUE - # If not user uploaded, then ignore. - RAU_user_id_columns = NA - RAU_user_RAU_columns = NA - } - } - - cat("############################ STEP 2: END ############################\n") - - ###STEP 3### - progress$inc(3/9, detail = paste("Step", 3)) - cat("############################ STEP 3: START ############################\n") - - # Step 3: check if user submitted plate template file was loaded - if(is.null(input$template)){ - cat(paste0("No plate template file uploaded; default plate template layout file used\n")) - # Step 3a: File name - TEMPLATE_FILE_NAME = "/RESOURCES/DEFAULT_PLATE_TEMPLATE.xlsx" - # Step 3b: File path - TEMPLATE_FILE_PATH = paste0(getwd(),TEMPLATE_FILE_NAME) - # Step 3c: CSV or XLSX uploaded? - TEMPLATE_CSV = FALSE - }else{ - cat(paste0("Load plate template file: ", print(input$template$name),"\n")) - # Step 3a: File name - TEMPLATE_FILE_NAME = input$template$name - # Step 3b: File path - TEMPLATE_FILE_PATH = input$template$datapath - cat("User uploaded plate template file read\n") - # Step 3c: CSV or XLSX uploaded? - TEMPLATE_CSV = ifelse(sub('.*\\.', '', print(input$template$name)) == "CSV" | - sub('.*\\.', '', print(input$template$name)) == "csv", TRUE, FALSE) - cat(paste0("User loaded template file type is CSV?: ", print(TEMPLATE_CSV),"\n")) - } - - cat("############################ STEP 3: END ############################\n") - - ###STEP 4### - progress$inc(4/9, detail = paste("Step", 4)) - cat("############################ STEP 4: START ############################\n") - - # Step 4: check if user submitted plate template file was loaded - if(is.null(input$name)){ - cat(paste0("No antigen name reference file uploaded; default antigen naming file used\n")) - # Step 4a: File name - if(isTRUE(print(input$model_version_RF) == 1)){ - ANTIGEN_FILE_NAME = "/RESOURCES/ANTIGEN_NAMES_TOP_8_W16.csv" - }else{ - ANTIGEN_FILE_NAME = "/RESOURCES/ANTIGEN_NAMES_TOP_8_W47.csv" - } - # Step 4a: File path - ANTIGEN_FILE_PATH = paste0(getwd(),ANTIGEN_FILE_NAME) - # Step 4c: CSV or XLSX uploaded? - ANTIGEN_CSV = TRUE - }else{ - cat(paste0("Load antigen name reference file: ", print(input$name$name),"\n")) - # Step 4a: File name - ANTIGEN_FILE_NAME = input$name$name - # Step 4a: File path - ANTIGEN_FILE_PATH = input$name$datapath - cat("User uploaded antigen name reference file read\n") - # Step 4c: CSV or XLSX uploaded? - ANTIGEN_CSV = ifelse(sub('.*\\.', '', print(input$name$name)) == "CSV" | - sub('.*\\.', '', print(input$name$name)) == "csv", TRUE, FALSE) - cat(paste0("User loaded antigen name file type is CSV?: ", print(ANTIGEN_CSV),"\n")) - } - - cat("############################ STEP 4: END ############################\n") - - ############################ DATA INFO ############################ - - ###STEP 5### - progress$inc(5/9, detail = paste("Step", 5)) - cat("############################ STEP 5: START ############################\n") - - # Step 5: check if user submitted plate ID - CHECK_ID = isTRUE(print(input$id) == "") - if(isTRUE(print(input$id) == "")){ - cat(paste0("No plate ID given (recommended); default = MFI or RAU file name\n")) - if(isTRUE(print(input$radio_data) == 1)){ - ID = RAW_MFI_FILE_NAME - }else{ - ID = RAW_RAU_FILE_NAME - } - }else{ - ID = input$id - cat(paste0("Plate ID: ", print(input$id),"\n")) - } - - cat("############################ STEP 5: END ############################\n") - - ###STEP 6### - progress$inc(6/9, detail = paste("Step", 6)) - cat("############################ STEP 6: START ############################\n") - - # Step 6: load date - if(is.null(input$date)){ - cat(paste0("No date given (recommended); default = today's date\n")) - DATE = print(input$date) - }else{ - DATE = input$date - cat(paste0("File date: ", print(input$date),"\n")) - } - - cat("############################ STEP 6: END ############################\n") - - - ###STEP 7### - progress$inc(7/9, detail = paste("Step", 7)) - cat("############################ STEP 7: START ############################\n") - - # Step 7a: Get experiment sub-directory name - if(isTRUE(print(input$new_experiment_folder_name) == "")){ - cat(paste0("No experiment folder name given; default = MFI or RAU file name used\n")) - if(isTRUE(print(input$radio_data) == 1)){ - EXP_DIR = paste0(getwd(),"/RESULTS/",RAW_MFI_FILE_NAME,"/") - }else{ - EXP_DIR = paste0(getwd(),"/RESULTS/",RAW_RAU_FILE_NAME,"/") - } - }else{ - EXP_DIR_NAME = input$new_experiment_folder_name - EXP_DIR = paste0(getwd(),"/RESULTS/",EXP_DIR_NAME,"/") - } - - # Step 7b: Create experiment sub-directory name - dir.create(EXP_DIR) - cat(paste0("Create experiment directory: ", print(EXP_DIR),"\n")) - - cat("############################ STEP 7: END ############################\n") - - - ############################ RELATIVE ANTIBODY UNITS ############################ - - ###STEP 8### - progress$inc(8/9, detail = "Step 8 (RAU)") - cat("############################ STEP 8: START ############################\n") - - # If MFI file loaded, run RAU function; else, skip and load RAU file - if(isTRUE(print(input$radio_data) == 1)){ - - # Step 8a: create variable to either export RAU files or not - if(isTRUE(print(input$checkbox_RAU) == 1)){ - cat(paste0("Export RAU outputs\n")) - RAU_OUTPUT = TRUE - }else{ - cat(paste0("No RAU outputs\n")) - RAU_OUTPUT = FALSE - } - - # Step 8b: check if user submitted RAU folder name - if(isTRUE(print(input$new_experiment_folder_name) == "")){ - cat(paste0("No output folder given; default = RESULTS\n")) - RAU_OUTPUT_FOLDER = paste0(getwd(), "/RESULTS") - }else{ - RAU_OUTPUT_FOLDER = input$new_experiment_folder_name - cat(paste0("RAU output folder name: ", print(input$new_experiment_folder_name),"\n")) - } - - # Step 8c: check if user submitted RAU file name - if(isTRUE(print(input$output_RAU_file) == "")){ - cat(paste0("No output file name given; default = contains raw file name and/or plate ID and/or date (e.g. RAU_Plate_ID_DATE.csv\n")) - if(isTRUE(print(input$radio_data) == 1)){ - RAU_OUTPUT_FILE_NAME = RAW_MFI_FILE_NAME - }else{ - RAU_OUTPUT_FILE_NAME = RAW_RAU_FILE_NAME - } - }else{ - RAU_OUTPUT_FILE_NAME = input$output_RAU_file - cat(paste0("Output file name: ", print(input$output_RAU_file),"\n")) - } - - ############################ RUN RAU ANALYSIS ############################ - - #### STEP 8d - cat("LAUNCH RAU FUNCTION\n") - RESULTS = getRelativeAntibodyUnits(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, - MFI_CSV, MFI_N_ANTIGENS, - TEMPLATE_FILE_PATH, TEMPLATE_CSV, - ID, DATE, - EXP_DIR) - RAU_RESULTS = RESULTS[[1]] - - }else{ - - # Step 8: load submitted RAU file - if(RAU_CSV){ - RAU_RESULTS = read.csv(RAW_RAU_FILE_PATH) - }else{ - RAU_RESULTS = as.data.frame(read_excel(RAW_RAU_FILE_PATH)) - } - - } - - cat("############################ STEP 8: END ############################\n") - - ############################ SEROPOSITIVE OUTPUTS ############################ - - ###STEP 9### - progress$inc(9/9, detail = "Step 9 (SERO)") - cat("############################ STEP 9: START ############################\n") - - ############################ RUN RANDOM FOREST ANALYSIS ############################ - - if(isTRUE(print(input$RAU_only) == 0 & print(input$model_version_RF) == 1 | print(input$RAU_only) == 0 & print(input$model_version_RF) == 0)){ - cat("LAUNCH RANDOM FOREST CLASSIFIER FUNCTION\n") - RFOREST_VOTES_SEROPOS = getSeropositiveResults_RF(PATHWAY_1, - RAU_user_id_columns, RAU_user_RAU_columns, RAU_RESULTS, - ANTIGEN_FILE_PATH, ANTIGEN_CSV, - CHECK_NAME, CHECK_ID, ID, DATE, - EXP_DIR, - MODEL_W16, - MODEL_W16_3_TARGETS, - MODEL_W16_EQUAL_TARGET, - MODEL_W16_HIGH_SP_TARGET, - MODEL_W16_HIGH_SE_TARGET, - MODEL_W16_OTHER_SE, - MODEL_W16_OTHER_SP, - MODEL_W47, - MODEL_W47_3_TARGETS,MODEL_W47_EQUAL_TARGET, - MODEL_W47_HIGH_SP_TARGET, - MODEL_W47_HIGH_SE_TARGET, - MODEL_W47_OTHER_SE, - MODEL_W47_OTHER_SP) - - cat("DONE\n") - } - - ############################ RUN SVM ANALYSIS ############################ - - # if(isTRUE(print(input$model_version_SVM) == 1 | print(input$model_version_SVM) == 0)){ - # cat("LAUNCH SVM CLASSIFIER FUNCTION\n") - # RFOREST_VOTES_SEROPOS = getSeropositiveResults_SVM(PATHWAY_1, - # RAU_user_id_columns, RAU_user_RAU_columns, RAU_RESULTS, - # ANTIGEN_FILE_PATH, ANTIGEN_CSV, - # SERO_OUTPUT_FILE_NAME, SERO_OUTPUT_FOLDER, - # CHECK_NAME, CHECK_ID, ID, DATE, - # MODEL_W16, - # MODEL_W16_3_TARGETS, - # MODEL_W16_EQUAL_TARGET, - # MODEL_W16_HIGH_SP_TARGET, - # MODEL_W16_HIGH_SE_TARGET, - # MODEL_W16_OTHER_SE, - # MODEL_W16_OTHER_SP, - # MODEL_W47, - # MODEL_W47_3_TARGETS,MODEL_W47_EQUAL_TARGET, - # MODEL_W47_HIGH_SP_TARGET, - # MODEL_W47_HIGH_SE_TARGET, - # MODEL_W47_OTHER_SE, - # MODEL_W47_OTHER_SP) - # - # cat("DONE\n") - # } - - ###INDICATE RAU ANALYSIS HAS FINISHED### - if(isTRUE(print(input$radio_data) == 1)){ - output$progress_RAU_done <- renderText({ - paste0("Relative Antibody Unit conversion has finished, check output folder (",EXP_DIR,")") - }) - }else{ - output$progress_RAU_done <- renderText({ - paste0("") - }) - } - ###INDICATE SERO ANALYSIS HAS FINISHED### - if(isTRUE(print(input$RAU_only) == 0)){ - output$progress_SERO_done <- renderText({ - paste0("PvSeroTAT analysis has finished, check output folder (",EXP_DIR,")") - }) - } - - cat("############################ STEP 9: END ############################\n") - - cat("############################ ANALYSIS END ############################\n") - - }) - -} - - - -### END -shinyApp(ui = ui, server = server) - - +######################################################### +######################################################### +### ### +### PvSeroTAT ### +### R Shiny App Script ### +### WEHI and Institut Pasteur ### +### ### +######################################################### +######################################################### + + +#################################################################### +############################ DEFINE APP ############################ +#################################################################### + +ui <- fluidPage( + + # Theme + theme = shinytheme("cosmo"), + + # Options + tags$head( + tags$style(HTML("hr {border-top: 1px solid #000000; margin: 10px 0 10px 0;}"), + HTML("h3 { margin: 10px 0 10px 0; }"), + HTML("h4 { margin: 10px 0 10px 0; }"), + HTML("h5 { margin: 20px 0 10px 0; color: #5e5e5e; font-weight:bold; }"), + HTML(".checkbox { margin-bottom: 10px; }"), + HTML(".help-block { margin: 10px 0 10px 0; }"), + HTML(".shiny-input-container { margin: 0 0 5px 0; }")) + ), + + + + ############################ MAIN PANEL ############################ + + fluidRow( + column(12, + align = "left", + br(), + mainPanel( + # Image + img(src = "IP_WEHI_logo_2020.png", height = 88, width = 468) + )), + column(12, + align = "left", + # App title ---- + h1(strong(" Pv SeroTAT Tool")), + hr())), + + + ############################ HEADER ########################### + + tabsetPanel( + tabPanel("About", + br(), + p("The Pv SeroTAT Tool has been developed by Ivo Mueller's research groups at the Institut Pasteur and Walter and Eliza Hall Institut of Medical Research. The tool is a patented diagnostic test based on validated serological markers of recent",tags$em("Plasmodium vivax")," infections and hypnozoite carriage. The tool uses machine learning classification algorithms (Random Forest (RF) and Support Vector Machine (SVM)) to predict sero-positivity with varying performance."), + p("Validation of this tool using Random Forest can be found in the", a("Nature Medicine Publication", href="https://www.nature.com/articles/s41591-020-0841-4")), + p("Please contact on how to cite this tool in publications."), + p(em("Follow steps 1 - 6 to process Luminex MFI or Relative Antibody Unit (RAU) data and run the diagnostic test.")), + hr()), + tabPanel("Biomarkers of exposure", + br(), + p("The tools has been validated with predicting sero-positivity with 8 biomarkers of exposure. They include:"), + tags$li("W01 | L01 | PVX_099980 | MSP1-19"), + tags$li("W02 | L02 | PVX_099980"), + tags$li("W08 | L12 | PVX_112670"), + tags$li("W30 | L34 | PVX_097625 | MSP8"), + tags$li("W39 | L54 | PVX_097720 | MSP3a"), + tags$li("W50 | PVX_094255 | RBP2b"), + tags$li("W58 | KMZ83376.1d | EBPII"), + tags$li("W47 | PVX_087885 | RAMA from CellFree Science", strong("OR"), "W16 | L23 | PVX_087885 from Japan"), + hr()), + tabPanel("Contacts", + br(), + p(a("Michael White", href="mailto:michael.white@pasteur.fr"),"|", + a("Rhea Longley", href="mailto:longley.r@wehi.edu.au"),"|", + a("Thomas Obadia", href="mailto:thomas.obadia@pasteur.fr"),"|", + a("Ramin Mazhari", href="mailto:mazhari@wehi.edu.au"),"|", + a("Narimane Nekkab", href="mailto:narimane.nekkab@pastuer.fr"),"|", + a("Shazia Ruybal", href="mailto:ruybal.s@wehi.edu.au")), + p("Please contact on how to cite in publications."), + hr())), + + ############################ MODEL VERSION ############################ + + tabsetPanel( + tabPanel("Random Forest Classifier", + ######################## RANDOM FOREST ######################## + radioButtons("model_version_RF", + label = h4(strong("1. Choose model")), + choices = list("With antigen W16 / L23 / CellFree Sciences" = 1, "With antigen W47 / PVX_087885 / Japan" = 0), + selected = 1), + helpText("Note 1: The Random Forest classification algorithm has been previously validated to determining sero-positivity. We have studied its performance extensively and is a reliable method."), + helpText("Note 2: The two RAMA proteins are the same but were produced by different labs. They will give slighly different predictions. + Check with Rhea Longley or Ramin Mazhari if unsure which was used. + If proteins were coupled before 2016, there is a chance the W47 protein was used.")) + + # , + # # SVM + # tabPanel("Support-Vector Machine Classifier", + # ######################## SUPPORT-VECTOR MACHINE ####################### + # radioButtons("model_version_SVM", + # label = h4(strong("1. Choose model")), + # choices = list("With antigen W16 / L23 / CellFree Sciences" = 1, "With antigen W47 / PVX_087885 / Japan" = 0), + # selected = 1), + # helpText("Note 1: The Support-Vector Machine classification algorithm has not been validated or studied extensively. However, it may have better performance. Please contact Michael White on how to interpret results."), + # helpText("Note 2: The two RAMA proteins are the same but were produced by different labs. They will give slighly different predictions. + # Check with Rhea Longley or Ramin Mazhari if unsure which was used. + # If proteins were coupled before 2016, there is a chance the W47 protein was used.")) + + ), + hr(), + + ############################ DIAGNOSTIC TARGETS ############################ + ############################## RANDOM FOREST ############################### + + # Model W16 + conditionalPanel("input.model_version_RF == 1", + radioButtons("diagnostic_tests_W16", + label = h4(strong("2. Choose diagnostic target")), + choices = list("All 3 default targets" = 0, + "79% sensitivity and 79% specificity" = 1, + "63% sensitivity and 90% specificity" = 2, + "90% sensitivity and 59% specificity" = 3, + "Other" = 4), + selected = 0)), + # Model W47 + conditionalPanel("input.model_version_RF == 0", + radioButtons("diagnostic_tests_W47", + label = h4(strong("2. Choose diagnostic target")), + choices = list("All 3 default targets" = 0, + "80% sensitivity and 80% specificity" = 1, + "64% sensitivity and 90% specificity" = 2, + "90% sensitivity and 60% specificity" = 3, + "Other" = 4), + selected = 0)), + + #### Other + conditionalPanel("input.diagnostic_tests_W16 == 4 | input.diagnostic_tests_W47 == 4", + radioButtons("other_target", + label = h4(strong("2.1 Choose target")), + choices = list("Sensitivity" = 0, + "Specificity" = 1)), + conditionalPanel("input.other_target == 0", + textInput("sensitivity_value", + p("Value between 0 to 1"), + value = NULL)), + conditionalPanel("input.other_target == 1", + textInput("specificity_value", + p("Value between 0 to 1"), + value = NULL))), + + helpText("Note: different prediction cutoffs can be used to determine sero-positivity. We chose 3 default targets along the ROC curve that we recommend; however, other targets can be selected."), + + + hr(), + + # ############################ DIAGNOSTIC TARGETS ############################ + # ########################## SUPPORT VECTOR MACHINE ########################## + # + # # Model W16 + # conditionalPanel("input.model_version_SVM == 1", + # radioButtons("diagnostic_tests_W16", + # label = h4(strong("2. Choose diagnostic target")), + # choices = list("All 3 default targets" = 0, + # "87% sensitivity and 87% specificity" = 1, + # "78% sensitivity and 90% specificity" = 2, + # "90% sensitivity and 86% specificity" = 3, + # "Other" = 4), + # selected = 0)), + # # Model W47 + # conditionalPanel("input.model_version_SVM == 0", + # radioButtons("diagnostic_tests_W47", + # label = h4(strong("2. Choose diagnostic target")), + # choices = list("All 3 default targets" = 0, + # "86% sensitivity and 86% specificity" = 1, + # "78% sensitivity and 90% specificity" = 2, + # "90% sensitivity and 85% specificity" = 3, + # "Other" = 4), + # selected = 0)), + # + # #### Other + # conditionalPanel("input.diagnostic_tests_W16 == 4 | input.diagnostic_tests_W47 == 4", + # radioButtons("other_target", + # label = h4(strong("2.1 Choose target")), + # choices = list("Sensitivity" = 0, + # "Specificity" = 1)), + # conditionalPanel("input.other_target == 0", + # textInput("sensitivity_value", + # p("Value between 0 to 1"), + # value = NULL)), + # conditionalPanel("input.other_target == 1", + # textInput("specificity_value", + # p("Value between 0 to 1"), + # value = NULL))), + # + # helpText("Note: different prediction cutoffs can be used to determine sero-positivity. We chose 3 default targets along the ROC curve that we recommend; however, other targets can be selected."), + # + + ############################ DATA UPLOAD ############################ + + radioButtons("radio_data", + label = h4(strong("3. Choose data to process")), + choices = list("Median Fluorescent Intensity (MFI) data" = 1, "Relative Antibody Units (RAU) data" = 0), + selected = 1), + helpText("Note: if MFI data is loaded, it will be converted into RAU and then processed."), + + hr(), + h4(strong("4. Data upload")), + + tabsetPanel( + tabPanel("Load MFI or RAU data files", + # br(), + ######################## MFI ######################## + conditionalPanel("input.radio_data == 1", + fileInput("MFI_file", + h5("4.1 Load raw MFI file (required)"), + accept = c(".csv", ".xlsx")), + helpText("Note: a (.csv) or (.xlsx) file of raw outputs of the Luminex-MagPix machine. + If you encounter errors when loading a (.csv) file, convert it to (.xlsx) in Excel and load the (.xlsx) file instead.")), + conditionalPanel("input.radio_data == 1", + textInput("MFI_num_antigens", + h5("4.2 Number of antigens processed"), + value = NULL), + helpText("Note: please indicate the number of antigens processed by the MagPix in this file. + This is required only if you have loaded a (.csv) file.")), + conditionalPanel("input.radio_data == 1", + fileInput("template", + h5("4.3 Load plate template (required)"), + accept = c(".csv", ".xlsx")), + helpText("Note: for each plate, upload a (.csv) or (.xlsx) file indicating the serial dilutions, + positions of blanks, and sample numbers. Refer to the default template (12 column, 7 row, + two-fold diluation from 1/50 to 1/25600)")), + ######################## RAU ######################## + conditionalPanel("input.radio_data == 0", + fileInput("RAU_file", + h5("4.1 Load RAU file (required)"), + accept = c(".csv", ".xlsx")), + helpText("Note: a (.csv) or (.xlsx) file with ID variables and antigen protein names in the column header")), + conditionalPanel("input.radio_data == 0", + radioButtons("radio_RAU_upload_type", + label = h5("4.2 File type"), + choices = list("RAU data file exported by this app" = 1, "RAU data file from other sources" = 0), + selected = 1), + helpText("Note: it is important to indicate if the data file comes from an outside source to be able to correctly read it in. + If data is from an outside source, file must contains ID columns and RAU data columns (untransformed) with the header label indicating + the antigens names that correspond to the default list (in RESOURCES folder) followed by '_Dilution' i.e. W50_Dilution, L01_Dilution, MSP3a_Dilution")), + conditionalPanel("input.radio_RAU_upload_type == 0", + textInput("RAU_user_id_columns", + h5("4.3 Number of ID columns (from the left)"), + value = NULL), + helpText("Note: input the total number of columns starting from the left that contain ID info (not including the RAU columns)"), + textInput("RAU_user_RAU_columns", + h5("4.4 Number of RAU columns (after the ID)"), + value = NULL), + helpText("Note: input the total number of columns after the ID columns corresponding to RAU. + This should correpond to the number of antigens assessed. + Please do not include more data than needed in the file."))), + ############################ DATA INFO ############################ + # tabPanel("Plate information", + # # br(), + # fluidRow( + # column(4, + # textInput("id", + # h5("4.4 Plate ID number (recommended)"), + # value = NULL), + # helpText("Note: plate ID for output file name")), + # column(4, + # dateInput("date", + # h5("4.5 Date input (recommended)")), + # helpText("Note: date for output file name; default is today's date")))), + + ############################ ANTIGEN INFO ############################ + tabPanel("Antigen names", + # br(), + fileInput("name", + h5("4.4 Load antigen naming reference file (optional)")), + helpText("Note: a (.csv) or (.xlsx) file indicating the reference W names under + column name 'Protein_Code' and your corresponding name under column 'Antigen_Name'. + A default reference file will be used if not provided."))), + + ############################ EXPERIMENT SUB-DIRECTORY ############################ + + hr(), + h4(strong("5. Data output")), + h5("5.1 Run only Relative Antibody Unit conversion without serological classification?"), + checkboxInput("RAU_only", "Yes", value = F), + helpText("Note: RAU conversion results in the output folder RESULTS will be created. + Serological classification with RF or SVM will not be outputed. + An option if all 8 antigens were not processed."), + h5("5.2 Change default experiment sub-directory folder name?"), + checkboxInput("keep_experiment_folder_name", "Yes", value = F), + helpText("Note: a default experiment sub-directory in the output folder RESULTS will be created. + If you wish to change the experiment folder name, give the new name."), + conditionalPanel("input.keep_experiment_folder_name == 1", + textInput("new_experiment_folder_name","5.3 Add folder name", value = NULL)), + + + ############################ SUBMIT ############################ + + hr(), + h4(strong("6. Run Analysis")), + h5(""), + actionButton("run","Submit"), + h5(""), + + + ############################ STATUS ############################ + + hr(), + h4(strong("Status:")), + fluidRow( + column(12, + textOutput("progress_RAU_warn"), + tags$head(tags$style("#progress_RAU_warn{color: red; + font-size: 16px; + }", + "#run{background-color:#34b3ff}")), + br()), + column(12, + textOutput("progress_RAU_done"), + tags$head(tags$style("#progress_RAU_done{color: green; + font-size: 16px; + }", + "#run{background-color:#34b3ff}")), + br()), + column(12, + textOutput("progress_SERO_done"), + tags$head(tags$style("#progress_SERO_done{color: green; + font-size: 16px; + }", + "#run{background-color:#34b3ff}")), + br())) + ) + +#################################################################### +############################ DEFINE APP ############################ +#################################################################### + +server <- function(input, output, session) { + + ############################ CONDITIONAL ON SUBMISSION ############################ + observeEvent(input$run, { + + ### START PROGRESS ### + # Create a Progress object + progress <- shiny::Progress$new() + # Make sure it closes when we exit this reactive, even if there's an error + on.exit(progress$close()) + # Create + progress$set(message = "PvSeroTAT Analysis", value = 0) + + ############################ START ############################ + + ###STEP 1### + progress$inc(1/9, detail = paste("Step ", 1)) + cat("############################ STEP 1: START ############################\n") + + # Step 1a + cat("Files has been submitted for analysis\n") + + # Step 1b + cat("Load functions\n") + source("FUNCTIONS.R") + + # Step 1c + cat(paste0("Working directory: ", print(getwd()),"\n")) + + ############################ MODEL PARAMS ############################ + + #RF + if(isTRUE(print(input$model_version_RF) == 1)){ + + # Model + MODEL_W16 = TRUE + MODEL_W47 = FALSE + + # Default + MODEL_W16_3_TARGETS = FALSE + MODEL_W16_EQUAL_TARGET = FALSE + MODEL_W16_HIGH_SP_TARGET = FALSE + MODEL_W16_HIGH_SE_TARGET = FALSE + MODEL_W16_OTHER_SE = FALSE + MODEL_W16_OTHER_SP = FALSE + # Default + MODEL_W47_3_TARGETS = FALSE + MODEL_W47_EQUAL_TARGET = FALSE + MODEL_W47_HIGH_SP_TARGET = FALSE + MODEL_W47_HIGH_SE_TARGET = FALSE + MODEL_W47_OTHER_SE = FALSE + MODEL_W47_OTHER_SP = FALSE + + # Which test target + if(isTRUE(print(input$diagnostic_tests_W16) == 0)){ + MODEL_W16_3_TARGETS = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W16) == 1)){ + MODEL_W16_EQUAL_TARGET = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W16) == 2)){ + MODEL_W16_HIGH_SP_TARGET = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W16) == 3)){ + MODEL_W16_HIGH_SE_TARGET = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W16) == 4)){ + if(isTRUE(print(input$other_target) == 0)){ + MODEL_W16_OTHER_SE = TRUE + MODEL_W16_HIGH_SE_VALUE = input$sensitivity_value + }else{ + MODEL_W16_OTHER_SP = TRUE + MODEL_W16_HIGH_SP_VALUE = input$specificity_value + } + } + }else{ + + # Model + MODEL_W47 = TRUE + MODEL_W16 = FALSE + + # Default + MODEL_W47_3_TARGETS = FALSE + MODEL_W47_EQUAL_TARGET = FALSE + MODEL_W47_HIGH_SP_TARGET = FALSE + MODEL_W47_HIGH_SE_TARGET = FALSE + MODEL_W47_OTHER_SE = FALSE + MODEL_W47_OTHER_SP = FALSE + # Default + MODEL_W16_3_TARGETS = FALSE + MODEL_W16_EQUAL_TARGET = FALSE + MODEL_W16_HIGH_SP_TARGET = FALSE + MODEL_W16_HIGH_SE_TARGET = FALSE + MODEL_W16_OTHER_SE = FALSE + MODEL_W16_OTHER_SP = FALSE + + # Which test target + if(isTRUE(print(input$diagnostic_tests_W47) == 0)){ + MODEL_W47_3_TARGETS = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W47) == 1)){ + MODEL_W47_EQUAL_TARGET = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W47) == 2)){ + MODEL_W47_HIGH_SP_TARGET = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W47) == 3)){ + MODEL_W47_HIGH_SE_TARGET = TRUE + } + if(isTRUE(print(input$diagnostic_tests_W47) == 4)){ + if(isTRUE(print(input$other_target) == 0)){ + MODEL_W47_OTHER_SE = TRUE + MODEL_W47_HIGH_SE_VALUE = input$sensitivity_value + }else{ + MODEL_W47_OTHER_SP = TRUE + MODEL_W47_HIGH_SP_VALUE = input$specificity_value + } + } + } + + # #SVM + # if(isTRUE(print(input$model_version_SVM) == 1)){ + # + # # Model + # MODEL_W16 = TRUE + # MODEL_W47 = FALSE + # + # # Default + # MODEL_W16_3_TARGETS = FALSE + # MODEL_W16_EQUAL_TARGET = FALSE + # MODEL_W16_HIGH_SP_TARGET = FALSE + # MODEL_W16_HIGH_SE_TARGET = FALSE + # MODEL_W16_OTHER_SE = FALSE + # MODEL_W16_OTHER_SP = FALSE + # # Default + # MODEL_W47_3_TARGETS = FALSE + # MODEL_W47_EQUAL_TARGET = FALSE + # MODEL_W47_HIGH_SP_TARGET = FALSE + # MODEL_W47_HIGH_SE_TARGET = FALSE + # MODEL_W47_OTHER_SE = FALSE + # MODEL_W47_OTHER_SP = FALSE + # + # # Which test target + # if(isTRUE(print(input$diagnostic_tests_W16) == 0)){ + # MODEL_W16_3_TARGETS = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W16) == 1)){ + # MODEL_W16_EQUAL_TARGET = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W16) == 2)){ + # MODEL_W16_HIGH_SP_TARGET = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W16) == 3)){ + # MODEL_W16_HIGH_SE_TARGET = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W16) == 4)){ + # if(isTRUE(print(input$other_target) == 0)){ + # MODEL_W16_OTHER_SE = TRUE + # MODEL_W16_HIGH_SE_VALUE = input$sensitivity_value + # }else{ + # MODEL_W16_OTHER_SP = TRUE + # MODEL_W16_HIGH_SP_VALUE = input$specificity_value + # } + # } + # }else{ + # + # # Model + # MODEL_W47 = TRUE + # MODEL_W16 = FALSE + # + # # Default + # MODEL_W47_3_TARGETS = FALSE + # MODEL_W47_EQUAL_TARGET = FALSE + # MODEL_W47_HIGH_SP_TARGET = FALSE + # MODEL_W47_HIGH_SE_TARGET = FALSE + # MODEL_W47_OTHER_SE = FALSE + # MODEL_W47_OTHER_SP = FALSE + # # Default + # MODEL_W16_3_TARGETS = FALSE + # MODEL_W16_EQUAL_TARGET = FALSE + # MODEL_W16_HIGH_SP_TARGET = FALSE + # MODEL_W16_HIGH_SE_TARGET = FALSE + # MODEL_W16_OTHER_SE = FALSE + # MODEL_W16_OTHER_SP = FALSE + # + # # Which test target + # if(isTRUE(print(input$diagnostic_tests_W47) == 0)){ + # MODEL_W47_3_TARGETS = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W47) == 1)){ + # MODEL_W47_EQUAL_TARGET = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W47) == 2)){ + # MODEL_W47_HIGH_SP_TARGET = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W47) == 3)){ + # MODEL_W47_HIGH_SE_TARGET = TRUE + # } + # if(isTRUE(print(input$diagnostic_tests_W47) == 4)){ + # if(isTRUE(print(input$other_target) == 0)){ + # MODEL_W47_OTHER_SE = TRUE + # MODEL_W47_HIGH_SE_VALUE = input$sensitivity_value + # }else{ + # MODEL_W47_OTHER_SP = TRUE + # MODEL_W47_HIGH_SP_VALUE = input$specificity_value + # } + # } + # } + + cat("############################ STEP 1: END ############################\n") + + ############################ DATA UPLOAD ############################ + + ###STEP 2### + progress$inc(2/9, detail = paste("Step", 2)) + cat("############################ STEP 2: START ############################\n") + + # Step 2: check if raw MFI file was uploaded or RAU file + if(isTRUE(print(input$radio_data) == 1)){ + + # Set analysis pathways (1 = RAU default, 2 = RAU user submitted) + PATHWAY_1 = TRUE + + # Print + cat(paste0("Load MFI file: ", print(input$MFI_file$name),"\n")) + cat(paste0("MFI file path: ", print(input$MFI_file$datapath),"\n")) + + # Step 2a: set file path + RAW_MFI_FILE_PATH = input$MFI_file$datapath + + # Step 2b: file name + RAW_MFI_FILE_NAME = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(input$MFI_file$name)) + cat(paste0("MFI file Name: ", print(RAW_MFI_FILE_NAME),"\n")) + + # Step 2c: CSV or XLSX uploaded? + MFI_CSV = ifelse(sub('.*\\.', '', print(input$MFI_file$name)) == "CSV" | + sub('.*\\.', '', print(input$MFI_file$name)) == "csv", TRUE, FALSE) + cat(paste0("MFI file type is CSV?: ", print(MFI_CSV),"\n")) + + # Step 2d: if a CSV file was uploaded, how many columns to index? + MFI_N_ANTIGENS = input$MFI_num_antigens + + }else{ + + # Print + cat(paste0("Load RAU file: ", print(input$RAU_file$name),"\n")) + cat(paste0("RAU file path: ", print(input$RAU_file$datapath),"\n")) + + # Step 2a: set file path + RAW_RAU_FILE_PATH = input$RAU_file$datapath + + # Step 2b: file name + RAW_RAU_FILE_NAME = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(input$RAU_file$name)) + cat(paste0("RAU file Name: ", print(RAW_RAU_FILE_NAME),"\n")) + + # Step 2c: CSV or XLSX uploaded? + RAU_CSV = ifelse(sub('.*\\.', '', print(input$RAU_file$name)) == "CSV" | + sub('.*\\.', '', print(input$RAU_file$name)) == "csv", TRUE, FALSE) + cat(paste0("RAU file type is CSV?: ", print(RAU_CSV),"\n")) + + # Step 2d: is it a user uploaded file? + if(isTRUE(print(input$radio_RAU_upload_type) == 0)){ + # Pathway + PATHWAY_1 = FALSE + # If user uploaded data, set ID and RAU columns for indexing + RAU_user_id_columns = input$RAU_user_id_columns + cat(paste0("Number of ID columns: ", print(RAU_user_id_columns),"\n")) + RAU_user_RAU_columns = input$RAU_user_RAU_columns + cat(paste0("Number of RAU columns: ", print(RAU_user_RAU_columns),"\n")) + }else{ + # Pathways + PATHWAY_1 = TRUE + # If not user uploaded, then ignore. + RAU_user_id_columns = NA + RAU_user_RAU_columns = NA + } + } + + cat("############################ STEP 2: END ############################\n") + + ###STEP 3### + progress$inc(3/9, detail = paste("Step", 3)) + cat("############################ STEP 3: START ############################\n") + + # Step 3: check if user submitted plate template file was loaded + if(is.null(input$template)){ + cat(paste0("No plate template file uploaded; default plate template layout file used\n")) + # Step 3a: File name + TEMPLATE_FILE_NAME = "/RESOURCES/DEFAULT_PLATE_TEMPLATE.xlsx" + # Step 3b: File path + TEMPLATE_FILE_PATH = paste0(getwd(),TEMPLATE_FILE_NAME) + # Step 3c: CSV or XLSX uploaded? + TEMPLATE_CSV = FALSE + }else{ + cat(paste0("Load plate template file: ", print(input$template$name),"\n")) + # Step 3a: File name + TEMPLATE_FILE_NAME = input$template$name + # Step 3b: File path + TEMPLATE_FILE_PATH = input$template$datapath + cat("User uploaded plate template file read\n") + # Step 3c: CSV or XLSX uploaded? + TEMPLATE_CSV = ifelse(sub('.*\\.', '', print(input$template$name)) == "CSV" | + sub('.*\\.', '', print(input$template$name)) == "csv", TRUE, FALSE) + cat(paste0("User loaded template file type is CSV?: ", print(TEMPLATE_CSV),"\n")) + } + + cat("############################ STEP 3: END ############################\n") + + ###STEP 4### + progress$inc(4/9, detail = paste("Step", 4)) + cat("############################ STEP 4: START ############################\n") + + # Step 4: check if user submitted plate template file was loaded + if(is.null(input$name)){ + cat(paste0("No antigen name reference file uploaded; default antigen naming file used\n")) + # Step 4a: File name + if(isTRUE(print(input$model_version_RF) == 1)){ + ANTIGEN_FILE_NAME = "/RESOURCES/ANTIGEN_NAMES_TOP_8_W16.csv" + }else{ + ANTIGEN_FILE_NAME = "/RESOURCES/ANTIGEN_NAMES_TOP_8_W47.csv" + } + # Step 4a: File path + ANTIGEN_FILE_PATH = paste0(getwd(),ANTIGEN_FILE_NAME) + # Step 4c: CSV or XLSX uploaded? + ANTIGEN_CSV = TRUE + }else{ + cat(paste0("Load antigen name reference file: ", print(input$name$name),"\n")) + # Step 4a: File name + ANTIGEN_FILE_NAME = input$name$name + # Step 4a: File path + ANTIGEN_FILE_PATH = input$name$datapath + cat("User uploaded antigen name reference file read\n") + # Step 4c: CSV or XLSX uploaded? + ANTIGEN_CSV = ifelse(sub('.*\\.', '', print(input$name$name)) == "CSV" | + sub('.*\\.', '', print(input$name$name)) == "csv", TRUE, FALSE) + cat(paste0("User loaded antigen name file type is CSV?: ", print(ANTIGEN_CSV),"\n")) + } + + cat("############################ STEP 4: END ############################\n") + + ############################ DATA INFO ############################ + + ###STEP 5### + progress$inc(5/9, detail = paste("Step", 5)) + cat("############################ STEP 5: START ############################\n") + + # Step 5: check if user submitted plate ID + CHECK_ID = isTRUE(print(input$id) == "") + if(isTRUE(print(input$id) == "")){ + cat(paste0("No plate ID given (recommended); default = MFI or RAU file name\n")) + if(isTRUE(print(input$radio_data) == 1)){ + ID = RAW_MFI_FILE_NAME + }else{ + ID = RAW_RAU_FILE_NAME + } + }else{ + ID = input$id + cat(paste0("Plate ID: ", print(input$id),"\n")) + } + + cat("############################ STEP 5: END ############################\n") + + ###STEP 6### + progress$inc(6/9, detail = paste("Step", 6)) + cat("############################ STEP 6: START ############################\n") + + # Step 6: load date + if(is.null(input$date)){ + cat(paste0("No date given (recommended); default = today's date\n")) + DATE = print(input$date) + }else{ + DATE = input$date + cat(paste0("File date: ", print(input$date),"\n")) + } + + cat("############################ STEP 6: END ############################\n") + + + ###STEP 7### + progress$inc(7/9, detail = paste("Step", 7)) + cat("############################ STEP 7: START ############################\n") + + # Step 7a: Get experiment sub-directory name + if(isTRUE(print(input$new_experiment_folder_name) == "")){ + cat(paste0("No experiment folder name given; default = MFI or RAU file name used\n")) + if(isTRUE(print(input$radio_data) == 1)){ + EXP_DIR = paste0(getwd(),"/RESULTS/",RAW_MFI_FILE_NAME,"/") + }else{ + EXP_DIR = paste0(getwd(),"/RESULTS/",RAW_RAU_FILE_NAME,"/") + } + }else{ + EXP_DIR_NAME = input$new_experiment_folder_name + EXP_DIR = paste0(getwd(),"/RESULTS/",EXP_DIR_NAME,"/") + } + + # Step 7b: Create experiment sub-directory name + dir.create(EXP_DIR) + cat(paste0("Create experiment directory: ", print(EXP_DIR),"\n")) + + cat("############################ STEP 7: END ############################\n") + + + ############################ RELATIVE ANTIBODY UNITS ############################ + + ###STEP 8### + progress$inc(8/9, detail = "Step 8 (RAU)") + cat("############################ STEP 8: START ############################\n") + + # If MFI file loaded, run RAU function; else, skip and load RAU file + if(isTRUE(print(input$radio_data) == 1)){ + + # Step 8a: create variable to either export RAU files or not + if(isTRUE(print(input$checkbox_RAU) == 1)){ + cat(paste0("Export RAU outputs\n")) + RAU_OUTPUT = TRUE + }else{ + cat(paste0("No RAU outputs\n")) + RAU_OUTPUT = FALSE + } + + # Step 8b: check if user submitted RAU folder name + if(isTRUE(print(input$new_experiment_folder_name) == "")){ + cat(paste0("No output folder given; default = RESULTS\n")) + RAU_OUTPUT_FOLDER = paste0(getwd(), "/RESULTS") + }else{ + RAU_OUTPUT_FOLDER = input$new_experiment_folder_name + cat(paste0("RAU output folder name: ", print(input$new_experiment_folder_name),"\n")) + } + + # Step 8c: check if user submitted RAU file name + if(isTRUE(print(input$output_RAU_file) == "")){ + cat(paste0("No output file name given; default = contains raw file name and/or plate ID and/or date (e.g. RAU_Plate_ID_DATE.csv\n")) + if(isTRUE(print(input$radio_data) == 1)){ + RAU_OUTPUT_FILE_NAME = RAW_MFI_FILE_NAME + }else{ + RAU_OUTPUT_FILE_NAME = RAW_RAU_FILE_NAME + } + }else{ + RAU_OUTPUT_FILE_NAME = input$output_RAU_file + cat(paste0("Output file name: ", print(input$output_RAU_file),"\n")) + } + + ############################ RUN RAU ANALYSIS ############################ + + #### STEP 8d + cat("LAUNCH RAU FUNCTION\n") + RESULTS = getRelativeAntibodyUnits(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, + MFI_CSV, MFI_N_ANTIGENS, + TEMPLATE_FILE_PATH, TEMPLATE_CSV, + ID, DATE, + EXP_DIR) + RAU_RESULTS = RESULTS[[1]] + + }else{ + + # Step 8: load submitted RAU file + if(RAU_CSV){ + RAU_RESULTS = read.csv(RAW_RAU_FILE_PATH) + }else{ + RAU_RESULTS = as.data.frame(read_excel(RAW_RAU_FILE_PATH)) + } + + } + + cat("############################ STEP 8: END ############################\n") + + ############################ SEROPOSITIVE OUTPUTS ############################ + + ###STEP 9### + progress$inc(9/9, detail = "Step 9 (SERO)") + cat("############################ STEP 9: START ############################\n") + + ############################ RUN RANDOM FOREST ANALYSIS ############################ + + if(isTRUE(print(input$RAU_only) == 0 & print(input$model_version_RF) == 1 | print(input$RAU_only) == 0 & print(input$model_version_RF) == 0)){ + cat("LAUNCH RANDOM FOREST CLASSIFIER FUNCTION\n") + RFOREST_VOTES_SEROPOS = getSeropositiveResults_RF(PATHWAY_1, + RAU_user_id_columns, RAU_user_RAU_columns, RAU_RESULTS, + ANTIGEN_FILE_PATH, ANTIGEN_CSV, + CHECK_NAME, CHECK_ID, ID, DATE, + EXP_DIR, + MODEL_W16, + MODEL_W16_3_TARGETS, + MODEL_W16_EQUAL_TARGET, + MODEL_W16_HIGH_SP_TARGET, + MODEL_W16_HIGH_SE_TARGET, + MODEL_W16_OTHER_SE, + MODEL_W16_OTHER_SP, + MODEL_W47, + MODEL_W47_3_TARGETS,MODEL_W47_EQUAL_TARGET, + MODEL_W47_HIGH_SP_TARGET, + MODEL_W47_HIGH_SE_TARGET, + MODEL_W47_OTHER_SE, + MODEL_W47_OTHER_SP) + + cat("DONE\n") + } + + ############################ RUN SVM ANALYSIS ############################ + + # if(isTRUE(print(input$model_version_SVM) == 1 | print(input$model_version_SVM) == 0)){ + # cat("LAUNCH SVM CLASSIFIER FUNCTION\n") + # RFOREST_VOTES_SEROPOS = getSeropositiveResults_SVM(PATHWAY_1, + # RAU_user_id_columns, RAU_user_RAU_columns, RAU_RESULTS, + # ANTIGEN_FILE_PATH, ANTIGEN_CSV, + # SERO_OUTPUT_FILE_NAME, SERO_OUTPUT_FOLDER, + # CHECK_NAME, CHECK_ID, ID, DATE, + # MODEL_W16, + # MODEL_W16_3_TARGETS, + # MODEL_W16_EQUAL_TARGET, + # MODEL_W16_HIGH_SP_TARGET, + # MODEL_W16_HIGH_SE_TARGET, + # MODEL_W16_OTHER_SE, + # MODEL_W16_OTHER_SP, + # MODEL_W47, + # MODEL_W47_3_TARGETS,MODEL_W47_EQUAL_TARGET, + # MODEL_W47_HIGH_SP_TARGET, + # MODEL_W47_HIGH_SE_TARGET, + # MODEL_W47_OTHER_SE, + # MODEL_W47_OTHER_SP) + # + # cat("DONE\n") + # } + + ###INDICATE RAU ANALYSIS HAS FINISHED### + if(isTRUE(print(input$radio_data) == 1)){ + output$progress_RAU_done <- renderText({ + paste0("Relative Antibody Unit conversion has finished, check output folder (",EXP_DIR,")") + }) + }else{ + output$progress_RAU_done <- renderText({ + paste0("") + }) + } + ###INDICATE SERO ANALYSIS HAS FINISHED### + if(isTRUE(print(input$RAU_only) == 0)){ + output$progress_SERO_done <- renderText({ + paste0("PvSeroTAT analysis has finished, check output folder (",EXP_DIR,")") + }) + } + + cat("############################ STEP 9: END ############################\n") + + cat("############################ ANALYSIS END ############################\n") + + }) + +} + + + +### END +shinyApp(ui = ui, server = server) + + -- GitLab