diff --git a/Example data files/Example_3_BIOPLEX_MFI-WITH-BEAD-COUNTS_XLSX.xlsx b/Example data files/Example_3_BIOPLEX_MFI-WITH-BEAD-COUNTS_XLSX.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6ef56a6d0c5f4e685b57c2bd8fdedb99cbf12ad6 Binary files /dev/null and b/Example data files/Example_3_BIOPLEX_MFI-WITH-BEAD-COUNTS_XLSX.xlsx differ diff --git a/Example data files/Example_3_plate_layout_XLSX.xlsx b/Example data files/Example_3_plate_layout_XLSX.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ee9bfeac6a710506c8f7b05ebc31005fdc5c3556 Binary files /dev/null and b/Example data files/Example_3_plate_layout_XLSX.xlsx differ diff --git a/Example data files/Example_4_BIOPLEX_MFI-WITHOUT-BEAD-COUNTS_XLSX.xlsx b/Example data files/Example_4_BIOPLEX_MFI-WITHOUT-BEAD-COUNTS_XLSX.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b21b3433f3f2fa83ab48f91a673f7d2f41596951 Binary files /dev/null and b/Example data files/Example_4_BIOPLEX_MFI-WITHOUT-BEAD-COUNTS_XLSX.xlsx differ diff --git a/Example data files/Example_4_plate_layout_XLSX.xlsx b/Example data files/Example_4_plate_layout_XLSX.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..02b3c156612d7905d343205b67bda64ee790e038 Binary files /dev/null and b/Example data files/Example_4_plate_layout_XLSX.xlsx differ diff --git a/FUNCTIONS.R b/FUNCTIONS.R index 31371cb023d2477cdf032a6e7962a7f2e5eff203..e3bdc23e7562a437fcf43dc14556a103a356eb7a 100644 --- a/FUNCTIONS.R +++ b/FUNCTIONS.R @@ -41,7 +41,7 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM ## converts the median fluorescence intensity (MFI) values into relative antibody units. ## ## ## ## Input: ## - ## - Luminex-200 or Magpix output file (required) ## + ## - Luminex-200 or Magpix output file or BioPlex raw MFI output file(required) ## ## - Plate layout with bleedcode information (optional) ## ## ## ## Output: ## @@ -56,7 +56,9 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM #### READ DATA ########################################################################################################## - # ## The first 41 rows are not relevant and are not imported into R. + ## The first rows are headers not relevant and are quickly discarded into R. + ## The number of rows to discard depends on platform used to generate the input. + if(MFI_CSV){ @@ -101,7 +103,7 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM dim(L) # MFI values as numeric - L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events"))], as.numeric) + L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))], as.numeric) ## Load the counts to check for run quality control C = L_full[(count_row_number+1):(count_row_number+1+nrow(L)),1:(3+as.integer(MFI_N_ANTIGENS))] @@ -133,70 +135,138 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM # Read L_full <- as.data.frame(read_excel(fname1)) - # Identify rows - median_row_number <- which(L_full$xPONENT == "Median") - count_row_number <- which(L_full$xPONENT == "Count") - endcount_row_number <- which(L_full$xPONENT == "Avg Net MFI") - - # Load Excel file - L <- as.data.frame(read_excel(fname1, skip = median_row_number+1, col_types = "text")) + # Guess the platform from the header structure + # This should be improved in the future (made into a radio button instead ?) + PLATFORM <- ifelse(any(grepl("MAGPIX", colnames(L_full))), "Magpix", + ifelse(any(grepl("Reader Serial Number", L_full[, 1])), "Bioplex", "Unknown")) - ## Find all blank rows (i.e. rows that are all NA). - ## Then keep rows preceding the first blank row. - blank.row.number <- which(rowSums(is.na(L)) == length(names(L)))[1] - if(is.na(blank.row.number)){ - L = L - }else{ - L <- L[1:(blank.row.number-1),] + ## Magpix data input detected + if (PLATFORM == "Magpix") { + cat("Guessed MFI input file from a Magpix platform") + + # Identify rows + median_row_number <- which(L_full$xPONENT == "Median") + count_row_number <- which(L_full$xPONENT == "Count") + endcount_row_number <- which(L_full$xPONENT == "Avg Net MFI") + + # Load Excel file + L <- as.data.frame(read_excel(fname1, skip = median_row_number+1, col_types = "text")) + + ## Find all blank rows (i.e. rows that are all NA). + ## Then keep rows preceding the first blank row. + blank.row.number <- which(rowSums(is.na(L)) == length(names(L)))[1] + if(is.na(blank.row.number)){ + L = L + }else{ + L <- L[1:(blank.row.number-1),] + } + + ## Exclude column that corresponds to "Total events" + L <- L[, !(colnames(L) %in% c("Total Events","TotalEvents"))] + + # Antigen names clean-up + colnames(L) = gsub("\\..*", "", colnames(L)) + # Remove any values in (parentheses) + colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) + # Remove spaces + colnames(L) = gsub(" ","", colnames(L)) + + dim(L) + + # Change "NaN" to 0s + L <- L %>% mutate_all(funs(gsub("NaN", 0, .))) + + # MFI values as numeric + L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))], as.numeric) + + C <- as.data.frame(read_excel(fname1, skip = count_row_number+1, col_types = "text")) + + ## Find all blank rows (i.e. rows that are all NA). + ## Then keep rows preceding the first blank row. + blank.row.number <- which(rowSums(is.na(C)) == length(names(C)))[1] + if(is.na(blank.row.number)){ + C = C + }else{ + C <- C[1:(blank.row.number-1),] + } + + ## Exclude column that corresponds to "Total events" + C <- C[, !(colnames(C) %in% c("Total Events","TotalEvents"))] + + # Antigen names clean-up + colnames(C) = gsub("\\..*", "", colnames(C)) + # Remove any values in (parentheses) + colnames(C) = gsub("\\s*\\([^\\)]+\\)","", colnames(C)) + # Remove spaces + colnames(C) = gsub(" ","", colnames(C)) + dim(C) + + ## Save the MFI values for the blank sample(s) for run quality control + B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) + dim(B) + + ## Save the MFI values for the standards for run quality control + S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) + dim(S) #there should be 10 rows } - ## Exclude column that corresponds to "Total events" - L <- L[, !(colnames(L) %in% c("Total Events","TotalEvents"))] - - # Antigen names clean-up - colnames(L) = gsub("\\..*", "", colnames(L)) - # Remove any values in (parentheses) - colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) - # Remove spaces - colnames(L) = gsub(" ","", colnames(L)) - - dim(L) - - # Change "NaN" to 0s - L <- L %>% mutate_all(funs(gsub("NaN", 0, .))) - - # MFI values as numeric - L[,-which(colnames(L) %in% c("Location","Sample","Total Events","TotalEvents"))] = lapply(L[,-which(colnames(L) %in% c("Location","Sample","Total Events"))], as.numeric) - - C <- as.data.frame(read_excel(fname1, skip = count_row_number+1, col_types = "text")) - - ## Find all blank rows (i.e. rows that are all NA). - ## Then keep rows preceding the first blank row. - blank.row.number <- which(rowSums(is.na(C)) == length(names(C)))[1] - if(is.na(blank.row.number)){ - C = C - }else{ - C <- C[1:(blank.row.number-1),] + ## BioPlex data input detected + else if (PLATFORM == "Bioplex") { + cat("Guessed MFI input file from a BioPlex platform") + + # In BioPlex outputs, MFI and bead counts are in the same cell with format MFI (counts). + # Note that the bead counts may not be present, in that case only the MFI is reported. + + # Identify rows (-1 because we match the header, contrary as one row before in Magpix) + median_row_number <- which(L_full[, 1] == "Well") - 1 + + # Load again Excel file reading only after the 1st row of interest + L <- as.data.frame(read_excel(fname1, skip = median_row_number+1, col_types = "text")) %>% + dplyr::rename(Location = Well, + Sample = Type) %>% + # Pad the Location column to match the syntax used by Luminex outputs, + # so that future regex will work regardless + dplyr::mutate(Location = paste0(1:n(), "(1,", Location, ")")) + + # Remove columns that are not MFI readings or sample code / location + L <- L[, !(colnames(L) %in% c("Region", "Gate", "Total", "% Agg Beads", "Sampling Errors"))] + + # Antigen names clean-up + colnames(L) = gsub("\\..*", "", colnames(L)) + # Remove any values in (parentheses) + colnames(L) = gsub("\\s*\\([^\\)]+\\)","", colnames(L)) + # Remove spaces + colnames(L) = gsub(" ","", colnames(L)) + + dim(L) + + # Change "NaN" to 0s + L <- L %>% mutate_all(funs(gsub("NaN", 0, .))) + + # Bead counts, when available, will be extracted from the table + C <- L %>% + dplyr::mutate(dplyr::across(!c(Location, Sample), gsub, pattern = "^(\\d+(?:\\.\\d+)?)(\\s*\\((\\d+(?:\\.\\d+)?)\\))?$", replacement = "\\3")) %>% + # Set to NA in case bead counts were not available + dplyr::mutate(dplyr::across(!c(Location, Sample), gsub, pattern = "^$", replacement = NA)) + + dim(C) + + # Remove bead counts from L when they have been isolated in C + L <- L %>% + dplyr::mutate(dplyr::across(!c(Location, Sample), gsub, pattern = "^(\\d+(?:\\.\\d+)?)(\\s*\\((\\d+(?:\\.\\d+)?)\\))?$", replacement = "\\1")) + + ## Save the MFI values for the blank sample(s) for run quality control + B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) + dim(B) + + ## Save the MFI values for the standards for run quality control + S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) + dim(S) #there should be 10 rows } - ## Exclude column that corresponds to "Total events" - C <- C[, !(colnames(C) %in% c("Total Events","TotalEvents"))] - - # Antigen names clean-up - colnames(C) = gsub("\\..*", "", colnames(C)) - # Remove any values in (parentheses) - colnames(C) = gsub("\\s*\\([^\\)]+\\)","", colnames(C)) - # Remove spaces - colnames(C) = gsub(" ","", colnames(C)) - dim(C) - - ## Save the MFI values for the blank sample(s) for run quality control - B <- L %>% filter(grepl(c("^B"), Sample, ignore.case = T)) - dim(B) - - ## Save the MFI values for the standards for run quality control - S <- L %>% filter(grepl("^S", Sample, ignore.case = T)) - dim(S) #there should be 10 rows + else { + stop("Execution halted: the platform used to generate the input could not be determined.") + } } @@ -214,9 +284,9 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM dplyr::filter(grepl("^\\d+$",count)) %>% dplyr::mutate(count = as.numeric(count), warning = case_when( - count<15~1, - count>=15~0 - )) %>% + count<15~1, + count>=15~0 + )) %>% dplyr::select(Location, warning) %>% dplyr::group_by(Location) %>% dplyr::summarise(sum = sum(warning)) %>% @@ -246,8 +316,8 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM ## Protein name list obtained after removing variable names: "Location" and "Sample" proteins <- names(L[, -c(1:2)]); proteins - ## Add new variable to data frame to indicate the first letter of sample type ("B", "C", "S", "U","N") - ## "B" = blank, "C"=control, "S"=standard (dilution of the pool), "U"=sample, "N"=negative control + ## Add new variable to data frame to indicate the first letter of sample type ("B", "C", "S", "U" or "X","N") + ## "B" = blank, "C"=control, "S"=standard (dilution of the pool), "U" or "X"=sample, "N"=negative control L$type.letter <- substr(L$Sample, start=1, stop=1) dilution <- c(1/50, 1/100, 1/200, 1/400, 1/800, 1/1600, 1/3200, 1/6400, 1/12800, 1/25600) dilution.scaled <- dilution*25600; dilution.scaled @@ -303,7 +373,7 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM for (r in 1:nrow(L)){ results <- NULL - if (L$type.letter[r]=="U"){ + if (L$type.letter[r] %in% c("U", "X")){ mfi.X <- as.numeric(L[r, i]) y <- log(mfi.X) @@ -367,7 +437,7 @@ runRelativeAntibodyUnits = function(fname1, fname2, MFI_CSV, MFI_N_ANTIGENS, TEM # Make all columns after 1st 4 numeric results.df.wide[,5:ncol(results.df.wide)] = lapply(results.df.wide[,5:ncol(results.df.wide)], as.character) results.df.wide[,5:ncol(results.df.wide)] = lapply(results.df.wide[,5:ncol(results.df.wide)], as.numeric) - + ########################################################################################################## #### OUTPUT @@ -428,7 +498,7 @@ getRelativeAntibodyUnits = function(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, cat("******************Running RAU function******************\n") - # Get Luminex-MagPix full file path + # Get Luminex-MagPix-BioPlex full file path fname1 = RAW_MFI_FILE_PATH # Extract working directly of this file @@ -461,46 +531,53 @@ getRelativeAntibodyUnits = function(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, bead_counts = results[[4]] MFI_RAU_results = results[[5]] model_results = results[[6]] + plot_stdcurve <- plot_counts <- plot_blank <- plots_model <- all_model_plots <- NULL ## Plot standard curve raw MFI - plot_stdcurve <- std_curve %>% - dplyr::select(-Location) %>% - dplyr::mutate(Sample = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"), - Sample_dilution = factor(paste0(Sample,": ",dilution_plot),levels=paste0(Sample,": ",dilution_plot))) %>% - tidyr::pivot_longer(-c(Sample,dilution,dilution_plot,Sample_dilution), names_to = "protein", values_to = "MFI") %>% - # mutate(Sample = factor(Sample, c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"))) %>% - dplyr::mutate(MFI = as.numeric(MFI)) %>% - ggplot(aes(x = Sample_dilution, y = MFI, color = protein, group = protein)) + - geom_point() + - geom_line() + - scale_y_log10(breaks = c(0, 10, 100, 1000, 10000)) + - labs(x = "standard curve", - y = "log(MFI)") + - facet_wrap(~protein) + - theme_bw() + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) + if (nrow(std_curve) > 0) { + plot_stdcurve <- std_curve %>% + dplyr::select(-Location) %>% + dplyr::mutate(Sample = c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"), + Sample_dilution = factor(paste0(Sample,": ",dilution_plot),levels=paste0(Sample,": ",dilution_plot))) %>% + tidyr::pivot_longer(-c(Sample,dilution,dilution_plot,Sample_dilution), names_to = "protein", values_to = "MFI") %>% + # mutate(Sample = factor(Sample, c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"))) %>% + dplyr::mutate(MFI = as.numeric(MFI)) %>% + ggplot(aes(x = Sample_dilution, y = MFI, color = protein, group = protein)) + + geom_point() + + geom_line() + + scale_y_log10(breaks = c(0, 10, 100, 1000, 10000)) + + labs(x = "standard curve", + y = "log(MFI)") + + facet_wrap(~protein) + + theme_bw() + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + } ## Plot plate counts - plot_counts <- bead_counts %>% - ggplot(mapping = aes(x = col, y = fct_rev(row), fill = colour), fill = summary)+ - geom_tile(aes(height = 0.90, width = 0.90)) + - scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))+ - scale_fill_manual(values = c("sufficient beads" = "#91bfdb", "repeat" = "#d73027"))+ - theme_linedraw()+ - labs(x = "columns", y = "rows", fill = "") + if (nrow(bead_counts) > 0) { + plot_counts <- bead_counts %>% + ggplot(mapping = aes(x = col, y = fct_rev(row), fill = colour), fill = summary)+ + geom_tile(aes(height = 0.90, width = 0.90)) + + scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))+ + scale_fill_manual(values = c("sufficient beads" = "#91bfdb", "repeat" = "#d73027"))+ + theme_linedraw()+ + labs(x = "columns", y = "rows", fill = "") + } ## Plot blank sample MFI for each protein - if there is more than one blank sample label as "Blank1", "Blank2" etc - plot_blank <- blank_MFI %>% - dplyr::select(-Location) %>% - dplyr::mutate(Sample = paste0(Sample,1:n())) %>% - tidyr::pivot_longer(-Sample, names_to = "protein", values_to = "MFI") %>% - ggplot(aes(x = factor(protein), y = as.numeric(MFI), fill = Sample)) + - geom_bar(stat = "identity", position = "dodge") + - geom_hline(yintercept = 50, linetype = "dashed", color = "grey") + - labs(x = "protein", - y = "MFI") + - theme_linedraw() + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) + if (nrow(blank_MFI) > 0) { + plot_blank <- blank_MFI %>% + dplyr::select(-Location) %>% + dplyr::mutate(Sample = paste0(Sample,1:n())) %>% + tidyr::pivot_longer(-Sample, names_to = "protein", values_to = "MFI") %>% + ggplot(aes(x = factor(protein), y = as.numeric(MFI), fill = Sample)) + + geom_bar(stat = "identity", position = "dodge") + + geom_hline(yintercept = 50, linetype = "dashed", color = "grey") + + labs(x = "protein", + y = "MFI") + + theme_linedraw() + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + } ## Plot model curves plots_model <- lapply(seq_along(model_results), function(x){ @@ -526,32 +603,40 @@ getRelativeAntibodyUnits = function(RAW_MFI_FILE_NAME, RAW_MFI_FILE_PATH, # saveRDS(std_curve, file = paste0(EXP_DIR, "STD_CURVES.rds")) # Save PDF of std curve plots - ggsave(plot_stdcurve, - filename = paste0(EXP_DIR,"STD_CURVES_PLOT.pdf"), - height = 8, - width = 12, - units = "in") + if (!is.null(plot_stdcurve)) { + ggsave(plot_stdcurve, + filename = paste0(EXP_DIR,"STD_CURVES_PLOT.pdf"), + height = 8, + width = 12, + units = "in") + } # Save PDF of bead count plot - ggsave(plot_counts, - filename = paste0(EXP_DIR,"PLATE_BEADS_COUNT_PLOT.pdf"), - height = 8, - width = 12, - units = "in") + if(!is.null(plot_counts)) { + ggsave(plot_counts, + filename = paste0(EXP_DIR,"PLATE_BEADS_COUNT_PLOT.pdf"), + height = 8, + width = 12, + units = "in") + } # Save PDF ofblank sample QC plot - ggsave(plot_blank, - filename = paste0(EXP_DIR,"BLANK_SAMPLE_PLOT.pdf"), - height = 4, - width = 6, - units = "in") + if (!is.null(plot_blank)) { + ggsave(plot_blank, + filename = paste0(EXP_DIR,"BLANK_SAMPLE_PLOT.pdf"), + height = 4, + width = 6, + units = "in") + } # Save PDF of model plots - ggsave(all_model_plots, - filename = paste0(EXP_DIR,"MODEL_PLOTS.pdf"), - height = 8.27, - width = 11.69, - units = "in") + if (!is.null(all_model_plots)) { + ggsave(all_model_plots, + filename = paste0(EXP_DIR,"MODEL_PLOTS.pdf"), + height = 8.27, + width = 11.69, + units = "in") + } ## Write to file write.csv(results.df.wide, @@ -625,25 +710,25 @@ getAntigenNames = function(RAU_Dilution, ANTIGEN_FILE_PATH, ANTIGEN_CSV){ ################# Function 4: Random forest serology analysis getSeropositiveResults_RF = function(PATHWAY_1, - RAU_user_id_columns, RAU_user_RAU_columns, - RAU_RESULTS, - ANTIGEN_FILE_PATH, ANTIGEN_CSV, - CHECK_NAME, CHECK_ID, ID, DATE, - EXP_DIR, - MODEL_W16, - MODEL_W16_3_TARGETS, - MODEL_W16_EQUAL_TARGET, - MODEL_W16_HIGH_SP_TARGET, - MODEL_W16_HIGH_SE_TARGET, - MODEL_W16_OTHER_SE, - MODEL_W16_OTHER_SP, - MODEL_W47, - MODEL_W47_3_TARGETS, - MODEL_W47_EQUAL_TARGET, - MODEL_W47_HIGH_SP_TARGET, - MODEL_W47_HIGH_SE_TARGET, - MODEL_W47_OTHER_SE, - MODEL_W47_OTHER_SP){ + RAU_user_id_columns, RAU_user_RAU_columns, + RAU_RESULTS, + ANTIGEN_FILE_PATH, ANTIGEN_CSV, + CHECK_NAME, CHECK_ID, ID, DATE, + EXP_DIR, + MODEL_W16, + MODEL_W16_3_TARGETS, + MODEL_W16_EQUAL_TARGET, + MODEL_W16_HIGH_SP_TARGET, + MODEL_W16_HIGH_SE_TARGET, + MODEL_W16_OTHER_SE, + MODEL_W16_OTHER_SP, + MODEL_W47, + MODEL_W47_3_TARGETS, + MODEL_W47_EQUAL_TARGET, + MODEL_W47_HIGH_SP_TARGET, + MODEL_W47_HIGH_SE_TARGET, + MODEL_W47_OTHER_SE, + MODEL_W47_OTHER_SP){ ############################################################################################## ## ## @@ -1164,25 +1249,25 @@ getSeropositiveResults_RF = function(PATHWAY_1, ################# Function 5: SVM serology analysis getSeropositiveResults_SVM = function(PATHWAY_1, - RAU_user_id_columns, RAU_user_RAU_columns, - RAU_RESULTS, - ANTIGEN_FILE_PATH, ANTIGEN_CSV, - CHECK_NAME, CHECK_ID, ID, DATE, - EXP_DIR, - MODEL_W16, - MODEL_W16_3_TARGETS, - MODEL_W16_EQUAL_TARGET, - MODEL_W16_HIGH_SP_TARGET, - MODEL_W16_HIGH_SE_TARGET, - MODEL_W16_OTHER_SE, - MODEL_W16_OTHER_SP, - MODEL_W47, - MODEL_W47_3_TARGETS, - MODEL_W47_EQUAL_TARGET, - MODEL_W47_HIGH_SP_TARGET, - MODEL_W47_HIGH_SE_TARGET, - MODEL_W47_OTHER_SE, - MODEL_W47_OTHER_SP){ + RAU_user_id_columns, RAU_user_RAU_columns, + RAU_RESULTS, + ANTIGEN_FILE_PATH, ANTIGEN_CSV, + CHECK_NAME, CHECK_ID, ID, DATE, + EXP_DIR, + MODEL_W16, + MODEL_W16_3_TARGETS, + MODEL_W16_EQUAL_TARGET, + MODEL_W16_HIGH_SP_TARGET, + MODEL_W16_HIGH_SE_TARGET, + MODEL_W16_OTHER_SE, + MODEL_W16_OTHER_SP, + MODEL_W47, + MODEL_W47_3_TARGETS, + MODEL_W47_EQUAL_TARGET, + MODEL_W47_HIGH_SP_TARGET, + MODEL_W47_HIGH_SE_TARGET, + MODEL_W47_OTHER_SE, + MODEL_W47_OTHER_SP){ ############################################################################################## ## ## @@ -1370,30 +1455,30 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) } } @@ -1405,30 +1490,30 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative"), + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative"), + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) } } } @@ -1443,24 +1528,24 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_79SE_79SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) } } @@ -1472,24 +1557,24 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_80SE_80SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_1, "Positive", "Negative")), stringsAsFactors = F) } } } @@ -1504,24 +1589,24 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_63SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) } } @@ -1533,24 +1618,24 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_64SE_90SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_2, "Positive", "Negative")), stringsAsFactors = F) } } } @@ -1565,24 +1650,24 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_59SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) } } @@ -1594,24 +1679,24 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + SEROPOSITIVE_90SE_60SP=ifelse(SVM_MODEL_VOTES <= CUTOFF_3, "Positive", "Negative")), stringsAsFactors = F) } } } @@ -1626,27 +1711,27 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) } } } @@ -1657,27 +1742,27 @@ getSeropositiveResults_SVM = function(PATHWAY_1, # Create binary seropositivity value if(PATHWAY_1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:4], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) }else{ if(RAU_user_id_columns == 1){ col_name = colnames(RAU_RESULTS)[1] SVM_SEROPOS = as.data.frame(cbind(as.character(RAU_RESULTS[,colnames(RAU_RESULTS)[1]]), - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) colnames(SVM_SEROPOS) = c(col_name, colnames(SVM_SEROPOS[2:ncol(SVM_SEROPOS)])) } if(RAU_user_id_columns > 1){ SVM_SEROPOS = as.data.frame(cbind(RAU_RESULTS[,1:as.integer(RAU_user_id_columns)], - exp(RAU_Dilution_Subset), - SVM_MODEL_VOTES, - assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), - stringsAsFactors = F) + exp(RAU_Dilution_Subset), + SVM_MODEL_VOTES, + assign(paste0("SEROPOSITIVE_",as.character(round(as.numeric(SE_Other)*100,digits = 0)), "SE_",as.character(round(as.numeric(SP_Other)*100,digits = 0)),"SP"),ifelse(SVM_MODEL_VOTES <= CUTOFF_4, "Positive", "Negative"))), + stringsAsFactors = F) } } } diff --git a/SHINY_APP.R b/SHINY_APP.R index 0a3a99c8298c637e6bf3e0bb6bf8ae85ee3240a5..53f5381fa8a2b221c32ee2c73b4cdf169e8a7b71 100644 --- a/SHINY_APP.R +++ b/SHINY_APP.R @@ -56,7 +56,7 @@ ui <- fluidPage( p("The Pv SeroTAT Tool has been developed by Ivo Mueller's research groups at the Institut Pasteur and Walter and Eliza Hall Institut of Medical Research. The tool is a patented diagnostic test based on validated serological markers of recent",tags$em("Plasmodium vivax")," infections and hypnozoite carriage. The tool uses machine learning classification algorithms (Random Forest (RF) and Support Vector Machine (SVM)) to predict sero-positivity with varying performance."), p("Validation of this tool using Random Forest can be found in the", a("Nature Medicine Publication", href="https://www.nature.com/articles/s41591-020-0841-4")), p("Please contact on how to cite this tool in publications."), - p(em("Follow steps 1 - 6 to process Luminex MFI or Relative Antibody Unit (RAU) data and run the diagnostic test.")), + p(em("Follow steps 1 - 6 to process Luminex/BioPlex MFI or Relative Antibody Unit (RAU) data and run the diagnostic test.")), hr()), tabPanel("Biomarkers of exposure", br(), @@ -216,7 +216,7 @@ ui <- fluidPage( fileInput("MFI_file", h5("4.1 Load raw MFI file (required)"), accept = c(".csv", ".xlsx")), - helpText("Note: a (.csv) or (.xlsx) file of raw outputs of the Luminex-MagPix machine. + helpText("Note: a (.csv) or (.xlsx) file of raw outputs of the Luminex-MagPix or BioPlex machine. If you encounter errors when loading a (.csv) file, convert it to (.xlsx) in Excel and load the (.xlsx) file instead.")), conditionalPanel("input.radio_data == 1", textInput("MFI_num_antigens",