Commit 3e119e70 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

interm

parent 8fd104bf
This diff is collapsed.
......@@ -260,6 +260,7 @@ h2 <- unname(unlist(read.table(paste0(path.in, file.name1), nrows = 1, skip = 1,
h3 <- unname(unlist(read.table(paste0(path.in, file.name1), nrows = 1, skip = 12, sep=";", dec=",", stringsAsFactors = FALSE)))
df.nano <- read.table(file = paste0(path.in, file.name1), skip = 13, header=FALSE, sep=";", dec=",") # data frame clinical + nanostring data (from LPS and SEB: dim: 76 855
################ Data modification
h <- ifelse(is.na(h3), paste(h2, h1, sep="_"), h3)
......@@ -277,7 +278,13 @@ seb <- log2(df.nano[, i_seb]) # log2 of the SEB columns
# colnames(seb) <- sapply(strsplit(colnames(seb), "_"), function(x) sprintf("%s-%s", paste0(x[-1], collapse = "_"), x[1]))
dat <- data.frame(Y, lps, seb)
# export df.nano lps seb cvr Y dat
if(slurm.loop.nb == 1){
# df.nano : the whole data table
# dat : df.nano with only
save(list=c("df.nano", "dat"), file = paste0(path.out, "complete.data.table.RData"))
}else{
fun_export_data(path = path.out, data = paste0("SEE loop1 DIRECTORY TO GET THE complete.data.table.RData CONTAINING THE df.nano FILE", optional.text), output = log.file)
}
# here should be ok
......@@ -287,7 +294,7 @@ dat <- data.frame(Y, lps, seb)
# Two sets of patients were defined in this analysis from the `r nrow(df.nano)` patients in the total cohort: a discovery set of `r length(train)` patients and a validation set of `r length(valid)` patients.
# The data needs to be the training cohort (also called discovery cohorte)
class.prop <- table(df.nano$response_ASDAS_R_NR)/sum(table(df.nano$response_ASDAS_R_NR))
class.prop <- table(df.nano$response_ASDAS_R_NR, useNA = "no")/sum(table(df.nano$response_ASDAS_R_NR, useNA = "no"))
sample.prop <- class.prop[match(df.nano$response_ASDAS_R_NR, names(class.prop))]
if(analysis.kind == "longit"){
train <- which(df.nano$cohort_id != "cohortR") # 67 rows of df.nano used for training
......@@ -514,7 +521,7 @@ if(any((analysis.kind == "longit" & slurm.loop.nb == 1) | (analysis.kind == "val
ann_colors = list("ASDAS R/NR" = c(R = "steelblue", NR = "tomato"))
heatmap.plot <- pheatmap(t(scale(subdat)), silent = TRUE, annotation_col = annot.rows, cluster_cols = FALSE, show_colnames = FALSE, border_color = NA, color = colorRampPalette(c("red", "black", "green"))(499), annotation_colors = ann_colors, fontsize_row = label.size, fontsize_col = label.size)
tempo <- dev.set(pdf.nb) # assign to avoid the message
print(heatmap.plot)
ggplot2ggplot() ; print(heatmap.plot)
backup.name <- c(backup.name, "heatmap.plot")
# ```
#
......@@ -607,6 +614,9 @@ pred3.genes.crp.rf <- predict(mod3.genes.crp.rf, newdata = dat.valid3.genes.crp)
data.pred1.genes.rf <- pred1.genes.rf$data
data.pred2.crp.rf <- pred2.crp.rf$data
data.pred3.genes.crp.rf <- pred3.genes.crp.rf$data
auc1.genes.rf <- performance(pred1.genes.rf, auc)
auc2.crp.rf <- performance(pred2.crp.rf, auc)
auc3.genes.crp.rf <- performance(pred3.genes.crp.rf, auc)
confmat1.genes.rf <- mlr::calculateConfusionMatrix(pred1.genes.rf)
confmat2.crp.rf <- mlr::calculateConfusionMatrix(pred2.crp.rf)
confmat3.genes.crp.rf <- mlr::calculateConfusionMatrix(pred3.genes.crp.rf)
......@@ -617,7 +627,7 @@ fun_export_data(path = path.out, data = paste0("...model 2.crp..."), output = lo
fun_export_data(path = path.out, data = confmat2.crp.rf$result, output = log.file, rownames.kept = TRUE)
fun_export_data(path = path.out, data = paste0("...model 3.genes.crp..."), output = log.file, sep = 1)
fun_export_data(path = path.out, data = confmat3.genes.crp.rf$result, output = log.file, rownames.kept = TRUE)
backup.name <- c(backup.name, "confmat1.genes.rf", "confmat2.crp.rf", "confmat3.genes.crp.rf", "data.pred1.genes.rf", "data.pred2.crp.rf", "data.pred3.genes.crp.rf")
backup.name <- c(backup.name, "data.pred1.genes.rf", "data.pred2.crp.rf", "data.pred3.genes.crp.rf", "auc1.genes.rf", "auc2.crp.rf", "auc3.genes.crp.rf", "confmat1.genes.rf", "confmat2.crp.rf", "confmat3.genes.crp.rf")
# cat("\n...model 1.genes...\n")
# print(confmat1.genes.rf$result)
......@@ -696,6 +706,9 @@ pred3.genes.crp.logreg <- predict(mod3.genes.crp.logreg, newdata = dat.valid3.ge
data.pred1.genes.logreg <- pred1.genes.logreg$data
data.pred2.crp.logreg <- pred2.crp.logreg$data
data.pred3.genes.crp.logreg <- pred3.genes.crp.logreg$data
auc1.genes.logreg <- performance(pred1.genes.logreg, auc)
auc2.crp.logreg <- performance(pred2.crp.logreg, auc)
auc3.genes.crp.logreg <- performance(pred3.genes.crp.logreg, auc)
confmat1.genes.logreg <- mlr::calculateConfusionMatrix(pred1.genes.logreg)
confmat2.crp.logreg <- mlr::calculateConfusionMatrix(pred2.crp.logreg)
confmat3.genes.crp.logreg <- mlr::calculateConfusionMatrix(pred3.genes.crp.logreg)
......@@ -707,7 +720,7 @@ fun_export_data(path = path.out, data = paste0("...model 2.crp..."), output = lo
fun_export_data(path = path.out, data = confmat2.crp.logreg$result, output = log.file, rownames.kept = TRUE)
fun_export_data(path = path.out, data = paste0("...model 3.genes.crp..."), output = log.file, sep = 1)
fun_export_data(path = path.out, data = confmat3.genes.crp.logreg$result, output = log.file, rownames.kept = TRUE)
backup.name <- c(backup.name, "confmat1.genes.logreg", "confmat2.crp.logreg", "confmat3.genes.crp.logreg", "data.pred1.genes.logreg", "data.pred2.crp.logreg", "data.pred3.genes.crp.logreg")
backup.name <- c(backup.name, "data.pred1.genes.logreg", "data.pred2.crp.logreg", "data.pred3.genes.crp.logreg", "auc1.genes.logreg", "auc2.crp.logreg", "auc3.genes.crp.logreg", "confmat1.genes.logreg", "confmat2.crp.logreg", "confmat3.genes.crp.logreg")
# cat("\n...model 1.genes...\n")
# print(confmat1.genes.logreg$result)
......@@ -772,10 +785,12 @@ pred3.genes.crp.rpart <- predict(mod3.genes.crp.rpart, newdata = dat.valid3.gene
data.pred1.genes.rpart <- pred1.genes.rpart$data
data.pred2.crp.rpart <- pred2.crp.rpart$data
data.pred3.genes.crp.rpart <- pred3.genes.crp.rpart$data
auc1.genes.rpart <- performance(pred1.genes.rpart, auc)
auc2.crp.rpart <- performance(pred2.crp.rpart, auc)
auc3.genes.crp.rpart <- performance(pred3.genes.crp.rpart, auc)
confmat1.genes.rpart <- mlr::calculateConfusionMatrix(pred1.genes.rpart)
confmat2.crp.rpart <- mlr::calculateConfusionMatrix(pred2.crp.rpart)
confmat3.genes.crp.rpart <- mlr::calculateConfusionMatrix(pred3.genes.crp.rpart)
fun_export_data(path = path.out, data = paste0("CONFUSION MATRICES: "), output = log.file)
fun_export_data(path = path.out, data = paste0("...model 1.genes..."), output = log.file, sep = 1)
fun_export_data(path = path.out, data = confmat1.genes.rpart$result, output = log.file, rownames.kept = TRUE)
......@@ -783,7 +798,7 @@ fun_export_data(path = path.out, data = paste0("...model 2.crp..."), output = lo
fun_export_data(path = path.out, data = confmat2.crp.rpart$result, output = log.file, rownames.kept = TRUE)
fun_export_data(path = path.out, data = paste0("...model 3.genes.crp..."), output = log.file, sep = 1)
fun_export_data(path = path.out, data = confmat3.genes.crp.rpart$result, output = log.file, rownames.kept = TRUE)
backup.name <- c(backup.name, "confmat1.genes.rpart", "confmat2.crp.rpart", "confmat3.genes.crp.rpart", "data.pred1.genes.rpart", "data.pred2.crp.rpart", "data.pred3.genes.crp.rpart")
backup.name <- c(backup.name, "data.pred1.genes.rpart", "data.pred2.crp.rpart", "data.pred3.genes.crp.rpart", "auc1.genes.rpart", "auc2.crp.rpart", "auc3.genes.crp.rpart", "confmat1.genes.rpart", "confmat2.crp.rpart", "confmat3.genes.crp.rpart")
# cat("\n...model 1.genes...\n")
# print(confmat1.genes.rpart$result)
......@@ -842,15 +857,15 @@ gridExtra::grid.arrange(roc1.genes.rpart, roc2.crp.rpart, roc3.genes.crp.rpart,
#
# ```{r boplot_topfeatures, fig.width= 14, fig.height=20}
boxdat <- data.frame(Y,
Cohort = df.nano$cohort_id,
CV = df.nano$training_validation,
Cohort.name = df.nano$cohort_id,
RF.COHORTE = df.nano$training_validation,
dat[, mod.gene.names])
boxdat_melt <- melt(boxdat,
id.vars = c("Y", "Cohort", "CV"),
boxdat_melt <- reshape2::melt(boxdat,
id.vars = c("Y", "Cohort.name", "RF.COHORTE"),
variable.name = "Gene")
ggbox <- ggplot(data = boxdat_melt, aes(x=Y, y=value, colour=Y, shape = CV, linetype = CV)) +
ggbox <- ggplot(data = boxdat_melt, aes(x=Y, y=value, colour=Y, shape = RF.COHORTE, linetype = RF.COHORTE)) +
geom_beeswarm(dodge.width = 0.75, alpha = 0.7) +
geom_boxplot(outlier.shape = NA, fill = NA) +
facet_wrap(~ Gene, ncol = 5) +
......@@ -883,7 +898,7 @@ r25 <- mlr::resample(learner = lrn.rf.filter.25,
show.info = FALSE, models = TRUE)
sfeats <- sapply(r25$models, getFilteredFeatures)
df_top_features <- table(sfeats) %>%
df_top_features <- table(sfeats, useNA = "no") %>%
as.data.frame() %>%
dplyr::arrange(desc(Freq)) %>%
dplyr::mutate(Freq = Freq / ml.bootstrap.nb)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment