Data_Management.R 35.9 KB
Newer Older
svolant's avatar
svolant committed
1
#@ This file contains all the functions needed to
svolant's avatar
svolant committed
2
#@ to load, check, filter and transform the data 
svolant's avatar
svolant committed
3

svolant's avatar
svolant committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
create_forked_task <- function(expr) {
  makeReactiveBinding("state")
  state <- factor("running",
                  levels = c("running", "success", "error", "cancel"),
                  ordered = TRUE
  )
  
  result <- NULL
  
  # Launch the task in a forked process. This always returns
  # immediately, and we get back a handle we can use to monitor
  # or kill the job.
  task_handle <- parallel::mcparallel({
    force(expr)
  })
  
  # Poll every 100 milliseconds until the job completes
  o <- observe({
    res <- parallel::mccollect(task_handle, wait = FALSE)
    if (is.null(res)) {
      invalidateLater(100)
    } else {
      o$destroy()
      if (!is.list(res) || length(res) != 1 || !inherits(res[[1]], "try-error")) {
        state <<- "success"
        result <<- res[[1]]
      } else {
        state <<- "error"
        result <<- attr(res[[1]], "condition", exact = TRUE)
      }
    }
  })
  
  list(
    completed = function() {
      state != "running"
    },
    result = function() {
      if (state == "running") {
        # If running, abort the current context silently.
        # We've taken a reactive dependency on "state" so if
        # the state changes the context will invalidate.
        req(FALSE)
      } else if (state == "success") {
        return(result)
      } else if (state == "error") {
        stop(result)
      } else if (state == "cancel") {
        validate(need(FALSE, "The operation was cancelled"))
      }
    },
    cancel = function() {
      if (state == "running") {
        state <<- "cancel"
        o$destroy()
        tools::pskill(task_handle$pid, tools::SIGTERM)
        tools::pskill(-task_handle$pid, tools::SIGTERM)
        parallel::mccollect(task_handle, wait = FALSE)
      }
    }
  )
}


svolant's avatar
svolant committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
## Add news to the home page
addNews <- function(date ="",title="",text="")
{
  res=list()
  res$r1 = paste("<b><font size='+1'>",date,"</font></b>", " - ", "<b><font size='+1'>",title,"</font></b><br/>")
  res$r2 = paste("<p><font color='grey'>",text,"</font></p><hr/>")
  
  return(HTML(unlist(res)))
}



## Function for the rdp format
getval <- function(annotation_rdp, interest, threshold_annot){
  annotation_rdp = unlist(strsplit(annotation_rdp,"\t"))
  annotation = c(annotation_rdp[1])
  for(level in interest){
    idlevel=which(annotation_rdp == level)
    if(length(idlevel)>0){
      if(as.numeric(annotation_rdp[idlevel+1]) >= threshold_annot){
        annotation = c(annotation, gsub("\"", "", annotation_rdp[idlevel-1]))
      }
      else annotation = c(annotation, "NA")
    }
    else annotation = c(annotation, "NA")  
  }
  return(annotation)
}

## Read rdp file
read_rdp <- function(filename, threshold_annot)
{
  interest=c("phylum", "class", "order", "family", "genus")
  conn <- file(filename,open="r")
  linn <-readLines(conn)
  tab=t(sapply(1:length(linn), function(i) getval(linn[i], interest, threshold_annot)))
  close(conn)
  
  if(!TRUE%in%duplicated(tab[,1])) rownames(tab)=tab[,1];tab=tab[,-1]
  colnames(tab) = c("Phylum","Class","Order","Family","Genus")
  
  return(tab)
}



## Check the format of the counts table
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
115
CheckCountsTable <- function(counts, MGSTable=FALSE)
svolant's avatar
svolant committed
116
117
118
{
  Error = NULL
  Warning = NULL
svolant's avatar
svolant committed
119
120
121
122
123
124
125
126
  
  if(is.null(counts) && is.null(Error)){Error = "There is no counts table" }
  

  if(ncol(counts)<=1 && is.null(Error)){Error = "The number of columns of the counts table must be at least 2" }
  if(nrow(counts)<=1 && is.null(Error)){Error = "The number of rows of the counts table must be at least 2" }
  
  if(is.null(Error)) 
svolant's avatar
svolant committed
127
  {
svolant's avatar
svolant committed
128
129
130
131
132
133
    numTest = FALSE%in%sapply(counts,is.numeric)
    if(numTest) Error = "The counts table must contain only numeric values" 
    if(!numTest)
    {
      if(0%in%colSums(counts)){Error = "At least one of the column of the counts table is 0" }
      if(min(counts)<0){Error = "The counts table must contain only positive values" }
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
134
      if(MGSTable && length(which(toupper(colnames(counts))%in%"SIZE")) != 1){Error="The counts table must contain a column named SIZE providing the length of each gene"}
svolant's avatar
svolant committed
135
    }
svolant's avatar
svolant committed
136
  }
svolant's avatar
svolant committed
137
  if(TRUE%in%sapply(counts,is.na) && is.null(Error)){Warning = "NA values are considered as 0 is the counts table"; counts[sapply(counts,is.na)]=0}
svolant's avatar
svolant committed
138
139
140
141
142
  
  return(list(Error=Error,Warning=Warning,counts=counts))
}

## Check the format of the taxonomy table
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
143
CheckTaxoTable <- function(taxo,counts, MGSTable=FALSE, taxoCreated=FALSE)
svolant's avatar
svolant committed
144
145
146
{
  Error = NULL
  Warning = NULL
svolant's avatar
svolant committed
147
148
  if(taxoCreated){Warning = "No taxonomy table has been uploaded, the analysis can only be done at the OTU/gene level"}
  if(ncol(taxo)<1 && is.null(Error)){Error = "The number of columns of the taxonomy table must be at least 1" }
149
  else if(nrow(taxo)<=1 && is.null(Error)){Error = "The number of rows if the taxonomy table must be at least 2" }
150
  if(TRUE%in%is.numeric(taxo) && is.null(Error) ){Error = "The taxonomy table must contain only character" }
svolant's avatar
svolant committed
151
  
svolant's avatar
svolant committed
152
  if(is.null(Error))
svolant's avatar
svolant committed
153
  {
svolant's avatar
svolant committed
154
155
156
157
158
159
    for(i in 1:ncol(taxo))
    {
      level = levels(taxo[,i])
      nb = length(level)
      if(nb==1 && level=="NA"){ Error = "At least one column contains only NA"}
    }
160
161
162
    if(MGSTable && length(which(toupper(colnames(taxo))%in%"MGS")) != 1){
      Error="The taxonomy table must contain a column named MGS providing the MGS association of each gene"
      }
svolant's avatar
svolant committed
163
164
165
  }
  
  ## Annotated features without counts
svolant's avatar
svolant committed
166
  if(!any(rownames(taxo)%in%rownames(counts)) && is.null(Error)){ Error = "Some annotated features are not in the count table"}
svolant's avatar
svolant committed
167
168
169
170
  
  return(list(Error=Error,Warning=Warning))
}

171
172
173
174
175
176
177
178
179


CheckTargetModel <- function(input,target,labeled,CT)
{
  Error = NULL
  HowTo = NULL
  InterVar = input$InterestVar
  labels = rownames(target)
  ind = which(colnames(CT)%in%labels)
svolant's avatar
svolant committed
180
181
182
#   InterVar%in%
#   uniq_column = (length(which(sapply(target[InterVar], function(x) length(unique(x))) == 1)) > 0)
#   uniq_column_names = names(which(sapply(target[InterVar], function(x) length(unique(x))) == 1))
183
  
184
  ## At least one variable selected
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
185
186
187
188
  if(is.null(Error) && length(ind)<=1){
    Error = "Less than two samples names fit with the counts table" 
    HowTo = "Check the samples names in the target file. They must be in the first column and must correspond EXACTLY to the names in the count table."
  }
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
  ## At least one variable selected
  if(is.null(Error) && length(InterVar)==0){
    Error = "At least one variable must be selected for the model" 
    HowTo = "Add at least one variable in the 'Select the variables' widget"
  }
  
  ## Names of samples correct ?
  if(is.null(Error) && labeled==0){
    Error = "The names of the samples in the target file do not fit the counts table" 
    HowTo = "Check the samples names in the target file. They must be in the first column and must correspond EXACTLY to the names in the count table."
    }
  
  ## Number of columns
  if(is.null(Error) && ncol(target)<2){
    Error = "The number of columns of the target file must be at least 2"
    HowTo = "Add at least one additional variable to describe your samples"
svolant's avatar
svolant committed
205
206
207
208
209
210
211
  }
  
  if(is.null(Error) && min(sapply(apply(target,2,unique),length)) <=1){
    Error = "One of the variable has the same value for all the samples" 
    HowTo = "Remove the variable from the target file"
  }
  
212
213
214
215
216
217
218
219
220
221
222
223
224
  
  ## Number of rows
  if(is.null(Error) && nrow(target)<=1){
    Error = "The number of rows if the target file must be at least 2"
    HowTo = "Add information about more than 2 samples"
    }
  
  ## NA values
  if(is.null(Error) && (any(is.na(target)) || any(target ==""))){
    Error = "NA's or missing values are not allowed in the target file" 
    HowTo = "Remove all the samples for which one or more variables are NA or missing"
  }
  
225
226
  ## contrasts can be applied only to factors with 2 or more levels
  
svolant's avatar
svolant committed
227
228
229
230
231
#   if(is.null(Error) && (uniq_column)){
#     Error = "Contrasts can be applied only to factors with 2 or more levels."
#     HowTo = paste("Remove all variables with only one factor:", uniq_column_names, sep=" ")
#   }
#   
232
233
234
235
  
  ## Full rank matrix
  if(is.null(Error) && length(InterVar)>0)
  {
svolant's avatar
svolant committed
236
    design = GetDesign(input,target)
237
238
239
240
241
242
243
244
245
246
247
248
249
    testRank = CheckMatrixRank(design,target)
    if(!testRank){
        Error = "The model matrix is not full rank. One or more variables or interaction terms 
        are linear combinations of the others and must be removed." 
        HowTo = "Remove variable(s) that provide the same information, i.e, if the value of a variable is totaly determine by an other variable remove one of them."
        }
  }
    
  return(list(Error=Error,HowTo=HowTo))
}



svolant's avatar
svolant committed
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268

CheckContrast <- function(contrastFile,dds)
{
  Error = NULL
  Warning = NULL
  parameterNames = resultsNames(dds)
  if(is.null(contrastFile) && is.null(Error)){Error = "The format of the contrast file is not supported by SHAMAN" }
  
  
  if(ncol(contrastFile)<1 && is.null(Error)){Error = "The contrast file seems to be empty" }
  if(nrow(contrastFile)!=length(parameterNames) && is.null(Error)){Error = "The contrast file does not fit with the model parameters" }

  if(TRUE%in%sapply(contrastFile,is.na) && is.null(Error)){Error = "NA values are considered as 0 is the counts table"; contrastFile[sapply(contrastFile,is.na)]=0}
  
  
  return(list(Error=Error,Warning=Warning,contrastFile=contrastFile))
}


svolant's avatar
svolant committed
269
270
271
272
273
274
## Check the format of the tree file (for Unifrac distance)
CheckTreeFile <- function(tree)
{
  Error = NULL
  Warning = NULL
  if(!is.phylo(tree) && is.null(Error)){Error = "The loaded file is not a phylogenetic tree"; tree = NULL}
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
  if(!is.rooted(tree) && is.null(Error) ){
    Warning = "The tree has been rooted using midpoint method";
    roottree = try(midpoint.root(tree), TRUE)
    if (class(roottree) == "try-error"){
      D <- cophenetic(tree)
      dd <- max(D)
      ii <- which(D == dd)[1]
      ii <- c(ceiling(ii/nrow(D)), ii%%nrow(D))
      if (ii[2] == 0) ii[2] <- nrow(D)
      spp <- rownames(D)[ii]
      nn <- which(tree$tip.label == spp[2])
      tree <- reroot(tree, nn, tree$edge.length[which(tree$edge[,2] == nn)])
    } 
    else tree=roottree
  }
svolant's avatar
svolant committed
290
291
292
293
  return(list(Error=Error,Warning=Warning,tree=tree))
}


svolant's avatar
svolant committed
294
295

## Check Masque Input
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
296
CheckMasque <- function(input, values, check_mail=FALSE)
svolant's avatar
svolant committed
297
298
299
300
{
  Error = NULL
  HowTo = NULL
 
svolant's avatar
svolant committed
301
302
  ## Check password
  
svolant's avatar
svolant committed
303
304
305
306
307
  # if(is.null(Error) && input$password == ""){
  #   Error = "<h6><strong>Empty key field </strong></h6>"
  #   HowTo = "<h6><strong>Make sure that you have click the &laquo Get key &raquo button and that you have pasted the key sent by mail </strong></h6>"
  # }
  # 
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
308
  if(is.null(Error) && is.null(values$login_email) && check_mail){
svolant's avatar
svolant committed
309
310
       Error = "<h6><strong>Invalid key </strong></h6>";
       HowTo = "<h6><strong>Make sure that you have click the &laquo Get key &raquo button </strong></h6>"
svolant's avatar
svolant committed
311
  }
svolant's avatar
svolant committed
312

svolant's avatar
svolant committed
313
  ## At least one fastq is detected
svolant's avatar
svolant committed
314
315
316
317
  # if(is.null(Error) && input$LoadFiles>0 && length(values$fastq_names_only)==0){
  #   Error = "<h6><strong>The selected directory must contain at least one file in the following format : fastq, fastq.gz, or fq.</strong></h6>" 
  #   HowTo = "<h6><strong>Change the working directory and check the format of the files</strong></h6>"
  # }
svolant's avatar
svolant committed
318
319
320
  
  if(is.null(Error) && input$PairedOrNot=='y' && input$MatchFiles_button>0){
    if(length(values$R2fastQ) !=length(values$R2fastQ)){
svolant's avatar
svolant committed
321
322
        Error = "<h6><strong>The number of fastq files for R1 and R2 must be the same</strong></h6>" 
        HowTo = "<h6><strong>Add/Remove some files or change the suffix to identify the pairs</strong></h6>"
svolant's avatar
svolant committed
323
324
325
326
327
328
329
330
    }
    
    if(length(values$R2fastQ)>0  && length(values$R2fastQ)>0){
      tmpR1 = gsub(input$R1files,x=values$R1fastQ,"") 
      tmpR2 = gsub(input$R2files,x=values$R2fastQ,"")
      
      dup_files = c(values$R1fastQ[duplicated(tmpR1)],values$R2fastQ[duplicated(tmpR2)])
      if(length(dup_files)>0){
svolant's avatar
svolant committed
331
332
        Error = paste("<h6><strong>These fastq files corresponds to the same sample names:</strong></h6>" ,dup_files)
        HowTo = "<h6><strong>Change the suffix to identify the pairs</strong></h6>"
svolant's avatar
svolant committed
333
334
      }
      
svolant's avatar
svolant committed
335
336
      if(!isValidPrimer(input$R1primer)){ Error = "<h6><strong>The primer (forward) must only contain letters from A to Z</strong></h6>" }
      if(!isValidPrimer(input$R2primer)){ Error = "<h6><strong>The primer (reverse) must only contain letters from A to Z</strong></h6>" }
svolant's avatar
svolant committed
337
338
339
340
      
    }
    
  }
svolant's avatar
svolant committed
341
342
343
  if(is.null(Error) && !isValidEmail(input$to)) Error = "<h6><strong>The email address is not valid</strong></h6>"
  

svolant's avatar
svolant committed
344
  
svolant's avatar
svolant committed
345
346
  
  if(is.null(Error) && !isValidPrimer(input$primerSingle)){ Error = "<h6><strong>The primer must only contain letters from A to Z</strong></h6>" }
svolant's avatar
svolant committed
347
348
349
350
351
  
  if(is.null(Error)) {
    
    res = SamplesMasque(input,values)
    if(length(res$samples)==0) {
svolant's avatar
svolant committed
352
      Error = "<h6><strong>0 sample detected</strong></h6>"
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
353
354
      if(input$PairedOrNot=='y') HowTo = '<h6><strong>Make sure that you click the &laquo Match &raquo button. <br /> Change the working directory and/or verify the pairs matching.</strong></h6>'
      if(input$PairedOrNot=='n') HowTo = '<h6><strong>Make sure that your samples have the correct extension (.fastq, .fq, .fastq.gz or .fq.gz). <br /> Change the working directory.</strong></h6>'
svolant's avatar
svolant committed
355
356
357
358
    }
    
  }
  
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
359
360
361
362
363
364
365
  if(is.null(Error)) {
    error_file = paste(values$curdir,"www","masque","error",paste('file',values$masque_key,"_error.txt",sep=""),sep= .Platform$file.sep)
    if(file.exists(error_file)){
      Error = "<h6><strong>An error happened during the workflow progress. Please check your email.</strong></h6>" 
    }
  }
  
svolant's avatar
svolant committed
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  return(list(Error=Error,HowTo=HowTo))
}



isValidEmail <- function(x) {
  grepl("\\<[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\>", as.character(x), ignore.case=TRUE)
}

isValidPrimer <- function(x) {
  !any(!grepl("[A-Z]",unlist(strsplit(x,""))))
}


SamplesMasque <- function(input,values)
{
  samples_removed = NULL
  samples = NULL
  
  if(input$PairedOrNot=='y')
  {
    tmpR1 = gsub(input$R1files,x=values$R1fastQ,"") 
    tmpR2 = gsub(input$R2files,x=values$R2fastQ,"")
    
    
    R1samples = tmpR1[tmpR1%in%tmpR2]; R1samples_removed = values$R1fastQ[!tmpR1%in%tmpR2]
    R2samples = tmpR2[tmpR2%in%tmpR1]; R2samples_removed = values$R2fastQ[!tmpR2%in%tmpR1]
    
    samples = unique(c(R1samples,R2samples))
    samples_removed = c(R1samples_removed,R2samples_removed)
  } else {samples = unique(values$fastq_names_only)}
  
  return(list(samples=samples,samples_removed=samples_removed))
}



svolant's avatar
svolant committed
403
404
CreateJSON <- function(input,values){

svolant's avatar
svolant committed
405
406
  tmp = tempdir()
  path_fasta = paste(tmp,paste(basename(file_path_sans_ext(values$json_name)),"_contaminant.fasta",sep=""),sep = .Platform$file.sep)
svolant's avatar
svolant committed
407
408
409
  
  if(input$PairedOrNot=='n')
  {
svolant's avatar
svolant committed
410
411
    path_fastq = paste(tmp,"Masque_files",sep= .Platform$file.sep)
    
svolant's avatar
svolant committed
412
413
414
415
    df = data.frame("paired"=FALSE,
                    "path"=path_fastq,
                    "host"=input$HostName,
                    "type"=input$DataTypeMasque,
svolant's avatar
svolant committed
416
                    "mail"=values$login_email,
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
                    "contaminant"= path_fasta,
                    "phredthres" = input$phredthres,
                    "mincorrect" = input$mincorrect,
                    "minreadlength" = input$minreadlength,
                    "dreptype" = input$dreptype,
                    "maxampliconlength" = input$maxampliconlength,
                    "minampliconlength" = input$minampliconlength,
                    "minabundance" = input$minabundance,
                    "clusteringthreshold" = input$clusteringthreshold,
                    "clusteringstrand" = input$clusteringstrand,
                    "annotationstrand" = input$annotationstrand,
                    "aKmin" = input$annotationKingdomthreshold,
                    "aPmin" = input$annotationPhylumthreshold[1],
                    "aPmax" = input$annotationPhylumthreshold[2],
                    "aCmin" = input$annotationClassthreshold[1],
                    "aCmax" = input$annotationClassthreshold[2],
                    "aOmin" = input$annotationOrderthreshold[1],
                    "aOmax" = input$annotationOrderthreshold[2],
                    "aFmin" = input$annotationFamilythreshold[1],
                    "aFmax" = input$annotationFamilythreshold[2],
                    "aGmin" = input$annotationGenusthreshold[1],
                    "aGmax" = input$annotationGenusthreshold[2],
                    "aSmin" = input$annotationSpeciethreshold
svolant's avatar
svolant committed
440
441
                    )

svolant's avatar
svolant committed
442
    df %>% jsonlite::toJSON() %>% write_lines(values$json_name)
svolant's avatar
svolant committed
443
444
445
446
447
448
  }
  if(input$PairedOrNot=='y')
  {
    path_fastq_R1 = paste(tempdir(),"Masque_files_R1",sep= .Platform$file.sep)
    path_fastq_R2 = paste(tempdir(),"Masque_files_R2",sep= .Platform$file.sep)

svolant's avatar
svolant committed
449
    df = data.frame("paired"=TRUE,
svolant's avatar
svolant committed
450
451
452
453
                    "path_R1"=path_fastq_R1,
                    "path_R2"=path_fastq_R2,
                    "host"=input$HostName,
                    "type"=input$DataTypeMasque,
svolant's avatar
svolant committed
454
                    "mail"=values$login_email,
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
455
                    "contaminant"= path_fasta,
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
                    "pattern_R1"= input$R1files,
                    "phredthres" = input$phredthres,
                    "mincorrect" = input$mincorrect,
                    "minoverlap" = input$minoverlap,
                    "minreadlength" = input$minreadlength,
                    "dreptype" = input$dreptype,
                    "maxampliconlength" = input$maxampliconlength,
                    "minampliconlength" = input$minampliconlength,
                    "minabundance" = input$minabundance,
                    "clusteringthreshold" = input$clusteringthreshold,
                    "clusteringstrand" = input$clusteringstrand,
                    "annotationstrand" = input$annotationstrand,
                    "aKmin" = input$annotationKingdomthreshold,
                    "aPmin" = input$annotationPhylumthreshold[1],
                    "aPmax" = input$annotationPhylumthreshold[2],
                    "aCmin" = input$annotationClassthreshold[1],
                    "aCmax" = input$annotationClassthreshold[2],
                    "aOmin" = input$annotationOrderthreshold[1],
                    "aOmax" = input$annotationOrderthreshold[2],
                    "aFmin" = input$annotationFamilythreshold[1],
                    "aFmax" = input$annotationFamilythreshold[2],
                    "aGmin" = input$annotationGenusthreshold[1],
                    "aGmax" = input$annotationGenusthreshold[2],
                    "aSmin" = input$annotationSpeciethreshold
svolant's avatar
svolant committed
480
                    )
svolant's avatar
svolant committed
481
    df %>% jsonlite::toJSON() %>% write_lines(values$json_name)
svolant's avatar
svolant committed
482
483
484
485
  }
}


svolant's avatar
svolant committed
486
487
488
489
490
491
492
493
494
495
496
497
498
## Get the percentage of annotated OTU
PercentAnnot <- function(counts,taxo)
{
  Error = NULL  
  tmp = table(rownames(counts)%in%rownames(taxo))
  Percent = tmp["TRUE"]/sum(tmp)
  if(is.na(Percent)) Percent = 0
  if(Percent==0){Error = "Counts table and annotation do not matched" }
  
  return(list(Error=Error,Percent=Percent))
}


499
## Get the counts, the taxo and the target tables from the BIOM format file.
svolant's avatar
svolant committed
500
501
GetDataFromBIOM <-function(dataBIOM)
{
svolant's avatar
svolant committed
502
503
504
  taxo = NULL
  counts = NULL
  taxoCreated = FALSE
svolant's avatar
svolant committed
505
506
  ## Counts table
  counts = biom_data(dataBIOM)
svolant's avatar
svolant committed
507
508
509
510
511
512
513
  if(!is.null(counts))
  {
    counts = as.matrix(counts)
    ## Change of - to . is risky
    colnames(counts) = gsub("-",".",colnames(counts))
    counts = as.data.frame(counts)
  }
svolant's avatar
svolant committed
514
515
516
517
  CheckCounts = CheckCountsTable(counts)
  counts = CheckCounts$counts
  
  ## Taxonomy table
518
519
  obs = observation_metadata(dataBIOM)
  if(!is.null(obs))
svolant's avatar
svolant committed
520
  {
521
522
    if(is.data.frame(obs)) taxo = as.data.frame(obs)
    if(!is.data.frame(obs)) taxo = as.data.frame(t(sapply(observation_metadata(dataBIOM),FUN=function(x){x[1:7]})))
svolant's avatar
svolant committed
523
524
525
    
    OTUnames = rownames(taxo)
    ## Modif taxo table (remove p__,... and change the colnames)
526
    taxo_biom = taxo
svolant's avatar
svolant committed
527
528
529
530
531
532
533
534
    taxo = as.data.frame(sapply(taxo,gsub,pattern="^.*__",replacement=""))
    colnames(taxo) = c("Kingdom", "Phylum","Class","Order","Family","Genus","Species")
    rownames(taxo) = OTUnames
    ## Remove empty row
    taxo[taxo==""] = NA
    taxo[taxo=="Unassigned"] = NA
    taxo=taxo[rowSums(is.na(taxo))!=dim(taxo)[2], ]
  }
535
536
537
538
539
540
  
  ## Sample metadata
  target = sample_metadata(dataBIOM)
  if(!is.null(target))
  {
    # Convert from list to dataframe
541
542
543
544
545
    target=as.data.frame(target)
    # Check if numeric variable in the biom metadata
    for(i in seq(1,dim(target)[2])){
      if(TRUE%in%!is.na(as.numeric(target[,i]))) target[,i]=as.numeric(target[,i])
    }
546
547
548
549
    target$SampleID = rownames(target)
    target = subset(target, select=c(dim(target)[2], seq(1, dim(target)[2]-1)))
  }
  if(is.null(obs) && !is.null(counts)) {taxo = data.frame(rownames(counts),row.names = rownames(counts));names(taxo)=NA; taxoCreated = TRUE}
svolant's avatar
svolant committed
550
  
svolant's avatar
svolant committed
551
  CheckTaxo = CheckTaxoTable(taxo,counts,taxoCreated)
svolant's avatar
svolant committed
552
  
553
554
  #input the target file
  #CheckTargetModel(,meta,)
svolant's avatar
svolant committed
555
  ## Pourcentage of annotation
556
  perca = PercentAnnot(counts,taxo)
svolant's avatar
svolant committed
557
  
558
  return(list(counts=counts,taxo=taxo,taxo_biom=taxo_biom,target=target,CheckCounts=CheckCounts,CheckTaxo=CheckTaxo,Percent=perca$Percent,CheckPercent=perca$Error))
svolant's avatar
svolant committed
559
560
561
}

## Check the data
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
562
GetDataFromCT <-function(dataC,dataT, MGSTable)
svolant's avatar
svolant committed
563
{
564
  l = c("k__", "p__", "c__", "o__", "f__", "g__", "s__")
svolant's avatar
svolant committed
565
566
567
  
  ## Counts table
  counts = dataC
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
568
  CheckCounts = CheckCountsTable(counts, MGSTable)
svolant's avatar
svolant committed
569
  counts = CheckCounts$counts
svolant's avatar
svolant committed
570
571
  
  
svolant's avatar
svolant committed
572
573
  ## Taxonomy table
  taxo = as.data.frame(dataT)
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
574
  CheckTaxo = CheckTaxoTable(taxo,counts, MGSTable)
575
576
577
578
579

  # Biom taxonomy must have seven levels
  taxo_temp = as.matrix(taxo)
  if(dim(taxo_temp)[2] < 7){
    taxo_temp = cbind(taxo_temp, matrix(NA, dim(taxo_temp)[1], 7 - dim(taxo_temp)[2]))
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
580
581
582
  }else if (dim(taxo_temp)[2] > 7){
    taxo_temp = taxo_temp[,1:7]
    # send sweet alert
583
584
585
586
587
588
589
590
  }
  # All OTU must be referenced
  if(dim(taxo_temp)[1] != dim(counts)[1]){ 
    missing_elements = rownames(counts)[!rownames(counts) %in% rownames(taxo_temp)]
    mat_missing = matrix(NA, length(missing_elements), dim(taxo_temp)[2])
    rownames(mat_missing) = missing_elements
    taxo_temp = rbind(taxo_temp, mat_missing)
  }
Amine  GHOZLANE's avatar
Amine GHOZLANE committed
591
592
  #case with more than 7 levels 
  taxo_biom = t(sapply(as.data.frame(t(taxo_temp)), FUN=function(x){paste(l,na.exclude(x),sep="")}))
593

svolant's avatar
svolant committed
594
595
596
597
  
  ## Pourcentage of annotation
  tmp = PercentAnnot(counts,taxo)
  
598
  return(list(counts=counts,taxo=taxo,taxo_biom = taxo_biom, CheckCounts=CheckCounts,CheckTaxo=CheckTaxo,Percent=tmp$Percent,CheckPercent=tmp$Error))
svolant's avatar
svolant committed
599
600
601
602
603
604
605
606
607
608
609
610
611
}



## Get the counts for the selected taxonomy
GetCountsMerge <- function(input,dataInput,taxoSelect,target,design)
{
  ## Init
  counts= NULL
  CheckTarget = FALSE
  CT_noNorm = NULL
  normFactors = NULL
  FeatureSize = NULL
svolant's avatar
svolant committed
612
  CT_Norm = NULL
613
  Error = NULL
svolant's avatar
svolant committed
614
615
616
617
  
  ## Counts and taxo tables
  CT = dataInput$counts
  taxo = dataInput$taxo
svolant's avatar
svolant committed
618
  namesTaxo = colnames(taxo)
svolant's avatar
svolant committed
619
  # save(CT,target,taxo,file="testMerge.RData")
svolant's avatar
svolant committed
620
621
  
  ## Select cols in the target
svolant's avatar
svolant committed
622
  labels = rownames(target)
svolant's avatar
svolant committed
623
624
625
626
627
  ind = which(colnames(CT)%in%labels)
  
  ## Get the normalization variable (normalization can be done according to this variable)
  VarNorm = input$VarNorm
  
svolant's avatar
svolant committed
628
  
svolant's avatar
svolant committed
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
  if(length(ind)==length(labels))
  { 
    if(input$TypeTable == "MGS"){
      ## Get the feature size for the normalisation
      Size_indcol = which(toupper(colnames(CT))%in%"SIZE")
      if(length(Size_indcol)==1) FeatureSize = CT[,Size_indcol]
      else print("Size parameter is missing in the count matrix")
      # Consider only counts
      CT = CT[,ind]
      # Divide by gene length
      CT = CT / FeatureSize * 1000
      # Convert matrix as integer
      CT_int=t(apply(CT,1,as.integer))
      rownames(CT_int)=rownames(CT)
      colnames(CT_int)=colnames(CT)
      CT=CT_int
    } else CT = CT[,ind]
    
    ## Order CT according to the target
    CT = OrderCounts(counts=CT,labels=labels)$CountsOrder
    CT_noNorm = CT
    RowProd = sum(apply(CT_noNorm,1,prod))
svolant's avatar
svolant committed
651
652
653
654
655
656
657
658
659
    merged_table = merge(CT, taxo, by="row.names")
    CT = as.data.frame(merged_table[,2: (dim(CT)[2]+1)])
    taxo = as.data.frame(merged_table[,(dim(CT)[2]+2):dim(merged_table)[2]])
    
    rownames(CT) = merged_table[,1]
    rownames(taxo) = merged_table[,1]
    colnames(taxo) = namesTaxo
    #ordOTU = order(rownames(taxo))
    counts_annot = CT
660
661
662
663
664
665
666
667
    if(0%in%colSums(counts_annot)){Error = "At least one of the column of the counts table is 0" }
    else{
      ## Create the dds object
      dds <- DESeqDataSetFromMatrix(countData=CT, colData=target, design=design,ignoreRank=TRUE)
      #save(dds,file="testdds.RData")
      if(is.null(VarNorm)){
        ## Counts normalisation
        ## Normalisation with or without 0
668
669
670
        if(input$AccountForNA=="NonNull" || RowProd==0) dds = estimateSizeFactors(dds,locfunc=eval(as.name(input$locfunc)),geoMeans=GeoMeansCT(CT))
        if(input$AccountForNA=="All" && RowProd!=0) dds = estimateSizeFactors(dds,locfunc=eval(as.name(input$locfunc)))
        if(input$AccountForNA=="Weighted" && input$AccountForNA!="NonNull" ) {dds = estimateSizeFactors(dds,locfunc=eval(as.name(input$locfunc)),geoMeans=GeoMeansCT(CT)); sizeFactors(dds) = w.sizefactor(CT)}
671
        if(input$AccountForNA=="Total counts") { sizeFactors(dds) = colSums(CT)/mean(colSums(CT))}
svolant's avatar
svolant committed
672
        normFactors = sizeFactors(dds)
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
        
      } else{
        group = as.data.frame(target[,VarNorm])
        group = apply(group,1,paste, collapse = "-")
        normFactors = c()
        mod = unique(group)
        ## At least 2 samples are needed for the normalization
        if(min(table(group))>1){
          for(i in unique(group))
          {
            indgrp = which(group==i) 
            CT_tmp = CT[,indgrp]
            CT_tmp = removeNulCounts(CT_tmp) 
            target_tmp = data.frame(labels = rownames(target)[indgrp])
            dds_tmp <- DESeqDataSetFromMatrix(countData=CT_tmp, colData=target_tmp, design=~labels,ignoreRank=TRUE)
            if(input$AccountForNA=="NonNull") {dds_tmp = estimateSizeFactors(dds_tmp,locfunc=eval(as.name(input$locfunc)),geoMeans=GeoMeansCT(CT_tmp)); normFactors[indgrp] = sizeFactors(dds_tmp)}
            if(input$AccountForNA=="All") {dds_tmp = estimateSizeFactors(dds_tmp,locfunc=eval(as.name(input$locfunc))); normFactors[indgrp] = sizeFactors(dds_tmp)}
            if(input$AccountForNA=="Weighted" && input$AccountForNA!="NonNull" ) {dds_tmp = estimateSizeFactors(dds_tmp,locfunc=eval(as.name(input$locfunc)),geoMeans=GeoMeansCT(CT_tmp)); normFactors[indgrp] = w.sizefactor(CT_tmp)}
            if(input$AccountForNA=="Total counts") { normFactors[indgrp] = colSums(CT_tmp)/mean(colSums(CT_tmp))}
          }
        } else{
          if(input$AccountForNA=="NonNull" || RowProd==0) dds = estimateSizeFactors(dds,locfunc=eval(as.name(input$locfunc)),geoMeans=GeoMeansCT(CT))
          if(input$AccountForNA=="All" && RowProd!=0) dds = estimateSizeFactors(dds,locfunc=eval(as.name(input$locfunc)))
          if(input$AccountForNA=="Weighted" && input$AccountForNA!="NonNull" ) {dds = estimateSizeFactors(dds,locfunc=eval(as.name(input$locfunc)),geoMeans=GeoMeansCT(CT)); sizeFactors(dds) = w.sizefactor(CT)}
          if(input$AccountForNA=="Total counts") { sizeFactors(dds) = colSums(CT)/mean(colSums(CT))}
          normFactors = sizeFactors(dds)
        }
        
        sizeFactors(dds) = normFactors
svolant's avatar
svolant committed
702
703
      }
      
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
      ## Keep normalized OTU table
      CT_Norm = counts(dds, normalized=TRUE)
      
      # Only interesting OTU
      # merged_table = merge(CT, taxo[order(rownames(CT)),], by="row.names")
      
      #     merged_table = merge(CT, taxo, by="row.names")
      #     CT = as.data.frame(merged_table[,2: (dim(CT)[2]+1)])
      #     taxo = as.data.frame(merged_table[,(dim(CT)[2]+2):dim(merged_table)[2]])
      #     
      #     rownames(CT) = merged_table[,1]
      #     rownames(taxo) = merged_table[,1]
      #     #ordOTU = order(rownames(taxo))
      #     counts_annot = CT
      #       ordOTU = order(rownames(taxo))
      #       indOTU_annot = which(rownames(CT)%in%rownames(taxo))
      #       counts_annot = CT[indOTU_annot[ordOTU],]
      ## Aggregate matrix
      if(taxoSelect=="OTU/Gene") counts = counts_annot
      else{
        if(input$TypeTable == "MGS" && input$FileFormat!="fileBiom"){
          MGS_taxocol = which(toupper(colnames(taxo))%in%"MGS")
          taxoS = taxo[,MGS_taxocol]
          counts = aggregate(counts_annot,by=list(Taxonomy = taxoS),mean)
          rownames(counts)=counts[,1]
          counts=counts[,-1]
          counts_int=t(apply(counts,1,as.integer))
          rownames(counts_int)=rownames(counts)
          colnames(counts_int)=colnames(counts)
          counts=counts_int
        }
        if(taxoSelect != "MGS" || input$FileFormat=="fileBiom"){
          #taxoS = taxo[ordOTU,taxoSelect]
          taxoS = taxo[,taxoSelect]
          counts = aggregate(counts_annot,by=list(Taxonomy = taxoS),sum)
          rownames(counts)=counts[,1];counts=counts[,-1]
        }
svolant's avatar
svolant committed
741
      }
742
743
744
745
746
747
      
      ## Ordering the counts table according to the target labels 
      tmpOrder = OrderCounts(counts,normFactors,labels)
      counts = tmpOrder$CountsOrder
      normFactors = tmpOrder$normFactorsOrder
      CheckTarget = TRUE
svolant's avatar
svolant committed
748
749
    }
  }
750
  return(list(counts=counts,CheckTarget=CheckTarget,normFactors=normFactors, CT_noNorm=CT_noNorm, CT_Norm =CT_Norm, Error = Error))
svolant's avatar
svolant committed
751
752
753
754
755
756
757
758
759
760
761
762
763
  #return(list(counts=counts,target=target[ind,],labeled=labeled,normFactors=normFactors, CT_noNorm=CT_noNorm))
}


## Get the geometric mean of the counts (0 are replaced by NA values)
GeoMeansCT <- function(CT)
{
  CT=as.matrix(CT)
  CT[which(CT<1)]=NA
  gm = apply(CT,1,geometric.mean,na.rm=TRUE)
  return(gm)
}

764
765
766
767
768
769
770
771
772
773
774
775
776
777
## Get weighted size factors
w.sizefactor <- function(CT)
{
  sf = c()
  CT = as.matrix(CT)
  nbsamp = ncol(CT)
  CT[which(CT<1)]=NA
  gm = apply(CT,1,geometric.mean,na.rm=TRUE)
  weights = nbsamp - apply(CT,1,FUN=function(x){tmp =length(which(is.na(x))) ;return(tmp)})
  
  for(i in 1:ncol(CT))
  {
    ind = which(is.na(CT[,i]))
    gm_tmp = gm
778
    weights_tmp = weights
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
    tmp = CT[,i]
    if(length(ind)>0) {tmp = CT[-ind,i]; gm_tmp = gm[-ind]; weights_tmp = weights[-ind]}
    sf[i] = w.median(tmp/gm_tmp,weights_tmp, na.rm = TRUE)
  }
  names(sf) = colnames(CT)
  return(sf)
}

## Calculated the weighted median
w.median <- function (x, w, na.rm = TRUE) 
{
  if (missing(w)) 
    w <- rep.int(1, length(x))
  else {
    if (length(w) != length(x)) 
      stop("'x' and 'w' must have the same length")
    if (any(is.na(w))) 
      stop("NA weights not allowed")
    if (any(w < 0)) 
      stop("Negative weights not allowed")
  }
  if (is.integer(w)) 
    w <- as.numeric(w)
  if (na.rm) {
    w <- w[i <- !is.na(x)]
    x <- x[i]
  }
  if (all(w == 0)) {
    warning("All weights are zero")
    return(NA)
  }
  o <- order(x)
  x <- x[o]
  w <- w[o]
  p <- cumsum(w)/sum(w)
  n <- sum(p < 0.5)
  if (p[n + 1] > 0.5) 
    x[n + 1]
  else (x[n + 1] + x[n + 2])/2
}

svolant's avatar
svolant committed
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836

## Order the counts 
OrderCounts <- function(counts,normFactors=NULL,labels)
{
  n = length(labels)
  CountsOrder = counts
  normFactorsOrder = normFactors
  for(i in 1:n)
  {
    ind = which(labels[i]==colnames(counts))
    CountsOrder[,i] = counts[,ind]
    if(!is.null(normFactors)) normFactorsOrder[i] = normFactors[ind]
  }
  colnames(CountsOrder) = labels
  return(list(CountsOrder=CountsOrder,normFactorsOrder = normFactorsOrder))
}

837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860

## Order the counts 
Filtered_feature <- function(counts,th.samp,th.abund)
{
  ind = NULL
  
  ## Total abundance over samples
  Tot_abundance = log(rowSums(counts)+1)
  ind.ab = which(Tot_abundance<=th.abund)
  
  ## Get the numbre of non zero sample
  counts.bin = as.matrix(counts)
  counts.bin[which(counts>0)] = 1
  nbSampByFeat = rowSums(counts.bin)
  
  ind.samp = which(nbSampByFeat<=th.samp)
  
  ind = unique(c(ind.samp,ind.ab))
  
  return(list(ind=ind,Tot_abundance=Tot_abundance,ind.ab=ind.ab,counts.bin=counts.bin,ind.samp=ind.samp,nbSampByFeat=nbSampByFeat))
}



svolant's avatar
svolant committed
861
862
863



864
865
866
867
868
869
870
871
## Order the counts 
plot_filter <- function(counts,th.samp,th.abund,type="Scatter")
{
  res = NULL
  
  ## Initial plot for plotly
  if(type == 'Abundance' || type == 'Samples'){ 
    dataNull = data.frame(x=c(0,0),y=c(1,2),col=c("white","white"))
872
    res = ggplot(dataNull,aes(x=x,y=y))+geom_point(aes(colour = col))+theme_bw()+ scale_color_manual(values = "white")
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
  }
  
  res_filter = Filtered_feature(counts,th.samp,th.abund)
  if(type == 'Abundance' && !is.null(th.samp) && !is.null(th.abund) )
  {
    state = rep("Kept",nrow(counts))
    ## Total abundance over samples
    Tot_abundance = res_filter$Tot_abundance
    
    ind = res_filter$ind.ab
    ord = order(Tot_abundance,decreasing = FALSE)
    
    ## Modify the state
    state[ind] = "Removed"
    
    ## Create the data.frame for ggplot
    df = data.frame(lab = rownames(counts)[ord],y = Tot_abundance[ord],State=state[ord])
    df$lab = factor(df$lab,levels = rownames(counts)[ord])
    df$State = factor(df$State,levels = c("Kept","Removed"))
    
    ## plot the results
    gg = ggplot(df,aes(lab,y,fill=State)) + geom_bar(stat='identity') + theme_bw() +theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust = 0.5))
895
    gg = gg + geom_hline( yintercept = th.abund,linetype = "longdash") + xlab("")
896
897
898
899
    gg = gg + scale_fill_manual(values = c('springgreen3','firebrick'))
    if(!"Kept"%in%df$State ) gg = gg + scale_fill_manual(values = 'firebrick')
    if(!"Removed"%in%df$State ) gg = gg + scale_fill_manual(values = 'springgreen3')
    
900
    res = gg
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
  }
  
  if(type == 'Samples' && !is.null(th.samp) && !is.null(th.abund))
  {
    state = rep("Kept",nrow(counts))
    
    ## Get the number of non zero sample
    nbSampByFeat = res_filter$nbSampByFeat
    ind = res_filter$ind.samp
    ord = order(nbSampByFeat,decreasing = FALSE)
    
    state[ind] = "Removed"
    
    df = data.frame(lab = rownames(counts)[ord],y = nbSampByFeat[ord],State=state[ord])
    df$lab = factor(df$lab,levels = rownames(counts)[ord])
    df$State = factor(df$State,levels = c("Kept","Removed"))
    ## plot the results
    
    gg = ggplot(df,aes(lab,y,fill=State)) + geom_bar(stat='identity') + theme_bw() +theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust = 0.5))
920
    gg = gg + geom_hline( yintercept = th.samp,linetype = "longdash") + xlab("")
921
922
923
924
    gg = gg + scale_fill_manual(values = c('springgreen3','firebrick'))  
    if(!"Kept"%in%df$State ) gg = gg + scale_fill_manual(values = 'firebrick')
    if(!"Removed"%in%df$State ) gg = gg + scale_fill_manual(values = 'springgreen3')
    
925
    res = gg
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
    
  }
  
  if(type == 'Scatter')
  {
    state = rep("Kept",nrow(counts))
    
    ## Get the number of non zero sample
    nbSampByFeat = res_filter$nbSampByFeat
    Tot_abundance = res_filter$Tot_abundance
    
    ## Get the selected features (under the thresholds)
    ind = res_filter$ind
    
    ## Modify the state
    state[ind] = "Removed"
    
    ## Create the data.frame for ggplot
    df = data.frame(lab =rownames(counts), y = nbSampByFeat,x = Tot_abundance,State=state)
    df$lab = factor(df$lab,levels = rownames(counts))
    df$State = factor(df$State,levels = c("Kept","Removed"))
    
    x_var = df$x
    y_var = df$y
    State = df$State
    PointSize = 2
    colors_scat = list(Kept="#00CD66",Removed="#b22222")
953

954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
    res = scatterD3(x = x_var,
                     y = y_var,
                     lab = rownames(df),
                     xlab = "Abundance in log",
                     ylab = "Number of samples",
                     col_var = State,
                     colors = colors_scat,
                     size_lab = PointSize,
                     key_var = rownames(df),
                     point_opacity = 0.7,
                     transitions = TRUE)
    
    
#     gg = ggplot(df,aes(x,y,color=State,group = lab)) + geom_point() + theme_bw()
#     gg = gg + geom_vline( xintercept = th.samp,linetype = "longdash")
#     gg = gg + geom_hline( yintercept = th.abund,linetype = "longdash")
#     gg = gg + scale_color_manual(values = c('springgreen3','firebrick'))
#     ggplotly(gg)
#     return(gg)
  }
  
  return(res)
}



######################################################
## NAME: SelectThreshAb 
##
## INPUT:
##    infile : data matrix (counts, rows: taxo)
##    lambda : Tuning parameter (default is 500)
##
## OUTPUT:
##    Cut-off value 
##
######################################################

SelectThreshAb <- function(infile,lambda=500,graph=TRUE){
  
  rs <- rowSums(infile)
  test_Filtre <- sapply(c(min(rs):lambda),FUN=function(x) table(rs>x))
996
997
998
999
1000
  if(!is.list(test_Filtre))
  {
    x <- c(min(rs):lambda)
    reslm <- lm(test_Filtre[1,]~x)$coefficients
    val <- which(test_Filtre[1,]>reslm[1])[1]
For faster browsing, not all history is shown. View entire blame