diff --git a/README.md b/README.md index 762a294af045925d8ba7188806b40ff18876e68c..e90b07032bdb7df63ba92e7627b4c2ee78f78eb0 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ Gitlab developers ### v10.1.0 -1) D614G added as expected variants in B.1.1.7 and B.1.351 +1) D614G added as expected variants in B.1.1.7 and B.1.351 ### v10.0.0 diff --git a/dataset/20210326_voc.gmt b/dataset/20210326_voc.gmt deleted file mode 100644 index aa668e1471e8b0c375fd837ce5d7f4aa1624536d..0000000000000000000000000000000000000000 --- a/dataset/20210326_voc.gmt +++ /dev/null @@ -1,15 +0,0 @@ -#name=20210322 VOC Variants -B.1.1.7 B.1.1.7 MN908947_Ampliseq:3208-3218 MN908947_Ampliseq:5329-5339 MN908947_Ampliseq:6895-6905 MN908947_Ampliseq:11229-11247 MN908947_Ampliseq:21706-21721 MN908947_Ampliseq:21932-21944 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23212-23222 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:23650-23660 MN908947_Ampliseq:24447-24457 MN908947_Ampliseq:24855-24865 MN908947_Ampliseq:27913-27923 MN908947_Ampliseq:27989-27999 MN908947_Ampliseq:28052-28062 MN908947_Ampliseq:28221-28233 MN908947_Ampliseq:28918-28928 -B.1.351 B.1.351 MN908947_Ampliseq:1000-1010 MN908947_Ampliseq:5171-5181 MN908947_Ampliseq:10264-10274 MN908947_Ampliseq:11229-11239 MN908947_Ampliseq:21742-21752 MN908947_Ampliseq:22147-22157 MN908947_Ampliseq:22754-22764 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23605-23615 MN908947_Ampliseq:25504-25514 MN908947_Ampliseq:25845-25855 MN908947_Ampliseq:26397-26407 MN908947_Ampliseq:28828-28838 -P1 P1 MN908947_Ampliseq:3769-3779 MN908947_Ampliseq:5589-5599 MN908947_Ampliseq:11229-11247 MN908947_Ampliseq:17200-17210 MN908947_Ampliseq:21555-21565 MN908947_Ampliseq:21562-21572 MN908947_Ampliseq:21579-21589 MN908947_Ampliseq:21915-21925 MN908947_Ampliseq:22073-22083 MN908947_Ampliseq:22753-22763 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23466-23476 MN908947_Ampliseq:24583-24593 MN908947_Ampliseq:28108-28118 MN908947_Ampliseq:28210-28220 MN908947_Ampliseq:28453-28463 -B.1.1.7_Spike B.1.1.7_Spike MN908947_Ampliseq:21706-21721 MN908947_Ampliseq:21932-21944 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23212-23222 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:23650-23660 MN908947_Ampliseq:24447-24457 MN908947_Ampliseq:24855-24865 -B.1.351_Spike B.1.351_Spike MN908947_Ampliseq:21742-21752 MN908947_Ampliseq:22147-22157 MN908947_Ampliseq:22754-22764 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23605-23615 -P1_Spike P1_Spike MN908947_Ampliseq:21555-21565 MN908947_Ampliseq:21562-21572 MN908947_Ampliseq:21579-21589 MN908947_Ampliseq:21915-21925 MN908947_Ampliseq:22073-22083 MN908947_Ampliseq:22753-22763 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23466-23476 MN908947_Ampliseq:24583-24593 -A.23.1 A.23.1 MN908947_Ampliseq:2943-2953 MN908947_Ampliseq:11171-11181 MN908947_Ampliseq:11207-11217 MN908947_Ampliseq:21808-21818 MN908947_Ampliseq:21974-21984 MN908947_Ampliseq:22602-22612 MN908947_Ampliseq:23342-23352 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:28085-28095 MN908947_Ampliseq:28108-28118 MN908947_Ampliseq:28819-28829 MN908947_Ampliseq:29468-29478 -A.23.1_Spike A.23.1_Spike MN908947_Ampliseq:21808-21818 MN908947_Ampliseq:21974-21984 MN908947_Ampliseq:22602-22612 MN908947_Ampliseq:23342-23352 MN908947_Ampliseq:23545-23555 -B.1.525 B.1.525 MN908947_Ampliseq:14348-14358 MN908947_Ampliseq:11229-11247 MN908947_Ampliseq:21706-21721 MN908947_Ampliseq:21658-21668 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23534-23544 MN908947_Ampliseq:24165-24175 MN908947_Ampliseq:26246-26256 MN908947_Ampliseq:26708-26718 MN908947_Ampliseq:28219-28231 -B.1.525_Spike B.1.525_Spike MN908947_Ampliseq:21706-21721 MN908947_Ampliseq:21658-21668 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23534-23544 MN908947_Ampliseq:24165-24175 -B.1.427 B.1.427 MN908947_Ampliseq:1000-1010 MN908947_Ampliseq:9679-9689 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:16335-16345 MN908947_Ampliseq:16955-16965 MN908947_Ampliseq:21541-21551 MN908947_Ampliseq:21959-21969 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:25504-25514 MN908947_Ampliseq:28828-28838 -B.1.427_Spike B.1.427_Spike MN908947_Ampliseq:21541-21551 MN908947_Ampliseq:21959-21969 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:23344-23354 -B.1.429 B.1.429 MN908947_Ampliseq:1000-1010 MN908947_Ampliseq:12819-12829 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:16955-16965 MN908947_Ampliseq:21541-21551 MN908947_Ampliseq:21959-21969 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:25504-25514 MN908947_Ampliseq:28828-28838 -B.1.429_Spike B.1.429_Spike MN908947_Ampliseq:21959-21969 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:23344-23354 diff --git a/dataset/20210327_voc.gmt b/dataset/20210327_voc.gmt new file mode 100644 index 0000000000000000000000000000000000000000..9194b79155fa276c4d026fabb5b69c36d9e83de8 --- /dev/null +++ b/dataset/20210327_voc.gmt @@ -0,0 +1,7 @@ +B.1.1.7 B.1.1.7 MN908947_Ampliseq:3208-3218 MN908947_Ampliseq:5329-5339 MN908947_Ampliseq:6895-6905 MN908947_Ampliseq:11228-11238 MN908947_Ampliseq:21705-21715 MN908947_Ampliseq:21931-21941 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23212-23222 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:23650-23660 MN908947_Ampliseq:24447-24457 MN908947_Ampliseq:24855-24865 MN908947_Ampliseq:27913-27923 MN908947_Ampliseq:27989-27999 MN908947_Ampliseq:28052-28062 MN908947_Ampliseq:28221-28231 MN908947_Ampliseq:28222-28232 MN908947_Ampliseq:28223-28233 MN908947_Ampliseq:28918-28928 +B.1.351 B.1.351 MN908947_Ampliseq:1000-1010 MN908947_Ampliseq:5171-5181 MN908947_Ampliseq:10264-10274 MN908947_Ampliseq:11229-11239 MN908947_Ampliseq:21742-21752 MN908947_Ampliseq:22147-22157 MN908947_Ampliseq:22754-22764 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23605-23615 MN908947_Ampliseq:25504-25514 MN908947_Ampliseq:25845-25855 MN908947_Ampliseq:26397-26407 MN908947_Ampliseq:28828-28838 +P.1 P.1 MN908947_Ampliseq:3769-3779 MN908947_Ampliseq:5589-5599 MN908947_Ampliseq:11228-11238 MN908947_Ampliseq:17200-17210 MN908947_Ampliseq:21555-21565 MN908947_Ampliseq:21562-21572 MN908947_Ampliseq:21579-21589 MN908947_Ampliseq:21915-21925 MN908947_Ampliseq:22073-22083 MN908947_Ampliseq:22753-22763 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23004-23014 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23466-23476 MN908947_Ampliseq:24583-24593 MN908947_Ampliseq:28108-28118 MN908947_Ampliseq:28210-28220 MN908947_Ampliseq:28453-28463 +A.23.1 A.23.1 MN908947_Ampliseq:11171-11181 MN908947_Ampliseq:11207-11217 MN908947_Ampliseq:21808-21818 MN908947_Ampliseq:21974-21984 MN908947_Ampliseq:22602-22612 MN908947_Ampliseq:23342-23352 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:28085-28095 MN908947_Ampliseq:28108-28118 MN908947_Ampliseq:28819-28829 MN908947_Ampliseq:29468-29478 +B.1.525 B.1.525 MN908947_Ampliseq:14348-14358 MN908947_Ampliseq:11228-11238 MN908947_Ampliseq:21705-21715 MN908947_Ampliseq:21658-21668 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23534-23544 MN908947_Ampliseq:24165-24175 MN908947_Ampliseq:26246-26256 MN908947_Ampliseq:26708-26718 MN908947_Ampliseq:28218-28228 +B.1.427 B.1.427 MN908947_Ampliseq:1000-1010 MN908947_Ampliseq:9679-9689 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:16335-16345 MN908947_Ampliseq:16955-16965 MN908947_Ampliseq:21541-21551 MN908947_Ampliseq:21959-21969 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:25504-25514 MN908947_Ampliseq:28828-28838 +B.1.429 B.1.429 MN908947_Ampliseq:1000-1010 MN908947_Ampliseq:12819-12829 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:16955-16965 MN908947_Ampliseq:21541-21551 MN908947_Ampliseq:21959-21969 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:25504-25514 MN908947_Ampliseq:28828-28838 diff --git a/dataset/Variants of concern_V7_20210323.xlsx b/dataset/Variants of concern_V8_20210330.xlsx similarity index 100% rename from dataset/Variants of concern_V7_20210323.xlsx rename to dataset/Variants of concern_V8_20210330.xlsx diff --git a/dev/convert.voc_gatk_2_gmt.R b/dev/convert.voc_gatk_2_gmt.R new file mode 100644 index 0000000000000000000000000000000000000000..f71dcaa27a46a48d4a09ce377d1256d3dd08ecb3 --- /dev/null +++ b/dev/convert.voc_gatk_2_gmt.R @@ -0,0 +1,34 @@ +######################################################################### +## ## +## convert voc_gatk into gmt ## +## ## +## Gael A. Millot ## +## Bioinformatics and Biostatistics Hub ## +## Computational Biology Department ## +## Institut Pasteur Paris ## +## ## +######################################################################### + + +rm(list = ls()) + +date <- "20210327" +expanse <- 5 +file.in <- "_voc_gatk.txt" +file.out <- "_voc.gmt" +path <- "C:/Users/gael/Documents/Git_projects/cnr_curation/dataset/" + +in.path <- paste0(path, date, file.in) +out.path <- paste0(path, date, file.out) +txt <- read.table(in.path, header = TRUE, sep = "\t", stringsAsFactors = FALSE, comment.char = "#", fill = FALSE, check.names = FALSE, colClasses = "character") + +res <- data.frame() +voc.names <- unique(txt$voc) +for(i0 in 1:length(voc.names)){ + tempo.txt <- txt[txt$voc == voc.names[i0], ] + pos <- as.integer(tempo.txt$gatk_pos) + pos <- paste0(unique(tempo.txt$gatk_chr), ":", pos - expanse, "-", pos + expanse) + final <- paste0(voc.names[i0], "\t", voc.names[i0], "\t", paste(pos, collapse = "\t")) + res <- rbind(res, final) +} +write.table(res, file = out.path, row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") diff --git a/make new R process to convert into gmt.txt b/make new R process to convert into gmt.txt deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000