diff --git a/README.md b/README.md index d4d822a19a9cc9a7f40b52d49a8afbe130f8518b..70999fcbd45def49287220fa9dff9cfca0f76f4a 100644 --- a/README.md +++ b/README.md @@ -196,9 +196,14 @@ Hub-CDB and Institut Pasteur <br /><br /> ## WHAT'S NEW IN +### v14.7.0 + +1) B.1.620 added + + ### v14.6.0 -1) B.1.617.1, B.1.617.2, B.1.617.2 added +1) B.1.617.1, B.1.617.2, B.1.617.3 added ### v14.5.0 diff --git a/cnr_curation_voc_qc.config b/cnr_curation_voc_qc.config index 5f6af2b5e9f15543f7b343b97ea4c1d257c57d83..03cb7c0c48fc88b9c6e1eb1454fcd387b682aa4c 100644 --- a/cnr_curation_voc_qc.config +++ b/cnr_curation_voc_qc.config @@ -35,7 +35,7 @@ res.file.path <- paste0(in.path, "Full_Results.tsv") # single character string i #### files in the gitlab dataset folder -voc.file.path <- "C:/Users/gael/Documents/Git_projects/cnr_curation/dataset/20210505_voc.txt" # single character string indicating the name of the GATK Variant of Concern (VOC) file. Example: /pasteur/zeus/projets/p01/CNRVIR_bioinfoSEQ/sarscov2mapping/20200128_voc_gatk.txt +voc.file.path <- "C:/Users/gael/Documents/Git_projects/cnr_curation/dataset/20210511_voc.txt" # single character string indicating the name of the GATK Variant of Concern (VOC) file. Example: /pasteur/zeus/projets/p01/CNRVIR_bioinfoSEQ/sarscov2mapping/20200128_voc_gatk.txt critical.file.path <- "C:/Users/gael/Documents/Git_projects/cnr_curation/dataset/20210406_critical_pos.txt" # single character string indicating the name of the critical positions listed. Write NULL if not required. Example: "C:/Users/gael/Documents/Git_projects/cnr_curation/dataset/20210203_voc_gatk_critical_var_present.txt" #### output folder diff --git a/dataset/20210430_Variants of concern.xlsx b/dataset/20210430_Variants of concern.xlsx index 97d5038a56e47b60bc67f8666c259165f05c041a..834052617140b85b67ef2230edfca36cac58164f 100644 Binary files a/dataset/20210430_Variants of concern.xlsx and b/dataset/20210430_Variants of concern.xlsx differ diff --git a/dataset/20210505_voc.gmt b/dataset/20210511_voc.gmt similarity index 92% rename from dataset/20210505_voc.gmt rename to dataset/20210511_voc.gmt index 3944380e6175deab8d78d265df2fa6aac4ace830..1fc4a9ce52243fda8ec2b62c12494ff4b6821dba 100644 --- a/dataset/20210505_voc.gmt +++ b/dataset/20210511_voc.gmt @@ -15,3 +15,4 @@ B.1.617.1 B.1.617.1 MN908947_Ampliseq:4906-4916 MN908947_Ampliseq:11142-11152 MN B.1.617.2 B.1.617.2 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:21559-21569 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:22936-22946 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:25410-25420 MN908947_Ampliseq:26708-26718 MN908947_Ampliseq:27579-27589 MN908947_Ampliseq:28402-28412 MN908947_Ampliseq:28822-28832 MN908947_Ampliseq:29343-29353 B.1.617.3 B.1.617.3 MN908947_Ampliseq:7237-7247 MN908947_Ampliseq:8149-8159 MN908947_Ampliseq:10575-10585 MN908947_Ampliseq:11263-11273 MN908947_Ampliseq:21559-21569 MN908947_Ampliseq:22858-22868 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:27579-27589 MN908947_Ampliseq:27911-27921 MN908947_Ampliseq:28822-28832 MN908947_Ampliseq:29343-29353 B.1.214.3 B.1.214.3 MN908947_Ampliseq:4398-4408 MN908947_Ampliseq:5848-5858 MN908947_Ampliseq:11228-11247 MN908947_Ampliseq:11371-11381 MN908947_Ampliseq:12253-12263 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:15128-15138 MN908947_Ampliseq:21787-21797 MN908947_Ampliseq:22936-22946 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23650-23660 MN908947_Ampliseq:28828-28838 +B.1.620 B.1.620 MN908947_Ampliseq:1414-1424 MN908947_Ampliseq:3714-3724 MN908947_Ampliseq:6177-6187 MN908947_Ampliseq:11228-11247 MN908947_Ampliseq:14349-14359 MN908947_Ampliseq:17051-17061 MN908947_Ampliseq:21579-21589 MN908947_Ampliseq:21705-21721 MN908947_Ampliseq:21880-21890 MN908947_Ampliseq:21931-21944 MN908947_Ampliseq:22221-22240 MN908947_Ampliseq:22236-22246 MN908947_Ampliseq:22933-22943 MN908947_Ampliseq:22953-22963 MN908947_Ampliseq:23344-23354 MN908947_Ampliseq:23545-23555 MN908947_Ampliseq:24583-24593 MN908947_Ampliseq:24855-24865 diff --git a/dataset/20210505_voc.txt b/dataset/20210511_voc.txt similarity index 91% rename from dataset/20210505_voc.txt rename to dataset/20210511_voc.txt index df389e1dbe45ae20b949f9f1aecd6b55d8b09a71..f19a3039e26bfba3a44ebef8cf96a2a20671ecbd 100644 --- a/dataset/20210505_voc.txt +++ b/dataset/20210511_voc.txt @@ -5,7 +5,7 @@ B.1.1.7 MN908947_Ampliseq 5334 C A ORF1ab:A1708D B.1.1.7 MN908947_Ampliseq 6900 T C ORF1ab:I2230T B.1.1.7 MN908947_Ampliseq 11233 GTCTGGTTTT G ORF1ab:SGF_3675-3677_del B.1.1.7 MN908947_Ampliseq 21710 ATACATG A S:HV_69-70_del -B.1.1.7 MN908947_Ampliseq 21936 TTTA T S:Y144_del +B.1.1.7 MN908947_Ampliseq 21936 ATTA A S:Y144_del B.1.1.7 MN908947_Ampliseq 23009 A T S:N501Y B.1.1.7 MN908947_Ampliseq 23217 C A S:A570D B.1.1.7 MN908947_Ampliseq 23349 A G S:D614G @@ -109,7 +109,7 @@ B.1.616 MN908947_Ampliseq 21023 C T ORF1b:T2537I B.1.616 MN908947_Ampliseq 21434 A G ORF1b:K2674R B.1.616 MN908947_Ampliseq 21704 C G S:H66D B.1.616 MN908947_Ampliseq 21933 G T S:G142V -B.1.616 MN908947_Ampliseq 21936 TTTA T S:Y144_del +B.1.616 MN908947_Ampliseq 21936 ATTA A S:Y144_del B.1.616 MN908947_Ampliseq 22152 A G S:D215G B.1.616 MN908947_Ampliseq 22956 T C S:V483A B.1.616 MN908947_Ampliseq 23349 A G S:D614G @@ -233,4 +233,22 @@ B.1.214.3 MN908947_Ampliseq 22941 C A S:T478K B.1.214.3 MN908947_Ampliseq 23349 A G S:D614G B.1.214.3 MN908947_Ampliseq 23655 C T S:T716I B.1.214.3 MN908947_Ampliseq 28833 C T N:T205I +B.1.620 MN908947_Ampliseq 1419 C T ORF1a:T403I +B.1.620 MN908947_Ampliseq 3719 C T ORF1a:R1170C +B.1.620 MN908947_Ampliseq 6182 G A ORF1a:V1991I +B.1.620 MN908947_Ampliseq 11233 GTCTGGTTTT G ORF1ab:SGF_3675-3677_del +B.1.620 MN908947_Ampliseq 14354 C T ORF1b:P314L +B.1.620 MN908947_Ampliseq 17056 G T ORF1b:A1215S +B.1.620 MN908947_Ampliseq 21584 C T S:P26S +B.1.620 MN908947_Ampliseq 21710 ATACATG A S:HV_69-70_del +B.1.620 MN908947_Ampliseq 21885 T C S:V126A +B.1.620 MN908947_Ampliseq 21936 ATTA A S:Y144_del +B.1.620 MN908947_Ampliseq 22226 ACTTTACTTG A S:LLA_241-243_del +B.1.620 MN908947_Ampliseq 22241 C T S:H245Y +B.1.620 MN908947_Ampliseq 22938 G A S:S477N +B.1.620 MN908947_Ampliseq 22958 G A S:E484K +B.1.620 MN908947_Ampliseq 23349 A G S:D614G +B.1.620 MN908947_Ampliseq 23550 C A S:P681H +B.1.620 MN908947_Ampliseq 24588 C T S:T1027I +B.1.620 MN908947_Ampliseq 24860 G C S:D1118H diff --git a/dev/TODO.txt b/dev/TODO.txt new file mode 100644 index 0000000000000000000000000000000000000000..40b4dc0fd001adfaaf82f087fcfc3454b604e6f4 --- /dev/null +++ b/dev/TODO.txt @@ -0,0 +1,4 @@ +1) Consider line full of "" in the Nexclade annot file. This must not be considered as REF +2) Add Pangolin annot (or replace, even better) +3) Do not "ToCheck" for new critical if no problem with it +4) in gmt, solve the fact that in sessions, small shift like if i have to write 27:38 instead of 28:38 \ No newline at end of file diff --git a/dev/convert.voc_gatk_2_gmt.R b/dev/convert.voc_gatk_2_gmt.R index 2898d603660f2ee2c92b499300550d61772e27b4..d83ba09ba205a379a6c03de202c187b08309e0f7 100644 --- a/dev/convert.voc_gatk_2_gmt.R +++ b/dev/convert.voc_gatk_2_gmt.R @@ -12,7 +12,7 @@ rm(list = ls()) -date <- "20210505" +date <- "20210511" expanse <- 5 file.in <- "_voc.txt" file.out <- "_voc.gmt"