diff --git a/Data_submission/Data_submission.snakefile b/Data_submission/Data_submission.snakefile index f7c7087090dfd47512b115585fa94025cba234aa..0a14e5bb3d3cba2fe4e88fff654c325257d39358 100644 --- a/Data_submission/Data_submission.snakefile +++ b/Data_submission/Data_submission.snakefile @@ -98,6 +98,8 @@ paper = ref_info["paper"] submission_dir = ref_info["NCBI_submitter"] gather_processed_data = config.get("gather_processed_tables", False) +if not gather_processed_data: + print("Only raw files will be gathered.") data_info = config["data"] LIBTYPES = list(data_info.keys()) diff --git a/Data_submission/libtype_info.yaml b/Data_submission/libtype_info.yaml index ba7b7100ffcc1d5e65b428f95d41b3abc9b042ff..aa9e2ae41888f2a58194284e059b0b7009ce5aa6 100644 --- a/Data_submission/libtype_info.yaml +++ b/Data_submission/libtype_info.yaml @@ -94,7 +94,7 @@ Ribo-seq: The 5' and 3' 4 nt UMIs were removed from the trimmed reads using cutadapt (version 1.18) with options -u 4 and -u -4 After removing UMIs, the reads from 28 to 30 nt were selected using bioawk version 20110810 (git commit fd40150b7c557da45e781a999d372abbc634cc21) The size-selected reads were mapped on the C. elegans genome (WBcel235) using bowtie2 (version 2.3.4.3) with options -L 6 -i S,1,0.8 -N 0 - Mapped and remapped reads were used to estimate the abundance of structural RNAs using featureCounts (version 1.6.3) with options -O -s 1 --fracOverlap 1 and annotations corresponding to tRNA, snRNA, snoRNA, rRNA or RNA (as annotated in the iGenome distribution of WBcel235 obtained at ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Caenorhabditis_elegans/Ensembl/WBcel235/Caenorhabditis_elegans_Ensembl_WBcel235.tar.gz) + Mapped reads were used to estimate the abundance of structural RNAs using featureCounts (version 1.6.3) with options -O -s 1 --fracOverlap 1 and annotations corresponding to tRNA, snRNA, snoRNA, rRNA or RNA (as annotated in the iGenome distribution of WBcel235 obtained at ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Caenorhabditis_elegans/Ensembl/WBcel235/Caenorhabditis_elegans_Ensembl_WBcel235.tar.gz) The abundance of non-structural RNAs was estimated by subtracting the above counts from the number of mapped and remapped reads. Initially mapped reads were classified using a custom python program according to their length, composition and on the annotations on which they mapped. Reads that didn't match miRNA and piRNA annotations were considered as potential endo-siRNAs. The potential endo-siRNAs of size 21 to 23 nt that started with G were classified as \"si_22G\" if they mapped antisense to annotation belonging to the following categories: DNA transposons, RNA transposons, satellites, simple repeats (as annotated in http://hgdownload.cse.ucsc.edu/goldenPath/ce11/database/rmsk.txt.gz) or pseudogene or protein-coding genes (as annotated in the iGenome distribution of WBcel235 obtained at ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Caenorhabditis_elegans/Ensembl/WBcel235/Caenorhabditis_elegans_Ensembl_WBcel235.tar.gz)