diff --git a/libhts/install.sh b/libhts/install.sh new file mode 100755 index 0000000000000000000000000000000000000000..cfb2c7d1ced1245323b318c40a9c543d667a3eac --- /dev/null +++ b/libhts/install.sh @@ -0,0 +1,3 @@ +#!/bin/sh +python3.6 setup.py build_ext +pip3.6 install -e . diff --git a/libhts/libhts/__init__.py b/libhts/libhts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..601c3757245ecc81c1130685b22e096c338d483f --- /dev/null +++ b/libhts/libhts/__init__.py @@ -0,0 +1 @@ +from .libhts import do_deseq2 diff --git a/libhts/libhts/libhts.py b/libhts/libhts/libhts.py new file mode 100644 index 0000000000000000000000000000000000000000..f28879ea0a1da1ab7ed899e89c7bffedbd2217ef --- /dev/null +++ b/libhts/libhts/libhts.py @@ -0,0 +1,40 @@ +import pandas as pd +from rpy2.robjects import r, pandas2ri, Formula, StrVector +as_df = r("as.data.frame") +from rpy2.robjects.packages import importr +deseq2 = importr("DESeq2") + +def do_deseq2(cond_names, conditions, counts_data, + formula=None, contrast=None, deseq2_args=None): + """Runs a DESeq2 differential expression analysis.""" + if formula is None: + formula = Formula("~ lib") + if contrast is None: + contrast = StrVector(["lib", MUT, REF]) + if deseq2_args is None: + deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True} + col_data = pd.DataFrame(conditions).assign(cond_name=pd.Series(cond_names).values) + col_data.set_index("cond_name", inplace=True) + # In case we want contrasts between factor combinations + if ("lib" in col_data.columns) and ("treat" in col_data.columns): + col_data = col_data.assign( + lib_treat = ["%s_%s" % (lib, treat) for (lib, treat) in zip( + col_data["lib"], col_data["treat"])]) + # http://stackoverflow.com/a/31206596/1878788 + pandas2ri.activate() # makes some conversions automatic + # r_counts_data = pandas2ri.py2ri(counts_data) + # r_col_data = pandas2ri.py2ri(col_data) + # r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib")) + dds = deseq2.DESeqDataSetFromMatrix( + countData=counts_data, + colData=col_data, + design=formula) + dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"]) + res = pandas2ri.ri2py(as_df(deseq2.results( + dds, + contrast=contrast, + addMLE=deseq2_args["addMLE"], + independentFiltering=deseq2_args["independentFiltering"]))) + res.index = counts_data.index + return res + diff --git a/libhts/setup.py b/libhts/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..a2248899dd64e96eb619c3e32c4d3c459b05368f --- /dev/null +++ b/libhts/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup, find_packages +#from Cython.Build import cythonize + +name = "libhts" + +# Adapted from Biopython +__version__ = "Undefined" +for line in open("%s/__init__.py" % name): + if (line.startswith('__version__')): + exec(line.strip()) + + +setup( + name=name, + version=__version__, + description="Miscellaneous things to process high throughput sequencing data.", + author="Blaise Li", + author_email="blaise.li@normalesup.org", + license="MIT", + packages=find_packages()) + #ext_modules = cythonize("libsmallrna/libsmallrna.pyx"), + #install_requires=["cytoolz"], + #zip_safe=False diff --git a/libsmallrna/install.sh b/libsmallrna/install.sh new file mode 100755 index 0000000000000000000000000000000000000000..cfb2c7d1ced1245323b318c40a9c543d667a3eac --- /dev/null +++ b/libsmallrna/install.sh @@ -0,0 +1,3 @@ +#!/bin/sh +python3.6 setup.py build_ext +pip3.6 install -e . diff --git a/libworkflows/libworkflows/__init__.py b/libworkflows/libworkflows/__init__.py index 3425888f9beb1b9161852e94d12f0d99cadc2f10..d33b9a4308ab00e35b6b059b8e0fd624edf1ba09 100644 --- a/libworkflows/libworkflows/__init__.py +++ b/libworkflows/libworkflows/__init__.py @@ -1 +1 @@ -from .libworkflows import file_len, filter_combinator, strip_split +from .libworkflows import file_len, filter_combinator, read_htseq, strip_split, sum_htseq_counts diff --git a/libworkflows/libworkflows/libworkflows.py b/libworkflows/libworkflows/libworkflows.py index 069120cec0688f324ec1d569a75ce92994d241cd..632a78f30d88383b823f571f33ba7a17ec7a9752 100644 --- a/libworkflows/libworkflows/libworkflows.py +++ b/libworkflows/libworkflows/libworkflows.py @@ -7,6 +7,22 @@ def strip_split(text): return split(strip(text), "\t") +def sum_htseq_counts(counts_filename): + with open(counts_filename) as counts_file: + return sum((int(fields[1]) for fields in map( + strip_split, counts_file) if not fields[0].startswith("__"))) + + +def read_htseq(filename): + return pd.read_table(filename, index_col=0, header=None).drop( + ["__no_feature", + "__ambiguous", + "__too_low_aQual", + "__not_aligned", + "__alignment_not_unique"], + errors="ignore") + + # http://stackoverflow.com/a/845069/1878788 def file_len(fname): p = Popen( diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 26f297c08acb75d9971cca8239040721dcc6bce8..c13807636db96c231a72218c01b0206f40824ae2 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -20,7 +20,7 @@ from fileinput import input as finput # Useful for functional style from itertools import chain, combinations, product, repeat, starmap from functools import reduce -from operator import eq +from operator import or_ as union # Useful data structures from collections import OrderedDict as od @@ -87,8 +87,11 @@ ORIENTATIONS = config["orientations"] # See small_RNA_seq_annotate.py and libsmallrna.pyx for more details #SMALL_TYPES = ["prot_si", "te_si", "pseu_si", "pi", "mi", "prot_siu", "te_siu", "pseu_siu"] SMALL_TYPES = config["small_types"] -SI_TYPES = ["prot_si", "te_si", "pseu_si", "satel_si", "simrep_si"] -SIU_TYPES = ["prot_siu", "te_siu", "pseu_siu", "satel_siu", "simrep_siu"] +#SI_TYPES = ["prot_si", "te_si", "pseu_si", "satel_si", "simrep_si"] +#SIU_TYPES = ["prot_siu", "te_siu", "pseu_siu", "satel_siu", "simrep_siu"] +SI_TYPE_PREFIXES = ["prot", "te", "pseu", "satel", "simrep"] +SI_TYPES = [prefix + "_si" for prefix in SI_TYPE_PREFIXES] +SIU_TYPES = [prefix + "_siu" for prefix in SI_TYPE_PREFIXES] #PISIMI_TYPES = ["si", "pi", "mi", "siu"] #STANDARDS = ["zscore", "robust", "minmax", "unit"] #STANDARDS = ["zscore", "robust", "minmax"] @@ -187,7 +190,8 @@ UNSCALED_INSIDE = 0 META_MIN_LEN = 2 * UNSCALED_INSIDE MIN_DIST = 2 * META_MARGIN -from libworkflows import strip_split, file_len, filter_combinator +from libhts import do_deseq2 +from libworkflows import strip_split, file_len, filter_combinator, sum_htseq_counts strip = str.strip # split = str.split # @@ -296,7 +300,7 @@ wildcard_constraints: biotype="|".join(COUNT_BIOTYPES + ANNOT_BIOTYPES), gene_list="|".join(GENE_LISTS), type_set="|".join(["all", "protein_coding", "protein_coding_TE"]), - small_type="[spm]i|pisimi|siu|prot_si|te_si|pseu_si|satel_si|simrep_si|prot_siu|te_siu|pseu_siu|satel_siu|simrep_siu||sisiu|all_si", + small_type="[spm]i|pisimi|siu|prot_si|te_si|pseu_si|satel_si|simrep_si|prot_siu|te_siu|pseu_siu|satel_siu|simrep_siu|sisiu|all_si", read_type="raw|trimmed|deduped|%s|nomap|nomap_siRNA|mapped|siRNA|piRNA|miRNA|siuRNA|prot_siRNA|te_siRNA|pseu_siRNA|satel_siRNA|simrep_siRNA|prot_siuRNA|te_siuRNA|pseu_siuRNA|satel_siuRNA|simrep_siuRNA|all_siRNA" % size_selected, standard="zscore|robust|minmax|unit|identity", orientation="all|fwd|rev", @@ -364,7 +368,8 @@ rule all: #expand(expand(OPJ(output_dir, "figures", "{{lib}}_{{treat}}_{{rep}}", "{read_type}_{orientation}_on_merged_isolated_%d_{biotype}_min_%d_meta_profile.{fig_format}" % (MIN_DIST, META_MIN_LEN)), read_type=[size_selected, "siRNA", "siuRNA", "all_siRNA"], orientation=["all"], biotype=["protein_coding"], fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS, treat=TREATS), expand(OPJ(local_annot_dir, "transcripts_{type_set}", "merged_isolated_{min_dist}.bed"), type_set=["all", "protein_coding", "protein_coding_TE"], min_dist="0 5 10 25 50 100 250 500 1000 2500 5000 10000".split()), expand(OPJ(local_annot_dir, "transcripts_{type_set}", "merged_isolated_{min_dist}_{biotype}_min_{min_len}.bed"), type_set=["all", "protein_coding", "protein_coding_TE"], min_dist="0 5 10 25 50 100 250 500 1000 2500 5000 10000".split(), biotype=["protein_coding"], min_len=[str(META_MIN_LEN)]), - expand(OPJ(output_dir, "figures", "mean_meta_profiles_meta_scale_{meta_scale}", "{read_type}_{orientation}_on_{type_set}_merged_isolated_{min_dist}_{biotype}_min_{min_len}_meta_profile.{fig_format}"), meta_scale= [str(META_SCALE)], lib=LIBS, treat=TREATS, read_type=[size_selected, "siRNA", "siuRNA", "miRNA", "piRNA", "all_siRNA"], orientation=["all"], type_set=["protein_coding", "protein_coding_TE"], min_dist=[str(MIN_DIST)], biotype=["protein_coding", "DNA_transposons_rmsk", "RNA_transposons_rmsk"], min_len=[str(META_MIN_LEN)], fig_format=FIG_FORMATS), + expand(OPJ(output_dir, "figures", "mean_meta_profiles_meta_scale_{meta_scale}", "{read_type}_{orientation}_on_{type_set}_merged_isolated_{min_dist}_{biotype}_min_{min_len}_meta_profile.{fig_format}"), meta_scale= [str(META_SCALE)], lib=LIBS, treat=TREATS, read_type=[size_selected, "siRNA", "siuRNA", "miRNA", "piRNA", "all_siRNA"], orientation=["all"], type_set=["protein_coding_TE"], min_dist=[str(MIN_DIST)], biotype=["protein_coding", "DNA_transposons_rmsk", "RNA_transposons_rmsk"], min_len=[str(META_MIN_LEN)], fig_format=FIG_FORMATS), + expand(OPJ(output_dir, "figures", "mean_meta_profiles_meta_scale_{meta_scale}", "{read_type}_{orientation}_on_{type_set}_merged_isolated_{min_dist}_{biotype}_min_{min_len}_meta_profile.{fig_format}"), meta_scale= [str(META_SCALE)], lib=LIBS, treat=TREATS, read_type=[size_selected, "siRNA", "siuRNA", "miRNA", "piRNA", "all_siRNA"], orientation=["all"], type_set=["protein_coding"], min_dist=[str(MIN_DIST)], biotype=["protein_coding"], min_len=[str(META_MIN_LEN)], fig_format=FIG_FORMATS), #expand(OPJ(output_dir, "figures", "{lib}_{treat}_{rep}", "{read_type}_{orientation}_on_merged_isolated_%d_{biotype}_min_%d_meta_profile.{fig_format}" % (MIN_DIST, META_MIN_LEN)), lib=LIBS, treat=TREATS, rep=REPS, read_type=[size_selected, "siRNA", "siuRNA"], orientation=["all"], biotype=["protein_coding"], fig_format=FIG_FORMATS), expand(OPJ(output_dir, "figures", "mean_meta_profiles_meta_scale_{meta_scale}", "{read_type}_{orientation}_on_{type_set}_merged_isolated_{min_dist}_{gene_list}_meta_profile.{fig_format}"), meta_scale=[str(META_SCALE)], lib=LIBS, treat=TREATS, read_type=[size_selected, "siRNA", "siuRNA", "miRNA", "piRNA", "all_siRNA"], orientation=["all"], type_set=["protein_coding_TE"], min_dist=["0"], gene_list=["replication_dependent_octamer_histone"], fig_format=FIG_FORMATS), expand(OPJ(output_dir, "figures", "{lib}_{treat}_{rep}", "{read_type}_{orientation}_pi_targets_in_{biotype}_profile.{fig_format}"), lib=LIBS, treat=TREATS, rep=REPS, read_type=[size_selected, "siRNA", "siuRNA", "miRNA", "piRNA", "all_siRNA"], orientation=["all"], biotype=["protein_coding"], fig_format=FIG_FORMATS), @@ -377,16 +382,16 @@ rule all: expand(expand(OPJ(output_dir, "figures", "{{lib}}_{{treat}}_{{rep}}", "{read_type}_size_distribution.{fig_format}"), read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS, treat=TREATS), expand(expand(OPJ(output_dir, "figures", "{{lib}}_{{treat}}_{{rep}}", "%s_smallRNA_pie.{fig_format}" % size_selected), fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS, treat=TREATS), expand(OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "{small_type}_median_ratios_to_pseudo_ref.txt"), small_type=SMALL_TYPES + ["pisimi", "all_si"]), - expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_deseq2_{id_type}.txt"), contrast=CONTRASTS, small_type=SMALL_TYPES + ["pisimi"], id_type=ID_TYPES), - expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_counts_and_res_{id_type}.txt"), contrast=CONTRASTS, small_type=SMALL_TYPES + ["pisimi"], id_type=ID_TYPES), - expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_up_genes_{id_type}.txt"), contrast=CONTRASTS, small_type=SMALL_TYPES + ["pisimi"], id_type=ID_TYPES), - expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_down_genes_{id_type}.txt"), contrast=CONTRASTS, small_type=SMALL_TYPES + ["pisimi"], id_type=ID_TYPES), - expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_pairplots.{fig_format}"), contrast=CONTRASTS, small_type=SMALL_TYPES + ["pisimi"], fig_format=FIG_FORMATS), - expand(OPJ(output_dir, "figures", "{small_type}_{standard}_PCA.{fig_format}"), small_type=SMALL_TYPES, standard=STANDARDS, fig_format=FIG_FORMATS), - expand(OPJ(output_dir, "figures", "{small_type}_{standard}_PC1_PC2_distrib.{fig_format}"), small_type=SMALL_TYPES, standard=STANDARDS, fig_format=FIG_FORMATS), + expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_deseq2_{id_type}.txt"), contrast=CONTRASTS, small_type=["pisimi"], id_type=ID_TYPES), + expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_counts_and_res_{id_type}.txt"), contrast=CONTRASTS, small_type=["pisimi"], id_type=ID_TYPES), + expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_up_genes_{id_type}.txt"), contrast=CONTRASTS, small_type=["pisimi"], id_type=ID_TYPES), + expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_down_genes_{id_type}.txt"), contrast=CONTRASTS, small_type=["pisimi"], id_type=ID_TYPES), + expand(OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_pairplots.{fig_format}"), contrast=CONTRASTS, small_type=["pisimi"], fig_format=FIG_FORMATS), + expand(OPJ(output_dir, "figures", "{small_type}_{standard}_PCA.{fig_format}"), small_type=["pisimi"], standard=STANDARDS, fig_format=FIG_FORMATS), + expand(OPJ(output_dir, "figures", "{small_type}_{standard}_PC1_PC2_distrib.{fig_format}"), small_type=["pisimi"], standard=STANDARDS, fig_format=FIG_FORMATS), #expand(OPJ(output_dir, "figures", "{small_type}_clustermap.{fig_format}"), small_type=SMALL_TYPES, fig_format=FIG_FORMATS), #expand(OPJ(output_dir, "figures", "{small_type}_zscore_clustermap.{fig_format}"), small_type=SMALL_TYPES, fig_format=FIG_FORMATS), - expand(OPJ(output_dir, "figures", "{contrast}", "{small_type}_zscore_clustermap.{fig_format}"), contrast=CONTRASTS, small_type=SMALL_TYPES, fig_format=FIG_FORMATS), + expand(OPJ(output_dir, "figures", "{contrast}", "{small_type}_zscore_clustermap.{fig_format}"), contrast=CONTRASTS, small_type=["pisimi"], fig_format=FIG_FORMATS), #expand(OPJ(output_dir, "figures", "{small_type}_unit_clustermap.{fig_format}"), small_type=SMALL_TYPES, fig_format=FIG_FORMATS), # piRNA raise ValueError: `dataset` input should have multiple elements when plotting expand(OPJ(output_dir, "figures", "{small_type}_norm_correlations.{fig_format}"), small_type=["mi", "prot_si", "te_si", "pseu_si", "satel_si", "simrep_si", "prot_siu", "te_siu", "pseu_siu", "satel_siu", "simrep_siu", "pisimi"], fig_format=FIG_FORMATS), @@ -621,7 +626,7 @@ rule extract_nomap_siRNAs: rule sam2indexedbam: input: - rules.map_on_genome.output.sam, + sam = rules.map_on_genome.output.sam, #source_sam, output: sorted_bam = OPJ(output_dir, aligner, "mapped_C_elegans", "{lib}_{treat}_{rep}", "{read_type}_on_C_elegans_sorted.bam"), @@ -629,8 +634,10 @@ rule sam2indexedbam: message: "Sorting and indexing sam file for {wildcards.lib}_{wildcards.treat}_{wildcards.rep}_{wildcards.read_type}." log: - indexing_log=OPJ(log_dir, "sam2indexedbam_{lib}_{treat}_{rep}_{read_type}.log"), - indexing_err=OPJ(log_dir, "sam2indexedbam_{lib}_{treat}_{rep}_{read_type}.err"), + log = OPJ(log_dir, "sam2indexedbam_{lib}_{treat}_{rep}_{read_type}.log"), + err = OPJ(log_dir, "sam2indexedbam_{lib}_{treat}_{rep}_{read_type}.err"), + threads: + 4 # shell: # """ # nice -n 19 ionice -c2 -n7 sam2indexedbam.sh {input} 1> {log.indexing_log} 2> {log.indexing_err} @@ -730,6 +737,7 @@ rule merge_bigwig_reps: expand(OPJ(output_dir, aligner, "mapped_C_elegans", "{{lib}}_{{treat}}_{rep}", "{{lib}}_{{treat}}_{rep}_{{read_type}}_on_C_elegans_norm_{{orientation}}.bw"), rep=REPS), output: bw = OPJ(output_dir, aligner, "mapped_C_elegans", "{lib}_{treat}_mean", "{lib}_{treat}_mean_{read_type}_on_C_elegans_norm_{orientation}.bw"), + threads: 12 # to limit memory usage, actually run: bws = [pyBigWig.open(bw_filename) for bw_filename in input] #for bw_filename in input: @@ -775,23 +783,15 @@ rule htseq_count_reads: counts = OPJ(output_dir, aligner, "mapped_C_elegans", "htseq_count", "{lib}_{treat}_{rep}_{read_type}_on_C_elegans_{biotype}_{orientation}_counts.txt"), params: stranded = htseq_orientation2stranded, + mode = "intersection-nonempty", + annot = lambda wildcards : "/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/{biotype}.gtf".format(biotype=wildcards.biotype) message: "Counting {wildcards.orientation} {wildcards.biotype} reads for {wildcards.lib}_{wildcards.treat}_{wildcards.rep}_{wildcards.read_type} with htseq-count." log: - htseq_log = OPJ(log_dir, "count_reads_{lib}_{treat}_{rep}_{read_type}.log"), - htseq_err = OPJ(log_dir, "count_reads_{lib}_{treat}_{rep}_{read_type}.err"), - shell: - """ - annot="/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/{wildcards.biotype}.gtf" - cmd="htseq-count -f bam -s {params.stranded} -a 0 -t transcript -i gene_id -m intersection-nonempty {input.sorted_bam} ${{annot}} > {output.counts}" - echo ${{cmd}} - eval ${{cmd}} 1> {log.htseq_log} 2> {log.htseq_err} || error_exit "htseq-count failed" - """ - - -def sum_htseq_counts(counts_filename): - with open(counts_filename) as counts_file: - return sum((int(fields[1]) for fields in map(strip_split, counts_file) if not fields[0].startswith("__"))) + log = OPJ(log_dir, "count_reads_{lib}_{treat}_{rep}_{read_type}.log"), + err = OPJ(log_dir, "count_reads_{lib}_{treat}_{rep}_{read_type}.err"), + wrapper: + "file:///pasteur/homes/bli/src/bioinfo_utils/snakemake_wrappers/htseq_count_reads" rule summarize_htseq_counts: @@ -1140,7 +1140,10 @@ rule resort_transcript_bed: out_bed = OPJ(local_annot_dir, "transcripts_{type_set}", "merged_resorted.bed"), shell: """ - sort -k1,1 -k4,4n -k5,5n {input.in_bed} > {output.out_bed} + # This bedops command checks bed format and finds problems like + # Error on line 36447 in annotations/transcripts_all/merged_by_gene.bed. Genomic end coordinate is less than (or equal to) start coordinate. + #sort-bed {input.in_bed} > {output.out_bed} + sort -k1,1 -k2,2n -k3,3n {input.in_bed} > {output.out_bed} """ @@ -1665,6 +1668,7 @@ rule make_read_counts_summary: mi_counts = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "{lib}_{treat}_{rep}_%s_on_C_elegans" % size_selected, "mi_counts.txt"), output: summary = OPJ(output_dir, aligner, "summaries", "{lib}_{treat}_{rep}_%s_on_C_elegans_read_counts.txt" % size_selected), + #threads: 8 # to limit memory usage, actually run: with open(output.summary, "w") as summary_file: summary_file.write("%s\n" % "\t".join([ @@ -1790,9 +1794,17 @@ rule gather_small_RNA_counts: na_filter=False).drop(drop, errors="ignore") for counts_file in counts_files), axis=1).fillna(0).astype(int) counts_data.columns = COND_NAMES - if wildcards.small_type == "te_si" or wildcards.small_type == "te_siu": - te_families = [":".join(name.split(":")[:-1]) for name in counts_data.index] - counts_data = counts_data.assign(family=te_families).groupby("family").sum() + # Simple_repeat|Simple_repeat|(TTTTTTG)n:1 + # Simple_repeat|Simple_repeat|(TTTTTTG)n:2 + # Simple_repeat|Simple_repeat|(TTTTTTG)n:3 + # Simple_repeat|Simple_repeat|(TTTTTTG)n:4 + # -> Simple_repeat|Simple_repeat|(TTTTTTG)n + if wildcards.small_type in {"te_si", "te_siu", + "satel_si", "satel_siu", + "simrep_si", "simrep_siu"}: + repeat_families = [":".join(name.split(":")[:-1]) for name in counts_data.index] + # Sum the counts for a given repeat family + counts_data = counts_data.assign(family=repeat_families).groupby("family").sum() counts_data.index.names = ["gene"] counts_data.to_csv(output.counts_table, sep="\t") @@ -1876,6 +1888,47 @@ def source_small_RNA_counts(wildcards): return rules.gather_small_RNA_counts.output.counts_table +def make_tag_association(dfs, tag): + """Associates a tag "tag" to the union of the indices of dataframes *dfs*.""" + idx = reduce(union, (df.index for df in dfs)) + return pd.DataFrame(list(zip(idx, repeat(tag)))).set_index(0) + + +# TODO: use biotype instead? +rule associate_small_type: + """This rule uses the small RNA count matrices to associate a biotype to each gene identifier.""" + input: + sisiu_counts_tables = expand(OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "{small_type}_counts.txt"), small_type=SI_TYPES + SIU_TYPES), + pi_counts_table = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "pi_counts.txt"), + mi_counts_table = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "mi_counts.txt"), + output: + tags_table = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "id2tags.txt"), + run: + def sisiu_table(prefix): + si_table = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "{prefix}_si_counts.txt".format(prefix=prefix)) + siu_table = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "{prefix}_siu_counts.txt".format(prefix=prefix)) + return (si_table, siu_table) + sisiu_tags_tables = (make_tag_association( + (pd.read_table(table, index_col="gene") for table in sisiu_table(prefix)), + "%s_sisiu" % prefix) for prefix in SI_TYPE_PREFIXES) + pi_tags_table = make_tag_association( + (pd.read_table(input.pi_counts_table, index_col="gene"),), "pi") + mi_tags_table = make_tag_association( + (pd.read_table(input.mi_counts_table, index_col="gene"),), "mi") + tags_table = pd.concat(chain(sisiu_tags_tables, (pi_tags_table, mi_tags_table))) + tags_table.index.names = ["gene"] + tags_table.columns = ["small_type"] + tags_table.to_csv(output.tags_table, sep="\t") + + +def add_tags_column(data, tags_table, tag_name): + """Adds a column *tag_name* to *data* based on the DataFrame *tag_table* + associating tags to row names.""" + df = pd.concat((data, pd.read_table(tags_table, index_col=0)), join="inner", axis=1) + df.columns = (*data.columns, tag_name) + return df + + def median_ratio_to_pseudo_ref_size_factors(counts_data): """Adapted from DESeq paper (doi:10.1186/gb-2010-11-10-r106)""" # Add pseudo-count to compute the geometric mean, then remove it @@ -2265,67 +2318,6 @@ def set_status(row): return "NS" -from rpy2.robjects import r, pandas2ri, Formula, StrVector -as_df = r("as.data.frame") -from rpy2.robjects.packages import importr -deseq2 = importr("DESeq2") - -def do_deseq2(cond_names, conditions, counts_data, formula=None, contrast=None, deseq2_args=None): - """Runs a DESeq2 differential expression analysis.""" - if formula is None: - formula = Formula("~ treat + lib") - if contrast is None: - contrast = StrVector(["lib", MUT, REF]) - if deseq2_args is None: - deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True} - col_data = pd.DataFrame(conditions).assign(cond_name=pd.Series(cond_names).values) - # col_data["cond_name"] = pd.Series(cond_names) - col_data.set_index("cond_name", inplace=True) - col_data = col_data.assign( - lib_treat = ["%s_%s" % (lib, treat) for (lib, treat) in zip( - col_data["lib"], col_data["treat"])]) - # col_data.to_csv(output.conditions_table, sep="\t") - # http://stackoverflow.com/a/31206596/1878788 - pandas2ri.activate() # makes some conversions automatic - # r_counts_data = pandas2ri.py2ri(counts_data) - # r_col_data = pandas2ri.py2ri(col_data) - # r.DESeqDataSetFromMatrix(countData=r_counts_data, colData=r_col_data, design=Formula("~lib")) - dds = deseq2.DESeqDataSetFromMatrix( - countData=counts_data, - colData=col_data, - design=formula) - # To be able to use lib_treat based contrasts: - #design=Formula("~ lib_treat")) - #dds_factors = dict(dds.do_slot("colData").do_slot("listData").items()) - #libs_factor = dds_factors["lib"] - #treats_factor = dds_factors["treat"] - #["%s_%s" % (libs_factor.levels[lib - 1], treats_factor.levels[treat - 1]) for (lib, treat) in zip(libs_factor, treats_factor)] - # robjects.globalenv["dds"] = dds - # print("Before relevel:") - # print(dds.do_slot("colData").do_slot("listData")) - ## Not necessary with explicit contrast when getting the results ? - # This hopefully works because lib is the first element of listData - #dds.do_slot("colData").do_slot("listData")[0] = r.relevel(dds.do_slot("colData").do_slot("listData")[0], ref=REF) - # This hopefully works because treat is the third element of listData - # TODO: This does not work - #dds.do_slot("colData").do_slot("listData")[2] = r.relevel(dds.do_slot("colData").do_slot("listData")[0], ref=TREATS[0]) - # print("After relevel:") - # print(dds.do_slot("colData").do_slot("listData")) - dds = deseq2.DESeq(dds, betaPrior=deseq2_args["betaPrior"]) - # dds = deseq2.DESeq(deseq2.DESeqDataSetFromMatrix( - # countData=counts_data, - # colData=col_data, - # design=Formula("~lib"))) - res = pandas2ri.ri2py(as_df(deseq2.results( - dds, - contrast=contrast, - addMLE=deseq2_args["addMLE"], - independentFiltering=deseq2_args["independentFiltering"]))) - res.index = counts_data.index - #res.index.names = ["gene"] - return res - - # TODO Make script to generate scatterplots separately def plot_scatter(data, x_column, y_column, hue_column="status"): sns.lmplot( @@ -2350,11 +2342,12 @@ def plot_counts_scatters(counts_and_res, cols): # TODO: check that padj == NA is not considered 0 ################################################### -# TODO: do joint DESeq2 analysis -> in separate "pisimi" rule +from rpy2.robjects import Formula, StrVector rule small_RNA_differential_expression: input: counts_table = source_small_RNA_counts, summary_table = rules.gather_read_counts_summaries.output.summary_table, + tags_table = rules.associate_small_type.output.tags_table, output: deseq_results = OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_deseq2.txt"), up_genes = OPJ(output_dir, aligner, "mapped_C_elegans", "deseq2_%s" % size_selected, "{contrast}", "{small_type}_up_genes.txt"), @@ -2400,7 +2393,7 @@ rule small_RNA_differential_expression: by_norm = counts_data / size_factors by_norm.columns = by_norm.columns.map(lambda s: "%s_by_%s" % (s, normalizer)) counts_and_res = pd.concat((counts_and_res, by_norm), axis=1) - counts_and_res = pd.concat((counts_and_res, res), axis=1).assign(status=res.apply(set_status, axis=1)) + counts_and_res = add_tags_column(pd.concat((counts_and_res, res), axis=1).assign(status=res.apply(set_status, axis=1)), input.tags_table, "small_type") counts_and_res.to_csv(output.counts_and_res, sep="\t", na_rep="NA") # Saving lists of genes gaining or loosing siRNAs # diff_expressed = res.query("padj < 0.05") diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.16u5vrtp.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.16u5vrtp.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..b346e864b5a5c5af118c2b85b5d44282ba7f40ce --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.16u5vrtp.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XR\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_HS30_2_piRNA_on_C_elegans/piRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XA\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_HS30_2/piRNA_on_C_elegans.samq\rXS\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_HS30_2_piRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\x04\x00\x00\x00HS30q!X\x01\x00\x00\x002q"X\x05\x00\x00\x00piRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X&\x00\x00\x00logs/map_on_genome_WT_HS30_2_piRNA.logq?X&\x00\x00\x00logs/map_on_genome_WT_HS30_2_piRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.5v24inro.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.5v24inro.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d7c6945d49be59f8e67023075128ff241d735c --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.5v24inro.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XR\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/prg1_RT_2_siRNA_on_C_elegans/siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XA\x00\x00\x00results/bowtie2/mapped_C_elegans/prg1_RT_2/siRNA_on_C_elegans.samq\rXS\x00\x00\x00results/bowtie2/not_mapped_C_elegans/prg1_RT_2_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x04\x00\x00\x00prg1q X\x02\x00\x00\x00RTq!X\x01\x00\x00\x002q"X\x05\x00\x00\x00siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X&\x00\x00\x00logs/map_on_genome_prg1_RT_2_siRNA.logq?X&\x00\x00\x00logs/map_on_genome_prg1_RT_2_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.7d9py3f6.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.7d9py3f6.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..c9663447da518fbfb0b29efe53a236e4ee45f04c --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.7d9py3f6.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XY\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/prg1_HS30RT120_2_siRNA_on_C_elegans/siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XH\x00\x00\x00results/bowtie2/mapped_C_elegans/prg1_HS30RT120_2/siRNA_on_C_elegans.samq\rXZ\x00\x00\x00results/bowtie2/not_mapped_C_elegans/prg1_HS30RT120_2_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x04\x00\x00\x00prg1q X\t\x00\x00\x00HS30RT120q!X\x01\x00\x00\x002q"X\x05\x00\x00\x00siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X-\x00\x00\x00logs/map_on_genome_prg1_HS30RT120_2_siRNA.logq?X-\x00\x00\x00logs/map_on_genome_prg1_HS30RT120_2_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.by3fj1v8.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.by3fj1v8.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..b69adf099009c8270b555beb0dffb3f170368cda --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.by3fj1v8.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XT\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/prg1_HS30_2_siRNA_on_C_elegans/siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XC\x00\x00\x00results/bowtie2/mapped_C_elegans/prg1_HS30_2/siRNA_on_C_elegans.samq\rXU\x00\x00\x00results/bowtie2/not_mapped_C_elegans/prg1_HS30_2_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x04\x00\x00\x00prg1q X\x04\x00\x00\x00HS30q!X\x01\x00\x00\x002q"X\x05\x00\x00\x00siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X(\x00\x00\x00logs/map_on_genome_prg1_HS30_2_siRNA.logq?X(\x00\x00\x00logs/map_on_genome_prg1_HS30_2_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.ftb48_zh.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.ftb48_zh.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..eaa37b22404ebf785cbc09529c7369c5a87a3cb5 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.ftb48_zh.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05X[\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_HS30RT120_1_18-26_on_C_elegans/all_siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XJ\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_HS30RT120_1/all_siRNA_on_C_elegans.samq\rX\\\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_HS30RT120_1_all_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\t\x00\x00\x00HS30RT120q!X\x01\x00\x00\x001q"X\t\x00\x00\x00all_siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X/\x00\x00\x00logs/map_on_genome_WT_HS30RT120_1_all_siRNA.logq?X/\x00\x00\x00logs/map_on_genome_WT_HS30RT120_1_all_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(h"X\x01\x00\x00\x002qdeX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\t\x00\x00\x00positionsqi]qj(X\x05\x00\x00\x00firstqkX\x04\x00\x00\x00lastqleX\x0c\x00\x00\x00orientationsqm]qn(X\x03\x00\x00\x00fwdqoX\x03\x00\x00\x00revqpX\x03\x00\x00\x00allqqeX\x0b\x00\x00\x00small_typesqr]qs(X\x07\x00\x00\x00prot_siqtX\x05\x00\x00\x00te_siquX\x07\x00\x00\x00pseu_siqvX\x08\x00\x00\x00satel_siqwX\t\x00\x00\x00simrep_siqxX\x02\x00\x00\x00piqyX\x02\x00\x00\x00miqzX\x08\x00\x00\x00prot_siuq{X\x06\x00\x00\x00te_siuq|X\x08\x00\x00\x00pseu_siuq}X\t\x00\x00\x00satel_siuq~X\n\x00\x00\x00simrep_siuq\x7feX\x0e\x00\x00\x00count_biotypesq\x80]q\x81(X\t\x00\x00\x00antisenseq\x82X\x04\x00\x00\x00tRNAq\x83X\x05\x00\x00\x00snRNAq\x84X\x06\x00\x00\x00snoRNAq\x85X\x04\x00\x00\x00rRNAq\x86X\x05\x00\x00\x00piRNAq\x87X\x05\x00\x00\x00ncRNAq\x88X\x05\x00\x00\x00miRNAq\x89X\x07\x00\x00\x00lincRNAq\x8aX\x0e\x00\x00\x00protein_codingq\x8bX\n\x00\x00\x00pseudogeneq\x8cX\t\x00\x00\x00antisenseq\x8dX\x14\x00\x00\x00DNA_transposons_rmskq\x8eX\x14\x00\x00\x00RNA_transposons_rmskq\x8fX\x0f\x00\x00\x00satellites_rmskq\x90X\x13\x00\x00\x00simple_repeats_rmskq\x91eX\x0e\x00\x00\x00annot_biotypesq\x92]q\x93(X\t\x00\x00\x00antisenseq\x94X\x04\x00\x00\x00tRNAq\x95X\x05\x00\x00\x00snRNAq\x96X\x06\x00\x00\x00snoRNAq\x97X\x04\x00\x00\x00rRNAq\x98X\x05\x00\x00\x00piRNAq\x99X\x05\x00\x00\x00ncRNAq\x9aX\x05\x00\x00\x00miRNAq\x9bX\x07\x00\x00\x00lincRNAq\x9cX\x12\x00\x00\x00protein_coding_CDSq\x9dX\x12\x00\x00\x00protein_coding_UTRq\x9eX\x1a\x00\x00\x00protein_coding_pure_intronq\x9fX\n\x00\x00\x00pseudogeneq\xa0X\t\x00\x00\x00antisenseq\xa1X\x14\x00\x00\x00DNA_transposons_rmskq\xa2X\x14\x00\x00\x00RNA_transposons_rmskq\xa3X\x0f\x00\x00\x00satellites_rmskq\xa4X\x13\x00\x00\x00simple_repeats_rmskq\xa5eX\n\x00\x00\x00gene_listsq\xa6]q\xa7X%\x00\x00\x00replication_dependent_octamer_histoneq\xa8aX\x08\x00\x00\x00data_dirq\xa9X\x04\x00\x00\x00dataq\xaaX\t\x00\x00\x00annot_dirq\xabX_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\xacX\x0f\x00\x00\x00local_annot_dirq\xadX\x0b\x00\x00\x00annotationsq\xaeX\x07\x00\x00\x00alignerq\xafX\x07\x00\x00\x00bowtie2q\xb0X\x05\x00\x00\x00indexq\xb1h\x18X\x0b\x00\x00\x00convert_dirq\xb2X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\xb3X\n\x00\x00\x00output_dirq\xb4X\x07\x00\x00\x00resultsq\xb5X\x07\x00\x00\x00log_dirq\xb6X\x04\x00\x00\x00logsq\xb7uX\x04\x00\x00\x00ruleq\xb8X\r\x00\x00\x00map_on_genomeq\xb9ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.km0dybww.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.km0dybww.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..d575a1df52e0589dcbf792e982eb196ffc4edf51 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.km0dybww.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XT\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/prg1_HS30_1_siRNA_on_C_elegans/siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XC\x00\x00\x00results/bowtie2/mapped_C_elegans/prg1_HS30_1/siRNA_on_C_elegans.samq\rXU\x00\x00\x00results/bowtie2/not_mapped_C_elegans/prg1_HS30_1_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x04\x00\x00\x00prg1q X\x04\x00\x00\x00HS30q!X\x01\x00\x00\x001q"X\x05\x00\x00\x00siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X(\x00\x00\x00logs/map_on_genome_prg1_HS30_1_siRNA.logq?X(\x00\x00\x00logs/map_on_genome_prg1_HS30_1_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(h"X\x01\x00\x00\x002qdeX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.pq9m29fq.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.pq9m29fq.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..4a449e2adb551d7c77231f818d5a0903dabf22b5 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.pq9m29fq.wrapper.py @@ -0,0 +1,14 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(XL\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bamq\x06XP\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bam.baiq\x07e}q\x08(X\x06\x00\x00\x00_namesq\t}q\n(X\n\x00\x00\x00sorted_bamq\x0bK\x00N\x86q\x0cX\x03\x00\x00\x00baiq\rK\x01N\x86q\x0euh\x0bh\x06h\rh\x07ubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(Xa\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/tRNA_fwd_counts.txtq\x12Xl\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/tRNA_fwd_counts_gene_names.txtq\x13e}q\x14(h\t}q\x15(X\x06\x00\x00\x00countsq\x16K\x00N\x86q\x17X\x10\x00\x00\x00counts_convertedq\x18K\x01N\x86q\x19uh\x16h\x12h\x18h\x13ubX\x06\x00\x00\x00paramsq\x1acsnakemake.io\nParams\nq\x1b)\x81q\x1c(X\x03\x00\x00\x00yesq\x1dX\x05\x00\x00\x00unionq\x1eXh\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/tRNA.gtfq\x1fe}q (h\t}q!(X\x08\x00\x00\x00strandedq"K\x00N\x86q#X\x04\x00\x00\x00modeq$K\x01N\x86q%X\x05\x00\x00\x00annotq&K\x02N\x86q\'uh"h\x1dh$h\x1eh&h\x1fubX\t\x00\x00\x00wildcardsq(csnakemake.io\nWildcards\nq))\x81q*(X\x02\x00\x00\x00WTq+X\x01\x00\x00\x002q,X\x04\x00\x00\x00tRNAq-X\x03\x00\x00\x00fwdq.e}q/(h\t}q0(X\x03\x00\x00\x00libq1K\x00N\x86q2X\x03\x00\x00\x00repq3K\x01N\x86q4X\x07\x00\x00\x00biotypeq5K\x02N\x86q6X\x0b\x00\x00\x00orientationq7K\x03N\x86q8uX\x03\x00\x00\x00libq9h+X\x03\x00\x00\x00repq:h,X\x07\x00\x00\x00biotypeq;h-X\x0b\x00\x00\x00orientationq<h.ubX\x07\x00\x00\x00threadsq=K\x01X\t\x00\x00\x00resourcesq>csnakemake.io\nResources\nq?)\x81q@(K\x01K\x01e}qA(h\t}qB(X\x06\x00\x00\x00_coresqCK\x00N\x86qDX\x06\x00\x00\x00_nodesqEK\x01N\x86qFuhCK\x01hEK\x01ubX\x03\x00\x00\x00logqGcsnakemake.io\nLog\nqH)\x81qI(X)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.logqJX)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.errqKe}qL(h\t}qM(hGK\x00N\x86qNX\x03\x00\x00\x00errqOK\x01N\x86qPuhGhJhOhKubX\x06\x00\x00\x00configqQ}qR(X\x07\x00\x00\x00lib2rawqSccollections\nOrderedDict\nqT)RqU(X\x02\x00\x00\x00WTqVhT)RqW(X\x01\x00\x00\x001qXXG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_1.fastq.gzqYh,XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_2.fastq.gzqZX\x01\x00\x00\x003q[XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_3.fastq.gzq\\uX\t\x00\x00\x00met2set25q]hT)Rq^(hXXO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_1.fastq.gzq_h,XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_2.fastq.gzq`h[XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_3.fastq.gzqauuX\n\x00\x00\x00cond_pairsqb]qc]qd(X\t\x00\x00\x00met2set25qeX\x02\x00\x00\x00WTqfeahVX\x02\x00\x00\x00WTqgX\x06\x00\x00\x00mutantqhX\t\x00\x00\x00met2set25qiX\n\x00\x00\x00replicatesqj]qk(hXh,h[eX\x0c\x00\x00\x00orientationsql]qm(X\x03\x00\x00\x00fwdqnX\x03\x00\x00\x00revqoX\x03\x00\x00\x00allqpeX\x08\x00\x00\x00biotypesqq]qr(X\t\x00\x00\x00antisenseqsX\x04\x00\x00\x00tRNAqtX\x05\x00\x00\x00snRNAquX\x06\x00\x00\x00snoRNAqvX\x04\x00\x00\x00rRNAqwX\x05\x00\x00\x00piRNAqxX\x05\x00\x00\x00ncRNAqyX\x05\x00\x00\x00miRNAqzX\x07\x00\x00\x00lincRNAq{X\x0e\x00\x00\x00protein_codingq|X\n\x00\x00\x00pseudogeneq}X\t\x00\x00\x00antisenseq~X\x14\x00\x00\x00DNA_transposons_rmskq\x7fX\x14\x00\x00\x00RNA_transposons_rmskq\x80X\x0f\x00\x00\x00satellites_rmskq\x81X\x13\x00\x00\x00simple_repeats_rmskq\x82eX\x08\x00\x00\x00data_dirq\x83X\x14\x00\x00\x00data_gonad_met2set25q\x84X\t\x00\x00\x00annot_dirq\x85X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x86X\x07\x00\x00\x00alignerq\x87X\x06\x00\x00\x00hisat2q\x88X\x07\x00\x00\x00counterq\x89X\x0b\x00\x00\x00htseq_countq\x8aX\x0b\x00\x00\x00mapping_dirq\x8bX\x17\x00\x00\x00mapping_gonad_met2set25q\x8cX\n\x00\x00\x00output_dirq\x8dX\x1b\x00\x00\x00merged_transcriptomes_gonadq\x8eX\x07\x00\x00\x00log_dirq\x8fX\x14\x00\x00\x00logs_gonad_met2set25q\x90uX\x04\x00\x00\x00ruleq\x91X\x11\x00\x00\x00htseq_count_readsq\x92ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +converter="/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes_id2name.pickle" +cmd="htseq-count -f bam -s {snakemake.params.stranded} -a 0 -t transcript -i gene_id -m {snakemake.params.mode} {snakemake.input.sorted_bam} {snakemake.params.annot} | tee {snakemake.output.counts} | id2name.py ${{converter}} > {snakemake.output.counts_converted}" +echo ${{cmd}} +eval ${{cmd}} 1> {snakemake.log.log} 2> {snakemake.log.err} || error_exit "htseq-count failed" +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.q4x91jrv.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.q4x91jrv.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..3b4e16626c27a914722ecc4e87e869fa4ae734f8 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.q4x91jrv.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XT\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_RT_2_18-26_on_C_elegans/all_siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XC\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_RT_2/all_siRNA_on_C_elegans.samq\rXU\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_RT_2_all_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\x02\x00\x00\x00RTq!X\x01\x00\x00\x002q"X\t\x00\x00\x00all_siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X(\x00\x00\x00logs/map_on_genome_WT_RT_2_all_siRNA.logq?X(\x00\x00\x00logs/map_on_genome_WT_RT_2_all_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\t\x00\x00\x00positionsqi]qj(X\x05\x00\x00\x00firstqkX\x04\x00\x00\x00lastqleX\x0c\x00\x00\x00orientationsqm]qn(X\x03\x00\x00\x00fwdqoX\x03\x00\x00\x00revqpX\x03\x00\x00\x00allqqeX\x0b\x00\x00\x00small_typesqr]qs(X\x07\x00\x00\x00prot_siqtX\x05\x00\x00\x00te_siquX\x07\x00\x00\x00pseu_siqvX\x08\x00\x00\x00satel_siqwX\t\x00\x00\x00simrep_siqxX\x02\x00\x00\x00piqyX\x02\x00\x00\x00miqzX\x08\x00\x00\x00prot_siuq{X\x06\x00\x00\x00te_siuq|X\x08\x00\x00\x00pseu_siuq}X\t\x00\x00\x00satel_siuq~X\n\x00\x00\x00simrep_siuq\x7feX\x0e\x00\x00\x00count_biotypesq\x80]q\x81(X\t\x00\x00\x00antisenseq\x82X\x04\x00\x00\x00tRNAq\x83X\x05\x00\x00\x00snRNAq\x84X\x06\x00\x00\x00snoRNAq\x85X\x04\x00\x00\x00rRNAq\x86X\x05\x00\x00\x00piRNAq\x87X\x05\x00\x00\x00ncRNAq\x88X\x05\x00\x00\x00miRNAq\x89X\x07\x00\x00\x00lincRNAq\x8aX\x0e\x00\x00\x00protein_codingq\x8bX\n\x00\x00\x00pseudogeneq\x8cX\t\x00\x00\x00antisenseq\x8dX\x14\x00\x00\x00DNA_transposons_rmskq\x8eX\x14\x00\x00\x00RNA_transposons_rmskq\x8fX\x0f\x00\x00\x00satellites_rmskq\x90X\x13\x00\x00\x00simple_repeats_rmskq\x91eX\x0e\x00\x00\x00annot_biotypesq\x92]q\x93(X\t\x00\x00\x00antisenseq\x94X\x04\x00\x00\x00tRNAq\x95X\x05\x00\x00\x00snRNAq\x96X\x06\x00\x00\x00snoRNAq\x97X\x04\x00\x00\x00rRNAq\x98X\x05\x00\x00\x00piRNAq\x99X\x05\x00\x00\x00ncRNAq\x9aX\x05\x00\x00\x00miRNAq\x9bX\x07\x00\x00\x00lincRNAq\x9cX\x12\x00\x00\x00protein_coding_CDSq\x9dX\x12\x00\x00\x00protein_coding_UTRq\x9eX\x1a\x00\x00\x00protein_coding_pure_intronq\x9fX\n\x00\x00\x00pseudogeneq\xa0X\t\x00\x00\x00antisenseq\xa1X\x14\x00\x00\x00DNA_transposons_rmskq\xa2X\x14\x00\x00\x00RNA_transposons_rmskq\xa3X\x0f\x00\x00\x00satellites_rmskq\xa4X\x13\x00\x00\x00simple_repeats_rmskq\xa5eX\n\x00\x00\x00gene_listsq\xa6]q\xa7X%\x00\x00\x00replication_dependent_octamer_histoneq\xa8aX\x08\x00\x00\x00data_dirq\xa9X\x04\x00\x00\x00dataq\xaaX\t\x00\x00\x00annot_dirq\xabX_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\xacX\x0f\x00\x00\x00local_annot_dirq\xadX\x0b\x00\x00\x00annotationsq\xaeX\x07\x00\x00\x00alignerq\xafX\x07\x00\x00\x00bowtie2q\xb0X\x05\x00\x00\x00indexq\xb1h\x18X\x0b\x00\x00\x00convert_dirq\xb2X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\xb3X\n\x00\x00\x00output_dirq\xb4X\x07\x00\x00\x00resultsq\xb5X\x07\x00\x00\x00log_dirq\xb6X\x04\x00\x00\x00logsq\xb7uX\x04\x00\x00\x00ruleq\xb8X\r\x00\x00\x00map_on_genomeq\xb9ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.q_427g9f.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.q_427g9f.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..5c71cdca8be7ac2bf8be5e5c4c66cbc73aaa44b9 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.q_427g9f.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XP\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_RT_2_piRNA_on_C_elegans/piRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(X?\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_RT_2/piRNA_on_C_elegans.samq\rXQ\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_RT_2_piRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\x02\x00\x00\x00RTq!X\x01\x00\x00\x002q"X\x05\x00\x00\x00piRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X$\x00\x00\x00logs/map_on_genome_WT_RT_2_piRNA.logq?X$\x00\x00\x00logs/map_on_genome_WT_RT_2_piRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.ttk6hkva.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.ttk6hkva.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..474ac9f7551b0b93ef753f592e8b988f9a0f3888 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.ttk6hkva.wrapper.py @@ -0,0 +1,14 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(XL\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bamq\x06XP\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bam.baiq\x07e}q\x08(X\x06\x00\x00\x00_namesq\t}q\n(X\n\x00\x00\x00sorted_bamq\x0bK\x00N\x86q\x0cX\x03\x00\x00\x00baiq\rK\x01N\x86q\x0euh\x0bh\x06h\rh\x07ubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(Xb\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/piRNA_all_counts.txtq\x12Xm\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/piRNA_all_counts_gene_names.txtq\x13e}q\x14(h\t}q\x15(X\x06\x00\x00\x00countsq\x16K\x00N\x86q\x17X\x10\x00\x00\x00counts_convertedq\x18K\x01N\x86q\x19uh\x16h\x12h\x18h\x13ubX\x06\x00\x00\x00paramsq\x1acsnakemake.io\nParams\nq\x1b)\x81q\x1c(X\x02\x00\x00\x00noq\x1dX\x05\x00\x00\x00unionq\x1eXi\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/piRNA.gtfq\x1fe}q (h\t}q!(X\x08\x00\x00\x00strandedq"K\x00N\x86q#X\x04\x00\x00\x00modeq$K\x01N\x86q%X\x05\x00\x00\x00annotq&K\x02N\x86q\'uh"h\x1dh$h\x1eh&h\x1fubX\t\x00\x00\x00wildcardsq(csnakemake.io\nWildcards\nq))\x81q*(X\x02\x00\x00\x00WTq+X\x01\x00\x00\x002q,X\x05\x00\x00\x00piRNAq-X\x03\x00\x00\x00allq.e}q/(h\t}q0(X\x03\x00\x00\x00libq1K\x00N\x86q2X\x03\x00\x00\x00repq3K\x01N\x86q4X\x07\x00\x00\x00biotypeq5K\x02N\x86q6X\x0b\x00\x00\x00orientationq7K\x03N\x86q8uX\x03\x00\x00\x00libq9h+X\x03\x00\x00\x00repq:h,X\x07\x00\x00\x00biotypeq;h-X\x0b\x00\x00\x00orientationq<h.ubX\x07\x00\x00\x00threadsq=K\x01X\t\x00\x00\x00resourcesq>csnakemake.io\nResources\nq?)\x81q@(K\x01K\x01e}qA(h\t}qB(X\x06\x00\x00\x00_coresqCK\x00N\x86qDX\x06\x00\x00\x00_nodesqEK\x01N\x86qFuhCK\x01hEK\x01ubX\x03\x00\x00\x00logqGcsnakemake.io\nLog\nqH)\x81qI(X)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.logqJX)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.errqKe}qL(h\t}qM(hGK\x00N\x86qNX\x03\x00\x00\x00errqOK\x01N\x86qPuhGhJhOhKubX\x06\x00\x00\x00configqQ}qR(X\x07\x00\x00\x00lib2rawqSccollections\nOrderedDict\nqT)RqU(X\x02\x00\x00\x00WTqVhT)RqW(X\x01\x00\x00\x001qXXG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_1.fastq.gzqYh,XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_2.fastq.gzqZX\x01\x00\x00\x003q[XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_3.fastq.gzq\\uX\t\x00\x00\x00met2set25q]hT)Rq^(hXXO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_1.fastq.gzq_h,XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_2.fastq.gzq`h[XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_3.fastq.gzqauuX\n\x00\x00\x00cond_pairsqb]qc]qd(X\t\x00\x00\x00met2set25qeX\x02\x00\x00\x00WTqfeahVX\x02\x00\x00\x00WTqgX\x06\x00\x00\x00mutantqhX\t\x00\x00\x00met2set25qiX\n\x00\x00\x00replicatesqj]qk(hXh,h[eX\x0c\x00\x00\x00orientationsql]qm(X\x03\x00\x00\x00fwdqnX\x03\x00\x00\x00revqoX\x03\x00\x00\x00allqpeX\x08\x00\x00\x00biotypesqq]qr(X\t\x00\x00\x00antisenseqsX\x04\x00\x00\x00tRNAqtX\x05\x00\x00\x00snRNAquX\x06\x00\x00\x00snoRNAqvX\x04\x00\x00\x00rRNAqwX\x05\x00\x00\x00piRNAqxX\x05\x00\x00\x00ncRNAqyX\x05\x00\x00\x00miRNAqzX\x07\x00\x00\x00lincRNAq{X\x0e\x00\x00\x00protein_codingq|X\n\x00\x00\x00pseudogeneq}X\t\x00\x00\x00antisenseq~X\x14\x00\x00\x00DNA_transposons_rmskq\x7fX\x14\x00\x00\x00RNA_transposons_rmskq\x80X\x0f\x00\x00\x00satellites_rmskq\x81X\x13\x00\x00\x00simple_repeats_rmskq\x82eX\x08\x00\x00\x00data_dirq\x83X\x14\x00\x00\x00data_gonad_met2set25q\x84X\t\x00\x00\x00annot_dirq\x85X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x86X\x07\x00\x00\x00alignerq\x87X\x06\x00\x00\x00hisat2q\x88X\x07\x00\x00\x00counterq\x89X\x0b\x00\x00\x00htseq_countq\x8aX\x0b\x00\x00\x00mapping_dirq\x8bX\x17\x00\x00\x00mapping_gonad_met2set25q\x8cX\n\x00\x00\x00output_dirq\x8dX\x1b\x00\x00\x00merged_transcriptomes_gonadq\x8eX\x07\x00\x00\x00log_dirq\x8fX\x14\x00\x00\x00logs_gonad_met2set25q\x90uX\x04\x00\x00\x00ruleq\x91X\x11\x00\x00\x00htseq_count_readsq\x92ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +converter="/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes_id2name.pickle" +cmd="htseq-count -f bam -s {snakemake.params.stranded} -a 0 -t transcript -i gene_id -m {snakemake.params.mode} {snakemake.input.sorted_bam} {snakemake.params.annot} | tee {snakemake.output.counts} | id2name.py ${{converter}} > {snakemake.output.counts_converted}" +echo ${{cmd}} +eval ${{cmd}} 1> {snakemake.log.log} 2> {snakemake.log.err} || error_exit "htseq-count failed" +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.uh5y8lab.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.uh5y8lab.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..8fbe9363b768c1d02ce63dc11ac4719c7219eaec --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.uh5y8lab.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XT\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_RT_1_18-26_on_C_elegans/all_siRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XC\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_RT_1/all_siRNA_on_C_elegans.samq\rXU\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_RT_1_all_siRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\x02\x00\x00\x00RTq!X\x01\x00\x00\x001q"X\t\x00\x00\x00all_siRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X(\x00\x00\x00logs/map_on_genome_WT_RT_1_all_siRNA.logq?X(\x00\x00\x00logs/map_on_genome_WT_RT_1_all_siRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(h"X\x01\x00\x00\x002qdeX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\t\x00\x00\x00positionsqi]qj(X\x05\x00\x00\x00firstqkX\x04\x00\x00\x00lastqleX\x0c\x00\x00\x00orientationsqm]qn(X\x03\x00\x00\x00fwdqoX\x03\x00\x00\x00revqpX\x03\x00\x00\x00allqqeX\x0b\x00\x00\x00small_typesqr]qs(X\x07\x00\x00\x00prot_siqtX\x05\x00\x00\x00te_siquX\x07\x00\x00\x00pseu_siqvX\x08\x00\x00\x00satel_siqwX\t\x00\x00\x00simrep_siqxX\x02\x00\x00\x00piqyX\x02\x00\x00\x00miqzX\x08\x00\x00\x00prot_siuq{X\x06\x00\x00\x00te_siuq|X\x08\x00\x00\x00pseu_siuq}X\t\x00\x00\x00satel_siuq~X\n\x00\x00\x00simrep_siuq\x7feX\x0e\x00\x00\x00count_biotypesq\x80]q\x81(X\t\x00\x00\x00antisenseq\x82X\x04\x00\x00\x00tRNAq\x83X\x05\x00\x00\x00snRNAq\x84X\x06\x00\x00\x00snoRNAq\x85X\x04\x00\x00\x00rRNAq\x86X\x05\x00\x00\x00piRNAq\x87X\x05\x00\x00\x00ncRNAq\x88X\x05\x00\x00\x00miRNAq\x89X\x07\x00\x00\x00lincRNAq\x8aX\x0e\x00\x00\x00protein_codingq\x8bX\n\x00\x00\x00pseudogeneq\x8cX\t\x00\x00\x00antisenseq\x8dX\x14\x00\x00\x00DNA_transposons_rmskq\x8eX\x14\x00\x00\x00RNA_transposons_rmskq\x8fX\x0f\x00\x00\x00satellites_rmskq\x90X\x13\x00\x00\x00simple_repeats_rmskq\x91eX\x0e\x00\x00\x00annot_biotypesq\x92]q\x93(X\t\x00\x00\x00antisenseq\x94X\x04\x00\x00\x00tRNAq\x95X\x05\x00\x00\x00snRNAq\x96X\x06\x00\x00\x00snoRNAq\x97X\x04\x00\x00\x00rRNAq\x98X\x05\x00\x00\x00piRNAq\x99X\x05\x00\x00\x00ncRNAq\x9aX\x05\x00\x00\x00miRNAq\x9bX\x07\x00\x00\x00lincRNAq\x9cX\x12\x00\x00\x00protein_coding_CDSq\x9dX\x12\x00\x00\x00protein_coding_UTRq\x9eX\x1a\x00\x00\x00protein_coding_pure_intronq\x9fX\n\x00\x00\x00pseudogeneq\xa0X\t\x00\x00\x00antisenseq\xa1X\x14\x00\x00\x00DNA_transposons_rmskq\xa2X\x14\x00\x00\x00RNA_transposons_rmskq\xa3X\x0f\x00\x00\x00satellites_rmskq\xa4X\x13\x00\x00\x00simple_repeats_rmskq\xa5eX\n\x00\x00\x00gene_listsq\xa6]q\xa7X%\x00\x00\x00replication_dependent_octamer_histoneq\xa8aX\x08\x00\x00\x00data_dirq\xa9X\x04\x00\x00\x00dataq\xaaX\t\x00\x00\x00annot_dirq\xabX_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\xacX\x0f\x00\x00\x00local_annot_dirq\xadX\x0b\x00\x00\x00annotationsq\xaeX\x07\x00\x00\x00alignerq\xafX\x07\x00\x00\x00bowtie2q\xb0X\x05\x00\x00\x00indexq\xb1h\x18X\x0b\x00\x00\x00convert_dirq\xb2X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\xb3X\n\x00\x00\x00output_dirq\xb4X\x07\x00\x00\x00resultsq\xb5X\x07\x00\x00\x00log_dirq\xb6X\x04\x00\x00\x00logsq\xb7uX\x04\x00\x00\x00ruleq\xb8X\r\x00\x00\x00map_on_genomeq\xb9ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.v2tkj33z.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.v2tkj33z.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd1683a96244db4f914ab9a9385ff35a2f0ad20 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.v2tkj33z.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XW\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_HS30RT120_2_piRNA_on_C_elegans/piRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XF\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_HS30RT120_2/piRNA_on_C_elegans.samq\rXX\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_HS30RT120_2_piRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\t\x00\x00\x00HS30RT120q!X\x01\x00\x00\x002q"X\x05\x00\x00\x00piRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X+\x00\x00\x00logs/map_on_genome_WT_HS30RT120_2_piRNA.logq?X+\x00\x00\x00logs/map_on_genome_WT_HS30RT120_2_piRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(X\x01\x00\x00\x001qdh"eX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.weqd46if.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.weqd46if.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..0d4f9733e6c0ad287e09c48a4ed780e11fc5fafd --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.weqd46if.wrapper.py @@ -0,0 +1,15 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05XW\x00\x00\x00results/bowtie2/mapped_C_elegans/reads/WT_HS30RT120_1_piRNA_on_C_elegans/piRNA.fastq.gzq\x06a}q\x07X\x06\x00\x00\x00_namesq\x08}q\tsbX\x06\x00\x00\x00outputq\ncsnakemake.io\nOutputFiles\nq\x0b)\x81q\x0c(XF\x00\x00\x00results/bowtie2/mapped_C_elegans/WT_HS30RT120_1/piRNA_on_C_elegans.samq\rXX\x00\x00\x00results/bowtie2/not_mapped_C_elegans/WT_HS30RT120_1_piRNA_unmapped_on_C_elegans.fastq.gzq\x0ee}q\x0f(h\x08}q\x10(X\x03\x00\x00\x00samq\x11K\x00N\x86q\x12X\x05\x00\x00\x00nomapq\x13K\x01N\x86q\x14uh\x11h\rh\x13h\x0eubX\x06\x00\x00\x00paramsq\x15csnakemake.io\nParams\nq\x16)\x81q\x17Xk\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/genomeq\x18a}q\x19(h\x08}q\x1aX\x05\x00\x00\x00indexq\x1bK\x00N\x86q\x1csh\x1bh\x18ubX\t\x00\x00\x00wildcardsq\x1dcsnakemake.io\nWildcards\nq\x1e)\x81q\x1f(X\x02\x00\x00\x00WTq X\t\x00\x00\x00HS30RT120q!X\x01\x00\x00\x001q"X\x05\x00\x00\x00piRNAq#e}q$(h\x08}q%(X\x03\x00\x00\x00libq&K\x00N\x86q\'X\x05\x00\x00\x00treatq(K\x01N\x86q)X\x03\x00\x00\x00repq*K\x02N\x86q+X\t\x00\x00\x00read_typeq,K\x03N\x86q-uX\x03\x00\x00\x00libq.h X\x05\x00\x00\x00treatq/h!X\x03\x00\x00\x00repq0h"X\t\x00\x00\x00read_typeq1h#ubX\x07\x00\x00\x00threadsq2K\x01X\t\x00\x00\x00resourcesq3csnakemake.io\nResources\nq4)\x81q5(K\x01K\x01e}q6(h\x08}q7(X\x06\x00\x00\x00_coresq8K\x00N\x86q9X\x06\x00\x00\x00_nodesq:K\x01N\x86q;uh8K\x01h:K\x01ubX\x03\x00\x00\x00logq<csnakemake.io\nLog\nq=)\x81q>(X+\x00\x00\x00logs/map_on_genome_WT_HS30RT120_1_piRNA.logq?X+\x00\x00\x00logs/map_on_genome_WT_HS30RT120_1_piRNA.errq@e}qA(h\x08}qB(X\x07\x00\x00\x00map_logqCK\x00N\x86qDX\x07\x00\x00\x00map_errqEK\x01N\x86qFuhCh?hEh@ubX\x06\x00\x00\x00configqG}qH(X\x07\x00\x00\x00lib2rawqIccollections\nOrderedDict\nqJ)RqK(X\x02\x00\x00\x00WTqLXQ\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/wt{treat}_{rep}.fastq.gzqMX\x04\x00\x00\x00prg1qNXS\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/small_RNA-seq/20162212/prg1{treat}_{rep}.fastq.gzqOuX\t\x00\x00\x00lib2adaptqPhJ)RqQ(hLX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqRhNX\x15\x00\x00\x00TGGAATTCTCGGGTGCCAAGGqSuX\x07\x00\x00\x00missingqT]qUhJ)RqVahLX\x02\x00\x00\x00WTqWX\x06\x00\x00\x00mutantqXX\x04\x00\x00\x00prg1qYX\x05\x00\x00\x00trim5qZX\x01\x00\x00\x004q[X\x05\x00\x00\x00trim3q\\h[X\n\x00\x00\x00treatmentsq]]q^(X\x02\x00\x00\x00RTq_X\x04\x00\x00\x00HS30q`X\t\x00\x00\x00HS30RT120qaeX\n\x00\x00\x00replicatesqb]qc(h"X\x01\x00\x00\x002qdeX\x07\x00\x00\x00min_lenqeX\x02\x00\x00\x0018qfX\x07\x00\x00\x00max_lenqgX\x02\x00\x00\x0026qhX\x0e\x00\x00\x00count_biotypesqi]qj(X\t\x00\x00\x00antisenseqkX\x04\x00\x00\x00tRNAqlX\x05\x00\x00\x00snRNAqmX\x06\x00\x00\x00snoRNAqnX\x04\x00\x00\x00rRNAqoX\x05\x00\x00\x00piRNAqpX\x05\x00\x00\x00ncRNAqqX\x05\x00\x00\x00miRNAqrX\x07\x00\x00\x00lincRNAqsX\x0e\x00\x00\x00protein_codingqtX\n\x00\x00\x00pseudogenequX\t\x00\x00\x00antisenseqvX\x14\x00\x00\x00DNA_transposons_rmskqwX\x14\x00\x00\x00RNA_transposons_rmskqxX\x0f\x00\x00\x00satellites_rmskqyX\x13\x00\x00\x00simple_repeats_rmskqzeX\x0e\x00\x00\x00annot_biotypesq{]q|(X\t\x00\x00\x00antisenseq}X\x04\x00\x00\x00tRNAq~X\x05\x00\x00\x00snRNAq\x7fX\x06\x00\x00\x00snoRNAq\x80X\x04\x00\x00\x00rRNAq\x81X\x05\x00\x00\x00piRNAq\x82X\x05\x00\x00\x00ncRNAq\x83X\x05\x00\x00\x00miRNAq\x84X\x07\x00\x00\x00lincRNAq\x85X\x12\x00\x00\x00protein_coding_CDSq\x86X\x12\x00\x00\x00protein_coding_UTRq\x87X\x1a\x00\x00\x00protein_coding_pure_intronq\x88X\n\x00\x00\x00pseudogeneq\x89X\t\x00\x00\x00antisenseq\x8aX\x14\x00\x00\x00DNA_transposons_rmskq\x8bX\x14\x00\x00\x00RNA_transposons_rmskq\x8cX\x0f\x00\x00\x00satellites_rmskq\x8dX\x13\x00\x00\x00simple_repeats_rmskq\x8eeX\x08\x00\x00\x00data_dirq\x8fX\x04\x00\x00\x00dataq\x90X\t\x00\x00\x00annot_dirq\x91X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x92X\x0f\x00\x00\x00local_annot_dirq\x93X\x0b\x00\x00\x00annotationsq\x94X\x07\x00\x00\x00alignerq\x95X\x07\x00\x00\x00bowtie2q\x96X\x05\x00\x00\x00indexq\x97h\x18X\x0b\x00\x00\x00convert_dirq\x98X=\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Wormbase/WS253/geneIDsq\x99X\n\x00\x00\x00output_dirq\x9aX\x07\x00\x00\x00resultsq\x9bX\x07\x00\x00\x00log_dirq\x9cX\x04\x00\x00\x00logsq\x9duX\x04\x00\x00\x00ruleq\x9eX\r\x00\x00\x00map_on_genomeq\x9fub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.xbqcvz9a.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.xbqcvz9a.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..b6d254cf7addcb524a1f3735fe51ef165f242572 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.xbqcvz9a.wrapper.py @@ -0,0 +1,14 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(XL\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bamq\x06XP\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bam.baiq\x07e}q\x08(X\x06\x00\x00\x00_namesq\t}q\n(X\n\x00\x00\x00sorted_bamq\x0bK\x00N\x86q\x0cX\x03\x00\x00\x00baiq\rK\x01N\x86q\x0euh\x0bh\x06h\rh\x07ubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(Xk\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/protein_coding_all_counts.txtq\x12Xv\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/protein_coding_all_counts_gene_names.txtq\x13e}q\x14(h\t}q\x15(X\x06\x00\x00\x00countsq\x16K\x00N\x86q\x17X\x10\x00\x00\x00counts_convertedq\x18K\x01N\x86q\x19uh\x16h\x12h\x18h\x13ubX\x06\x00\x00\x00paramsq\x1acsnakemake.io\nParams\nq\x1b)\x81q\x1c(X\x02\x00\x00\x00noq\x1dX\x05\x00\x00\x00unionq\x1eXr\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/protein_coding.gtfq\x1fe}q (h\t}q!(X\x08\x00\x00\x00strandedq"K\x00N\x86q#X\x04\x00\x00\x00modeq$K\x01N\x86q%X\x05\x00\x00\x00annotq&K\x02N\x86q\'uh"h\x1dh$h\x1eh&h\x1fubX\t\x00\x00\x00wildcardsq(csnakemake.io\nWildcards\nq))\x81q*(X\x02\x00\x00\x00WTq+X\x01\x00\x00\x002q,X\x0e\x00\x00\x00protein_codingq-X\x03\x00\x00\x00allq.e}q/(h\t}q0(X\x03\x00\x00\x00libq1K\x00N\x86q2X\x03\x00\x00\x00repq3K\x01N\x86q4X\x07\x00\x00\x00biotypeq5K\x02N\x86q6X\x0b\x00\x00\x00orientationq7K\x03N\x86q8uX\x03\x00\x00\x00libq9h+X\x03\x00\x00\x00repq:h,X\x07\x00\x00\x00biotypeq;h-X\x0b\x00\x00\x00orientationq<h.ubX\x07\x00\x00\x00threadsq=K\x01X\t\x00\x00\x00resourcesq>csnakemake.io\nResources\nq?)\x81q@(K\x01K\x01e}qA(h\t}qB(X\x06\x00\x00\x00_coresqCK\x00N\x86qDX\x06\x00\x00\x00_nodesqEK\x01N\x86qFuhCK\x01hEK\x01ubX\x03\x00\x00\x00logqGcsnakemake.io\nLog\nqH)\x81qI(X)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.logqJX)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.errqKe}qL(h\t}qM(hGK\x00N\x86qNX\x03\x00\x00\x00errqOK\x01N\x86qPuhGhJhOhKubX\x06\x00\x00\x00configqQ}qR(X\x07\x00\x00\x00lib2rawqSccollections\nOrderedDict\nqT)RqU(X\x02\x00\x00\x00WTqVhT)RqW(X\x01\x00\x00\x001qXXG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_1.fastq.gzqYh,XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_2.fastq.gzqZX\x01\x00\x00\x003q[XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_3.fastq.gzq\\uX\t\x00\x00\x00met2set25q]hT)Rq^(hXXO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_1.fastq.gzq_h,XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_2.fastq.gzq`h[XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_3.fastq.gzqauuX\n\x00\x00\x00cond_pairsqb]qc]qd(X\t\x00\x00\x00met2set25qeX\x02\x00\x00\x00WTqfeahVX\x02\x00\x00\x00WTqgX\x06\x00\x00\x00mutantqhX\t\x00\x00\x00met2set25qiX\n\x00\x00\x00replicatesqj]qk(hXh,h[eX\x0c\x00\x00\x00orientationsql]qm(X\x03\x00\x00\x00fwdqnX\x03\x00\x00\x00revqoX\x03\x00\x00\x00allqpeX\x08\x00\x00\x00biotypesqq]qr(X\t\x00\x00\x00antisenseqsX\x04\x00\x00\x00tRNAqtX\x05\x00\x00\x00snRNAquX\x06\x00\x00\x00snoRNAqvX\x04\x00\x00\x00rRNAqwX\x05\x00\x00\x00piRNAqxX\x05\x00\x00\x00ncRNAqyX\x05\x00\x00\x00miRNAqzX\x07\x00\x00\x00lincRNAq{X\x0e\x00\x00\x00protein_codingq|X\n\x00\x00\x00pseudogeneq}X\t\x00\x00\x00antisenseq~X\x14\x00\x00\x00DNA_transposons_rmskq\x7fX\x14\x00\x00\x00RNA_transposons_rmskq\x80X\x0f\x00\x00\x00satellites_rmskq\x81X\x13\x00\x00\x00simple_repeats_rmskq\x82eX\x08\x00\x00\x00data_dirq\x83X\x14\x00\x00\x00data_gonad_met2set25q\x84X\t\x00\x00\x00annot_dirq\x85X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x86X\x07\x00\x00\x00alignerq\x87X\x06\x00\x00\x00hisat2q\x88X\x07\x00\x00\x00counterq\x89X\x0b\x00\x00\x00htseq_countq\x8aX\x0b\x00\x00\x00mapping_dirq\x8bX\x17\x00\x00\x00mapping_gonad_met2set25q\x8cX\n\x00\x00\x00output_dirq\x8dX\x1b\x00\x00\x00merged_transcriptomes_gonadq\x8eX\x07\x00\x00\x00log_dirq\x8fX\x14\x00\x00\x00logs_gonad_met2set25q\x90uX\x04\x00\x00\x00ruleq\x91X\x11\x00\x00\x00htseq_count_readsq\x92ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +converter="/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes_id2name.pickle" +cmd="htseq-count -f bam -s {snakemake.params.stranded} -a 0 -t transcript -i gene_id -m {snakemake.params.mode} {snakemake.input.sorted_bam} {snakemake.params.annot} | tee {snakemake.output.counts} | id2name.py ${{converter}} > {snakemake.output.counts_converted}" +echo ${{cmd}} +eval ${{cmd}} 1> {snakemake.log.log} 2> {snakemake.log.err} || error_exit "htseq-count failed" +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.snakemake.yh27wb6r.wrapper.py b/snakemake_wrappers/htseq_count_reads/.snakemake.yh27wb6r.wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..95ff5f366e4f382756b4dd897d48619f7b8e92de --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/.snakemake.yh27wb6r.wrapper.py @@ -0,0 +1,14 @@ + +######## Snakemake header ######## +import sys; sys.path.insert(0, "/home/bli/.local/lib/python3.6/site-packages"); import pickle; snakemake = pickle.loads(b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(XL\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bamq\x06XP\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/WT_2_on_C_elegans_sorted.bam.baiq\x07e}q\x08(X\x06\x00\x00\x00_namesq\t}q\n(X\n\x00\x00\x00sorted_bamq\x0bK\x00N\x86q\x0cX\x03\x00\x00\x00baiq\rK\x01N\x86q\x0euh\x0bh\x06h\rh\x07ubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(Xb\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/piRNA_fwd_counts.txtq\x12Xm\x00\x00\x00mapping_gonad_met2set25/hisat2/mapped_C_elegans/htseq_count/WT_2_on_C_elegans/piRNA_fwd_counts_gene_names.txtq\x13e}q\x14(h\t}q\x15(X\x06\x00\x00\x00countsq\x16K\x00N\x86q\x17X\x10\x00\x00\x00counts_convertedq\x18K\x01N\x86q\x19uh\x16h\x12h\x18h\x13ubX\x06\x00\x00\x00paramsq\x1acsnakemake.io\nParams\nq\x1b)\x81q\x1c(X\x03\x00\x00\x00yesq\x1dX\x05\x00\x00\x00unionq\x1eXi\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/piRNA.gtfq\x1fe}q (h\t}q!(X\x08\x00\x00\x00strandedq"K\x00N\x86q#X\x04\x00\x00\x00modeq$K\x01N\x86q%X\x05\x00\x00\x00annotq&K\x02N\x86q\'uh"h\x1dh$h\x1eh&h\x1fubX\t\x00\x00\x00wildcardsq(csnakemake.io\nWildcards\nq))\x81q*(X\x02\x00\x00\x00WTq+X\x01\x00\x00\x002q,X\x05\x00\x00\x00piRNAq-X\x03\x00\x00\x00fwdq.e}q/(h\t}q0(X\x03\x00\x00\x00libq1K\x00N\x86q2X\x03\x00\x00\x00repq3K\x01N\x86q4X\x07\x00\x00\x00biotypeq5K\x02N\x86q6X\x0b\x00\x00\x00orientationq7K\x03N\x86q8uX\x03\x00\x00\x00libq9h+X\x03\x00\x00\x00repq:h,X\x07\x00\x00\x00biotypeq;h-X\x0b\x00\x00\x00orientationq<h.ubX\x07\x00\x00\x00threadsq=K\x01X\t\x00\x00\x00resourcesq>csnakemake.io\nResources\nq?)\x81q@(K\x01K\x01e}qA(h\t}qB(X\x06\x00\x00\x00_coresqCK\x00N\x86qDX\x06\x00\x00\x00_nodesqEK\x01N\x86qFuhCK\x01hEK\x01ubX\x03\x00\x00\x00logqGcsnakemake.io\nLog\nqH)\x81qI(X)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.logqJX)\x00\x00\x00logs_gonad_met2set25/count_reads_WT_2.errqKe}qL(h\t}qM(hGK\x00N\x86qNX\x03\x00\x00\x00errqOK\x01N\x86qPuhGhJhOhKubX\x06\x00\x00\x00configqQ}qR(X\x07\x00\x00\x00lib2rawqSccollections\nOrderedDict\nqT)RqU(X\x02\x00\x00\x00WTqVhT)RqW(X\x01\x00\x00\x001qXXG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_1.fastq.gzqYh,XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_2.fastq.gzqZX\x01\x00\x00\x003q[XG\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/wt_gonad_3.fastq.gzq\\uX\t\x00\x00\x00met2set25q]hT)Rq^(hXXO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_1.fastq.gzq_h,XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_2.fastq.gzq`h[XO\x00\x00\x00/pasteur/entites/Mhe/bli/raw_data/Zeller_et_al_2016/met2_set25_gonad_3.fastq.gzqauuX\n\x00\x00\x00cond_pairsqb]qc]qd(X\t\x00\x00\x00met2set25qeX\x02\x00\x00\x00WTqfeahVX\x02\x00\x00\x00WTqgX\x06\x00\x00\x00mutantqhX\t\x00\x00\x00met2set25qiX\n\x00\x00\x00replicatesqj]qk(hXh,h[eX\x0c\x00\x00\x00orientationsql]qm(X\x03\x00\x00\x00fwdqnX\x03\x00\x00\x00revqoX\x03\x00\x00\x00allqpeX\x08\x00\x00\x00biotypesqq]qr(X\t\x00\x00\x00antisenseqsX\x04\x00\x00\x00tRNAqtX\x05\x00\x00\x00snRNAquX\x06\x00\x00\x00snoRNAqvX\x04\x00\x00\x00rRNAqwX\x05\x00\x00\x00piRNAqxX\x05\x00\x00\x00ncRNAqyX\x05\x00\x00\x00miRNAqzX\x07\x00\x00\x00lincRNAq{X\x0e\x00\x00\x00protein_codingq|X\n\x00\x00\x00pseudogeneq}X\t\x00\x00\x00antisenseq~X\x14\x00\x00\x00DNA_transposons_rmskq\x7fX\x14\x00\x00\x00RNA_transposons_rmskq\x80X\x0f\x00\x00\x00satellites_rmskq\x81X\x13\x00\x00\x00simple_repeats_rmskq\x82eX\x08\x00\x00\x00data_dirq\x83X\x14\x00\x00\x00data_gonad_met2set25q\x84X\t\x00\x00\x00annot_dirq\x85X_\x00\x00\x00/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genesq\x86X\x07\x00\x00\x00alignerq\x87X\x06\x00\x00\x00hisat2q\x88X\x07\x00\x00\x00counterq\x89X\x0b\x00\x00\x00htseq_countq\x8aX\x0b\x00\x00\x00mapping_dirq\x8bX\x17\x00\x00\x00mapping_gonad_met2set25q\x8cX\n\x00\x00\x00output_dirq\x8dX\x1b\x00\x00\x00merged_transcriptomes_gonadq\x8eX\x07\x00\x00\x00log_dirq\x8fX\x14\x00\x00\x00logs_gonad_met2set25q\x90uX\x04\x00\x00\x00ruleq\x91X\x11\x00\x00\x00htseq_count_readsq\x92ub.') +######## Original script ######### +from snakemake.shell import shell + +cmd = """ +converter="/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes_id2name.pickle" +cmd="htseq-count -f bam -s {snakemake.params.stranded} -a 0 -t transcript -i gene_id -m {snakemake.params.mode} {snakemake.input.sorted_bam} {snakemake.params.annot} | tee {snakemake.output.counts} | id2name.py ${{converter}} > {snakemake.output.counts_converted}" +echo ${{cmd}} +eval ${{cmd}} 1> {snakemake.log.log} 2> {snakemake.log.err} || error_exit "htseq-count failed" +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/.wrapper.py.swp b/snakemake_wrappers/htseq_count_reads/.wrapper.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..cbf6217eaf449cea6c705ff98d39e321ac39390f Binary files /dev/null and b/snakemake_wrappers/htseq_count_reads/.wrapper.py.swp differ diff --git a/snakemake_wrappers/htseq_count_reads/wrapper.py b/snakemake_wrappers/htseq_count_reads/wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..490d04a0cfda3fd5278c32a659782fc1e1bdc4fa --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/wrapper.py @@ -0,0 +1,10 @@ +from snakemake.shell import shell + +cmd = """ +converter="/pasteur/entites/Mhe/Genomes/C_elegans/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes_id2name.pickle" +cmd="htseq-count -f bam -s {snakemake.params.stranded} -a 0 -t transcript -i gene_id -m {snakemake.params.mode} {snakemake.input.sorted_bam} {snakemake.params.annot} | tee {snakemake.output.counts} | id2name.py ${{converter}} > {snakemake.output.counts_converted}" +echo ${{cmd}} +eval ${{cmd}} 1> {snakemake.log.log} 2> {snakemake.log.err} || error_exit "htseq-count failed" +""" + +shell(cmd) diff --git a/snakemake_wrappers/htseq_count_reads/wrapper.py~ b/snakemake_wrappers/htseq_count_reads/wrapper.py~ new file mode 100644 index 0000000000000000000000000000000000000000..af58cc0ce192a6897a373af48797f3ba396bea98 --- /dev/null +++ b/snakemake_wrappers/htseq_count_reads/wrapper.py~ @@ -0,0 +1,11 @@ +from snakemake.shell import shell + +cmd = """ +genome_dir="${{HOME}}/Genomes" +genome="C_elegans" +cmd="bowtie2 --seed 123 -t -L 6 -i S,1,0.8 -N 0 --mm -x {snakemake.params.index} -U {snakemake.input[0]} --no-unal --un-gz {snakemake.output.nomap} -S {snakemake.output.sam}" + echo ${{cmd}} + eval ${{cmd}} 1> {snakemake.log.map_log} 2> {snakemake.log.map_err} +""" + +shell(cmd) diff --git a/snakemake_wrappers/sam2indexedbam/wrapper.py b/snakemake_wrappers/sam2indexedbam/wrapper.py index 00cb2a11086baad3d2667a95e26d495f5a3157b0..07d1dab71288811730858287ada91abf805ac4c3 100644 --- a/snakemake_wrappers/sam2indexedbam/wrapper.py +++ b/snakemake_wrappers/sam2indexedbam/wrapper.py @@ -1,7 +1,8 @@ from snakemake.shell import shell cmd = """ -nice -n 19 ionice -c2 -n7 sam2indexedbam.sh {snakemake.input[0]} 1> {snakemake.log.indexing_log} 2> {snakemake.log.indexing_err} +export SAMTOOLS_THREADS={snakemake.threads} +nice -n 19 ionice -c2 -n7 sam2indexedbam.sh {snakemake.input.sam} 1> {snakemake.log.log} 2> {snakemake.log.err} """ shell(cmd)