diff --git a/Genome_preparation/extract_info_from_gtf.py b/Genome_preparation/extract_info_from_gtf.py index e61773a4bf452c8b1973f2c02b30240e5ce3d9a5..6eda28305922a9568a2eea57bcf30c9dd50147a6 100755 --- a/Genome_preparation/extract_info_from_gtf.py +++ b/Genome_preparation/extract_info_from_gtf.py @@ -134,6 +134,9 @@ def main(): ]) + ";" dest_files[biotype].write("\t".join(fields) + "\n") + # Note that "convert_dir" is not set in the the genome config. + # This is just one converter, hopefully accessible via the "converter" config element. + # The other converters will be obtained from the libcelegans package. with open(f"{base}_id2name.pickle", "wb") as pickle_file: print(f"Storing id2name in {pickle_file.name}") dump(id2name, pickle_file, HIGHEST_PROTOCOL) diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 58f61707d97d0158e62e03b0171f72335954cc27..237118666af56162b1126a4a10781c6711f7d351 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -435,8 +435,21 @@ genome_db = genome_dict["db"][aligner] genome_binned = genome_dict["binned"] annot_dir = genome_dict["annot_dir"] exon_lengths_file = OPJ(annot_dir, "union_exon_lengths.txt"), -# TODO: figure out the difference between OPJ(convert_dir, "wormid2name.pickle") and genome_dict["converter"] +# What are the difference between +# OPJ(convert_dir, "wormid2name.pickle") and genome_dict["converter"]? +# /!\ gene_ids_data_dir contains more conversion dicts, +# but is not influenced by genome preparation customization, +# like splitting of miRNAs into 3p and 5p. convert_dir = genome_dict.get("convert_dir", gene_ids_data_dir) +# For wormid2name, load in priority the one +# that might contain custom gene names, like for splitted miRNAs +with open( + genome_dict.get( + "converter", + OPJ(convert_dir, "wormid2name.pickle")), + "rb") as dict_file: + wormid2name = load(dict_file) + gene_lists_dir = genome_dict["gene_lists_dir"] avail_id_lists = set(glob(OPJ(gene_lists_dir, "*_ids.txt"))) index = genome_db @@ -2373,8 +2386,9 @@ rule compute_RPM_folds: ###################### with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file: lfc = lfc.assign(cosmid=lfc.apply(column_converter(load(dict_file)), axis=1)) - with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: - lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1)) + #with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: + # lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1)) + lfc = lfc.assign(name=lfc.apply(column_converter(wormid2name), axis=1)) # logfile.write(f"Columns in lfc are: {lfc.columns}\n") # logfile.write(f"Index in lfc is: {lfc.index.name}\n") assert lfc.index.name == "gene", f"Wrong index: {lfc.index.name}" @@ -2483,8 +2497,9 @@ rule compute_remapped_RPM_folds: ###################### with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file: lfc = lfc.assign(cosmid=lfc.apply(column_converter(load(dict_file)), axis=1)) - with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: - lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1)) + #with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: + # lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1)) + lfc = lfc.assign(name=lfc.apply(column_converter(wormid2name), axis=1)) #logfile.write("Adding small read type info\n") ##pd.concat((counts_data.loc[common], RPM, add_tags_column(lfc, input.tags_table, "small_type")), axis=1).to_csv(output.fold_results, sep="\t") #pd.concat((RPM, add_tags_column(lfc, input.tags_table, "small_type")), axis=1).to_csv(output.fold_results, sep="\t") @@ -3891,8 +3906,9 @@ rule small_RNA_differential_expression: ###################### with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file: res = res.assign(cosmid=res.apply(column_converter(load(dict_file)), axis=1)) - with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: - res = res.assign(name=res.apply(column_converter(load(dict_file)), axis=1)) + #with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: + # res = res.assign(name=res.apply(column_converter(load(dict_file)), axis=1)) + res = res.assign(name=res.apply(column_converter(wormid2name), axis=1)) ########################################## # res.to_csv(output.deseq_results, sep="\t", na_rep="NA", decimal=",") res.to_csv(output.deseq_results, sep="\t", na_rep="NA")