Skip to content
Snippets Groups Projects
Commit 8919f01f authored by Blaise Li's avatar Blaise Li
Browse files

Tryng to fix gene id conversion for miRNAs.

parent 34dcef40
No related branches found
No related tags found
No related merge requests found
...@@ -134,6 +134,9 @@ def main(): ...@@ -134,6 +134,9 @@ def main():
]) + ";" ]) + ";"
dest_files[biotype].write("\t".join(fields) + "\n") dest_files[biotype].write("\t".join(fields) + "\n")
# Note that "convert_dir" is not set in the the genome config.
# This is just one converter, hopefully accessible via the "converter" config element.
# The other converters will be obtained from the libcelegans package.
with open(f"{base}_id2name.pickle", "wb") as pickle_file: with open(f"{base}_id2name.pickle", "wb") as pickle_file:
print(f"Storing id2name in {pickle_file.name}") print(f"Storing id2name in {pickle_file.name}")
dump(id2name, pickle_file, HIGHEST_PROTOCOL) dump(id2name, pickle_file, HIGHEST_PROTOCOL)
......
...@@ -435,8 +435,21 @@ genome_db = genome_dict["db"][aligner] ...@@ -435,8 +435,21 @@ genome_db = genome_dict["db"][aligner]
genome_binned = genome_dict["binned"] genome_binned = genome_dict["binned"]
annot_dir = genome_dict["annot_dir"] annot_dir = genome_dict["annot_dir"]
exon_lengths_file = OPJ(annot_dir, "union_exon_lengths.txt"), exon_lengths_file = OPJ(annot_dir, "union_exon_lengths.txt"),
# TODO: figure out the difference between OPJ(convert_dir, "wormid2name.pickle") and genome_dict["converter"] # What are the difference between
# OPJ(convert_dir, "wormid2name.pickle") and genome_dict["converter"]?
# /!\ gene_ids_data_dir contains more conversion dicts,
# but is not influenced by genome preparation customization,
# like splitting of miRNAs into 3p and 5p.
convert_dir = genome_dict.get("convert_dir", gene_ids_data_dir) convert_dir = genome_dict.get("convert_dir", gene_ids_data_dir)
# For wormid2name, load in priority the one
# that might contain custom gene names, like for splitted miRNAs
with open(
genome_dict.get(
"converter",
OPJ(convert_dir, "wormid2name.pickle")),
"rb") as dict_file:
wormid2name = load(dict_file)
gene_lists_dir = genome_dict["gene_lists_dir"] gene_lists_dir = genome_dict["gene_lists_dir"]
avail_id_lists = set(glob(OPJ(gene_lists_dir, "*_ids.txt"))) avail_id_lists = set(glob(OPJ(gene_lists_dir, "*_ids.txt")))
index = genome_db index = genome_db
...@@ -2373,8 +2386,9 @@ rule compute_RPM_folds: ...@@ -2373,8 +2386,9 @@ rule compute_RPM_folds:
###################### ######################
with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file: with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file:
lfc = lfc.assign(cosmid=lfc.apply(column_converter(load(dict_file)), axis=1)) lfc = lfc.assign(cosmid=lfc.apply(column_converter(load(dict_file)), axis=1))
with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: #with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file:
lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1)) # lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1))
lfc = lfc.assign(name=lfc.apply(column_converter(wormid2name), axis=1))
# logfile.write(f"Columns in lfc are: {lfc.columns}\n") # logfile.write(f"Columns in lfc are: {lfc.columns}\n")
# logfile.write(f"Index in lfc is: {lfc.index.name}\n") # logfile.write(f"Index in lfc is: {lfc.index.name}\n")
assert lfc.index.name == "gene", f"Wrong index: {lfc.index.name}" assert lfc.index.name == "gene", f"Wrong index: {lfc.index.name}"
...@@ -2483,8 +2497,9 @@ rule compute_remapped_RPM_folds: ...@@ -2483,8 +2497,9 @@ rule compute_remapped_RPM_folds:
###################### ######################
with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file: with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file:
lfc = lfc.assign(cosmid=lfc.apply(column_converter(load(dict_file)), axis=1)) lfc = lfc.assign(cosmid=lfc.apply(column_converter(load(dict_file)), axis=1))
with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: #with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file:
lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1)) # lfc = lfc.assign(name=lfc.apply(column_converter(load(dict_file)), axis=1))
lfc = lfc.assign(name=lfc.apply(column_converter(wormid2name), axis=1))
#logfile.write("Adding small read type info\n") #logfile.write("Adding small read type info\n")
##pd.concat((counts_data.loc[common], RPM, add_tags_column(lfc, input.tags_table, "small_type")), axis=1).to_csv(output.fold_results, sep="\t") ##pd.concat((counts_data.loc[common], RPM, add_tags_column(lfc, input.tags_table, "small_type")), axis=1).to_csv(output.fold_results, sep="\t")
#pd.concat((RPM, add_tags_column(lfc, input.tags_table, "small_type")), axis=1).to_csv(output.fold_results, sep="\t") #pd.concat((RPM, add_tags_column(lfc, input.tags_table, "small_type")), axis=1).to_csv(output.fold_results, sep="\t")
...@@ -3891,8 +3906,9 @@ rule small_RNA_differential_expression: ...@@ -3891,8 +3906,9 @@ rule small_RNA_differential_expression:
###################### ######################
with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file: with open(OPJ(convert_dir, "wormid2cosmid.pickle"), "rb") as dict_file:
res = res.assign(cosmid=res.apply(column_converter(load(dict_file)), axis=1)) res = res.assign(cosmid=res.apply(column_converter(load(dict_file)), axis=1))
with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file: #with open(OPJ(convert_dir, "wormid2name.pickle"), "rb") as dict_file:
res = res.assign(name=res.apply(column_converter(load(dict_file)), axis=1)) # res = res.assign(name=res.apply(column_converter(load(dict_file)), axis=1))
res = res.assign(name=res.apply(column_converter(wormid2name), axis=1))
########################################## ##########################################
# res.to_csv(output.deseq_results, sep="\t", na_rep="NA", decimal=",") # res.to_csv(output.deseq_results, sep="\t", na_rep="NA", decimal=",")
res.to_csv(output.deseq_results, sep="\t", na_rep="NA") res.to_csv(output.deseq_results, sep="\t", na_rep="NA")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment