Skip to content
Snippets Groups Projects
Commit db1ea7ac authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

split on_target_predict and add functions off_target_per_seed_size

parent ea3bc37e
No related branches found
No related tags found
No related merge requests found
Pipeline #28704 passed with stage
in 1 minute and 1 second
......@@ -94,7 +94,7 @@ def from_str(
guide_rnas = on_target_predict(target, genome_fh)
click.echo("\t".join(HEADER), file=output_file)
write_guide_rnas(guide_rnas, output_file)
write_guide_rnas(guide_rnas, output_file, len(HEADER))
@predict.command()
......@@ -150,6 +150,7 @@ def from_seq(
fg = "blue"
if config.verbose:
print_parameters(target.name, fg)
click.echo("\t".join(HEADER), file=output_file)
for record in SeqIO.parse(target, seq_format):
if config.verbose:
......@@ -160,7 +161,7 @@ def from_seq(
genome_fh = None
guide_rnas = on_target_predict(str(record.seq), genome_fh)
# print(guide_rnas)
write_guide_rnas(guide_rnas, output_file, record.id)
write_guide_rnas(guide_rnas, output_file, len(HEADER), record.id)
def print_parameters(target, fg="blue"):
......@@ -168,7 +169,9 @@ def print_parameters(target, fg="blue"):
click.secho("Target sequence : %s" % target, fg=fg)
def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"):
def write_guide_rnas(
guide_rnas, output_file, header_size, seq_id="N/A",
):
for guide_rna in guide_rnas:
row = [
str(guide_rna["target_id"]),
......@@ -179,6 +182,7 @@ def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"):
str(guide_rna["pred"]),
seq_id,
]
# seed_size = ""
if len(guide_rna["off_targets_per_seed"]) > 0:
for off_target_per_seed in guide_rna["off_targets_per_seed"]:
for off_target in off_target_per_seed["off_targets"]:
......@@ -191,14 +195,16 @@ def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"):
return ""
details = map(extract_off_target_detail, OFF_TARGET_DETAILS)
line_to_print = row + [str(seed_size)] + list(details)
assert len(line_to_print) == header_size
click.echo(
"\t".join(row + [str(seed_size)] + list(details)),
file=output_file,
"\t".join(line_to_print), file=output_file,
)
else:
line_to_print = row + [""] + list(map(lambda x: "", OFF_TARGET_DETAILS))
assert len(line_to_print) == header_size
click.echo(
"\t".join(row + list(map(lambda x: "", OFF_TARGET_DETAILS))),
file=output_file,
"\t".join(line_to_print), file=output_file,
)
......
......@@ -68,6 +68,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12, GUIDE_LEN]
seq = re.sub(r"\s", "", seq) # removes white space
records = None
genome_features = None
if genome:
records = extract_records(genome)
if records is None:
......@@ -87,69 +88,11 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12, GUIDE_LEN]
target.update({"target_id": target_id})
target.update({"pred": preds[i]})
if genome:
off_targets_per_seed = []
for seed_size in seed_sizes:
# off-target found for a guide
off_target_df = compute_off_target_df(
target["guide"], seed_size, records, genome_features
)
off_targets_list = []
if off_target_df is not None and not off_target_df.empty:
off_targets = slice_off_targets_results(off_target_df)
for j, off_t in enumerate(off_targets):
off_target_dict = {
"off_target_id": str(target_id)
+ "-"
+ str(seed_size)
+ "-"
+ str(j),
"off_target_start": off_t[0],
"off_target_end": off_t[1],
"off_target_pampos": off_t[2],
"off_target_strand": off_t[3],
"off_target_recid": off_t[4],
"off_target_longest_perfect_match": off_t[5],
"off_target_pam_seq": off_t[6],
"off_target_good_orientation": None,
}
if (
seed_size == GUIDE_LEN
or off_target_dict["off_target_longest_perfect_match"]
!= GUIDE_LEN
):
index_features = 7
# Get feature details associated
# to an off-target position
if len(off_t[index_features]) > 0:
feat = off_t[index_features][0]
off_target_dict[
"off_target_good_orientation"
] = is_good_orientation(
feat, off_target_dict["off_target_strand"]
)
off_target_feature = get_off_target_feature(feat)
off_targets_list.append(
{**off_target_feature, **off_target_dict}
)
else:
off_targets_list.append(off_target_dict)
off_targets_per_seed.append(
{
"id": str(i) + "-" + str(seed_size),
"seed_size": seed_size,
"off_targets": off_targets_list,
}
)
else:
off_targets_per_seed.append(
{
"id": str(i) + "-" + str(seed_size),
"seed_size": seed_size,
"off_targets": [],
}
)
target.update({"off_targets_per_seed": off_targets_per_seed})
# lookup for off-targets in genome
off_targets_per_seed = gen_off_target_per_seed_size(
i, target["guide"], records, genome_features, target_id, seed_sizes
)
target.update({"off_targets_per_seed": list(off_targets_per_seed)})
else:
target.update({"off_targets_per_seed": []})
return alltargets
......@@ -204,4 +147,58 @@ def get_off_target_feature(feat):
return feature_dict
# def gen_off_target_per_seed_size(seed_sizes):
def gen_off_target_per_seed_size(
index, guide, records, genome_features, target_id, seed_sizes
):
for seed_size in seed_sizes:
# off-target found for a guide
off_target_df = compute_off_target_df(
guide, seed_size, records, genome_features
)
if off_target_df is not None and not off_target_df.empty:
off_targets = slice_off_targets_results(off_target_df)
off_targets_list = gen_off_targets_convert(
target_id, seed_size, off_targets
)
yield {
"id": str(index) + "-" + str(seed_size),
"seed_size": seed_size,
"off_targets": off_targets_list,
}
else:
yield {
"id": str(index) + "-" + str(seed_size),
"seed_size": seed_size,
"off_targets": [],
}
def gen_off_targets_convert(target_id, seed_size, off_targets):
for j, off_t in enumerate(off_targets):
off_target_dict = {
"off_target_id": str(target_id) + "-" + str(seed_size) + "-" + str(j),
"off_target_start": off_t[0],
"off_target_end": off_t[1],
"off_target_pampos": off_t[2],
"off_target_strand": off_t[3],
"off_target_recid": off_t[4],
"off_target_longest_perfect_match": off_t[5],
"off_target_pam_seq": off_t[6],
"off_target_good_orientation": None,
}
if (
seed_size == GUIDE_LEN
or off_target_dict["off_target_longest_perfect_match"] != GUIDE_LEN
):
index_features = 7
# Get feature details associated
# to an off-target position
if len(off_t[index_features]) > 0:
feat = off_t[index_features][0]
off_target_dict["off_target_good_orientation"] = is_good_orientation(
feat, off_target_dict["off_target_strand"]
)
off_target_feature = get_off_target_feature(feat)
yield {**off_target_feature, **off_target_dict}
else:
yield off_target_dict
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment