Skip to content
Snippets Groups Projects
Commit f29fb5a9 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

Filter off-target feature (keep reverse off-target feature)

parent 497b1eb0
No related branches found
No related tags found
1 merge request!4Multi seed size
......@@ -43,7 +43,12 @@ def find_targets(seq):
)
def on_target_predict(seq, genome=None, seed_size=7):
def get_strand_value(value):
strand_dict = {"+": 1, "1": 1, "-": -1, "-1": -1}
return strand_dict[str(value)]
def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
seq = seq.upper() # make uppercase
seq = re.sub(r"\s", "", seq) # removes white space
......@@ -65,47 +70,80 @@ def on_target_predict(seq, genome=None, seed_size=7):
preds = predict(X)
for i, target in enumerate(alltargets):
target_id = i + 1
target.update({"id": target_id})
target.update({"pred": preds[i]})
target.update({"seed_size": seed_size})
if genome:
off_target_df = compute_off_target_df(
target["guide"], seed_size, records, genome_features
)
off_targets_list = []
if not off_target_df.empty:
off_targets = off_target_df.loc[
0:, ["start", "end", "pampos", "strand", "recid", "features"]
]
for index, off_t in enumerate(off_targets.values.tolist()):
off_target_dict = {
"off_target_start": off_t[0],
"off_target_end": off_t[1],
"pampos": off_t[2],
"strand": off_t[3],
"recid": off_t[4],
}
if len(off_t[5]) > 0:
# Loop for each feature
for feat in off_t[5]:
feature_dict = {
"feat_strand": feat.location.strand,
"feat_start": feat.location.start,
"feat_end": feat.location.end,
"feat_type": feat.type,
}
for k, feat in feat.qualifiers.items():
if k != "translation":
feature_dict[k] = "::".join(feat)
off_targets_list.append(
{**feature_dict, **off_target_dict}
off_targets_per_seed = []
for seed_size in seed_sizes:
off_target_df = compute_off_target_df(
target["guide"], seed_size, records, genome_features
)
off_targets_list = []
if not off_target_df.empty:
off_targets = off_target_df.loc[
0:,
["start", "end", "pampos", "strand", "recid", "features"],
]
for j, off_t in enumerate(off_targets.values.tolist()):
off_target_dict = {
"id": str(target_id)
+ "-"
+ str(seed_size)
+ "-"
+ str(j),
"off_target_start": off_t[0],
"off_target_end": off_t[1],
"off_target_pampos": off_t[2],
"off_target_strand": off_t[3],
"off_target_recid": off_t[4],
}
off_t[5] = list(
filter(
lambda feat: get_strand_value(
off_target_dict["off_target_strand"]
)
!= get_strand_value(feat.location.strand),
off_t[5],
)
else:
off_targets_list.append(off_target_dict)
target.update({"off_targets": off_targets_list})
else:
target.update({"off_targets": off_targets_list})
)
if len(off_t[5]) > 0:
# Loop for each feature
for feat in off_t[5]:
feature_dict = {
"off_target_feat_strand": feat.location.strand,
"off_target_feat_start": feat.location.start,
"off_target_feat_end": feat.location.end,
"off_target_feat_type": feat.type,
}
for k, feat in feat.qualifiers.items():
if k != "translation":
feature_dict[k] = "::".join(feat)
off_targets_list.append(
{**feature_dict, **off_target_dict}
)
else:
off_targets_list.append(off_target_dict)
off_targets_per_seed.append(
{
"id": str(i) + "-" + str(seed_size),
"seed_size": seed_size,
"off_targets": off_targets_list,
}
)
else:
off_targets_per_seed.append(
{
"id": str(i) + "-" + str(seed_size),
"seed_size": seed_size,
"off_targets": off_targets_list,
}
)
# target.update({"off_targets": off_targets_list})
target.update({"off_targets_per_seed": off_targets_per_seed})
else:
target.update({"off_targets": []})
target.update({"off_targets_per_seed": []})
return alltargets
else:
return []
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment