Skip to content
Snippets Groups Projects
Commit 6c6d982c authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

Raise exception on wrong off-target format. Fix #7

parent a4c0663c
No related branches found
No related tags found
1 merge request!5Raise exception on wrong off-target format. Fix #7
Pipeline #25164 passed with stage
in 45 seconds
......@@ -4,10 +4,12 @@ from crisprbact.off_target import (
extract_features,
extract_records,
)
from crisprbact.utils import NoRecordsException
__all__ = [
"extract_records",
"on_target_predict",
"compute_off_target_df",
"extract_features",
"NoRecordsException",
]
......@@ -12,16 +12,25 @@ def get_pos_features(position, f_df):
return []
def get_off_target_pos(guide, recs, records):
for rec in recs:
# + ori
offs_plus = re.finditer(guide[-records:] + "[ATGC]GG", str(rec.seq))
offs = [match.span() + (match.end(), "+", rec.id) for match in offs_plus]
# - ori
offs_minus = re.finditer("CC[ATGC]" + rev_comp(guide[-records:]), str(rec.seq))
offs += [match.span() + (match.start(), "-", rec.id) for match in offs_minus]
offs_dict = dict(zip(["start", "end", "pampos", "strand", "recid"], zip(*offs)))
return pd.DataFrame(offs_dict)
def get_off_target_pos(guide, recs, seed_size):
if recs is not None:
for rec in recs:
# + ori
offs_plus = re.finditer(guide[-seed_size:] + "[ATGC]GG", str(rec.seq))
offs = [match.span() + (match.end(), "+", rec.id) for match in offs_plus]
# - ori
offs_minus = re.finditer(
"CC[ATGC]" + rev_comp(guide[-seed_size:]), str(rec.seq)
)
offs += [
match.span() + (match.start(), "-", rec.id) for match in offs_minus
]
offs_dict = dict(
zip(["start", "end", "pampos", "strand", "recid"], zip(*offs))
)
return pd.DataFrame(offs_dict)
else:
return None
def extract_records(genome):
......@@ -33,24 +42,31 @@ def extract_records(genome):
def extract_features(recs):
f_list = []
for rec in recs:
for f in rec.features:
if f.type in ["CDS", "ncRNA", "rRNA", "tRNA"]:
f_list.append(
(
f.location.start.position,
f.location.end.position,
f.location.strand,
f.type,
f,
rec.id,
if recs is not None:
f_list = []
for rec in recs:
for f in rec.features:
if f.type in ["CDS", "ncRNA", "rRNA", "tRNA"]:
f_list.append(
(
f.location.start.position,
f.location.end.position,
f.location.strand,
f.type,
f,
rec.id,
)
)
)
f_dict = dict(
zip(["start", "end", "strand", "type", "feature", "recid"], zip(*f_list[1:]),)
) # starts at 1 to get rid of the first feature which is the whole chromosome
return pd.DataFrame(f_dict)
f_dict = dict(
zip(
["start", "end", "strand", "type", "feature", "recid"],
zip(*f_list[1:]),
)
) # starts at 1 to get rid of the first feature which is the whole chromosome
return pd.DataFrame(f_dict)
else:
return None
def compute_off_target_df(guide, seed_size, records, feature_df):
......@@ -58,7 +74,10 @@ def compute_off_target_df(guide, seed_size, records, feature_df):
The features column contains a list of biopython SeqFeature objects that overlap
with the off-target"""
offs_df = get_off_target_pos(guide, records, seed_size)
offs_df["features"] = [
get_pos_features(off.pampos, feature_df) for i, off in offs_df.iterrows()
]
return offs_df
if offs_df is not None:
offs_df["features"] = [
get_pos_features(off.pampos, feature_df) for i, off in offs_df.iterrows()
]
return offs_df
else:
return None
import numpy as np
import re
from importlib.resources import open_binary
from crisprbact.utils import rev_comp
from crisprbact.utils import rev_comp, NoRecordsException
from crisprbact.off_target import (
compute_off_target_df,
extract_records,
......@@ -56,7 +56,12 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
genome_features = None
if genome:
records = extract_records(genome)
genome_features = extract_features(records)
if records is None:
raise NoRecordsException(
"No records found in sequence file. Check the sequence or the format"
)
else:
genome_features = extract_features(records)
alltargets = list(find_targets(seq))
if alltargets:
......@@ -81,7 +86,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
target["guide"], seed_size, records, genome_features
)
off_targets_list = []
if not off_target_df.empty:
if off_target_df is not None and not off_target_df.empty:
off_targets = off_target_df.loc[
0:,
["start", "end", "pampos", "strand", "recid", "features"],
......
def rev_comp(seq):
comp = str.maketrans("ATGC", "TACG")
return seq.translate(comp)[::-1]
class NoRecordsException(Exception):
"""No Record found in the sequence file"""
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment