Merge branch 'multi-seed-size' into 'master'

Raise exception on wrong off-target format. Fix #7 Closes #7 See merge request !5

Merge branch 'multi-seed-size' into 'master'
Raise exception on wrong off-target format. Fix #7 Closes #7 See merge request !5
a7fa4bae · Remi PLANEL · 924f5953 · 6c6d982c · a7fa4bae · a7fa4bae
Commit a7fa4bae authored 5 years ago by Remi PLANEL
--- a/crisprbact/__init__.py
+++ b/crisprbact/__init__.py
@@ -4,10 +4,12 @@ from crisprbact.off_target import (
    extract_features,
    extract_records,
 )
+from crisprbact.utils import NoRecordsException
 __all__ = [
    "extract_records",
    "on_target_predict",
    "compute_off_target_df",
    "extract_features",
+    "NoRecordsException",
 ]
--- a/crisprbact/off_target.py
+++ b/crisprbact/off_target.py
@@ -12,16 +12,25 @@ def get_pos_features(position, f_df):
        return []
-def get_off_target_pos(guide, recs, records):
+def get_off_target_pos(guide, recs, seed_size):
-    for rec in recs:
+    if recs is not None:
-        # + ori
+        for rec in recs:
-        offs_plus = re.finditer(guide[-records:] + "[ATGC]GG", str(rec.seq))
+            # + ori
-        offs = [match.span() + (match.end(), "+", rec.id) for match in offs_plus]
+            offs_plus = re.finditer(guide[-seed_size:] + "[ATGC]GG", str(rec.seq))
-        # - ori
+            offs = [match.span() + (match.end(), "+", rec.id) for match in offs_plus]
-        offs_minus = re.finditer("CC[ATGC]" + rev_comp(guide[-records:]), str(rec.seq))
+            # - ori
-        offs += [match.span() + (match.start(), "-", rec.id) for match in offs_minus]
+            offs_minus = re.finditer(
-        offs_dict = dict(zip(["start", "end", "pampos", "strand", "recid"], zip(*offs)))
+                "CC[ATGC]" + rev_comp(guide[-seed_size:]), str(rec.seq)
-        return pd.DataFrame(offs_dict)
+            )
+            offs += [
+                match.span() + (match.start(), "-", rec.id) for match in offs_minus
+            ]
+            offs_dict = dict(
+                zip(["start", "end", "pampos", "strand", "recid"], zip(*offs))
+            )
+            return pd.DataFrame(offs_dict)
+    else:
+        return None
 def extract_records(genome):
@@ -33,24 +42,31 @@ def extract_records(genome):
 def extract_features(recs):
-    f_list = []
-    for rec in recs:
+    if recs is not None:
-        for f in rec.features:
+        f_list = []
-            if f.type in ["CDS", "ncRNA", "rRNA", "tRNA"]:
+        for rec in recs:
-                f_list.append(
+            for f in rec.features:
-                    (
+                if f.type in ["CDS", "ncRNA", "rRNA", "tRNA"]:
-                        f.location.start.position,
+                    f_list.append(
-                        f.location.end.position,
+                        (
-                        f.location.strand,
+                            f.location.start.position,
-                        f.type,
+                            f.location.end.position,
-                        f,
+                            f.location.strand,
-                        rec.id,
+                            f.type,
+                            f,
+                            rec.id,
+                        )
                    )
-                )
+        f_dict = dict(
-    f_dict = dict(
+            zip(
-        zip(["start", "end", "strand", "type", "feature", "recid"], zip(*f_list[1:]),)
+                ["start", "end", "strand", "type", "feature", "recid"],
-    )  # starts at 1 to get rid of the first feature which is the whole chromosome
+                zip(*f_list[1:]),
-    return pd.DataFrame(f_dict)
+            )
+        )  # starts at 1 to get rid of the first feature which is the whole chromosome
+        return pd.DataFrame(f_dict)
+    else:
+        return None
 def compute_off_target_df(guide, seed_size, records, feature_df):
@@ -58,7 +74,10 @@ def compute_off_target_df(guide, seed_size, records, feature_df):
    The features column contains a list of biopython SeqFeature objects that overlap
    with the off-target"""
    offs_df = get_off_target_pos(guide, records, seed_size)
-    offs_df["features"] = [
+    if offs_df is not None:
-        get_pos_features(off.pampos, feature_df) for i, off in offs_df.iterrows()
+        offs_df["features"] = [
-    ]
+            get_pos_features(off.pampos, feature_df) for i, off in offs_df.iterrows()
-    return offs_df
+        ]
+        return offs_df
+    else:
+        return None
--- a/crisprbact/predict.py
+++ b/crisprbact/predict.py
 import numpy as np
 import re
 from importlib.resources import open_binary
-from crisprbact.utils import rev_comp
+from crisprbact.utils import rev_comp, NoRecordsException
 from crisprbact.off_target import (
    compute_off_target_df,
    extract_records,
@@ -56,7 +56,12 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
    genome_features = None
    if genome:
        records = extract_records(genome)
-        genome_features = extract_features(records)
+        if records is None:
+            raise NoRecordsException(
+                "No records found in sequence file. Check the sequence or the format"
+            )
+        else:
+            genome_features = extract_features(records)
    alltargets = list(find_targets(seq))
    if alltargets:
@@ -81,7 +86,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
                        target["guide"], seed_size, records, genome_features
                    )
                    off_targets_list = []
-                    if not off_target_df.empty:
+                    if off_target_df is not None and not off_target_df.empty:
                        off_targets = off_target_df.loc[
                            0:,
                            ["start", "end", "pampos", "strand", "recid", "features"],

--- a/crisprbact/utils.py
+++ b/crisprbact/utils.py
 def rev_comp(seq):
    comp = str.maketrans("ATGC", "TACG")
    return seq.translate(comp)[::-1]
+class NoRecordsException(Exception):
+    """No Record found in the sequence file"""
+    pass