From f03c3f3026792d5f5146e56b99a732196a2b6ee1 Mon Sep 17 00:00:00 2001
From: Remi Planel <rplanel@pasteur.fr>
Date: Fri, 3 Apr 2020 12:08:18 +0200
Subject: [PATCH] Fix bug on guide sequence #10

---
 crisprbact/predict.py | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/crisprbact/predict.py b/crisprbact/predict.py
index 49ed322..d4dc6a5 100644
--- a/crisprbact/predict.py
+++ b/crisprbact/predict.py
@@ -29,19 +29,32 @@ def find_targets(seq):
     repam = "[ATGC]GG"
     L = len(seq)
     seq_revcomp = rev_comp(seq)
-    matching_target = re.finditer("(?=([ATGC]{6}" + repam + "[ATGC]{16}))", seq_revcomp)
-    for target in matching_target:
+    matching_targets = re.finditer(
+        "(?=([ATGC]{6}" + repam + "[ATGC]{16}))", seq_revcomp
+    )
+    for target in matching_targets:
         matching_target = target.group(1)
-        yield dict(
-            [
-                ("target", matching_target),
-                ("guide", matching_target[:20]),
-                ("start", L - target.start() - 20),
-                ("stop", L - target.start()),
-                ("pam", L - target.start() - 22),
-                ("ori", "-"),
-            ]
-        )
+        start, end = target.span(1)
+        start_min = 13
+        if start >= start_min:
+            guide_start = start - start_min
+            guide_end = start + 7
+            guide = seq_revcomp[guide_start:guide_end]
+            assert len(guide) == 20
+            pos_seq_start = L - guide_start - 20
+            pos_seq_stop = L - guide_start
+            pos_seq_pam = pos_seq_start - 3
+            yield dict(
+                [
+                    ("target", matching_target),
+                    # ("guide", matching_target[:20]),
+                    ("guide", guide),
+                    ("start", pos_seq_start),
+                    ("stop", pos_seq_stop),
+                    ("pam", pos_seq_pam),
+                    ("ori", "-"),
+                ]
+            )
 
 
 def get_strand_value(value):
@@ -63,7 +76,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
             )
         else:
             genome_features = extract_features(records)
-
+    print(seq)
     alltargets = list(find_targets(seq))
     if alltargets:
         X = np.array(
-- 
GitLab