From f03c3f3026792d5f5146e56b99a732196a2b6ee1 Mon Sep 17 00:00:00 2001 From: Remi Planel <rplanel@pasteur.fr> Date: Fri, 3 Apr 2020 12:08:18 +0200 Subject: [PATCH] Fix bug on guide sequence #10 --- crisprbact/predict.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/crisprbact/predict.py b/crisprbact/predict.py index 49ed322..d4dc6a5 100644 --- a/crisprbact/predict.py +++ b/crisprbact/predict.py @@ -29,19 +29,32 @@ def find_targets(seq): repam = "[ATGC]GG" L = len(seq) seq_revcomp = rev_comp(seq) - matching_target = re.finditer("(?=([ATGC]{6}" + repam + "[ATGC]{16}))", seq_revcomp) - for target in matching_target: + matching_targets = re.finditer( + "(?=([ATGC]{6}" + repam + "[ATGC]{16}))", seq_revcomp + ) + for target in matching_targets: matching_target = target.group(1) - yield dict( - [ - ("target", matching_target), - ("guide", matching_target[:20]), - ("start", L - target.start() - 20), - ("stop", L - target.start()), - ("pam", L - target.start() - 22), - ("ori", "-"), - ] - ) + start, end = target.span(1) + start_min = 13 + if start >= start_min: + guide_start = start - start_min + guide_end = start + 7 + guide = seq_revcomp[guide_start:guide_end] + assert len(guide) == 20 + pos_seq_start = L - guide_start - 20 + pos_seq_stop = L - guide_start + pos_seq_pam = pos_seq_start - 3 + yield dict( + [ + ("target", matching_target), + # ("guide", matching_target[:20]), + ("guide", guide), + ("start", pos_seq_start), + ("stop", pos_seq_stop), + ("pam", pos_seq_pam), + ("ori", "-"), + ] + ) def get_strand_value(value): @@ -63,7 +76,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]): ) else: genome_features = extract_features(records) - + print(seq) alltargets = list(find_targets(seq)) if alltargets: X = np.array( -- GitLab