Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CRISPRbact
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
David BIKARD
CRISPRbact
Commits
8e22bb2d
Commit
8e22bb2d
authored
5 years ago
by
Remi PLANEL
Browse files
Options
Downloads
Patches
Plain Diff
Compute the biggest match between the guide and the off-target
parent
2592f048
No related branches found
No related tags found
No related merge requests found
Pipeline
#25960
passed with stage
in 57 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
crisprbact/off_target.py
+71
-9
71 additions, 9 deletions
crisprbact/off_target.py
with
71 additions
and
9 deletions
crisprbact/off_target.py
+
71
−
9
View file @
8e22bb2d
...
@@ -12,24 +12,86 @@ def get_pos_features(position, f_df):
...
@@ -12,24 +12,86 @@ def get_pos_features(position, f_df):
return
[]
return
[]
def
gen_extract_off_target_strand_plus
(
off_target_matches
,
rec
,
guide
,
seed_size
):
for
match
in
off_target_matches
:
guide_subseq
=
guide
[:
20
-
seed_size
][::
-
1
]
# extract part of the sequence
seq_extension_len
=
len
(
guide
)
-
seed_size
start_pos_seq
=
match
.
start
()
-
seq_extension_len
end_pos_seq
=
match
.
end
()
-
seed_size
-
3
sub_sequence_to_match
=
rec
.
seq
[
start_pos_seq
:
end_pos_seq
][::
-
1
]
assert
len
(
sub_sequence_to_match
)
==
len
(
guide_subseq
)
matching_chars
=
list
(
common_start
(
sub_sequence_to_match
,
guide_subseq
))
matching_substr
=
""
.
join
(
matching_chars
)
yield
match
.
span
()
+
(
match
.
end
(),
"
+
"
,
rec
.
id
,
seed_size
+
len
(
matching_chars
),
matching_substr
[::
-
1
]
+
match
.
group
(
0
),
)
def
common_start
(
seq1
,
seq2
):
for
a
,
b
in
zip
(
seq1
,
seq2
):
if
a
==
b
:
yield
a
else
:
return
def
gen_extract_off_target_strand_minus
(
off_target_matches
,
rec
,
guide
,
seed_size
):
for
match
in
off_target_matches
:
# Extract the sequence.
# since rev_comp, extend the sequence to the end
seq_extension_len
=
len
(
guide
)
-
seed_size
end_pos_seq
=
match
.
end
()
+
seq_extension_len
start_pos_seq
=
match
.
start
()
+
seed_size
+
3
sub_sequence_to_match
=
rec
.
seq
[
start_pos_seq
:
end_pos_seq
]
# Extract part of the guide to match
guide_subseq
=
rev_comp
(
guide
[:
20
-
seed_size
])
assert
len
(
sub_sequence_to_match
)
==
len
(
guide_subseq
)
matching_chars
=
list
(
common_start
(
sub_sequence_to_match
,
guide_subseq
))
matching_substr
=
""
.
join
(
matching_chars
)
yield
match
.
span
()
+
(
match
.
start
(),
"
-
"
,
rec
.
id
,
seed_size
+
len
(
matching_chars
),
match
.
group
(
0
)
+
matching_substr
,
)
def
get_off_target_pos
(
guide
,
recs
,
seed_size
):
def
get_off_target_pos
(
guide
,
recs
,
seed_size
):
if
recs
is
not
None
:
if
recs
is
not
None
:
for
rec
in
recs
:
for
rec
in
recs
:
# + ori
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs
=
[
match
.
span
()
+
(
match
.
end
(),
"
+
"
,
rec
.
id
)
for
match
in
offs_plus
]
offs
=
list
(
# TODO: comparer guide avec rec.seq[match.start():match.end()]
gen_extract_off_target_strand_plus
(
offs_plus
,
rec
,
guide
,
seed_size
)
)
# - ori
# - ori
offs_minus
=
re
.
finditer
(
offs_minus
=
re
.
finditer
(
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
str
(
rec
.
seq
)
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
str
(
rec
.
seq
)
)
)
offs
+=
[
offs
+=
list
(
match
.
span
()
+
(
match
.
start
(),
"
-
"
,
rec
.
id
)
for
match
in
offs_minus
gen_extract_off_target_strand_minus
(
offs_minus
,
rec
,
guide
,
seed_size
)
]
)
# comparer guide avec rec.seq[match.start():match.end()].reverse_complement()
# comparer les positions identique à partir de la fin et d'affilé.
offs_dict
=
dict
(
offs_dict
=
dict
(
zip
([
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
],
zip
(
*
offs
))
zip
(
[
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
,
"
max_matching_len
"
,
"
max_matching_seq
"
,
],
zip
(
*
offs
),
)
)
)
return
pd
.
DataFrame
(
offs_dict
)
return
pd
.
DataFrame
(
offs_dict
)
else
:
else
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment