Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CRISPRbact
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
David BIKARD
CRISPRbact
Commits
60ca2962
Commit
60ca2962
authored
5 years ago
by
Remi PLANEL
Browse files
Options
Downloads
Patches
Plain Diff
compute the longuest match perfect match between the guide and the off-target sequence
parent
c9f5cced
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
crisprbact/off_target.py
+52
-49
52 additions, 49 deletions
crisprbact/off_target.py
with
52 additions
and
49 deletions
crisprbact/off_target.py
+
52
−
49
View file @
60ca2962
...
...
@@ -4,6 +4,54 @@ import pandas as pd
from
crisprbact.utils
import
rev_comp
def
compute_off_target_df
(
guide
,
seed_size
,
records
,
feature_df
):
"""
Returns a pandas DataFrame with data about the identified off-targets.
The features column contains a list of biopython SeqFeature objects that overlap
with the off-target
"""
offs_df
=
get_off_target_pos
(
guide
,
records
,
seed_size
)
if
offs_df
is
not
None
:
offs_df
[
"
features
"
]
=
[
get_pos_features
(
off
.
pampos
,
feature_df
)
for
i
,
off
in
offs_df
.
iterrows
()
]
return
offs_df
else
:
return
None
def
get_off_target_pos
(
guide
,
recs
,
seed_size
):
if
recs
is
not
None
:
for
rec
in
recs
:
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs
=
list
(
gen_extract_off_target_strand_plus
(
offs_plus
,
rec
,
guide
,
seed_size
)
)
# - ori
offs_minus
=
re
.
finditer
(
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
str
(
rec
.
seq
)
)
offs
+=
list
(
gen_extract_off_target_strand_minus
(
offs_minus
,
rec
,
guide
,
seed_size
)
)
offs_dict
=
dict
(
zip
(
[
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
,
"
max_matching_len
"
,
"
max_matching_seq
"
,
"
pam_seq
"
,
],
zip
(
*
offs
),
)
)
return
pd
.
DataFrame
(
offs_dict
)
else
:
return
None
def
get_pos_features
(
position
,
f_df
):
if
len
(
f_df
)
>
0
:
feature_at_pos
=
f_df
[(
f_df
.
start
<
position
)
&
(
f_df
.
end
>
position
)]
...
...
@@ -29,7 +77,8 @@ def gen_extract_off_target_strand_plus(off_target_matches, rec, guide, seed_size
"
+
"
,
rec
.
id
,
seed_size
+
len
(
matching_chars
),
matching_substr
[::
-
1
]
+
match
.
group
(
0
),
matching_substr
[::
-
1
]
+
match
.
group
(
0
)[:
-
3
],
match
.
group
(
0
)[
-
3
:],
)
...
...
@@ -61,43 +110,11 @@ def gen_extract_off_target_strand_minus(off_target_matches, rec, guide, seed_siz
"
-
"
,
rec
.
id
,
seed_size
+
len
(
matching_chars
),
match
.
group
(
0
)
+
matching_substr
,
match
.
group
(
0
)[
3
:]
+
matching_substr
,
match
.
group
(
0
)[:
3
],
)
def
get_off_target_pos
(
guide
,
recs
,
seed_size
):
if
recs
is
not
None
:
for
rec
in
recs
:
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs
=
list
(
gen_extract_off_target_strand_plus
(
offs_plus
,
rec
,
guide
,
seed_size
)
)
# - ori
offs_minus
=
re
.
finditer
(
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
str
(
rec
.
seq
)
)
offs
+=
list
(
gen_extract_off_target_strand_minus
(
offs_minus
,
rec
,
guide
,
seed_size
)
)
offs_dict
=
dict
(
zip
(
[
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
,
"
max_matching_len
"
,
"
max_matching_seq
"
,
],
zip
(
*
offs
),
)
)
return
pd
.
DataFrame
(
offs_dict
)
else
:
return
None
def
extract_records
(
genome
):
records
=
list
(
genome
)
if
records
and
len
(
records
)
>
0
:
...
...
@@ -132,17 +149,3 @@ def extract_features(recs):
return
pd
.
DataFrame
(
f_dict
)
else
:
return
None
def
compute_off_target_df
(
guide
,
seed_size
,
records
,
feature_df
):
"""
Returns a pandas DataFrame with data about the identified off-targets.
The features column contains a list of biopython SeqFeature objects that overlap
with the off-target
"""
offs_df
=
get_off_target_pos
(
guide
,
records
,
seed_size
)
if
offs_df
is
not
None
:
offs_df
[
"
features
"
]
=
[
get_pos_features
(
off
.
pampos
,
feature_df
)
for
i
,
off
in
offs_df
.
iterrows
()
]
return
offs_df
else
:
return
None
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment