Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CRISPRbact
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
David BIKARD
CRISPRbact
Commits
5d74c21d
Commit
5d74c21d
authored
5 years ago
by
Remi PLANEL
Browse files
Options
Downloads
Patches
Plain Diff
off-target features per record id
parent
222412cd
No related branches found
No related tags found
No related merge requests found
Pipeline
#26159
passed with stage
in 48 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
crisprbact/off_target.py
+46
-37
46 additions, 37 deletions
crisprbact/off_target.py
with
46 additions
and
37 deletions
crisprbact/off_target.py
+
46
−
37
View file @
5d74c21d
...
...
@@ -2,6 +2,7 @@ import re
import
pandas
as
pd
from
crisprbact.utils
import
rev_comp
from
itertools
import
chain
def
compute_off_target_df
(
guide
,
seed_size
,
records
,
feature_df
):
...
...
@@ -11,71 +12,79 @@ def compute_off_target_df(guide, seed_size, records, feature_df):
offs_df
=
get_off_target_pos
(
guide
,
records
,
seed_size
)
if
offs_df
is
not
None
:
offs_df
[
"
features
"
]
=
[
get_pos_features
(
off
.
pampos
,
feature_df
)
for
i
,
off
in
offs_df
.
iterrows
()
get_pos_features
(
off
.
recid
,
off
.
pampos
,
feature_df
)
for
i
,
off
in
offs_df
.
iterrows
()
]
return
offs_df
else
:
return
None
def
get_off_target_pos
(
guide
,
recs
,
seed_size
):
if
recs
is
not
None
:
for
rec
in
recs
:
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs
=
list
(
gen_extract_off_target_strand_plus
(
offs_plus
,
rec
,
guide
,
seed_size
)
def
get_off_target_pos
(
guide
,
records
,
seed_size
):
if
records
is
not
None
:
offs
=
iter
([])
for
rec
in
records
:
sequence_str
=
str
(
rec
.
seq
)
seqid
=
rec
.
id
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
sequence_str
)
offs_plus
=
gen_extract_off_target_strand_plus
(
offs_plus
,
seqid
,
sequence_str
,
guide
,
seed_size
)
# - ori
offs_minus
=
re
.
finditer
(
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
s
tr
(
rec
.
seq
)
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
s
equence_str
)
offs
+=
list
(
gen_extract_off_target_strand_minus
(
offs_minus
,
rec
,
guide
,
seed_size
)
offs
_minus
=
gen_extract_off_target_strand_minus
(
offs_minus
,
seqid
,
sequence_str
,
guide
,
seed_size
)
offs
_dict
=
dict
(
zip
(
[
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
reci
d
"
,
"
max_matching_len
"
,
"
max_matching_
seq
"
,
"
pam
_seq
"
,
]
,
zip
(
*
offs
)
,
)
offs
=
chain
(
offs
,
offs_plus
,
offs_minus
)
offs_dict
=
dict
(
zip
(
[
"
start
"
,
"
end
"
,
"
pampos
"
,
"
stran
d
"
,
"
recid
"
,
"
max_matching_
len
"
,
"
max_matching
_seq
"
,
"
pam_seq
"
,
]
,
zip
(
*
offs
),
)
return
pd
.
DataFrame
(
offs_dict
)
)
return
pd
.
DataFrame
(
offs_dict
)
else
:
return
None
def
get_pos_features
(
position
,
f_df
):
def
get_pos_features
(
recid
,
position
,
f_df
):
if
len
(
f_df
)
>
0
:
feature_at_pos
=
f_df
[(
f_df
.
start
<
position
)
&
(
f_df
.
end
>
position
)]
feature_at_pos
=
f_df
[
(
f_df
.
start
<
position
)
&
(
f_df
.
end
>
position
)
&
(
f_df
.
recid
==
recid
)
]
return
feature_at_pos
.
feature
.
values
else
:
return
[]
def
gen_extract_off_target_strand_plus
(
off_target_matches
,
rec
,
guide
,
seed_size
):
def
gen_extract_off_target_strand_plus
(
off_target_matches
,
seqid
,
sequence_string
,
guide
,
seed_size
):
for
match
in
off_target_matches
:
guide_subseq
=
guide
[:
20
-
seed_size
][::
-
1
]
# extract part of the sequence
seq_extension_len
=
len
(
guide
)
-
seed_size
start_pos_seq
=
match
.
start
()
-
seq_extension_len
end_pos_seq
=
match
.
end
()
-
seed_size
-
3
sub_sequence_to_match
=
rec
.
seq
[
start_pos_seq
:
end_pos_seq
][::
-
1
]
assert
len
(
sub_sequence_to_match
)
==
len
(
guide_subseq
)
sub_sequence_to_match
=
sequence_string
[
start_pos_seq
:
end_pos_seq
][::
-
1
]
matching_chars
=
list
(
common_start
(
sub_sequence_to_match
,
guide_subseq
))
matching_substr
=
""
.
join
(
matching_chars
)
yield
match
.
span
()
+
(
match
.
end
(),
"
+
"
,
rec
.
id
,
seq
id
,
seed_size
+
len
(
matching_chars
),
matching_substr
[::
-
1
]
+
match
.
group
(
0
)[:
-
3
],
match
.
group
(
0
)[
-
3
:],
...
...
@@ -90,25 +99,25 @@ def common_start(seq1, seq2):
return
def
gen_extract_off_target_strand_minus
(
off_target_matches
,
rec
,
guide
,
seed_size
):
def
gen_extract_off_target_strand_minus
(
off_target_matches
,
seqid
,
sequence_string
,
guide
,
seed_size
):
for
match
in
off_target_matches
:
# Extract the sequence.
# since rev_comp, extend the sequence to the end
seq_extension_len
=
len
(
guide
)
-
seed_size
end_pos_seq
=
match
.
end
()
+
seq_extension_len
start_pos_seq
=
match
.
start
()
+
seed_size
+
3
sub_sequence_to_match
=
rec
.
seq
[
start_pos_seq
:
end_pos_seq
]
sub_sequence_to_match
=
sequence_string
[
start_pos_seq
:
end_pos_seq
]
# Extract part of the guide to match
guide_subseq
=
rev_comp
(
guide
[:
20
-
seed_size
])
assert
len
(
sub_sequence_to_match
)
==
len
(
guide_subseq
)
matching_chars
=
list
(
common_start
(
sub_sequence_to_match
,
guide_subseq
))
matching_substr
=
""
.
join
(
matching_chars
)
yield
match
.
span
()
+
(
match
.
start
(),
"
-
"
,
rec
.
id
,
seq
id
,
seed_size
+
len
(
matching_chars
),
match
.
group
(
0
)[
3
:]
+
matching_substr
,
match
.
group
(
0
)[:
3
],
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment