Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CRISPRbact
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
David BIKARD
CRISPRbact
Commits
6c6d982c
Commit
6c6d982c
authored
5 years ago
by
Remi PLANEL
Browse files
Options
Downloads
Patches
Plain Diff
Raise exception on wrong off-target format. Fix #7
parent
a4c0663c
No related branches found
No related tags found
1 merge request
!5
Raise exception on wrong off-target format. Fix #7
Pipeline
#25164
passed with stage
Stage:
in 45 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
crisprbact/__init__.py
+2
-0
2 additions, 0 deletions
crisprbact/__init__.py
crisprbact/off_target.py
+50
-31
50 additions, 31 deletions
crisprbact/off_target.py
crisprbact/predict.py
+8
-3
8 additions, 3 deletions
crisprbact/predict.py
crisprbact/utils.py
+6
-0
6 additions, 0 deletions
crisprbact/utils.py
with
66 additions
and
34 deletions
crisprbact/__init__.py
+
2
−
0
View file @
6c6d982c
...
...
@@ -4,10 +4,12 @@ from crisprbact.off_target import (
extract_features
,
extract_records
,
)
from
crisprbact.utils
import
NoRecordsException
__all__
=
[
"
extract_records
"
,
"
on_target_predict
"
,
"
compute_off_target_df
"
,
"
extract_features
"
,
"
NoRecordsException
"
,
]
This diff is collapsed.
Click to expand it.
crisprbact/off_target.py
+
50
−
31
View file @
6c6d982c
...
...
@@ -12,16 +12,25 @@ def get_pos_features(position, f_df):
return
[]
def
get_off_target_pos
(
guide
,
recs
,
records
):
for
rec
in
recs
:
# + ori
offs_plus
=
re
.
finditer
(
guide
[
-
records
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs
=
[
match
.
span
()
+
(
match
.
end
(),
"
+
"
,
rec
.
id
)
for
match
in
offs_plus
]
# - ori
offs_minus
=
re
.
finditer
(
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
records
:]),
str
(
rec
.
seq
))
offs
+=
[
match
.
span
()
+
(
match
.
start
(),
"
-
"
,
rec
.
id
)
for
match
in
offs_minus
]
offs_dict
=
dict
(
zip
([
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
],
zip
(
*
offs
)))
return
pd
.
DataFrame
(
offs_dict
)
def
get_off_target_pos
(
guide
,
recs
,
seed_size
):
if
recs
is
not
None
:
for
rec
in
recs
:
# + ori
offs_plus
=
re
.
finditer
(
guide
[
-
seed_size
:]
+
"
[ATGC]GG
"
,
str
(
rec
.
seq
))
offs
=
[
match
.
span
()
+
(
match
.
end
(),
"
+
"
,
rec
.
id
)
for
match
in
offs_plus
]
# - ori
offs_minus
=
re
.
finditer
(
"
CC[ATGC]
"
+
rev_comp
(
guide
[
-
seed_size
:]),
str
(
rec
.
seq
)
)
offs
+=
[
match
.
span
()
+
(
match
.
start
(),
"
-
"
,
rec
.
id
)
for
match
in
offs_minus
]
offs_dict
=
dict
(
zip
([
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
],
zip
(
*
offs
))
)
return
pd
.
DataFrame
(
offs_dict
)
else
:
return
None
def
extract_records
(
genome
):
...
...
@@ -33,24 +42,31 @@ def extract_records(genome):
def
extract_features
(
recs
):
f_list
=
[]
for
rec
in
recs
:
for
f
in
rec
.
features
:
if
f
.
type
in
[
"
CDS
"
,
"
ncRNA
"
,
"
rRNA
"
,
"
tRNA
"
]:
f_list
.
append
(
(
f
.
location
.
start
.
position
,
f
.
location
.
end
.
position
,
f
.
location
.
strand
,
f
.
type
,
f
,
rec
.
id
,
if
recs
is
not
None
:
f_list
=
[]
for
rec
in
recs
:
for
f
in
rec
.
features
:
if
f
.
type
in
[
"
CDS
"
,
"
ncRNA
"
,
"
rRNA
"
,
"
tRNA
"
]:
f_list
.
append
(
(
f
.
location
.
start
.
position
,
f
.
location
.
end
.
position
,
f
.
location
.
strand
,
f
.
type
,
f
,
rec
.
id
,
)
)
)
f_dict
=
dict
(
zip
([
"
start
"
,
"
end
"
,
"
strand
"
,
"
type
"
,
"
feature
"
,
"
recid
"
],
zip
(
*
f_list
[
1
:]),)
)
# starts at 1 to get rid of the first feature which is the whole chromosome
return
pd
.
DataFrame
(
f_dict
)
f_dict
=
dict
(
zip
(
[
"
start
"
,
"
end
"
,
"
strand
"
,
"
type
"
,
"
feature
"
,
"
recid
"
],
zip
(
*
f_list
[
1
:]),
)
)
# starts at 1 to get rid of the first feature which is the whole chromosome
return
pd
.
DataFrame
(
f_dict
)
else
:
return
None
def
compute_off_target_df
(
guide
,
seed_size
,
records
,
feature_df
):
...
...
@@ -58,7 +74,10 @@ def compute_off_target_df(guide, seed_size, records, feature_df):
The features column contains a list of biopython SeqFeature objects that overlap
with the off-target
"""
offs_df
=
get_off_target_pos
(
guide
,
records
,
seed_size
)
offs_df
[
"
features
"
]
=
[
get_pos_features
(
off
.
pampos
,
feature_df
)
for
i
,
off
in
offs_df
.
iterrows
()
]
return
offs_df
if
offs_df
is
not
None
:
offs_df
[
"
features
"
]
=
[
get_pos_features
(
off
.
pampos
,
feature_df
)
for
i
,
off
in
offs_df
.
iterrows
()
]
return
offs_df
else
:
return
None
This diff is collapsed.
Click to expand it.
crisprbact/predict.py
+
8
−
3
View file @
6c6d982c
import
numpy
as
np
import
re
from
importlib.resources
import
open_binary
from
crisprbact.utils
import
rev_comp
from
crisprbact.utils
import
rev_comp
,
NoRecordsException
from
crisprbact.off_target
import
(
compute_off_target_df
,
extract_records
,
...
...
@@ -56,7 +56,12 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
genome_features
=
None
if
genome
:
records
=
extract_records
(
genome
)
genome_features
=
extract_features
(
records
)
if
records
is
None
:
raise
NoRecordsException
(
"
No records found in sequence file. Check the sequence or the format
"
)
else
:
genome_features
=
extract_features
(
records
)
alltargets
=
list
(
find_targets
(
seq
))
if
alltargets
:
...
...
@@ -81,7 +86,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12]):
target
[
"
guide
"
],
seed_size
,
records
,
genome_features
)
off_targets_list
=
[]
if
not
off_target_df
.
empty
:
if
off_target_df
is
not
None
and
not
off_target_df
.
empty
:
off_targets
=
off_target_df
.
loc
[
0
:,
[
"
start
"
,
"
end
"
,
"
pampos
"
,
"
strand
"
,
"
recid
"
,
"
features
"
],
...
...
This diff is collapsed.
Click to expand it.
crisprbact/utils.py
+
6
−
0
View file @
6c6d982c
def
rev_comp
(
seq
):
comp
=
str
.
maketrans
(
"
ATGC
"
,
"
TACG
"
)
return
seq
.
translate
(
comp
)[::
-
1
]
class
NoRecordsException
(
Exception
):
"""
No Record found in the sequence file
"""
pass
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment