Skip to content
Snippets Groups Projects
Commit 69e2daff authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

Merge branch 'off-target' into 'master'

Off target

See merge request !3
parents 888da704 3a753216
No related branches found
No related tags found
1 merge request!3Off target
Pipeline #23198 passed
......@@ -4,3 +4,4 @@
dist
*.egg-info
__pycache__
data-test/
from .predict import on_target_predict
from crisprbact.predict import on_target_predict
from crisprbact.off_target import (
compute_off_target_df,
extract_features,
extract_records,
)
__all__ = ["on_target_predict"]
__all__ = [
"extract_records",
"on_target_predict",
"compute_off_target_df",
"extract_features",
]
from crisprbact import on_target_predict
from crisprbact.predict import on_target_predict
from Bio import SeqIO
import click
......@@ -48,7 +48,7 @@ def from_str(config, target, output_file):
@predict.command()
@click.option(
"-t", "--target", type=click.File("rU"), required=True, help="Sequence file"
"-t", "--target", type=click.File("rU"), required=True, help="Sequence file",
)
@click.option(
"-f",
......@@ -58,9 +58,10 @@ def from_str(config, target, output_file):
default="fasta",
show_default=True,
)
# @click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome")
@click.argument("output-file", type=click.File("w"), default="-")
@pass_config
def from_seq(config, target, seq_format, output_file):
def from_seq(config, target, seq_format, output_file): # genome,
"""
Outputs candidate guide RNAs for the S. pyogenes dCas9 with predicted on-target
activity from a target gene.
......@@ -76,6 +77,7 @@ def from_seq(config, target, seq_format, output_file):
if config.verbose:
click.secho(" - search guide RNAs for %s " % record.id, fg=fg)
guide_rnas = on_target_predict(str(record.seq))
write_guide_rnas(guide_rnas, output_file, record.id)
......@@ -85,9 +87,7 @@ def print_parameters(target, fg="blue"):
def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"):
for guide_rna in guide_rnas:
# click.echo(guide_rna)
click.echo(
"\t".join(
[
......
import re
import pandas as pd
from crisprbact.utils import rev_comp
def get_pos_features(position, f_df):
if len(f_df) > 0:
feature_at_pos = f_df[(f_df.start < position) & (f_df.end > position)]
return feature_at_pos.feature.values
else:
return []
def get_off_target_pos(guide, recs, records):
for rec in recs:
# + ori
offs_plus = re.finditer(guide[-records:] + "[ATGC]GG", str(rec.seq))
offs = [match.span() + (match.end(), "+", rec.id) for match in offs_plus]
# - ori
offs_minus = re.finditer("CC[ATGC]" + rev_comp(guide[-records:]), str(rec.seq))
offs += [match.span() + (match.start(), "-", rec.id) for match in offs_minus]
offs_dict = dict(zip(["start", "end", "pampos", "strand", "recid"], zip(*offs)))
return pd.DataFrame(offs_dict)
def extract_records(genome):
records = list(genome)
if records and len(records) > 0:
return records
else:
return None
def extract_features(recs):
f_list = []
for rec in recs:
for f in rec.features:
if f.type in ["CDS", "ncRNA", "rRNA", "tRNA"]:
f_list.append(
(
f.location.start.position,
f.location.end.position,
f.location.strand,
f.type,
f,
rec.id,
)
)
f_dict = dict(
zip(["start", "end", "strand", "type", "feature", "recid"], zip(*f_list[1:]),)
) # starts at 1 to get rid of the first feature which is the whole chromosome
return pd.DataFrame(f_dict)
def compute_off_target_df(guide, seed_size, records, feature_df):
""" Returns a pandas DataFrame with data about the identified off-targets.
The features column contains a list of biopython SeqFeature objects that overlap
with the off-target"""
offs_df = get_off_target_pos(guide, records, seed_size)
offs_df["features"] = [
get_pos_features(off.pampos, feature_df) for i, off in offs_df.iterrows()
]
return offs_df
import numpy as np
import re
from importlib.resources import open_binary
from crisprbact.utils import rev_comp
from crisprbact.off_target import (
compute_off_target_df,
extract_records,
extract_features,
)
with open_binary("crisprbact", "reg_coef.pkl") as handle:
coef = np.load(handle, allow_pickle=True)
......@@ -19,45 +25,62 @@ def predict(X):
return [np.sum(x * coef) for x in X]
def rev_comp(seq):
comp = str.maketrans("ATGC", "TACG")
return seq.translate(comp)[::-1]
def find_targets(seq):
repam = "[ATGC]GG"
L = len(seq)
seq_revcomp = rev_comp(seq)
return (
dict(
matching_target = re.finditer("(?=([ATGC]{6}" + repam + "[ATGC]{16}))", seq_revcomp)
for target in matching_target:
yield dict(
[
("target", m.group(1)),
("guide", m.group(1)[:20]),
("start", L - m.start() - 20),
("stop", L - m.start()),
("pam", L - m.start() - 22),
("target", target.group(1)),
("guide", target.group(1)[:20]),
("start", L - target.start() - 20),
("stop", L - target.start()),
("pam", L - target.start() - 22),
("ori", "-"),
]
)
for m in re.finditer("(?=([ATGC]{6}" + repam + "[ATGC]{16}))", seq_revcomp)
)
def on_target_predict(seq):
def on_target_predict(seq, genome=None):
seq = seq.upper() # make uppercase
seq = re.sub(r"\s", "", seq) # removes white space
records = None
genome_features = None
if genome:
records = extract_records(genome)
genome_features = extract_features(records)
alltargets = list(find_targets(seq))
if alltargets:
X = np.array(
[
encode(tar["target"][:7] + tar["target"][9:]) for tar in alltargets
encode(target["target"][:7] + target["target"][9:])
for target in alltargets
] # encode and remove GG of PAM
)
X = X.reshape(X.shape[0], -1)
preds = predict(X)
for i, target in enumerate(alltargets):
target.update({"pred": preds[i]})
if genome:
off_target_df = compute_off_target_df(
target["guide"], 7, records, genome_features
)
off_target_list = []
features = off_target_df.loc[0:, "features"]
for feat in features:
features_list = []
for x in feat:
for k, feat in x.qualifiers.items():
if k != "translation":
features_list.append((k, feat))
off_target_list.append(features_list)
target.update({"off_targets": off_target_list})
else:
target.update({"off_targets": []})
return alltargets
else:
return []
def rev_comp(seq):
comp = str.maketrans("ATGC", "TACG")
return seq.translate(comp)[::-1]
......@@ -191,6 +191,22 @@ version = "19.2"
pyparsing = ">=2.0.2"
six = "*"
[[package]]
category = "main"
description = "Powerful data structures for data analysis, time series, and statistics"
name = "pandas"
optional = false
python-versions = ">=3.5.3"
version = "0.25.3"
[package.dependencies]
numpy = ">=1.13.3"
python-dateutil = ">=2.6.1"
pytz = ">=2017.2"
[package.extras]
test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"]
[[package]]
category = "dev"
description = "Utility library for gitignore style pattern matching of file paths."
......@@ -294,13 +310,32 @@ version = ">=0.12"
[package.extras]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
[[package]]
category = "main"
description = "Extensions to the standard Python datetime module"
name = "python-dateutil"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
version = "2.8.1"
[package.dependencies]
six = ">=1.5"
[[package]]
category = "main"
description = "World timezone definitions, modern and historical"
name = "pytz"
optional = false
python-versions = "*"
version = "2019.3"
[[package]]
category = "dev"
description = "YAML parser and emitter for Python"
name = "pyyaml"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "5.2"
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "5.3"
[[package]]
category = "dev"
......@@ -311,7 +346,18 @@ python-versions = "*"
version = "2019.12.20"
[[package]]
category = "dev"
category = "main"
description = "a python refactoring library..."
name = "rope"
optional = false
python-versions = "*"
version = "0.16.0"
[package.extras]
dev = ["pytest"]
[[package]]
category = "main"
description = "Python 2 and 3 compatibility utilities"
name = "six"
optional = false
......@@ -371,7 +417,7 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"]
testing = ["pathlib2", "contextlib2", "unittest2"]
[metadata]
content-hash = "74295930225fa16a026b2f57e2f50e2bb3919702d8845b1f834a5217b8d655a4"
content-hash = "fd6a383dbb81068f75ead2da55eccaea5f426d79758f22ebaf6eef7ac197234a"
python-versions = "^3.7"
[metadata.files]
......@@ -489,6 +535,27 @@ packaging = [
{file = "packaging-19.2-py2.py3-none-any.whl", hash = "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108"},
{file = "packaging-19.2.tar.gz", hash = "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47"},
]
pandas = [
{file = "pandas-0.25.3-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:df8864824b1fe488cf778c3650ee59c3a0d8f42e53707de167ba6b4f7d35f133"},
{file = "pandas-0.25.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:7458c48e3d15b8aaa7d575be60e1e4dd70348efcd9376656b72fecd55c59a4c3"},
{file = "pandas-0.25.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:61741f5aeb252f39c3031d11405305b6d10ce663c53bc3112705d7ad66c013d0"},
{file = "pandas-0.25.3-cp35-cp35m-win32.whl", hash = "sha256:adc3d3a3f9e59a38d923e90e20c4922fc62d1e5a03d083440468c6d8f3f1ae0a"},
{file = "pandas-0.25.3-cp35-cp35m-win_amd64.whl", hash = "sha256:975c461accd14e89d71772e89108a050fa824c0b87a67d34cedf245f6681fc17"},
{file = "pandas-0.25.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ee50c2142cdcf41995655d499a157d0a812fce55c97d9aad13bc1eef837ed36c"},
{file = "pandas-0.25.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4545467a637e0e1393f7d05d61dace89689ad6d6f66f267f86fff737b702cce9"},
{file = "pandas-0.25.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:bbe3eb765a0b1e578833d243e2814b60c825b7fdbf4cdfe8e8aae8a08ed56ecf"},
{file = "pandas-0.25.3-cp36-cp36m-win32.whl", hash = "sha256:8153705d6545fd9eb6dd2bc79301bff08825d2e2f716d5dced48daafc2d0b81f"},
{file = "pandas-0.25.3-cp36-cp36m-win_amd64.whl", hash = "sha256:26382aab9c119735908d94d2c5c08020a4a0a82969b7e5eefb92f902b3b30ad7"},
{file = "pandas-0.25.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:00dff3a8e337f5ed7ad295d98a31821d3d0fe7792da82d78d7fd79b89c03ea9d"},
{file = "pandas-0.25.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e45055c30a608076e31a9fcd780a956ed3b1fa20db61561b8d88b79259f526f7"},
{file = "pandas-0.25.3-cp37-cp37m-win32.whl", hash = "sha256:255920e63850dc512ce356233081098554d641ba99c3767dde9e9f35630f994b"},
{file = "pandas-0.25.3-cp37-cp37m-win_amd64.whl", hash = "sha256:22361b1597c8c2ffd697aa9bf85423afa9e1fcfa6b1ea821054a244d5f24d75e"},
{file = "pandas-0.25.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9962957a27bfb70ab64103d0a7b42fa59c642fb4ed4cb75d0227b7bb9228535d"},
{file = "pandas-0.25.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:78bf638993219311377ce9836b3dc05f627a666d0dbc8cec37c0ff3c9ada673b"},
{file = "pandas-0.25.3-cp38-cp38-win32.whl", hash = "sha256:6a3ac2c87e4e32a969921d1428525f09462770c349147aa8e9ab95f88c71ec71"},
{file = "pandas-0.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:33970f4cacdd9a0ddb8f21e151bfb9f178afb7c36eb7c25b9094c02876f385c2"},
{file = "pandas-0.25.3.tar.gz", hash = "sha256:52da74df8a9c9a103af0a72c9d5fdc8e0183a90884278db7f386b5692a2220a4"},
]
pathspec = [
{file = "pathspec-0.7.0-py2.py3-none-any.whl", hash = "sha256:163b0632d4e31cef212976cf57b43d9fd6b0bac6e67c26015d611a647d5e7424"},
{file = "pathspec-0.7.0.tar.gz", hash = "sha256:562aa70af2e0d434367d9790ad37aed893de47f1693e4201fd1d3dca15d19b96"},
......@@ -521,18 +588,26 @@ pytest = [
{file = "pytest-5.2.2-py3-none-any.whl", hash = "sha256:58cee9e09242937e136dbb3dab466116ba20d6b7828c7620f23947f37eb4dae4"},
{file = "pytest-5.2.2.tar.gz", hash = "sha256:27abc3fef618a01bebb1f0d6d303d2816a99aa87a5968ebc32fe971be91eb1e6"},
]
python-dateutil = [
{file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"},
{file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"},
]
pytz = [
{file = "pytz-2019.3-py2.py3-none-any.whl", hash = "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d"},
{file = "pytz-2019.3.tar.gz", hash = "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"},
]
pyyaml = [
{file = "PyYAML-5.2-cp27-cp27m-win32.whl", hash = "sha256:35ace9b4147848cafac3db142795ee42deebe9d0dad885ce643928e88daebdcc"},
{file = "PyYAML-5.2-cp27-cp27m-win_amd64.whl", hash = "sha256:ebc4ed52dcc93eeebeae5cf5deb2ae4347b3a81c3fa12b0b8c976544829396a4"},
{file = "PyYAML-5.2-cp35-cp35m-win32.whl", hash = "sha256:38a4f0d114101c58c0f3a88aeaa44d63efd588845c5a2df5290b73db8f246d15"},
{file = "PyYAML-5.2-cp35-cp35m-win_amd64.whl", hash = "sha256:483eb6a33b671408c8529106df3707270bfacb2447bf8ad856a4b4f57f6e3075"},
{file = "PyYAML-5.2-cp36-cp36m-win32.whl", hash = "sha256:7f38e35c00e160db592091751d385cd7b3046d6d51f578b29943225178257b31"},
{file = "PyYAML-5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:0e7f69397d53155e55d10ff68fdfb2cf630a35e6daf65cf0bdeaf04f127c09dc"},
{file = "PyYAML-5.2-cp37-cp37m-win32.whl", hash = "sha256:e4c015484ff0ff197564917b4b4246ca03f411b9bd7f16e02a2f586eb48b6d04"},
{file = "PyYAML-5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:4b6be5edb9f6bb73680f5bf4ee08ff25416d1400fbd4535fe0069b2994da07cd"},
{file = "PyYAML-5.2-cp38-cp38-win32.whl", hash = "sha256:8100c896ecb361794d8bfdb9c11fce618c7cf83d624d73d5ab38aef3bc82d43f"},
{file = "PyYAML-5.2-cp38-cp38-win_amd64.whl", hash = "sha256:2e9f0b7c5914367b0916c3c104a024bb68f269a486b9d04a2e8ac6f6597b7803"},
{file = "PyYAML-5.2.tar.gz", hash = "sha256:c0ee8eca2c582d29c3c2ec6e2c4f703d1b7f1fb10bc72317355a746057e7346c"},
{file = "PyYAML-5.3-cp27-cp27m-win32.whl", hash = "sha256:940532b111b1952befd7db542c370887a8611660d2b9becff75d39355303d82d"},
{file = "PyYAML-5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:059b2ee3194d718896c0ad077dd8c043e5e909d9180f387ce42012662a4946d6"},
{file = "PyYAML-5.3-cp35-cp35m-win32.whl", hash = "sha256:4fee71aa5bc6ed9d5f116327c04273e25ae31a3020386916905767ec4fc5317e"},
{file = "PyYAML-5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:dbbb2379c19ed6042e8f11f2a2c66d39cceb8aeace421bfc29d085d93eda3689"},
{file = "PyYAML-5.3-cp36-cp36m-win32.whl", hash = "sha256:e3a057b7a64f1222b56e47bcff5e4b94c4f61faac04c7c4ecb1985e18caa3994"},
{file = "PyYAML-5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:74782fbd4d4f87ff04159e986886931456a1894c61229be9eaf4de6f6e44b99e"},
{file = "PyYAML-5.3-cp37-cp37m-win32.whl", hash = "sha256:24521fa2890642614558b492b473bee0ac1f8057a7263156b02e8b14c88ce6f5"},
{file = "PyYAML-5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:1cf708e2ac57f3aabc87405f04b86354f66799c8e62c28c5fc5f88b5521b2dbf"},
{file = "PyYAML-5.3-cp38-cp38-win32.whl", hash = "sha256:70024e02197337533eef7b85b068212420f950319cc8c580261963aefc75f811"},
{file = "PyYAML-5.3-cp38-cp38-win_amd64.whl", hash = "sha256:cb1f2f5e426dc9f07a7681419fe39cee823bb74f723f36f70399123f439e9b20"},
{file = "PyYAML-5.3.tar.gz", hash = "sha256:e9f45bd5b92c7974e59bcd2dcc8631a6b6cc380a904725fce7bc08872e691615"},
]
regex = [
{file = "regex-2019.12.20-cp27-cp27m-win32.whl", hash = "sha256:7bbbdbada3078dc360d4692a9b28479f569db7fc7f304b668787afc9feb38ec8"},
......@@ -557,6 +632,11 @@ regex = [
{file = "regex-2019.12.20-cp38-cp38-win_amd64.whl", hash = "sha256:d3ee0b035816e0520fac928de31b6572106f0d75597f6fa3206969a02baba06f"},
{file = "regex-2019.12.20.tar.gz", hash = "sha256:106e25a841921d8259dcef2a42786caae35bc750fb996f830065b3dfaa67b77e"},
]
rope = [
{file = "rope-0.16.0-py2-none-any.whl", hash = "sha256:ae1fa2fd56f64f4cc9be46493ce54bed0dd12dee03980c61a4393d89d84029ad"},
{file = "rope-0.16.0-py3-none-any.whl", hash = "sha256:52423a7eebb5306a6d63bdc91a7c657db51ac9babfb8341c9a1440831ecf3203"},
{file = "rope-0.16.0.tar.gz", hash = "sha256:d2830142c2e046f5fc26a022fe680675b6f48f81c7fc1f03a950706e746e9dfe"},
]
six = [
{file = "six-1.13.0-py2.py3-none-any.whl", hash = "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd"},
{file = "six-1.13.0.tar.gz", hash = "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66"},
......
......@@ -22,6 +22,8 @@ python = "^3.7"
numpy = "^1.17"
click = "^7.0"
biopython = "^1.75"
rope = "^0.16.0"
pandas = "^0.25.3"
[tool.poetry.dev-dependencies]
pytest = "^5.2"
......@@ -32,10 +34,6 @@ black = "^19.10b0"
[tool.poetry.scripts]
crisprbact= "crisprbact.cli:main"
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"
[tool.black]
target-version = ['py37']
include = '\.pyi?$'
......@@ -60,6 +58,10 @@ exclude = '''
)
'''
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"
[flake8]
max-line-length = 89
max-complexity = 18
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment