Skip to content
Snippets Groups Projects
Commit 4df90d3a authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

Write off target feature when a genbank file loaded

parent f29fb5a9
No related branches found
No related tags found
1 merge request!4Multi seed size
......@@ -10,7 +10,32 @@ class Config(object):
pass_config = click.make_pass_decorator(Config, ensure=True)
HEADER = ["target", "PAM position", "prediction", "seq_id"]
OFF_TARGET_DETAILS = [
"off_target_recid",
"off_target_start",
"off_target_end",
"off_target_pampos",
"off_target_strand",
"off_target_feat_type",
"off_target_feat_start",
"off_target_feat_end",
"off_target_feat_strand",
"locus_tag",
"gene",
"note",
"product",
"protein_id",
]
HEADER = [
"target_id",
"target",
"PAM position",
"prediction",
"seq_id",
"seed_size",
] + OFF_TARGET_DETAILS
SEED_SIZE = 8
GENOME_FORMAT = "genbank"
@click.group()
......@@ -28,9 +53,21 @@ def predict(config):
@predict.command()
@click.option("-t", "--target", type=str, required=True)
# @click.option(
# "-s", "--seed-size", type=int, required=False, show_default=True, default=SEED_SIZE
# )
@click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome")
@click.option(
"-gf",
"--genome-format",
type=click.Choice(["fasta", "gb", "genbank"]),
default=GENOME_FORMAT,
show_default=True,
help="Genome Format",
)
@click.argument("output-file", type=click.File("w"), default="-")
@pass_config
def from_str(config, target, output_file):
def from_str(config, target, genome, genome_format, output_file):
"""
Outputs candidate guide RNAs for the S. pyogenes dCas9 with predicted on-target
activity from a target gene.
......@@ -38,10 +75,12 @@ def from_str(config, target, output_file):
[OUTPUT_FILE] file where the candidate guide RNAs are saved. Default = "stdout"
"""
if config.verbose:
print_parameters(target)
genome_fh = SeqIO.parse(genome, genome_format)
guide_rnas = on_target_predict(target, genome_fh)
guide_rnas = on_target_predict(target)
click.echo("\t".join(HEADER), file=output_file)
write_guide_rnas(guide_rnas, output_file)
......@@ -53,15 +92,26 @@ def from_str(config, target, output_file):
@click.option(
"-f",
"--seq-format",
type=click.Choice(["fasta", "fa", "gb", "genbank"]),
type=click.Choice(["fasta", "gb", "genbank"]),
help="Sequence file format",
default="fasta",
show_default=True,
)
# @click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome")
# @click.option(
# "-s", "--seed-size", type=click.IntRange(8, 15, clamp=True),
# )
@click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome")
@click.option(
"-gf",
"--genome-format",
type=click.Choice(["fasta", "gb", "genbank"]),
default=GENOME_FORMAT,
show_default=True,
help="Genome Format",
)
@click.argument("output-file", type=click.File("w"), default="-")
@pass_config
def from_seq(config, target, seq_format, output_file): # genome,
def from_seq(config, target, seq_format, genome, genome_format, output_file):
"""
Outputs candidate guide RNAs for the S. pyogenes dCas9 with predicted on-target
activity from a target gene.
......@@ -76,9 +126,9 @@ def from_seq(config, target, seq_format, output_file): # genome,
for record in SeqIO.parse(target, seq_format):
if config.verbose:
click.secho(" - search guide RNAs for %s " % record.id, fg=fg)
# g = SeqIO.parse(genome, "genbank")
guide_rnas = on_target_predict(str(record.seq))
genome_fh = SeqIO.parse(genome, genome_format)
guide_rnas = on_target_predict(str(record.seq), genome_fh)
# print(guide_rnas)
write_guide_rnas(guide_rnas, output_file, record.id)
......@@ -89,17 +139,34 @@ def print_parameters(target, fg="blue"):
def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"):
for guide_rna in guide_rnas:
click.echo(
"\t".join(
[
guide_rna["target"],
str(guide_rna["pam"]),
str(guide_rna["pred"]),
seq_id,
]
),
file=output_file,
)
row = [
str(guide_rna["id"]),
guide_rna["target"],
str(guide_rna["pam"]),
str(guide_rna["pred"]),
seq_id,
]
if len(guide_rna["off_targets_per_seed"]) > 0:
for off_target_per_seed in guide_rna["off_targets_per_seed"]:
for off_target in off_target_per_seed["off_targets"]:
seed_size = off_target_per_seed["seed_size"]
def extract_off_target_detail(key):
if key in off_target:
return str(off_target[key])
else:
return ""
details = map(extract_off_target_detail, OFF_TARGET_DETAILS)
click.echo(
"\t".join(row + [str(seed_size)] + list(details)),
file=output_file,
)
else:
click.echo(
"\t".join(row + list(map(lambda x: "", OFF_TARGET_DETAILS))),
file=output_file,
)
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment