diff --git a/crisprbact/cli.py b/crisprbact/cli.py index 4eeaebd2e2700a0dd9450dcef52143d77f21264b..2b2849aeda39d585c0ddfc46dd2d9542815d9440 100644 --- a/crisprbact/cli.py +++ b/crisprbact/cli.py @@ -10,7 +10,32 @@ class Config(object): pass_config = click.make_pass_decorator(Config, ensure=True) -HEADER = ["target", "PAM position", "prediction", "seq_id"] +OFF_TARGET_DETAILS = [ + "off_target_recid", + "off_target_start", + "off_target_end", + "off_target_pampos", + "off_target_strand", + "off_target_feat_type", + "off_target_feat_start", + "off_target_feat_end", + "off_target_feat_strand", + "locus_tag", + "gene", + "note", + "product", + "protein_id", +] +HEADER = [ + "target_id", + "target", + "PAM position", + "prediction", + "seq_id", + "seed_size", +] + OFF_TARGET_DETAILS +SEED_SIZE = 8 +GENOME_FORMAT = "genbank" @click.group() @@ -28,9 +53,21 @@ def predict(config): @predict.command() @click.option("-t", "--target", type=str, required=True) +# @click.option( +# "-s", "--seed-size", type=int, required=False, show_default=True, default=SEED_SIZE +# ) +@click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome") +@click.option( + "-gf", + "--genome-format", + type=click.Choice(["fasta", "gb", "genbank"]), + default=GENOME_FORMAT, + show_default=True, + help="Genome Format", +) @click.argument("output-file", type=click.File("w"), default="-") @pass_config -def from_str(config, target, output_file): +def from_str(config, target, genome, genome_format, output_file): """ Outputs candidate guide RNAs for the S. pyogenes dCas9 with predicted on-target activity from a target gene. @@ -38,10 +75,12 @@ def from_str(config, target, output_file): [OUTPUT_FILE] file where the candidate guide RNAs are saved. Default = "stdout" """ + if config.verbose: print_parameters(target) + genome_fh = SeqIO.parse(genome, genome_format) + guide_rnas = on_target_predict(target, genome_fh) - guide_rnas = on_target_predict(target) click.echo("\t".join(HEADER), file=output_file) write_guide_rnas(guide_rnas, output_file) @@ -53,15 +92,26 @@ def from_str(config, target, output_file): @click.option( "-f", "--seq-format", - type=click.Choice(["fasta", "fa", "gb", "genbank"]), + type=click.Choice(["fasta", "gb", "genbank"]), help="Sequence file format", default="fasta", show_default=True, ) -# @click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome") +# @click.option( +# "-s", "--seed-size", type=click.IntRange(8, 15, clamp=True), +# ) +@click.option("-g", "--genome", type=click.File("rU"), required=True, help="Genome") +@click.option( + "-gf", + "--genome-format", + type=click.Choice(["fasta", "gb", "genbank"]), + default=GENOME_FORMAT, + show_default=True, + help="Genome Format", +) @click.argument("output-file", type=click.File("w"), default="-") @pass_config -def from_seq(config, target, seq_format, output_file): # genome, +def from_seq(config, target, seq_format, genome, genome_format, output_file): """ Outputs candidate guide RNAs for the S. pyogenes dCas9 with predicted on-target activity from a target gene. @@ -76,9 +126,9 @@ def from_seq(config, target, seq_format, output_file): # genome, for record in SeqIO.parse(target, seq_format): if config.verbose: click.secho(" - search guide RNAs for %s " % record.id, fg=fg) - # g = SeqIO.parse(genome, "genbank") - guide_rnas = on_target_predict(str(record.seq)) - + genome_fh = SeqIO.parse(genome, genome_format) + guide_rnas = on_target_predict(str(record.seq), genome_fh) + # print(guide_rnas) write_guide_rnas(guide_rnas, output_file, record.id) @@ -89,17 +139,34 @@ def print_parameters(target, fg="blue"): def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"): for guide_rna in guide_rnas: - click.echo( - "\t".join( - [ - guide_rna["target"], - str(guide_rna["pam"]), - str(guide_rna["pred"]), - seq_id, - ] - ), - file=output_file, - ) + row = [ + str(guide_rna["id"]), + guide_rna["target"], + str(guide_rna["pam"]), + str(guide_rna["pred"]), + seq_id, + ] + if len(guide_rna["off_targets_per_seed"]) > 0: + for off_target_per_seed in guide_rna["off_targets_per_seed"]: + for off_target in off_target_per_seed["off_targets"]: + seed_size = off_target_per_seed["seed_size"] + + def extract_off_target_detail(key): + if key in off_target: + return str(off_target[key]) + else: + return "" + + details = map(extract_off_target_detail, OFF_TARGET_DETAILS) + click.echo( + "\t".join(row + [str(seed_size)] + list(details)), + file=output_file, + ) + else: + click.echo( + "\t".join(row + list(map(lambda x: "", OFF_TARGET_DETAILS))), + file=output_file, + ) if __name__ == "__main__":