diff --git a/crisprbact/cli.py b/crisprbact/cli.py index 9568ddddf5980ad73bb864e2dd5a1ddfc9b549db..390fc8f202aabe0a1c2dcf5841fe8d6b43bc2931 100644 --- a/crisprbact/cli.py +++ b/crisprbact/cli.py @@ -94,7 +94,7 @@ def from_str( guide_rnas = on_target_predict(target, genome_fh) click.echo("\t".join(HEADER), file=output_file) - write_guide_rnas(guide_rnas, output_file) + write_guide_rnas(guide_rnas, output_file, len(HEADER)) @predict.command() @@ -150,6 +150,7 @@ def from_seq( fg = "blue" if config.verbose: print_parameters(target.name, fg) + click.echo("\t".join(HEADER), file=output_file) for record in SeqIO.parse(target, seq_format): if config.verbose: @@ -160,7 +161,7 @@ def from_seq( genome_fh = None guide_rnas = on_target_predict(str(record.seq), genome_fh) # print(guide_rnas) - write_guide_rnas(guide_rnas, output_file, record.id) + write_guide_rnas(guide_rnas, output_file, len(HEADER), record.id) def print_parameters(target, fg="blue"): @@ -168,7 +169,9 @@ def print_parameters(target, fg="blue"): click.secho("Target sequence : %s" % target, fg=fg) -def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"): +def write_guide_rnas( + guide_rnas, output_file, header_size, seq_id="N/A", +): for guide_rna in guide_rnas: row = [ str(guide_rna["target_id"]), @@ -179,6 +182,7 @@ def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"): str(guide_rna["pred"]), seq_id, ] + # seed_size = "" if len(guide_rna["off_targets_per_seed"]) > 0: for off_target_per_seed in guide_rna["off_targets_per_seed"]: for off_target in off_target_per_seed["off_targets"]: @@ -191,14 +195,16 @@ def write_guide_rnas(guide_rnas, output_file, seq_id="N/A"): return "" details = map(extract_off_target_detail, OFF_TARGET_DETAILS) + line_to_print = row + [str(seed_size)] + list(details) + assert len(line_to_print) == header_size click.echo( - "\t".join(row + [str(seed_size)] + list(details)), - file=output_file, + "\t".join(line_to_print), file=output_file, ) else: + line_to_print = row + [""] + list(map(lambda x: "", OFF_TARGET_DETAILS)) + assert len(line_to_print) == header_size click.echo( - "\t".join(row + list(map(lambda x: "", OFF_TARGET_DETAILS))), - file=output_file, + "\t".join(line_to_print), file=output_file, ) diff --git a/crisprbact/predict.py b/crisprbact/predict.py index 3a10c6f7e6a40fe73b8a9b31032b1bda7d4c7d7f..41d1579ce5d19cd37d27b8bae304c2467ae05971 100644 --- a/crisprbact/predict.py +++ b/crisprbact/predict.py @@ -68,6 +68,7 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12, GUIDE_LEN] seq = re.sub(r"\s", "", seq) # removes white space records = None genome_features = None + if genome: records = extract_records(genome) if records is None: @@ -87,69 +88,11 @@ def on_target_predict(seq, genome=None, seed_sizes=[8, 9, 10, 11, 12, GUIDE_LEN] target.update({"target_id": target_id}) target.update({"pred": preds[i]}) if genome: - off_targets_per_seed = [] - for seed_size in seed_sizes: - # off-target found for a guide - off_target_df = compute_off_target_df( - target["guide"], seed_size, records, genome_features - ) - off_targets_list = [] - if off_target_df is not None and not off_target_df.empty: - off_targets = slice_off_targets_results(off_target_df) - for j, off_t in enumerate(off_targets): - off_target_dict = { - "off_target_id": str(target_id) - + "-" - + str(seed_size) - + "-" - + str(j), - "off_target_start": off_t[0], - "off_target_end": off_t[1], - "off_target_pampos": off_t[2], - "off_target_strand": off_t[3], - "off_target_recid": off_t[4], - "off_target_longest_perfect_match": off_t[5], - "off_target_pam_seq": off_t[6], - "off_target_good_orientation": None, - } - if ( - seed_size == GUIDE_LEN - or off_target_dict["off_target_longest_perfect_match"] - != GUIDE_LEN - ): - index_features = 7 - # Get feature details associated - # to an off-target position - if len(off_t[index_features]) > 0: - feat = off_t[index_features][0] - off_target_dict[ - "off_target_good_orientation" - ] = is_good_orientation( - feat, off_target_dict["off_target_strand"] - ) - off_target_feature = get_off_target_feature(feat) - off_targets_list.append( - {**off_target_feature, **off_target_dict} - ) - else: - off_targets_list.append(off_target_dict) - - off_targets_per_seed.append( - { - "id": str(i) + "-" + str(seed_size), - "seed_size": seed_size, - "off_targets": off_targets_list, - } - ) - else: - off_targets_per_seed.append( - { - "id": str(i) + "-" + str(seed_size), - "seed_size": seed_size, - "off_targets": [], - } - ) - target.update({"off_targets_per_seed": off_targets_per_seed}) + # lookup for off-targets in genome + off_targets_per_seed = gen_off_target_per_seed_size( + i, target["guide"], records, genome_features, target_id, seed_sizes + ) + target.update({"off_targets_per_seed": list(off_targets_per_seed)}) else: target.update({"off_targets_per_seed": []}) return alltargets @@ -204,4 +147,58 @@ def get_off_target_feature(feat): return feature_dict -# def gen_off_target_per_seed_size(seed_sizes): +def gen_off_target_per_seed_size( + index, guide, records, genome_features, target_id, seed_sizes +): + for seed_size in seed_sizes: + # off-target found for a guide + off_target_df = compute_off_target_df( + guide, seed_size, records, genome_features + ) + if off_target_df is not None and not off_target_df.empty: + off_targets = slice_off_targets_results(off_target_df) + off_targets_list = gen_off_targets_convert( + target_id, seed_size, off_targets + ) + yield { + "id": str(index) + "-" + str(seed_size), + "seed_size": seed_size, + "off_targets": off_targets_list, + } + else: + yield { + "id": str(index) + "-" + str(seed_size), + "seed_size": seed_size, + "off_targets": [], + } + + +def gen_off_targets_convert(target_id, seed_size, off_targets): + for j, off_t in enumerate(off_targets): + off_target_dict = { + "off_target_id": str(target_id) + "-" + str(seed_size) + "-" + str(j), + "off_target_start": off_t[0], + "off_target_end": off_t[1], + "off_target_pampos": off_t[2], + "off_target_strand": off_t[3], + "off_target_recid": off_t[4], + "off_target_longest_perfect_match": off_t[5], + "off_target_pam_seq": off_t[6], + "off_target_good_orientation": None, + } + if ( + seed_size == GUIDE_LEN + or off_target_dict["off_target_longest_perfect_match"] != GUIDE_LEN + ): + index_features = 7 + # Get feature details associated + # to an off-target position + if len(off_t[index_features]) > 0: + feat = off_t[index_features][0] + off_target_dict["off_target_good_orientation"] = is_good_orientation( + feat, off_target_dict["off_target_strand"] + ) + off_target_feature = get_off_target_feature(feat) + yield {**off_target_feature, **off_target_dict} + else: + yield off_target_dict