Commit 5caa8ef3 authored by Blaise Li's avatar Blaise Li
Browse files

Keep all bed by default in extract_annot_start.py

parent 2f357ca8
......@@ -26,6 +26,45 @@ from argparse import (
import sys
def make_bed_size_filter(start_size, keep_short):
"""
Make a function that processes bed data, shortening it to *start_size*.
Ik *keep_short* is True, the input bed data will be filtered out
if its size is shorter than *start_size*.
"""
def bed_size_filter(chrom, start, end, name, score, strand):
"""Print shortened bed input."""
too_short = 0
if int(end) - int(start) < start_size:
if keep_short:
sys.stderr.write(
"Extracted fragment will be longer "
f"than annotation size for {name}\n")
too_short = 1
else:
return 1
if strand == "-":
print(
chrom, int(end) - start_size, int(end),
name, score, strand, sep="\t")
else:
print(
chrom, int(start), int(start) + start_size,
name, score, strand, sep="\t")
return too_short
return bed_size_filter
def keep_all_bed(chrom, start, end, name, score, strand):
"""Just print the input, bed-formatted."""
print(
chrom, int(start), int(end),
name, score, strand, sep="\t")
return 0
def main():
"""Run the command-line script."""
parser = ArgumentParser(
......@@ -43,7 +82,7 @@ def main():
parser.add_argument(
"-s", "--start_size",
type=int,
default=200,
default=0,
help="Number of positions to extract.")
parser.add_argument(
"-k", "--keep_short",
......@@ -52,7 +91,10 @@ def main():
args = parser.parse_args()
start_size = args.start_size
keep_short = args.keep_short
if start_size == 0:
bed_processor = keep_all_bed
else:
bed_processor = make_bed_size_filter(start_size, args.keep_short)
if args.gene_list:
with open(args.gene_list) as gene_list_fh:
gene_ids = set(line.strip() for line in gene_list_fh)
......@@ -62,22 +104,8 @@ def main():
(chrom, start, end, name, score, strand) = line.strip().split("\t")
if gene_ids and name not in gene_ids:
continue
if int(end) - int(start) < start_size:
nb_too_short += 1
if keep_short:
sys.stderr.write(
"Extracted fragment will be longer "
f"than annotation size for {name}\n")
else:
continue
if strand == "-":
print(
chrom, int(end) - start_size, int(end),
name, score, strand, sep="\t")
else:
print(
chrom, int(start), int(start) + start_size,
name, score, strand, sep="\t")
nb_too_short += bed_processor(
chrom, start, end, name, score, strand)
if nb_too_short:
sys.stderr.write(
f"{nb_too_short} annotations were shorter "
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment