Commit c6820e38 authored by Blaise Li's avatar Blaise Li
Browse files

Added script extracting first positions from bed.

parent 09d8dd85
#!/usr/bin/env python3
# Copyright (C) 2020 Blaise Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Extracts the first N positions from the annotations provided in a bed file.
The result is written in bed format on the standard output.
No bounds checks are performed. Some resulting bed entries might exceed
chromosome boundaries.
"""
from argparse import (
ArgumentParser,
ArgumentDefaultsHelpFormatter)
import sys
def main():
"""Run the command-line script."""
parser = ArgumentParser(
description=__doc__,
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument(
"-b", "--bedfile",
required=True,
help="Input bed file.")
parser.add_argument(
"-s", "--start_size",
type=int,
default=200,
help="Number of positions to extract.")
parser.add_argument(
"-k", "--keep_short",
help="Set this option to keep annotations that are too short.",
action="store_true")
args = parser.parse_args()
start_size = args.start_size
keep_short = args.keep_short
nb_too_short = 0
with open(args.bedfile) as bedfile:
for line in bedfile:
(chrom, start, end, name, score, strand) = line.strip().split("\t")
if int(end) - int(start) < start_size:
nb_too_short += 1
if keep_short:
sys.stderr.write(
"Extracted fragment will be longer "
f"than annotation size for {name}\n")
else:
continue
if strand == "-":
print(
chrom, int(end) - start_size, int(end),
name, score, strand, sep="\t")
else:
print(
chrom, int(start), int(start) + start_size,
name, score, strand, sep="\t")
if nb_too_short:
sys.stderr.write(
f"{nb_too_short} annotations were shorter "
"than the extracted fragment.\n")
return 0
if __name__ == "__main__":
sys.exit(main())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment