diff --git a/scripts/extract_annot_start.py b/scripts/extract_annot_start.py
new file mode 100755
index 0000000000000000000000000000000000000000..10b3483994df331bc896d97132fa1f692535fd3f
--- /dev/null
+++ b/scripts/extract_annot_start.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+# Copyright (C) 2020 Blaise Li
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""Extracts the first N positions from the annotations provided in a bed file.
+
+The result is written in bed format on the standard output.
+No bounds checks are performed. Some resulting bed entries might exceed
+chromosome boundaries.
+"""
+
+from argparse import (
+ ArgumentParser,
+ ArgumentDefaultsHelpFormatter)
+import sys
+
+
+def main():
+ """Run the command-line script."""
+ parser = ArgumentParser(
+ description=__doc__,
+ formatter_class=ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ "-b", "--bedfile",
+ required=True,
+ help="Input bed file.")
+ parser.add_argument(
+ "-s", "--start_size",
+ type=int,
+ default=200,
+ help="Number of positions to extract.")
+ parser.add_argument(
+ "-k", "--keep_short",
+ help="Set this option to keep annotations that are too short.",
+ action="store_true")
+ args = parser.parse_args()
+
+ start_size = args.start_size
+ keep_short = args.keep_short
+ nb_too_short = 0
+ with open(args.bedfile) as bedfile:
+ for line in bedfile:
+ (chrom, start, end, name, score, strand) = line.strip().split("\t")
+ if int(end) - int(start) < start_size:
+ nb_too_short += 1
+ if keep_short:
+ sys.stderr.write(
+ "Extracted fragment will be longer "
+ f"than annotation size for {name}\n")
+ else:
+ continue
+ if strand == "-":
+ print(
+ chrom, int(end) - start_size, int(end),
+ name, score, strand, sep="\t")
+ else:
+ print(
+ chrom, int(start), int(start) + start_size,
+ name, score, strand, sep="\t")
+ if nb_too_short:
+ sys.stderr.write(
+ f"{nb_too_short} annotations were shorter "
+ "than the extracted fragment.\n")
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())