From 56cfa04d8e2cc980b36829069d9606d27646dc37 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Thu, 17 Dec 2020 17:42:23 +0100 Subject: [PATCH] Gene-list filtering in extract_annot_start.py. Also have setup.py install the script. --- README.md | 5 +++-- scripts/extract_annot_start.py | 10 ++++++++++ setup.py | 3 ++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6b9334c..4ac7e5a 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,10 @@ This Python package also installs some shell scripts: * `sam2indexedbam.sh` (depends on samtools) * `bam2bigwig.sh` (depends on bedops, python3, samtools, bedtools, niceload from parallel and bedGraphToBigWig from UCSC-tools) -It also provides a Python script to transfer bigwig data from one region to another: +It also provides some Python scripts. -* `copypaste_bigwig_regions.py` +* `copypaste_bigwig_regions.py` to transfer bigwig data from one region to another. +* `extract_annot_start.py` to extract starting portions of bed records. ## Installing diff --git a/scripts/extract_annot_start.py b/scripts/extract_annot_start.py index 10b3483..2da9b6a 100755 --- a/scripts/extract_annot_start.py +++ b/scripts/extract_annot_start.py @@ -35,6 +35,11 @@ def main(): "-b", "--bedfile", required=True, help="Input bed file.") + parser.add_argument( + "-g", "--gene_list", + help="File containing a list of gene identifiers. " + "If provided, only annotations whose 4th column " + "matches one of these identifiers will be considered.") parser.add_argument( "-s", "--start_size", type=int, @@ -48,10 +53,15 @@ def main(): start_size = args.start_size keep_short = args.keep_short + if args.gene_list: + with open(args.gene_list) as gene_list_fh: + gene_ids = set(line.strip() for line in gene_list_fh) nb_too_short = 0 with open(args.bedfile) as bedfile: for line in bedfile: (chrom, start, end, name, score, strand) = line.strip().split("\t") + if gene_ids and name not in gene_ids: + continue if int(end) - int(start) < start_size: nb_too_short += 1 if keep_short: diff --git a/setup.py b/setup.py index 3b05700..e0abc75 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,8 @@ setup( packages=find_packages(), scripts=[ "scripts/bam2bigwig.sh", "scripts/sam2indexedbam.sh", - "scripts/copypaste_bigwig_regions.py"], + "scripts/copypaste_bigwig_regions.py", + "scripts/extract_annot_start.py"], install_requires=[ #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1", #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1", -- GitLab