diff --git a/README.md b/README.md index 6b9334cd6b0e62e582833972a4da2234034cc768..4ac7e5a46fc6e1bf1bcf28b64af2be62042a1563 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,10 @@ This Python package also installs some shell scripts: * `sam2indexedbam.sh` (depends on samtools) * `bam2bigwig.sh` (depends on bedops, python3, samtools, bedtools, niceload from parallel and bedGraphToBigWig from UCSC-tools) -It also provides a Python script to transfer bigwig data from one region to another: +It also provides some Python scripts. -* `copypaste_bigwig_regions.py` +* `copypaste_bigwig_regions.py` to transfer bigwig data from one region to another. +* `extract_annot_start.py` to extract starting portions of bed records. ## Installing diff --git a/scripts/extract_annot_start.py b/scripts/extract_annot_start.py index 10b3483994df331bc896d97132fa1f692535fd3f..2da9b6ad99fac01cdb8ecdaa62e535f7644a927f 100755 --- a/scripts/extract_annot_start.py +++ b/scripts/extract_annot_start.py @@ -35,6 +35,11 @@ def main(): "-b", "--bedfile", required=True, help="Input bed file.") + parser.add_argument( + "-g", "--gene_list", + help="File containing a list of gene identifiers. " + "If provided, only annotations whose 4th column " + "matches one of these identifiers will be considered.") parser.add_argument( "-s", "--start_size", type=int, @@ -48,10 +53,15 @@ def main(): start_size = args.start_size keep_short = args.keep_short + if args.gene_list: + with open(args.gene_list) as gene_list_fh: + gene_ids = set(line.strip() for line in gene_list_fh) nb_too_short = 0 with open(args.bedfile) as bedfile: for line in bedfile: (chrom, start, end, name, score, strand) = line.strip().split("\t") + if gene_ids and name not in gene_ids: + continue if int(end) - int(start) < start_size: nb_too_short += 1 if keep_short: diff --git a/setup.py b/setup.py index 3b057003884a7369d5b4e9ce8684c84b7d893120..e0abc7599a43b8af0e05e4f220b83adeb07f2782 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,8 @@ setup( packages=find_packages(), scripts=[ "scripts/bam2bigwig.sh", "scripts/sam2indexedbam.sh", - "scripts/copypaste_bigwig_regions.py"], + "scripts/copypaste_bigwig_regions.py", + "scripts/extract_annot_start.py"], install_requires=[ #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1", #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",