Commit 56cfa04d authored by Blaise Li's avatar Blaise Li
Browse files

Gene-list filtering in extract_annot_start.py.

Also have setup.py install the script.
parent c6820e38
......@@ -5,9 +5,10 @@ This Python package also installs some shell scripts:
* `sam2indexedbam.sh` (depends on samtools)
* `bam2bigwig.sh` (depends on bedops, python3, samtools, bedtools, niceload from parallel and bedGraphToBigWig from UCSC-tools)
It also provides a Python script to transfer bigwig data from one region to another:
It also provides some Python scripts.
* `copypaste_bigwig_regions.py`
* `copypaste_bigwig_regions.py` to transfer bigwig data from one region to another.
* `extract_annot_start.py` to extract starting portions of bed records.
## Installing
......
......@@ -35,6 +35,11 @@ def main():
"-b", "--bedfile",
required=True,
help="Input bed file.")
parser.add_argument(
"-g", "--gene_list",
help="File containing a list of gene identifiers. "
"If provided, only annotations whose 4th column "
"matches one of these identifiers will be considered.")
parser.add_argument(
"-s", "--start_size",
type=int,
......@@ -48,10 +53,15 @@ def main():
start_size = args.start_size
keep_short = args.keep_short
if args.gene_list:
with open(args.gene_list) as gene_list_fh:
gene_ids = set(line.strip() for line in gene_list_fh)
nb_too_short = 0
with open(args.bedfile) as bedfile:
for line in bedfile:
(chrom, start, end, name, score, strand) = line.strip().split("\t")
if gene_ids and name not in gene_ids:
continue
if int(end) - int(start) < start_size:
nb_too_short += 1
if keep_short:
......
......@@ -34,7 +34,8 @@ setup(
packages=find_packages(),
scripts=[
"scripts/bam2bigwig.sh", "scripts/sam2indexedbam.sh",
"scripts/copypaste_bigwig_regions.py"],
"scripts/copypaste_bigwig_regions.py",
"scripts/extract_annot_start.py"],
install_requires=[
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment