Skip to content
Snippets Groups Projects
Commit 56cfa04d authored by Blaise Li's avatar Blaise Li
Browse files

Gene-list filtering in extract_annot_start.py.

Also have setup.py install the script.
parent c6820e38
No related branches found
No related tags found
No related merge requests found
...@@ -5,9 +5,10 @@ This Python package also installs some shell scripts: ...@@ -5,9 +5,10 @@ This Python package also installs some shell scripts:
* `sam2indexedbam.sh` (depends on samtools) * `sam2indexedbam.sh` (depends on samtools)
* `bam2bigwig.sh` (depends on bedops, python3, samtools, bedtools, niceload from parallel and bedGraphToBigWig from UCSC-tools) * `bam2bigwig.sh` (depends on bedops, python3, samtools, bedtools, niceload from parallel and bedGraphToBigWig from UCSC-tools)
It also provides a Python script to transfer bigwig data from one region to another: It also provides some Python scripts.
* `copypaste_bigwig_regions.py` * `copypaste_bigwig_regions.py` to transfer bigwig data from one region to another.
* `extract_annot_start.py` to extract starting portions of bed records.
## Installing ## Installing
......
...@@ -35,6 +35,11 @@ def main(): ...@@ -35,6 +35,11 @@ def main():
"-b", "--bedfile", "-b", "--bedfile",
required=True, required=True,
help="Input bed file.") help="Input bed file.")
parser.add_argument(
"-g", "--gene_list",
help="File containing a list of gene identifiers. "
"If provided, only annotations whose 4th column "
"matches one of these identifiers will be considered.")
parser.add_argument( parser.add_argument(
"-s", "--start_size", "-s", "--start_size",
type=int, type=int,
...@@ -48,10 +53,15 @@ def main(): ...@@ -48,10 +53,15 @@ def main():
start_size = args.start_size start_size = args.start_size
keep_short = args.keep_short keep_short = args.keep_short
if args.gene_list:
with open(args.gene_list) as gene_list_fh:
gene_ids = set(line.strip() for line in gene_list_fh)
nb_too_short = 0 nb_too_short = 0
with open(args.bedfile) as bedfile: with open(args.bedfile) as bedfile:
for line in bedfile: for line in bedfile:
(chrom, start, end, name, score, strand) = line.strip().split("\t") (chrom, start, end, name, score, strand) = line.strip().split("\t")
if gene_ids and name not in gene_ids:
continue
if int(end) - int(start) < start_size: if int(end) - int(start) < start_size:
nb_too_short += 1 nb_too_short += 1
if keep_short: if keep_short:
......
...@@ -34,7 +34,8 @@ setup( ...@@ -34,7 +34,8 @@ setup(
packages=find_packages(), packages=find_packages(),
scripts=[ scripts=[
"scripts/bam2bigwig.sh", "scripts/sam2indexedbam.sh", "scripts/bam2bigwig.sh", "scripts/sam2indexedbam.sh",
"scripts/copypaste_bigwig_regions.py"], "scripts/copypaste_bigwig_regions.py",
"scripts/extract_annot_start.py"],
install_requires=[ install_requires=[
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1", #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1", #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment