diff --git a/.gitignore b/.gitignore index f080b5bb752cd4bce30b0a06fc9acfdf4b201a16..b52dddc49bdb0d116111a6e3e4867c10a02be30d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ *\.pyc *\.pyo *\__pycache__/ - +craw.egg-info ######################### # distribution packages # ######################### @@ -41,5 +41,11 @@ htmlcov/ ###################### # Singularity images # ###################### -.img - +*\.img +*\.simg + +##################### +# My stuff +##################### +Sandbox/ +data/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 94f511e06219f35645811f2a861a5a13c477bb9c..a8170acc20a15b598c3f4eeb57a3592e6c70deef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,15 +3,11 @@ stages: - coverage - release -variables: - CRAW_HOME: ${CI_PROJECT_DIR} - CRAW_VERSION: ${CI_COMMIT_REF_NAME} - testing: stage: test image: registry-gitlab.pasteur.fr/bneron/craw:centos-run-dep script: - - python3 ${CI_PROJECT_DIR}/tests/run_tests.py -vvv + - python36 setup.py test except: - /^feat.*$/ - /^fix.*$/ @@ -20,8 +16,7 @@ coverage: stage: coverage image: registry-gitlab.pasteur.fr/bneron/craw:coverage script: - - cd ${CI_PROJECT_DIR} - - coverage run --source=craw tests/run_tests.py --unit + - coverage run --source=craw tests/run_tests.py - coverage html - coverage report artifacts: diff --git a/COPYRIGHT b/COPYRIGHT index 9e43a85aaff719c7d35475b2d931d1004d8c6ad5..12a949ec154f3f56cb75fdb89c7b58de15ae9ea5 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,4 +1,4 @@ Counter RNAseq Window - compute the coverage of RNAseq experiments on a specified window on both strand and display results as images. -Copyright © 2017 Institut Pasteur (Paris). \ No newline at end of file +Copyright © 2017-2019 Institut Pasteur (Paris), and CNRS. \ No newline at end of file diff --git a/Docker/Docker_release/Dockerfile b/Docker/Docker_release/Dockerfile index 9b7525ff6d5f86258ab8cc9c7ab85243b4bb1d04..4b17470cd0e8bff4febfdbed29d43b7b7bfcd27b 100644 --- a/Docker/Docker_release/Dockerfile +++ b/Docker/Docker_release/Dockerfile @@ -15,7 +15,7 @@ RUN yum install -y texlive-collection-latexrecommended \ COPY tabulary.sty fncychap.sty capt-of.sty \ eqparbox.sty environ.sty trimspaces.sty \ - /usr/lib64/python3.4/site-packages/sphinx/texinputs/ + /usr/lib64/python3.6/site-packages/sphinx/texinputs/ CMD ["/bin/bash"] diff --git a/Docker/Docker_run_dep/Dockerfile b/Docker/Docker_run_dep/Dockerfile index 0b8b7a777502fce2a63872224168ebb5b8c7a712..8f15d01e0fb2a410acaba8509e7949a69a547cc7 100644 --- a/Docker/Docker_run_dep/Dockerfile +++ b/Docker/Docker_run_dep/Dockerfile @@ -6,19 +6,19 @@ USER root RUN yum clean all && \ yum install -y epel-release &&\ - yum install -y make gcc gcc-c++ python34-devel zlib-devel python34-tkinter && \ + yum install -y make gcc gcc-c++ python36-devel zlib-devel python36-tkinter && \ yum clean all WORKDIR /tmp RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py &&\ - python3 get-pip.py -RUN pip3 install psutil>=4.0.0 -RUN pip3 install pysam==0.9.1.4 -RUN pip3 install numpy==1.11.2 -RUN pip3 install cython && pip3 install pandas==0.17.1 -RUN pip3 install scipy==0.16.1 -RUN pip3 install matplotlib==1.5.3 -RUN pip3 install pillow==3.4.2 + python36 get-pip.py +RUN pip3 install psutil>=5.6 +RUN pip3 install pysam==0.15.2 +RUN pip3 install numpy==1.16.2 +RUN pip3 install cython && pip3 install pandas==0.24.2 +RUN pip3 install scipy==1.2.1 +RUN pip3 install matplotlib==3.0.3 +RUN pip3 install pillow==5.4.1 CMD ["/bin/bash"] diff --git a/MANIFEST.in b/MANIFEST.in index 7551840b4dcb77eaf3d8dd309cbb87b01406e8ca..6275f31a8e455f0093d5edf68a604ff125a95238 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,13 +3,10 @@ include RELEASE_NOTES.md include COPYRIGHT include COPYING include requirements.txt +include requirements_dev.txt include setup.py -include bin/craw_coverage -include bin/craw_htmp - -include craw/*.py recursive-include tests *.py recursive-include tests/data * diff --git a/README.md b/README.md index c2a3ab9dbc0356173dbe3f1e2c5e8fdaead27456..b18627d668cd592afda506f44aa2f4018175f667 100644 --- a/README.md +++ b/README.md @@ -13,14 +13,14 @@ Installation Requirements ------------ -- python > 3 -- psutil >= 4.0 -- pysam >= 0.9.1.4 -- pandas >= 0.17.1 -- scipy >= 0.18.1 -- numpy >= 1.11.2 -- matplotlib >= 1.5.3 -- pillow >= 3.4.2 +- python > 3.4 +- psutil >= 5.6 +- pysam == 0.15.2 +- pandas >= 0.24 +- scipy >= 0.16.1 +- numpy >= 1.16 +- matplotlib >= 3.0 +- pillow >= 5.4 From package @@ -28,7 +28,7 @@ From package using pip -`pip install craw` +`pip install craw==1.0.2` if you use virtualenv do not forget to configure the matplotlib backend @@ -60,10 +60,6 @@ clone the project and install with the setup.py pip3 install dist/craw-master-devxxxxx.tar.gz -You can also use the package without install it. -You have to export the **CRAW_HOME** environment variable. -Then you can use it directly - ### Testing my installation @@ -71,7 +67,7 @@ The release come from with some unit and functional tests. to test if everything work fine. cd craw - python3 tests/run_tests.py -vvv + python3 tests/run_tests.py -vv This step is only available from the sources (a *clone* of the repository or a *tarball* release). You **cannot** perform tests if you installed craw from pypi (*pip install craw*) diff --git a/Singularity b/Singularity deleted file mode 100644 index 6d7584593ad7281065afbe1f3debcccb423a5661..0000000000000000000000000000000000000000 --- a/Singularity +++ /dev/null @@ -1,97 +0,0 @@ -Bootstrap: debootstrap -OSVersion: zesty -MirrorURL: http://archive.ubuntu.com/ubuntu/ - - -%labels - maintainer Bertrand Neron <bneron@pasteur.fr> - package.name craw - package.version 1.0.0 - package.homepage https://gitlab.pasteur.fr/bneron/craw - package.license GPLv3 - -%post - #################################### - # Installing system # - #################################### - - echo "deb http://archive.ubuntu.com/ubuntu/ zesty main restricted" > /etc/apt/sources.list - echo "deb http://archive.ubuntu.com/ubuntu/ zesty-updates main restricted" >> /etc/apt/sources.list - - echo "deb http://archive.ubuntu.com/ubuntu/ zesty universe" >> /etc/apt/sources.list - echo "deb-src http://archive.ubuntu.com/ubuntu/ zesty universe" >> /etc/apt/sources.list - echo "deb http://archive.ubuntu.com/ubuntu/ zesty-updates universe" >> /etc/apt/sources.list - echo "deb-src http://archive.ubuntu.com/ubuntu/ zesty-updates universe" >> /etc/apt/sources.list - - echo "deb http://archive.ubuntu.com/ubuntu/ zesty multiverse" >> /etc/apt/sources.list - echo "deb http://archive.ubuntu.com/ubuntu/ zesty-updates multiverse" >> /etc/apt/sources.list - - echo "deb http://archive.ubuntu.com/ubuntu/ zesty-backports main restricted universe multiverse" >> /etc/apt/sources.list - - echo "deb http://security.ubuntu.com/ubuntu/ zesty-security main restricted" >> /etc/apt/sources.list - echo "deb http://security.ubuntu.com/ubuntu/ zesty-security universe" >> /etc/apt/sources.list - echo "deb-src http://security.ubuntu.com/ubuntu/ zesty-security universe" >> /etc/apt/sources.list - echo "deb http://security.ubuntu.com/ubuntu/ zesty-security multiverse" >> /etc/apt/sources.list - - apt-get update -y - apt-get install -y --no-install-recommends wget python3 python3-tk - apt-get install -y python3-pip - - ################################# - # installing craw # - ################################# - cd / - CRAW_VERS="1.0.0" - #wget -O craw-${CRAW_VERS}.tar.gz https://gitlab.pasteur.fr/bneron/craw/repository/${CRAW_VERS}/archive.tar.gz - wget -O craw-${CRAW_VERS}.tar.gz https://gitlab.pasteur.fr/bneron/craw/repository/branch_1.0/archive.tar.gz - mkdir craw-${CRAW_VERS} && tar xzf craw-${CRAW_VERS}.tar.gz -C craw-${CRAW_VERS} --strip-components 1 - cd craw-${CRAW_VERS} - python3 setup.py sdist - #pip3 install dist/craw-${CRAW_VERS}.tar.gz - pip3 install dist/craw-*.tar.gz - - mkdir /craw - mv tests /craw/ - - ################################# - # cleaning image # - ################################# - cd / - rm craw-${CRAW_VERS}.tar.gz - rm -Rf craw-${CRAW_VERS} - apt-get purge -y wget - apt-get autoremove -y - apt-get clean -y - -%test - /usr/bin/python3 /craw/tests/run_tests.py --functional -vv - -%help - This singularity image contains the "Counter RNAseq Window (CRAW) package. - Two commands are available \"coverage\" and \"htmp\. - To run command: - ./craw.img [coverage|htmp] [options]... [args]... . - - To get help about each command ./craw.img [coverage|htmp] --help. - The detailed documentation is accessible here: http://bneron.pages.pasteur.fr/craw/ - -%runscript - -# the following syntax allow to get the command and args -# in POSIX manner so compliant with dash which -# is the debian/ubuntu /bin/sh shell - -CMD="$1" -shift -ARGS=${@} - -case ${CMD} in - coverage ) - exec /usr/local/bin/craw_coverage ${ARGS} ;; - htmp ) - exec /usr/local/bin/craw_htmp ${ARGS} ;; - * ) - echo "command \"${CMD}\" is not supported. available commands: \"coverage\"|\"htmp\"" - exit 127 - ;; -esac diff --git a/bin/craw_coverage b/bin/craw_coverage deleted file mode 100755 index 94d30fcb7ac485232650436e19702704de2a99d1..0000000000000000000000000000000000000000 --- a/bin/craw_coverage +++ /dev/null @@ -1,467 +0,0 @@ -#! /usr/bin/env python3 - -########################################################################### -# # -# This file is part of Counter RNAseq Window (craw) package. # -# # -# Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # -# see COPYRIGHT file for details. # -# # -# craw is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# craw is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# See the GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with craw (see COPYING file). # -# If not, see <http://www.gnu.org/licenses/>. # -# # -########################################################################### - -import os -import sys -import argparse -import itertools -import logging -import pysam - -if 'CRAW_HOME' in os.environ and os.environ['CRAW_HOME']: - if os.environ['CRAW_HOME'] not in sys.path: - sys.path.insert(0, os.environ['CRAW_HOME']) -try: - import craw -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - sys.exit(msg) - -from craw.util import progress -from craw import argparse_util -from craw import annotation, coverage -from craw.wig import WigParser - - -def positive_int(value): - """ - Parse value given by the parser - - :param value: the value given by the parser - :type value: string - :return: the integer corresponding to the value - :rtype: int - :raise: :class:`argparse.ArgumentTypeError` - """ - try: - value = int(value) - except ValueError: - raise argparse.ArgumentTypeError("must be a positive integer, got: {}".format(value)) - if value < 0: - msg = "must be a positive integer, got: {}".format(value) - raise argparse.ArgumentTypeError(msg) - return value - - -def quality_checker(value): - """ - Parse value given by the parser - - :param value: the value given by the parser - :type value: string - :return: the integer >=0 and <=42 corresponding to the value - :rtype: int - :raise: :class:`argparse.ArgumentTypeError` if value does not represent a integer >=0 and <=42 - """ - - try: - value = int(value) - except ValueError: - raise argparse.ArgumentTypeError("must be a integer between 0 and 42, got: {}".format(value)) - if not 0 <= value <= 42: - raise argparse.ArgumentTypeError("must be a integer between 0 and 42, got: {}".format(value)) - return value - - -def get_result_header(annot_parser, cmdline_args): - """ - - :param annot_parser: - :param cmdline_args: - :return: - """ - def version_infos(): - header = "# Running Counter RnAseq Window craw_coverage\n" - commented_ver = get_version_message().rstrip().replace('\n', '\n# ') - header += """# -# Version: {} -# -# craw_coverage run with the following arguments: -""".format(commented_ver) - return header - - def options(): - options = '' - for a, v in sorted(cmdline_args.__dict__.items()): - if v is None or v is False: - continue - else: - if v is True: - options += "# --{opt}\n".format(opt=a.replace('_', '-')) - else: - options += "# --{opt}={val}\n".format(opt=a.replace('_', '-'), val=v) - options.rstrip() + '\n' - return options - - def padded_header(): - metadata = '\t'.join([str(f) for f in annot_parser.header]) - if cmdline_args.start_col: - max_left, max_right = annot_parser.max() - pos = '\t'.join(str(p) for p in range(0 - max_left, max_right + 1)) - else: - pos = '\t'.join(str(p) for p in range(0 - cmdline_args.before, cmdline_args.after + 1)) - s = "sense\t{metadata}\t{pos}".format(metadata=metadata, pos=pos) - return s - - def sum_header(): - metadata = '\t'.join([str(f) for f in annot_parser.header]) - return "sense\t{metadata}\tcoverage".format(metadata=metadata) - - def resized_header(new_size): - metadata = '\t'.join([str(f) for f in annot_parser.header]) - pos = '\t'.join([str(i) for i in range(new_size)]) - return "sense\t{metadata}\t{pos}".format(metadata=metadata, pos=pos) - - header = version_infos() - header += options() - - if args.justify: - header += resized_header(args.justify) - elif args.sum: - header += sum_header() - else: - header += padded_header() - return header - - -def get_version_message(): - version_text = craw.get_version_message() - version_text += """ -Using: - - pysam {pysam_ver} (samtools {samtools_ver}) - - scipy {sp_ver} (only for --justify opt) -""".format(pysam_ver=pysam.__version__, - samtools_ver=pysam.__samtools_version__, - sp_ver=craw.coverage.scipy.__version__) - return version_text - - -def get_results_file(sense_opt, basename, suffix): - if sense_opt == 'S': - sense_filename = "{filename}.sense.{suffix}".format(filename=basename, suffix=suffix) - sense = open(sense_filename, 'w') - antisense = open(os.devnull, 'w') - elif sense_opt == 'AS': - sense = open(os.devnull, 'w') - antisense_filename = "{filename}.antisense.{suffix}".format(filename=basename, suffix=suffix) - antisense = open(antisense_filename, 'w') - elif sense_opt == 'split': - sense_filename = "{filename}.sense.{suffix}".format(filename=basename, suffix=suffix) - sense = open(sense_filename, 'w') - antisense_filename = "{filename}.antisense.{suffix}".format(filename=basename, suffix=suffix) - antisense = open(antisense_filename, 'w') - else: - output_filename = "{filename}.{suffix}".format(filename=basename, suffix=suffix) - sense = open(output_filename, 'w') - antisense = sense - return sense, antisense - - -parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter) -input_grp = parser.add_argument_group() -input_grp.add_argument("-b", "--bam", - help="""The path of the bam file to analyse. ---bam option is not compatible with any --wig or --wig-for or --wig-rev options. -but at least --bam or any of --wig* options is required.""") -input_grp.add_argument("-w", "--wig", - help="""The path of the wig file to analyse. -The file encode the coverage for the both strand. -The positive coverage ar on the forward strand whereas the negative coverage a located on the reverse one. -The --wig option is incompatible with both --bam or --wig-for or --wig-reverse options.""") -input_grp.add_argument("--wig-for", - metavar='FORWARD WIG', - help="""The path of a wig file to analyse. -This file encode the coverage for the forward strand. -The --wig-for option is incompatible with both --bam or --wig options.""") -input_grp.add_argument("--wig-rev", - metavar='REVERSE WIG', - help="""The path of a wig file to analyse. -This file encode the coverage for the reverse strand. -The --wig-rev option is incompatible with both --bam or --wig options.""") -parser.add_argument("-a", "--annot", - required=True, - help="The path of the annotation file (required).") -parser.add_argument("--qual-thr", - dest='qual_thr', - type=quality_checker, - default=15, - help="The minimal quality of read mapping to take it in account") -parser.add_argument("-s", "--suffix", - default="cov", - help="The name of the suffix to use for the output file.") -parser.add_argument('-o', '--output', - dest='output', - help="The path of the output (default= base name of annotation file with --suffix)") -parser.add_argument('--sep', - default='\t', - help="the separator use to delimit the annotation fields") -mutually_exclusive_opt = parser.add_mutually_exclusive_group() -mutually_exclusive_opt.add_argument('--justify', - type=positive_int, - help="to resize all genes coverage to this new size.") -mutually_exclusive_opt.add_argument('--sum', - action='store_true', - default=False, - help="sum all the coverages on the window.") - -region_grp = parser.add_argument_group(title="region of interest", - description="""Parameters which define regions to compute. - -There is 2 way to define regions: - * all regions have same length. - * each region have different lengths. - -In both case a position of reference must be define (--ref-col). - -If all regions have same length: - - --window define the number of nucleotide to take in account before and - after the reference position (the window will be centered on reference) - --before define the number of nucleotide to take in account before the - reference position. - --after define the number of nucleotide to take in account after the - reference position. - --before and --after allow to define non centered window. - - --after and --before options must be set together and are - incompatible with --window option. - -If all regions have different lengths: - - The regions must be specified in the annotation file. - --start-col define the name of the column in annotation file which define - the start position of the region to compute. - --stop-col define the name of the column in annotation file which define - the stop position of the region to compute. -""") -region_grp.add_argument("--ref-col", - default="position", - help="The name of the column for the reference position (default: position).") -region_grp.add_argument("--before", - type=positive_int, - help="The number of base to compute after the position of reference.") -region_grp.add_argument("--after", - type=positive_int, - help="The number of base to compute before the position of reference.") -region_grp.add_argument("--window", - type=positive_int, - help="The number of base to compute around the position of reference.") -region_grp.add_argument("--start-col", - help="The name of the column to define the start position.") -region_grp.add_argument("--stop-col", - help="The name of the column to define the stop position.") -col_name = parser.add_argument_group(title="specify the name of columns") -col_name.add_argument("--strand-col", - default='strand', - help="Specify the name of the column representing the strand (default: strand)") -col_name.add_argument("--chr-col", - default='chromosome', - help="Specify the name of the column representing the chromosome (default: chromosome)") - -parser.add_argument("--sense", - choices=('S', 'AS', 'split', 'mixed'), - default='mixed', - help="compute result only on: " - "sense (S), " - "antisense (AS), " - "on both senses but produce two separated files (split), " - "or in one file (mixed)." - "(default: mixed)" - ) - -parser.add_argument("--version", - action=argparse_util.VersionAction, - version=get_version_message()) -parser.add_argument("-q", "--quiet", - action="count", - default=0, - help="Reduce verbosity.") -parser.add_argument("-v", "--verbose", - action="count", - default=0, - help="Increase verbosity.") - -args = parser.parse_args() - -input_opt_group = (args.bam, args.wig, args.wig_for, args.wig_rev) -wig_opt_group = (args.wig, args.wig_for, args.wig_rev) - -############################# -# Check wig and bam options # -############################# -if not any(input_opt_group): - raise argparse.ArgumentError(None, "At least one of these options must be specified" - " '--bam', '--wig' , '--wig-for', '--wig-rev'.") -elif all(input_opt_group): - raise argparse.ArgumentError(None, "'--bam', '--wig' , '--wig-for', '--wig-rev' cannot specify at the same time.") -elif args.bam and any(wig_opt_group): - raise argparse.ArgumentError(None,"'--bam' option cannot be specified in the same time as" - " '--wig', '--wig-for' or '--wig-rev' options.") -elif args.wig and any((args.wig_for, args.wig_rev)): - raise argparse.ArgumentError(None,"'--wig' option cannot be specified in the same time as" - " '--wig-for' or '--wig-rev' options.") -########################### -# Checking window options # -########################### -group_one = (args.before, args.after, args.window) -group_two = (args.start_col, args.stop_col) -if all([v is None for v in itertools.chain(group_one, group_two)]): - raise argparse.ArgumentError(None, "[--window or [--before, --after] or [--start-col, --stop-col] options" - " must be specified") -elif any([v is not None for v in group_one]) and any([v is not None for v in group_two]): - raise argparse.ArgumentError(None, "Options [--before, --after, --window] and [--start-col, --stop-col] " - "are mutually exclusives.") -elif all([v is None for v in group_two]): - if args.window is None: - if any([v is None for v in (args.before, args.after)]): - raise argparse.ArgumentError(None, "The two options --after and --before work together." - " The both options must be specified in same time") - else: - pass - # window is None, before and after are specify - # => nothing to do - else: - # args.window is not None: - if any([v is not None for v in (args.before, args.after)]): - raise argparse.ArgumentError(None, "options [--before, --after] and --window are mutually exclusives.") - else: - # --before, --after are None - args.before = args.after = args.window -elif not all(group_two): - raise argparse.ArgumentError(None, "The two options --start-col and --stop-col work together. " - "The both options must be specified in same time") - -verbosity = max(logging.INFO + (args.quiet - args.verbose) * 10, 1) -craw.init_logger(verbosity) - -####################### -# Parsing input files # -####################### - -annot_line_number = sum(1 for line in open(args.annot)) -annot_parser = annotation.AnnotationParser(args.annot, args.ref_col, - chr_col=args.chr_col, - strand_col=args.strand_col, - start_col=args.start_col, - stop_col=args.stop_col, - sep=args.sep) - -if args.bam: - # input_data is a samfile - input_file = args.bam - input_data = pysam.AlignmentFile(args.bam, "rb") -elif args.wig: - # input_data is a wig.Genome object - input_file = args.wig - wig_parser = WigParser(mixed_wig=args.wig) - input_data = wig_parser.parse() -else: - # input_data is a wig.Genome object - input_file = args.wig_for - wig_parser = WigParser(for_wig=args.wig_for, rev_wig=args.wig_rev) - input_data = wig_parser.parse() - -annotations = annot_parser.get_annotations() - -############################ -# checking outputs options # -############################ -if not args.output: - args.output = os.path.splitext(input_file)[0] - out_name = args.output - suffix = args.suffix -else: - out_name, suffix = os.path.splitext(args.output) - suffix = suffix.strip('.') - if not suffix: - suffix = args.suffix - -sense_file, antisense_file = get_results_file(args.sense, out_name, suffix) - -########################### -# Computing output matrix # -########################### -with sense_file, antisense_file: - header = get_result_header(annot_parser, args) - - if args.sense in ('S', 'split', 'mixed'): - # if args.sense is mixed the sense_file and antisense_file are the same object - print(header, file=sense_file) - if args.sense in ('AS', 'split'): - print(header, file=antisense_file) - - # get the appropriate function according to the input type - # the 2 functions - # - get_wig_coverage - # - get_bam_coverage - # have exactly the same api - if args.justify: - get_coverage = coverage.resized_coverage_maker(input_data, args.justify, qual_thr=None) - elif args.sum: - get_coverage = coverage.sum_coverage_maker(input_data, qual_thr=args.qual_thr) - else: - if args.window is not None: - max_left = max_right = args.window - elif args.before and args.after: - max_left = args.before - max_right = args.after - else: - max_left, max_right = annot_parser.max() - get_coverage = coverage.padded_coverage_maker(input_data, max_left, max_right, qual_thr=args.qual_thr) - - for annot_num, annot_entry in enumerate(annotations, 1): - if verbosity <= logging.INFO: - progress(annot_num, annot_line_number) - if args.start_col: - # pos in get_coverage functions are - # 0 based whereas in annotation they are 1 based - # start is included, stop is excluded - start = annot_entry.start - 1 - stop = annot_entry.stop - else: - if annot_entry.strand == '+': - start = annot_entry.ref - args.before - 1 - stop = annot_entry.ref + args.after - else: - # if feature is on reverse strand - # the before and after are inverted - start = annot_entry.ref - args.after - 1 - stop = annot_entry.ref + args.before - forward_cov, reverse_cov = get_coverage(annot_entry, start=start, stop=stop) - - sens = 'S' if annot_entry.strand == '+' else 'AS' - if sens == 'S': - print(sens, annot_entry, *forward_cov, sep='\t', file=sense_file) - else: - print(sens, annot_entry, *forward_cov, sep='\t', file=antisense_file) - - sens = 'S' if annot_entry.strand == '-' else 'AS' - if sens == 'S': - print(sens, annot_entry, *reverse_cov, sep='\t', file=sense_file) - else: - print(sens, annot_entry, *reverse_cov, sep='\t', file=antisense_file) - -print(file=sys.stderr) diff --git a/bin/craw_htmp b/bin/craw_htmp deleted file mode 100755 index 2be0f414571df8dd49f8b78aed955882d03f57fb..0000000000000000000000000000000000000000 --- a/bin/craw_htmp +++ /dev/null @@ -1,525 +0,0 @@ -#! /usr/bin/env python3 - -########################################################################### -# # -# This file is part of Counter RNAseq Window (craw) package. # -# # -# Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # -# see COPYRIGHT file for details. # -# # -# craw is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# craw is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# See the GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with craw (see COPYING file). # -# If not, see <http://www.gnu.org/licenses/>. # -# # -########################################################################### - -import sys -import os -import argparse -import logging - -import matplotlib as mtp - - -if 'CRAW_HOME' in os.environ and os.environ['CRAW_HOME']: - if os.environ['CRAW_HOME'] not in sys.path: - sys.path.insert(0, os.environ['CRAW_HOME']) -try: - import craw -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - sys.exit(msg) - - -root_logger = logging.getLogger() -handler = logging.StreamHandler() -formatter = logging.Formatter(fmt='{levelname} : {name} : {message}', style='{') -handler.setFormatter(formatter) -root_logger.addHandler(handler) -root_logger.setLevel(logging.DEBUG) - - -def _gene_size_parser(value): - """ - Parse value given by the parser - - :param value: the value given by the parser for --sort-by-gene-size option - :type value: string - :return: name of column representing the start of gene, name of the column representing the end of gene - :rtype: tuple of 2 string - :raise: :class:`argparse.ArgumentError` object if value cannot be parsed - """ - # this function is called only if value is provided to sort-by-gene-size option - rv = value.split(',') - if len(rv) == 0: - rv = ['', ''] - elif len(rv) == 2: - pass - else: - raise argparse.ArgumentError(None, "--sort-by-gene-size {} invalid value. " - "Must be start_col, stop_col (separated by a comma), " - "default= annotation_start,annotation_end".format(value)) from None - return rv - -def _size_fig_parser(value): - """ - Parse value given by the parser for --size option - the value must follow the syntax widexheight[unit] - if the unit is omitted unit is inch - otherwise unit must be - - 'mm' for millimeters - - 'cm' for centimeters - - 'in' for inches - - 'px' for pixels - wide and height must be positive integers. - - :param value: the size of the figure - :type value: string - :return: the size in inch aks by the user - :rtype: tuple of float - :raise: :class:`argparse.ArgumentError` object - """ - def mm2in(value): - return value * (1 / 25.4) - - def cm2in(value): - return mm2in(value * 10) - - def px2in(value): - dpi = plt.rcParams['figure.dpi'] - return value / dpi - - err_msg = """--size {} invalid value. - The value must be widexheight[unit] or 'raw'. - 'wide' and 'height' must be positive integers - By default unit is in inches. - eg: 7x10 or 7x10in for 7 inches wide by 10 inches height - 70x100mm for 70 mm by 100 mm. - default=7x10 or 10x7 depending of the figure orientation.""" - if value == 'raw': - return 'raw' - else: - unit = 'in' - if value[-2:] in ('mm', 'cm', 'in', 'px'): - unit = value[-2:] - value = value[:-2] - try: - wide, height = value.split('x') - except ValueError: - raise argparse.ArgumentError(None, err_msg.format(value)) - try: - wide = int(wide) - height = int(height) - except ValueError: - raise argparse.ArgumentError(None, err_msg.format(value)) - if wide < 0 or height < 0: - raise argparse.ArgumentError(None, err_msg.format(value)) - - if unit == 'mm': - wide = mm2in(wide) - height = mm2in(height) - elif unit == 'cm': - wide = cm2in(wide) - height = cm2in(height) - elif unit == 'px': - wide = px2in(wide) - height = px2in(height) - - return wide, height - - -def get_version_message(): - """ - - :return: a human readable of craw_htmp version and it's main dependencies. - """ - # pyplot must be import after the argument parsing - # because in function of the environment ($DYSPLAY and options) the craw_htmp behavior - # is not the same - # so heatmap cannot be import before argument parsing - # so I import the dependencies in get_version_message. - # it's not very important as get_version_message is called only when --version is set - # so the program quit after displaying this message. - import numpy as np - import pandas as pd - import matplotlib as mtp - import PIL - version_text = craw.get_version_message() - version_text += """ -Using: - - numpy {np_ver} - - pandas {pd_ver} - - matplotlib {mtp_ver} - - pillow {pil_ver} -""".format(np_ver=np.__version__, - pd_ver=pd.__version__, - mtp_ver=mtp.__version__, - pil_ver=PIL.PILLOW_VERSION - ) - return version_text - -parser = argparse.ArgumentParser(description="Compute a figure from a file of coverage compute by craw_coverage.", - formatter_class=argparse.RawTextHelpFormatter) - -parser.add_argument(dest="cov_file", - help="the path to the coverage file.") - -data_grp = parser.add_argument_group('optional data options') -data_grp.add_argument("--crop", - nargs=2, - help="""crop the matrix. -This option need two values the name of the first and last column to keep -[start col, stop col] eg --crop -10 1000 .""") - -sort_grp = data_grp.add_mutually_exclusive_group() -sort_grp.add_argument("--sort-using-col", - default=False, - help="sort the rows using the column COL.") -sort_grp.add_argument("--sort-using-file", - type=argparse.FileType('r'), - help="""Sort the rows using a file. -The file must have on the first line the name of the column -which will use to sort. -Each following lines must match to a value of this column in the data.""") -sort_grp.add_argument("--sort-by-gene-size", - type=_gene_size_parser, - nargs='*', - metavar='start_col,stop_col', - help="""The rows will be sorted by gene size -using start-col and stop-col to compute length. - -start-col and stop-col must be a string separated by comma. -If start-col and stop-col are not specified annotation_start and annotation_end -for start-col and stop-col respectively will be used. -(Don't put this option without value just before the coverage file.)""") - -matrix_grp = data_grp.add_mutually_exclusive_group() -matrix_grp.add_argument("--sense-only", - action="store_true", - default=False, - help="Display only sense matrix (default is display both).") -matrix_grp.add_argument("--antisense-only", - action="store_true", - default=False, - help="Display only anti sense matrix (default is display both).") - -fig_grp = parser.add_argument_group("optional figure options") -fig_grp.add_argument("--cmap", - default="Blues", - help="""The color map used to display data. -The allowed values are defined in -http:matplotlib.org/examples/color/colormaps_reference.html -eg: Blues, BuGn, Greens, GnBu, ... (default: Blues).""") -fig_grp.add_argument("--title", - help="""The figure title. -It will display on the top of the figure. -(default: the name of the coverage file without extension).""") -fig_grp.add_argument("--dpi", - type=int, - help="""The resolution of the output . -This option work only if --out option is specified with size not raw. -(default: matplolibrc figure.dpi""") -fig_grp.add_argument("--size", - type=_size_fig_parser, - help="""Specify the figure size. -The value must be widexheight[unit] or 'raw'. - -If value is raw it will be produce two image files (for sense and antisense) " -with one pixel correspond to one coverage value for one nucleotide. - -Otherwise, 'wide' and 'height' must be positive integers -By default units are in inches eg: - -* 7x10 or 7x10in for 7 inches wide by 10 inches height -* 70x100mm for 70 mm by 100 mm. - -(default: 7x10 or 10x7 depending of the figure orientation).""") - -fig_grp.add_argument("--norm", - choices=["lin", "log", "row", "log+row", "row+log"], - default="lin", - help="""Which normalization to apply to the data before display them. - - * lin a linear normalization is applied on the whole matrix. - * log a 10 base logarithm will be applied on the data before matrix - normalization. - * row mean that a linear normalisation is compute row by row. - * log+row mean a 10 base logarithm will be applied before a normalisation - row by row. ('row+log' is an alias for 'log+row'). - (default: lin""") - -fig_grp.add_argument("--mark", - action='append', - nargs='*', - metavar='POS [COLOR]', - help="""* POS is mandatory and must be a positive integer. -* COLOR is optional - The supported color formats are: - - Hexadecimal color specifiers, given as '#rgb' or '#rrggbb'. For example, '#ff0000' specifies pure red. - - Common HTML color names. - -If COLOR is omitted the color corresponding to the highest value in the color map (--cmap) will be used. -(this option cannot be the last one just before the coverage file, on the command line.)""") - -layout = fig_grp.add_mutually_exclusive_group() -layout.add_argument("--sense-on-left", - action="store_true", - default=False, - help="Where to display the sense matrix relative to antisense matrix (default is top).") -layout.add_argument("--sense-on-right", - action="store_true", - default=False, - help="Where to display the sense matrix relative to antisense matrix (default is top).") -layout.add_argument("--sense-on-top", - action="store_true", - default=False, - help="Where to display the sense matrix relative to antisense matrix (default is top).") -layout.add_argument("--sense-on-bottom", - action="store_true", - default=False, - help="Where to display the sense matrix relative to antisense matrix (default is top).") - -parser.add_argument("--out", - help="""The name of the file (the format will based on the extension) -to save the figure. -Instead of displaying the figure on the screen, save it directly in this file. - -If this option is used with --size raw 2 files will be produced -for respectively sense and anti sense. -The extension 'sense' or 'antisense' will be added between the name and the suffix eg: ---size raw --out foo.png give 2 files 'foo.sense.png' and 'foo.antisense.png'. - -If no format (determine using the suffix) is given 'png' will be used.""") - -parser.add_argument("-v", "--verbose", - action='count', - default=0, - help="Increase output verbosity.") -parser.add_argument("-q", "--quiet", - action="count", - default=0, - help="Reduce output verbosity.") -parser.add_argument("--version", - action='version', - version=get_version_message(), - help="display the version information and quit.") -try: - args = parser.parse_args() -except Exception as err: - print(err) - -verbosity = max(logging.INFO + (args.quiet - args.verbose) * 10, 1) -craw.init_logger(verbosity) - -log = logging.getLogger('craw.htmp') - - -log.debug("args={}".format(args)) -########################### -# validating some options # -########################### -# test if DISPlAY -if not os.environ.get('DISPLAY'): - if args.out is not None: - _, out_format = os.path.splitext(args.out) - from craw.util import non_interactive_backends - try: - backend = non_interactive_backends[out_format.lstrip('.')] - except KeyError: - raise RuntimeError("The '{}' format is not supported, choose among {}.".format( - out_format, - list(non_interactive_backends.keys()) - )) - mtp.use(backend) - else: - raise RuntimeError(""" -'DISPLAY' variable is not set (you probably run craw_htmp in non graphic environment) -So you cannot use interactive output -please specify an output file (--out). -""") - -# the import of pyplot must be done after setting the backend -import matplotlib.pyplot as plt -from craw import heatmap - - -try: - color_map = plt.cm.get_cmap(args.cmap) -except AttributeError as err: - raise RuntimeError("{} : http:matplotlib.org/examples/color/colormaps_reference.html for example".format(err)) - -if args.sort_by_gene_size == []: - args.sort_by_gene_size = ['annotation_start', 'annotation_end'] -elif args.sort_by_gene_size: - args.sort_by_gene_size = args.sort_by_gene_size[0] - -if args.out: - out_dir = os.path.dirname(args.out) or '.' - if not os.access(out_dir, os.W_OK): - msg = "{} is not writable".format(out_dir) - log.error(msg) - raise RuntimeError(msg) - - -log.info("Parsing coverage file") -data = heatmap.get_data(args.cov_file) - - -def mark_converter(data, marks): - converted_marks = [] - for value in marks: - pos = int(value[0]) - color = value[1] if len(value) == 2 else None - converted_marks.append(heatmap.Mark(pos, data, color_map, color=color)) - return converted_marks - -if args.mark is not None: - args.mark = mark_converter(data, args.mark) - -sense_data, antisense_data = heatmap.split_data(data) - -if args.sense_only: - antisense_data = None -if args.antisense_only: - sense_data = None - -################ -# sorting data # -################ -# if data is empty or data is None sort return data -# so it's error safe, and not time and space consuming -if args.sort_by_gene_size: - start_col, stop_col = args.sort_by_gene_size - sense_data = heatmap.sort(sense_data, 'by_gene_size', start_col=start_col, stop_col=stop_col) - antisense_data = heatmap.sort(antisense_data, 'by_gene_size', start_col=start_col, stop_col=stop_col) -elif args.sort_using_col: - sense_data = heatmap.sort(sense_data, 'using_col', col=args.sort_using_col) - antisense = heatmap.sort(antisense_data, 'using_col', col=args.sort_using_col) -elif args.sort_using_file: - sense_data = heatmap.sort(sense_data, 'using_file', file=args.sort_using_file) - antisense_data = heatmap.sort(antisense_data, 'using_file', file=args.sort_using_file) - -sense_data = heatmap.remove_metadata(sense_data) -antisense_data = heatmap.remove_metadata(antisense_data) - -################ -# croping data # -################ -log.info("Croping matrix") -if args.crop: - start_col, stop_col = args.crop - sense_data = heatmap.crop(sense_data, start_col, stop_col) - antisense_data = heatmap.crop(antisense_data, start_col, stop_col) - -#################### -# Normalizing data # -#################### -log.info("Normalizing data") - -if args.norm == "lin": - log.info("Linear normalisation") - sense_data = heatmap.lin_norm(sense_data) - antisense_data = heatmap.lin_norm(antisense_data) -elif args.norm == 'log': - log.info("10 base logarithm normalisation") - sense_data = heatmap.log_norm(sense_data) - antisense_data = heatmap.log_norm(antisense_data) -elif args.norm == 'row': - log.info("Linear normalisation by row") - sense_data = heatmap.lin_norm_row_by_row(sense_data) - antisense_data = heatmap.lin_norm_row_by_row(antisense_data) -else: - # log+row or row+log - log.info("10 base logarithm and normalisation by row") - sense_data = heatmap.log_norm_row_by_row(sense_data) - antisense_data = heatmap.log_norm_row_by_row(antisense_data) - - -################## -# Drawing figure # -################## -log.info("Drawing figure") -if args.size == 'raw': - # pillow backend - if args.sense_only: - sense_to_compute = ('sense', ) - elif args.antisense_only: - sense_to_compute = ('antisense', ) - else: - sense_to_compute = ('sense', 'antisense') - - if args.out: - root_name, im_format = os.path.splitext(args.out) - im_format = im_format.lstrip('.') - if not im_format: - im_format = 'png' - else: - im_format = 'png' - root_name = os.path.splitext(args.cov_file)[0] - - out_filename = {} - for sense in sense_to_compute: - filename = "{filename}.{sense}.{format}".format(filename=root_name, sense=sense, format=im_format) - out_filename[sense] = filename - if os.path.exists(filename): - msg = "{} already exists".format(filename) - log.error(msg) - raise RuntimeError(msg) - - for sense in sense_to_compute: - log.info("Drawing {}".format(sense)) - data = globals()[sense + '_data'] - if data is not None: - heatmap.draw_raw_image(data, out_filename[sense], color_map, marks=args.mark) - else: - log.warning("{} data are empty: skip drawing.".format(sense)) -else: - # matplotlib backend - ########## - # layout # - ########## - if args.sense_on_left: - sense_on = 'left' - elif args.sense_on_right: - sense_on = 'right' - elif args.sense_on_top: - sense_on = 'top' - elif args.sense_on_bottom: - sense_on = 'bottom' - else: - sense_on = 'top' - - if args.title is None: - title = os.path.basename(os.path.splitext(args.cov_file)[0]) - else: - title = args.title - - fig = heatmap.draw_heatmap(sense_data, antisense_data, - color_map=color_map, - title=title, - sense_on=sense_on, - size=args.size, - marks=args.mark) - - if args.out: - if os.path.exists(args.out): - msg = "The output file: {} already exists.".format(args.out) - log.error(msg) - raise RuntimeError(msg) - - if args.dpi: - fig.savefig(args.out, dpi=args.dpi) - fig.savefig(args.out) - else: - plt.show() diff --git a/craw/__init__.py b/craw/__init__.py index 5a79ae4c75b1728e85d2378bc258af158c73a659..bc07de53501a4ca19eb7e7b6040da091dba67e5a 100644 --- a/craw/__init__.py +++ b/craw/__init__.py @@ -3,7 +3,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -25,8 +25,10 @@ import sys import logging +import time + +__version__ = '1.dev{}'.format(time.strftime('%Y%m%d')) -__version__ = '$VERSION' def get_version_message(): """ @@ -34,17 +36,13 @@ def get_version_message(): :return: A human readable version of the craw package version :rtype: string """ - # if I keep '$ VERSION' (without space) as is - # the setup.py will replace it by the value set in setup - # so the test become True even if craw is installed using setup.py - if __version__ == '$' + 'VERSION': - version = "NOT packaged, it should be a development version" - else: - version = __version__ - version_text = "craw {0} | Python {1}.{2}".format(version, sys.version_info.major, sys.version_info.minor) + version_text = "craw {0} | Python {1}.{2}".format(__version__, + sys.version_info.major, + sys.version_info.minor) return version_text -def init_logger(log_level): + +def init_logger(log_level, out=True): """ Initiate the "root" logger for craw library all logger create in craw package inherits from this root logger @@ -54,9 +52,13 @@ def init_logger(log_level): :type log_level: int """ craw_log = logging.getLogger('craw') - handler = logging.StreamHandler() - formatter = logging.Formatter(fmt='{levelname} : {name} : {message}', style='{') - handler.setFormatter(formatter) - craw_log.addHandler(handler) + if out: + handler = logging.StreamHandler() + formatter = logging.Formatter(fmt='{levelname} : {name} : {message}', style='{') + handler.setFormatter(formatter) + craw_log.addHandler(handler) + else: + null_handler = logging.NullHandler() + craw_log.addHandler(null_handler) craw_log.setLevel(log_level) craw_log.propagate = False diff --git a/craw/annotation.py b/craw/annotation.py index da8b18b1e65f5a458422dd9beb45b2a8156ab8ed..1811616489ca8b0a9b59dc11093ac5639b5118a1 100644 --- a/craw/annotation.py +++ b/craw/annotation.py @@ -3,7 +3,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # diff --git a/craw/argparse_util.py b/craw/argparse_util.py index 77bfae93455aa7e24da17363439240412503423c..9da41f87238b5693fa830f5d2e87dc404f873ff0 100644 --- a/craw/argparse_util.py +++ b/craw/argparse_util.py @@ -3,7 +3,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright c 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # diff --git a/craw/coverage.py b/craw/coverage.py index 451a35b785e67ec78d16af4975937621cb7b14c4..4a42e8cca6943475478aaa7941c62dde35bf0ebc 100644 --- a/craw/coverage.py +++ b/craw/coverage.py @@ -3,7 +3,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -26,20 +26,13 @@ import logging import numpy as np import scipy.interpolate -try: - # for pysam>=0.9.1.4 - from pysam.calignmentfile import AlignmentFile -except ImportError: - # for pysam>=0.10 - from pysam import AlignmentFile +from pysam import AlignmentFile from .wig import Genome _log = logging.getLogger(__name__) - - def sum_coverage_maker(input_data, qual_thr=None): """ This function return a new function :func:`get_sum_coverage` @@ -232,7 +225,6 @@ def get_raw_coverage_function(input): "'pysam.calignmentfile.AlignmentFile' as Input, not {}".format(input.__class__.__name__)) - def get_raw_wig_coverage(genome, annot_entry, start, stop, qual_thr=None): """ :param genome: The genome which store all coverages. @@ -314,20 +306,24 @@ def get_raw_bam_coverage(sam_file, annot_entry, start, stop, qual_thr=15): call_back = on_forward if strand == '+' else on_reverse try: - coverage = sam_file.count_coverage(reference=chromosome, + coverage = sam_file.count_coverage(chromosome, start=start, - end=stop, + stop=stop, quality_threshold=qual, read_callback=call_back) except SystemError as err: import sys - print("ERROR when call count_coverage with following arguments\n", - "reference=", chromosome, "\n", - "start=", start, "\n", - "end=", stop, "\n", - "quality_threshold=", qual, "\n", - "read_callback=", call_back, - file=sys.stderr) + _log.critical("ERROR when call count_coverage with following arguments\n" + "reference= {chromosome} \n" + "start={start}\n" + "end={stop}\n" + "quality_threshold={qual}\n" + "read_callback={call_back}".format(chromosome=chromosome, + start=start, + stop=stop, + qual=qual, + call_back=call_back) + ) raise err coverage = [array.tolist() for array in coverage] diff --git a/craw/heatmap.py b/craw/heatmap.py index 6651dccc0cf632542e3a668efb52c7a04e94d7e3..4e0c113cc859369bc350f244d03e7c22d847d6dd 100644 --- a/craw/heatmap.py +++ b/craw/heatmap.py @@ -3,7 +3,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -41,7 +41,7 @@ def get_data(coverage_file): :return: the data as 2 dimension dataframe :rtype: a :class:`pandas.DataFrame` object """ - data = pd.read_table(coverage_file, comment='#', na_values='None') + data = pd.read_csv(coverage_file, sep="\t", comment='#', na_values='None') return data @@ -140,7 +140,7 @@ def _sort_using_file(data, file=None): :rtype: a :class:`pandas.DataFrame` object. """ _log.info("Sorting data using file {}".format(file)) - ref = pd.read_table(file, comment="#") + ref = pd.read_csv(file, comment="#", sep='\t') col_name = ref.columns[0] # change the index of the data using the col_name @@ -319,6 +319,8 @@ def draw_one_matrix(mat, ax, cmap=plt.cm.Blues, y_label=None, marks=None): :type cmap: a :class:`matplotlib.pyplot.cm` object. :param y_label: the label for the data draw on y-axis. :type y_label: string + :param marks: list of vertical marks + :type marks: list of :class:`Mark` object :return: the mtp image corresponding to data :rtype: a :class:`matplotlib.image` object. """ @@ -342,7 +344,7 @@ def draw_one_matrix(mat, ax, cmap=plt.cm.Blues, y_label=None, marks=None): ax.set_ylabel(y_label, size='large') if marks: for mark in marks: - ax.axvline(x=mark.to_px(), linewidth=0.5, color=mark.rgb_float()) + ax.axvline(x=mark.to_px(), linewidth=0.5, color=mark.rgb_float) return mat_img @@ -363,6 +365,8 @@ def draw_heatmap(sense, antisense, color_map=plt.cm.Blues, title='', sense_on='t :type sense_on: string. :param size: the size of the figure in inches (wide, height). :type size: tuple of 2 float. + :param marks: list of vertical marks + :type marks: list of :class:`Mark` object :return: The figure. :rtype: a :class:`matplotlib.pyplot.Figure` object. """ diff --git a/craw/scripts/__init__.py b/craw/scripts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7911b88114661ac217c46cc0ded248be64b8031 --- /dev/null +++ b/craw/scripts/__init__.py @@ -0,0 +1,23 @@ +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### \ No newline at end of file diff --git a/craw/scripts/craw_coverage.py b/craw/scripts/craw_coverage.py new file mode 100755 index 0000000000000000000000000000000000000000..0ed034f7754de5189e99a09a9a55c2ca31442fa7 --- /dev/null +++ b/craw/scripts/craw_coverage.py @@ -0,0 +1,517 @@ +#! /usr/bin/env python3 + +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### + +import os +import sys +import argparse +import itertools +import logging +import pysam + +import craw +from craw.util import progress +from craw import argparse_util +from craw import annotation, coverage +from craw.wig import WigParser + + +def positive_int(value): + """ + Parse value given by the parser + + :param value: the value given by the parser + :type value: string + :return: the integer corresponding to the value + :rtype: int + :raise: :class:`argparse.ArgumentTypeError` + """ + try: + value = int(value) + except ValueError: + raise argparse.ArgumentTypeError("must be a positive integer, got: {}".format(value)) + if value < 0: + msg = "must be a positive integer, got: {}".format(value) + raise argparse.ArgumentTypeError(msg) + return value + + +def quality_checker(value): + """ + Parse value given by the parser + + :param value: the value given by the parser + :type value: string + :return: the integer >=0 and <=42 corresponding to the value + :rtype: int + :raise: :class:`argparse.ArgumentTypeError` if value does not represent a integer >=0 and <=42 + """ + + try: + value = int(value) + except ValueError: + raise argparse.ArgumentTypeError("must be a integer between 0 and 42, got: {}".format(value)) + if not 0 <= value <= 42: + raise argparse.ArgumentTypeError("must be a integer between 0 and 42, got: {}".format(value)) + return value + + +def get_result_header(annot_parser, parsed_args): + """ + Compute the header for the results. + the firts lines start with # + they contains some general information about the craw (version) + and options used (for tracbility) + the last line is the header of columns separated by --sep option + and can be used as header with pandas + + :param annot_parser: the annotation parser + :type annot_parser: :class:`annotation.AnnotationParser` object + :param parsed_args: the command line argument parsed with argparse + :type parsed_args: :class:`argparse.Namespace` + :return: The header of the result file + :rtype: str + """ + def version_infos(): + header = "# Running Counter RnAseq Window craw_coverage\n" + commented_ver = get_version_message().rstrip().replace('\n', '\n# ') + header += """# +# Version: {} +# +# craw_coverage run with the following arguments: +""".format(commented_ver) + return header + + def options(): + options = '' + for a, v in sorted(parsed_args.__dict__.items()): + if v is None or v is False: + continue + else: + if v is True: + options += "# --{opt}\n".format(opt=a.replace('_', '-')) + else: + options += "# --{opt}={val}\n".format(opt=a.replace('_', '-'), val=v) + options.rstrip() + '\n' + return options + + def padded_header(): + metadata = '\t'.join([str(f) for f in annot_parser.header]) + if parsed_args.start_col: + max_left, max_right = annot_parser.max() + pos = '\t'.join(str(p) for p in range(0 - max_left, max_right + 1)) + else: + pos = '\t'.join(str(p) for p in range(0 - parsed_args.before, parsed_args.after + 1)) + s = "sense\t{metadata}\t{pos}".format(metadata=metadata, pos=pos) + return s + + def sum_header(): + metadata = '\t'.join([str(f) for f in annot_parser.header]) + return "sense\t{metadata}\tcoverage".format(metadata=metadata) + + def resized_header(new_size): + metadata = '\t'.join([str(f) for f in annot_parser.header]) + pos = '\t'.join([str(i) for i in range(new_size)]) + return "sense\t{metadata}\t{pos}".format(metadata=metadata, pos=pos) + + header = version_infos() + header += options() + + if parsed_args.justify: + header += resized_header(parsed_args.justify) + elif parsed_args.sum: + header += sum_header() + else: + header += padded_header() + return header + + +def get_version_message(): + """ + :return: The human readable CRAW version. + :rtype: str + """ + version_text = craw.get_version_message() + version_text += """ +Using: + - pysam {pysam_ver} (samtools {samtools_ver}) + - scipy {sp_ver} (only for --justify opt) +""".format(pysam_ver=pysam.__version__, + samtools_ver=pysam.__samtools_version__, + sp_ver=craw.coverage.scipy.__version__) + return version_text + + +def get_results_file(sense_opt, basename, suffix): + """ + + :param str sense_opt: how to managed the sense and antisense results + + * **mixed**: sense and antisense are interleaved in same file + * **split**: sense and antisense are in separated files + * **S**: only sense results are write down + * **AS**: only antisense are write down + + :param str basename: the basename of the results file + :param str suffix: the suffix of the results file + :return: the file objects where to write sense and antisense results + :rtype: tuple (`file object` sense, `file object` antisense) + """ + if sense_opt == 'S': + sense_filename = "{filename}.sense.{suffix}".format(filename=basename, suffix=suffix) + sense = open(sense_filename, 'w') + antisense = open(os.devnull, 'w') + elif sense_opt == 'AS': + sense = open(os.devnull, 'w') + antisense_filename = "{filename}.antisense.{suffix}".format(filename=basename, suffix=suffix) + antisense = open(antisense_filename, 'w') + elif sense_opt == 'split': + sense_filename = "{filename}.sense.{suffix}".format(filename=basename, suffix=suffix) + sense = open(sense_filename, 'w') + antisense_filename = "{filename}.antisense.{suffix}".format(filename=basename, suffix=suffix) + antisense = open(antisense_filename, 'w') + else: + output_filename = "{filename}.{suffix}".format(filename=basename, suffix=suffix) + sense = open(output_filename, 'w') + antisense = sense + return sense, antisense + + +def parse_args(args): + """ + + :param args: The options set on the command line (without the program name) + :type args: list of string + :return: + """ + parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter) + input_grp = parser.add_argument_group() + input_grp.add_argument("-b", "--bam", + help="""The path of the bam file to analyse. + --bam option is not compatible with any --wig or --wig-for or --wig-rev options. + but at least --bam or any of --wig* options is required.""") + input_grp.add_argument("-w", "--wig", + help="""The path of the wig file to analyse. + The file encode the coverage for the both strand. + The positive coverage ar on the forward strand whereas the negative coverage a located on the reverse one. + The --wig option is incompatible with both --bam or --wig-for or --wig-reverse options.""") + input_grp.add_argument("--wig-for", + metavar='FORWARD WIG', + help="""The path of a wig file to analyse. + This file encode the coverage for the forward strand. + The --wig-for option is incompatible with both --bam or --wig options.""") + input_grp.add_argument("--wig-rev", + metavar='REVERSE WIG', + help="""The path of a wig file to analyse. + This file encode the coverage for the reverse strand. + The --wig-rev option is incompatible with both --bam or --wig options.""") + parser.add_argument("-a", "--annot", + required=True, + help="The path of the annotation file (required).") + parser.add_argument("--qual-thr", + dest='qual_thr', + type=quality_checker, + default=15, + help="The minimal quality of read mapping to take it in account") + parser.add_argument("-s", "--suffix", + default="cov", + help="The name of the suffix to use for the output file.") + parser.add_argument('-o', '--output', + dest='output', + help="The path of the output (default= base name of annotation file with --suffix)") + parser.add_argument('--sep', + default='\t', + help="the separator use to delimit the annotation fields") + mutually_exclusive_opt = parser.add_mutually_exclusive_group() + mutually_exclusive_opt.add_argument('--justify', + type=positive_int, + help="to resize all genes coverage to this new size.") + mutually_exclusive_opt.add_argument('--sum', + action='store_true', + default=False, + help="sum all the coverages on the window.") + + region_grp = parser.add_argument_group(title="region of interest", + description="""Parameters which define regions to compute. + + There is 2 way to define regions: + * all regions have same length. + * each region have different lengths. + + In both case a position of reference must be define (--ref-col). + + If all regions have same length: + + --window define the number of nucleotide to take in account before and + after the reference position (the window will be centered on reference) + --before define the number of nucleotide to take in account before the + reference position. + --after define the number of nucleotide to take in account after the + reference position. + --before and --after allow to define non centered window. + + --after and --before options must be set together and are + incompatible with --window option. + + If all regions have different lengths: + + The regions must be specified in the annotation file. + --start-col define the name of the column in annotation file which define + the start position of the region to compute. + --stop-col define the name of the column in annotation file which define + the stop position of the region to compute. + """) + region_grp.add_argument("--ref-col", + default="position", + help="The name of the column for the reference position (default: position).") + region_grp.add_argument("--before", + type=positive_int, + help="The number of base to compute after the position of reference.") + region_grp.add_argument("--after", + type=positive_int, + help="The number of base to compute before the position of reference.") + region_grp.add_argument("--window", + type=positive_int, + help="The number of base to compute around the position of reference.") + region_grp.add_argument("--start-col", + help="The name of the column to define the start position.") + region_grp.add_argument("--stop-col", + help="The name of the column to define the stop position.") + col_name = parser.add_argument_group(title="specify the name of columns") + col_name.add_argument("--strand-col", + default='strand', + help="Specify the name of the column representing the strand (default: strand)") + col_name.add_argument("--chr-col", + default='chromosome', + help="Specify the name of the column representing the chromosome (default: chromosome)") + + parser.add_argument("--sense", + choices=('S', 'AS', 'split', 'mixed'), + default='mixed', + help="compute result only on: " + "sense (S), " + "antisense (AS), " + "on both senses but produce two separated files (split), " + "or in one file (mixed)." + "(default: mixed)" + ) + + parser.add_argument("--version", + action=argparse_util.VersionAction, + version=get_version_message()) + parser.add_argument("-q", "--quiet", + action="count", + default=0, + help="Reduce verbosity.") + parser.add_argument("-v", "--verbose", + action="count", + default=0, + help="Increase verbosity.") + + parsed_args = parser.parse_args(args) + + input_opt_group = (parsed_args.bam, parsed_args.wig, parsed_args.wig_for, parsed_args.wig_rev) + wig_opt_group = (parsed_args.wig, parsed_args.wig_for, parsed_args.wig_rev) + + ############################# + # Check wig and bam options # + ############################# + if not any(input_opt_group): + raise argparse.ArgumentError(None, "At least one of these options must be specified" + " '--bam', '--wig' , '--wig-for', '--wig-rev'.") + elif all(input_opt_group): + raise argparse.ArgumentError(None, + "'--bam', '--wig' , '--wig-for', '--wig-rev' cannot specify at the same time.") + elif parsed_args.bam and any(wig_opt_group): + raise argparse.ArgumentError(None, "'--bam' option cannot be specified in the same time as" + " '--wig', '--wig-for' or '--wig-rev' options.") + elif parsed_args.wig and any((parsed_args.wig_for, parsed_args.wig_rev)): + raise argparse.ArgumentError(None, "'--wig' option cannot be specified in the same time as" + " '--wig-for' or '--wig-rev' options.") + ########################### + # Checking window options # + ########################### + group_one = (parsed_args.before, parsed_args.after, parsed_args.window) + group_two = (parsed_args.start_col, parsed_args.stop_col) + if all([v is None for v in itertools.chain(group_one, group_two)]): + raise argparse.ArgumentError(None, "[--window or [--before, --after] or [--start-col, --stop-col] options" + " must be specified") + elif any([v is not None for v in group_one]) and any([v is not None for v in group_two]): + raise argparse.ArgumentError(None, "Options [--before, --after, --window] and [--start-col, --stop-col] " + "are mutually exclusives.") + elif all([v is None for v in group_two]): + if parsed_args.window is None: + if any([v is None for v in (parsed_args.before, parsed_args.after)]): + raise argparse.ArgumentError(None, "The two options --after and --before work together." + " The both options must be specified in same time") + else: + pass + # window is None, before and after are specify + # => nothing to do + else: + # parsed_args.window is not None: + if any([v is not None for v in (parsed_args.before, parsed_args.after)]): + raise argparse.ArgumentError(None, "options [--before, --after] and --window are mutually exclusives.") + else: + # --before, --after are None + parsed_args.before = parsed_args.after = parsed_args.window + elif not all(group_two): + raise argparse.ArgumentError(None, "The two options --start-col and --stop-col work together. " + "The both options must be specified in same time") + return parsed_args + + +def main(args=None, log_level=None): + """ + The entrypoint for craw_coverage script + It will generate a coverage matrix around the position of interest + and write the results in files + + :param args: the arguments and options given on the command line + :type args: list of string as given by sys.argv without the program name + :param log_level: the level of logger + :type log_level: positive int or logging flag logging.DEBUG, logging.INFO, logging.ERROR, logging.CRITICAL + """ + args = sys.argv[1:] if args is None else args + parsed_args = parse_args(args) + + if log_level is None: + verbosity = max(logging.INFO + (parsed_args.quiet - parsed_args.verbose) * 10, 1) + else: + verbosity = log_level + craw.init_logger(verbosity) + + ####################### + # Parsing input files # + ####################### + with open(parsed_args.annot) as annot_file: + annot_line_number = sum(1 for _ in annot_file) + annot_parser = annotation.AnnotationParser(parsed_args.annot, parsed_args.ref_col, + chr_col=parsed_args.chr_col, + strand_col=parsed_args.strand_col, + start_col=parsed_args.start_col, + stop_col=parsed_args.stop_col, + sep=parsed_args.sep) + + if parsed_args.bam: + # input_data is a samfile + input_file = parsed_args.bam + input_data = pysam.AlignmentFile(parsed_args.bam, "rb") + elif parsed_args.wig: + # input_data is a wig.Genome object + input_file = parsed_args.wig + wig_parser = WigParser(mixed_wig=parsed_args.wig) + input_data = wig_parser.parse() + else: + # input_data is a wig.Genome object + input_file = parsed_args.wig_for + wig_parser = WigParser(for_wig=parsed_args.wig_for, rev_wig=parsed_args.wig_rev) + input_data = wig_parser.parse() + + annotations = annot_parser.get_annotations() + + ############################ + # checking outputs options # + ############################ + if not parsed_args.output: + parsed_args.output = os.path.splitext(input_file)[0] + out_name = parsed_args.output + suffix = parsed_args.suffix + else: + out_name, suffix = os.path.splitext(parsed_args.output) + suffix = suffix.strip('.') + if not suffix: + suffix = parsed_args.suffix + + sense_file, antisense_file = get_results_file(parsed_args.sense, out_name, suffix) + + ########################### + # Computing output matrix # + ########################### + with sense_file, antisense_file: + header = get_result_header(annot_parser, parsed_args) + + if parsed_args.sense in ('S', 'split', 'mixed'): + # if parsed_args.sense is mixed the sense_file and antisense_file are the same object + print(header, file=sense_file) + if parsed_args.sense in ('AS', 'split'): + print(header, file=antisense_file) + + # get the appropriate function according to the input type + # the 2 functions + # - get_wig_coverage + # - get_bam_coverage + # have exactly the same api + if parsed_args.justify: + get_coverage = coverage.resized_coverage_maker(input_data, parsed_args.justify, qual_thr=None) + elif parsed_args.sum: + get_coverage = coverage.sum_coverage_maker(input_data, qual_thr=parsed_args.qual_thr) + else: + if parsed_args.window is not None: + max_left = max_right = parsed_args.window + elif parsed_args.before and parsed_args.after: + max_left = parsed_args.before + max_right = parsed_args.after + else: + max_left, max_right = annot_parser.max() + get_coverage = coverage.padded_coverage_maker(input_data, max_left, max_right, qual_thr=parsed_args.qual_thr) + + for annot_num, annot_entry in enumerate(annotations, 1): + if verbosity <= logging.INFO: + progress(annot_num, annot_line_number) + if parsed_args.start_col: + # pos in get_coverage functions are + # 0 based whereas in annotation they are 1 based + # start is included, stop is excluded + start = annot_entry.start - 1 + stop = annot_entry.stop + else: + if annot_entry.strand == '+': + start = annot_entry.ref - parsed_args.before - 1 + stop = annot_entry.ref + parsed_args.after + else: + # if feature is on reverse strand + # the before and after are inverted + start = annot_entry.ref - parsed_args.after - 1 + stop = annot_entry.ref + parsed_args.before + forward_cov, reverse_cov = get_coverage(annot_entry, start=start, stop=stop) + + sens = 'S' if annot_entry.strand == '+' else 'AS' + if sens == 'S': + print(sens, annot_entry, *forward_cov, sep='\t', file=sense_file) + else: + print(sens, annot_entry, *forward_cov, sep='\t', file=antisense_file) + + sens = 'S' if annot_entry.strand == '-' else 'AS' + if sens == 'S': + print(sens, annot_entry, *reverse_cov, sep='\t', file=sense_file) + else: + print(sens, annot_entry, *reverse_cov, sep='\t', file=antisense_file) + + print(file=sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/craw/scripts/craw_htmp.py b/craw/scripts/craw_htmp.py new file mode 100755 index 0000000000000000000000000000000000000000..da602a51348e7e1b5b89a507dd60140247d87477 --- /dev/null +++ b/craw/scripts/craw_htmp.py @@ -0,0 +1,556 @@ +#! /usr/bin/env python3 + +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### + +import sys +import os +import argparse +import logging + +import matplotlib as mtp +import matplotlib.pyplot as plt + +import craw + + +def _file_readable(value): + """ + check value given by the parser + + :param str value: the value given by the parser for --sort-using-file option + :return: the normpath of the value + :raises: ArgumentError if the file does not exists, or is not a file, or not readable + """ + if not os.path.exists(value): + raise argparse.ArgumentError(None, "No such file: {}".format(value)) + elif not os.path.isfile(value): + raise argparse.ArgumentError(None, "{} is not a regular file".format(value)) + elif not os.access(value, os.R_OK): + raise argparse.ArgumentError(None, "{} is not readable".format(value)) + return os.path.normpath(value) + + +def _gene_size_parser(value): + """ + Parse value given by the parser + + :param value: the value given by the parser for --sort-by-gene-size option + :type value: string + :return: name of column representing the start of gene, name of the column representing the end of gene + :rtype: tuple of 2 string + :raise: :class:`argparse.ArgumentError` object if value cannot be parsed + """ + # this function is called only if value is provided to sort-by-gene-size option + rv = value.split(',') + if len(rv) == 0: + rv = ['', ''] + elif len(rv) == 2: + pass + else: + raise argparse.ArgumentError(None, "--sort-by-gene-size {} invalid value. " + "Must be start_col, stop_col (separated by a comma), " + "default= annotation_start,annotation_end".format(value)) from None + return rv + + +def _size_fig_parser(value): + """ + Parse value given by the parser for --size option + the value must follow the syntax widexheight[unit] + if the unit is omitted unit is inch + otherwise unit must be + + * 'mm' for millimeters + * 'cm' for centimeters + * 'in' for inches + * 'px' for pixels + + wide and height must be positive integers. + + :param value: the size of the figure + :type value: string + :return: the size in inch ask by the user + :rtype: tuple of float + :raise: :class:`argparse.ArgumentError` object + """ + def mm2in(value): + return value * (1 / 25.4) + + def cm2in(value): + return mm2in(value * 10) + + def px2in(value): + dpi = plt.rcParams['figure.dpi'] + return value / dpi + + err_msg = """--size {} invalid value. + The value must be widexheight[unit] or 'raw'. + 'wide' and 'height' must be positive integers + By default unit is in inches. + eg: 7x10 or 7x10in for 7 inches wide by 10 inches height + 70x100mm for 70 mm by 100 mm. + default=7x10 or 10x7 depending of the figure orientation.""" + if value == 'raw': + return 'raw' + else: + unit = 'in' + if value[-2:] in ('mm', 'cm', 'in', 'px'): + unit = value[-2:] + value = value[:-2] + try: + wide, height = value.split('x') + except ValueError: + raise argparse.ArgumentError(None, err_msg.format(value)) + try: + wide = int(wide) + height = int(height) + except ValueError: + raise argparse.ArgumentError(None, err_msg.format(value)) + if wide < 0 or height < 0: + raise argparse.ArgumentError(None, err_msg.format(value)) + + if unit == 'mm': + wide = mm2in(wide) + height = mm2in(height) + elif unit == 'cm': + wide = cm2in(wide) + height = cm2in(height) + elif unit == 'px': + wide = px2in(wide) + height = px2in(height) + + return wide, height + + +def get_version_message(): + """ + :return: a human readable of craw_htmp version and it's main dependencies. + :rtype: str + """ + # pyplot must be import after the argument parsing + # because in function of the environment ($DYSPLAY and options) the craw_htmp behavior + # is not the same + # so heatmap cannot be import before argument parsing + # so I import the dependencies in get_version_message. + # it's not very important as get_version_message is called only when --version is set + # so the program quit after displaying this message. + import numpy as np + import pandas as pd + import matplotlib as mtp + import PIL + version_text = craw.get_version_message() + version_text += """ +Using: + - numpy {np_ver} + - pandas {pd_ver} + - matplotlib {mtp_ver} + - pillow {pil_ver} +""".format(np_ver=np.__version__, + pd_ver=pd.__version__, + mtp_ver=mtp.__version__, + pil_ver=PIL.PILLOW_VERSION + ) + return version_text + + +def parse_args(args): + """ + :param args: the arguments and option as provided by sys.argv without the program name + :return: the argument parsed + :rtype: :class:`argparse.Namespace` object + """ + parser = argparse.ArgumentParser(description="Compute a figure from a file of coverage compute by craw_coverage.py.", + formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument(dest="cov_file", + help="the path to the coverage file.") + + data_grp = parser.add_argument_group('optional data options') + data_grp.add_argument("--crop", + nargs=2, + help="""crop the matrix. + This option need two values the name of the first and last column to keep + [start col, stop col] eg --crop -10 1000 .""") + + sort_grp = data_grp.add_mutually_exclusive_group() + sort_grp.add_argument("--sort-using-col", + default=False, + help="sort the rows using the column COL.") + sort_grp.add_argument("--sort-using-file", + type=_file_readable, + help="""Sort the rows using a file. + The file must have on the first line the name of the column + which will use to sort. + Each following lines must match to a value of this column in the data.""") + sort_grp.add_argument("--sort-by-gene-size", + type=_gene_size_parser, + nargs='*', + metavar='start_col,stop_col', + help="""The rows will be sorted by gene size + using start-col and stop-col to compute length. + + start-col and stop-col must be a string separated by comma. + If start-col and stop-col are not specified annotation_start and annotation_end + for start-col and stop-col respectively will be used. + (Don't put this option without value just before the coverage file.)""") + + matrix_grp = data_grp.add_mutually_exclusive_group() + matrix_grp.add_argument("--sense-only", + action="store_true", + default=False, + help="Display only sense matrix (default is display both).") + matrix_grp.add_argument("--antisense-only", + action="store_true", + default=False, + help="Display only anti sense matrix (default is display both).") + + fig_grp = parser.add_argument_group("optional figure options") + fig_grp.add_argument("--cmap", + default="Blues", + help="""The color map used to display data. + The allowed values are defined in + http:matplotlib.org/examples/color/colormaps_reference.html + eg: Blues, BuGn, Greens, GnBu, ... (default: Blues).""") + fig_grp.add_argument("--title", + help="""The figure title. + It will display on the top of the figure. + (default: the name of the coverage file without extension).""") + fig_grp.add_argument("--dpi", + type=int, + help="""The resolution of the output . + This option work only if --out option is specified with size not raw. + (default: matplolibrc figure.dpi""") + fig_grp.add_argument("--size", + type=_size_fig_parser, + help="""Specify the figure size. + The value must be widexheight[unit] or 'raw'. + + If value is raw it will be produce two image files (for sense and antisense) " + with one pixel correspond to one coverage value for one nucleotide. + + Otherwise, 'wide' and 'height' must be positive integers + By default units are in inches eg: + + * 7x10 or 7x10in for 7 inches wide by 10 inches height + * 70x100mm for 70 mm by 100 mm. + + (default: 7x10 or 10x7 depending of the figure orientation).""") + + fig_grp.add_argument("--norm", + choices=["lin", "log", "row", "log+row", "row+log"], + default="lin", + help="""Which normalization to apply to the data before display them. + + * lin a linear normalization is applied on the whole matrix. + * log a 10 base logarithm will be applied on the data before matrix + normalization. + * row mean that a linear normalisation is compute row by row. + * log+row mean a 10 base logarithm will be applied before a normalisation + row by row. ('row+log' is an alias for 'log+row'). + (default: lin""") + + fig_grp.add_argument("--mark", + action='append', + nargs='*', + metavar='POS [COLOR]', + help="""* POS is mandatory and must be a positive integer. + * COLOR is optional + The supported color formats are: + - Hexadecimal color specifiers, given as '#rgb' or '#rrggbb'. For example, '#ff0000' specifies pure red. + - Common HTML color names. + + If COLOR is omitted the color corresponding to the highest value in the color map (--cmap) will be used. + (this option cannot be the last one just before the coverage file, on the command line.)""") + + layout = fig_grp.add_mutually_exclusive_group() + layout.add_argument("--sense-on-left", + action="store_true", + default=False, + help="Where to display the sense matrix relative to antisense matrix (default is top).") + layout.add_argument("--sense-on-right", + action="store_true", + default=False, + help="Where to display the sense matrix relative to antisense matrix (default is top).") + layout.add_argument("--sense-on-top", + action="store_true", + default=False, + help="Where to display the sense matrix relative to antisense matrix (default is top).") + layout.add_argument("--sense-on-bottom", + action="store_true", + default=False, + help="Where to display the sense matrix relative to antisense matrix (default is top).") + + parser.add_argument("--out", + help="""The name of the file (the format will based on the extension) + to save the figure. + Instead of displaying the figure on the screen, save it directly in this file. + + If this option is used with --size raw 2 files will be produced + for respectively sense and anti sense. + The extension 'sense' or 'antisense' will be added between the name and the suffix eg: + --size raw --out foo.png give 2 files 'foo.sense.png' and 'foo.antisense.png'. + + If no format (determine using the suffix) is given 'png' will be used.""") + + parser.add_argument("-v", "--verbose", + action='count', + default=0, + help="Increase output verbosity.") + parser.add_argument("-q", "--quiet", + action="count", + default=0, + help="Reduce output verbosity.") + parser.add_argument("--version", + action='version', + version=get_version_message(), + help="display the version information and quit.") + parsed_args = parser.parse_args(args) + return parsed_args + + +def main(args=None, log_level=None, logger_out=True): + """ + The entrypoint for craw_html script + + It will generate a heatmap representing the coverage matrix around the position of interest + it can display the results on the screen or write it on file depending the options + + :param args: The arguments and option representing the command line + :type args: list of string + :param log_level: the level of verbosity + :param logger_out: True if you want to display logs on stdout, False otherwise + :return: + """ + args = sys.argv[1:] if args is None else args + parsed_args = parse_args(args) + + verbosity = max(logging.INFO + (parsed_args.quiet - parsed_args.verbose) * 10, 1) + craw.init_logger(verbosity, out=logger_out) + + log = logging.getLogger('craw.htmp') + log.debug("args={}".format(parsed_args)) + ########################### + # validating some options # + ########################### + # test if DISPlAY + if not os.environ.get('DISPLAY'): + if parsed_args.out is not None: + _, out_format = os.path.splitext(parsed_args.out) + if not out_format: + raise RuntimeError(""" + 'DISPLAY' variable is not set (you probably run craw_htmp in non graphic environment) + So you must specify an output format (add ext to the output file option as 'my_file.png') + """) + from craw.util import non_interactive_backends + try: + backend = non_interactive_backends[out_format.lstrip('.')] + except KeyError: + raise RuntimeError("The '{}' format is not supported, choose among {}.".format( + out_format, + list(non_interactive_backends.keys()) + )) + mtp.use(backend) + else: + raise RuntimeError(""" + 'DISPLAY' variable is not set (you probably run craw_htmp in non graphic environment) + So you cannot use interactive output + please specify an output file (--out). + """) + + # the import of pyplot must be done after setting the backend + import matplotlib.pyplot as plt + from craw import heatmap + + try: + color_map = plt.cm.get_cmap(parsed_args.cmap) + except(AttributeError, ValueError) as err: + raise RuntimeError("{} : http:matplotlib.org/examples/color/colormaps_reference.html for example".format(err)) + + if parsed_args.sort_by_gene_size == []: + parsed_args.sort_by_gene_size = ['annotation_start', 'annotation_end'] + elif parsed_args.sort_by_gene_size: + parsed_args.sort_by_gene_size = parsed_args.sort_by_gene_size[0] + + if parsed_args.out: + out_dir = os.path.dirname(parsed_args.out) or '.' + if not os.access(out_dir, os.W_OK): + msg = "{} is not writable".format(out_dir) + log.error(msg) + raise RuntimeError(msg) + + log.info("Parsing coverage file") + data = heatmap.get_data(parsed_args.cov_file) + + + def mark_converter(data, marks): + converted_marks = [] + for value in marks: + pos = int(value[0]) + color = value[1] if len(value) == 2 else None + converted_marks.append(heatmap.Mark(pos, data, color_map, color=color)) + return converted_marks + + if parsed_args.mark is not None: + parsed_args.mark = mark_converter(data, parsed_args.mark) + + sense_data, antisense_data = heatmap.split_data(data) + + if parsed_args.sense_only: + antisense_data = None + if parsed_args.antisense_only: + sense_data = None + + ################ + # sorting data # + ################ + # if data is empty or data is None sort return data + # so it's error safe, and not time and space consuming + if parsed_args.sort_by_gene_size: + start_col, stop_col = parsed_args.sort_by_gene_size + sense_data = heatmap.sort(sense_data, 'by_gene_size', start_col=start_col, stop_col=stop_col) + antisense_data = heatmap.sort(antisense_data, 'by_gene_size', start_col=start_col, stop_col=stop_col) + elif parsed_args.sort_using_col: + sense_data = heatmap.sort(sense_data, 'using_col', col=parsed_args.sort_using_col) + antisense = heatmap.sort(antisense_data, 'using_col', col=parsed_args.sort_using_col) + elif parsed_args.sort_using_file: + sense_data = heatmap.sort(sense_data, 'using_file', file=parsed_args.sort_using_file) + antisense_data = heatmap.sort(antisense_data, 'using_file', file=parsed_args.sort_using_file) + + sense_data = heatmap.remove_metadata(sense_data) + antisense_data = heatmap.remove_metadata(antisense_data) + + ################ + # croping data # + ################ + log.info("Croping matrix") + if parsed_args.crop: + start_col, stop_col = parsed_args.crop + sense_data = heatmap.crop_matrix(sense_data, start_col, stop_col) + antisense_data = heatmap.crop_matrix(antisense_data, start_col, stop_col) + + #################### + # Normalizing data # + #################### + log.info("Normalizing data") + + if parsed_args.norm == "lin": + log.info("Linear normalisation") + sense_data = heatmap.lin_norm(sense_data) + antisense_data = heatmap.lin_norm(antisense_data) + elif parsed_args.norm == 'log': + log.info("10 base logarithm normalisation") + sense_data = heatmap.log_norm(sense_data) + antisense_data = heatmap.log_norm(antisense_data) + elif parsed_args.norm == 'row': + log.info("Linear normalisation by row") + sense_data = heatmap.lin_norm_row_by_row(sense_data) + antisense_data = heatmap.lin_norm_row_by_row(antisense_data) + else: + # log+row or row+log + log.info("10 base logarithm and normalisation by row") + sense_data = heatmap.log_norm_row_by_row(sense_data) + antisense_data = heatmap.log_norm_row_by_row(antisense_data) + + ################## + # Drawing figure # + ################## + log.info("Drawing figure") + if parsed_args.size == 'raw': + # pillow backend + if parsed_args.sense_only: + sense_to_compute = ('sense', ) + elif parsed_args.antisense_only: + sense_to_compute = ('antisense', ) + else: + sense_to_compute = ('sense', 'antisense') + + if parsed_args.out: + root_name, im_format = os.path.splitext(parsed_args.out) + im_format = im_format.lstrip('.') + if not im_format: + im_format = 'png' + else: + im_format = 'png' + root_name = os.path.splitext(parsed_args.cov_file)[0] + + out_filename = {} + for sense in sense_to_compute: + filename = "{filename}.{sense}.{format}".format(filename=root_name, sense=sense, format=im_format) + out_filename[sense] = filename + if os.path.exists(filename): + msg = "{} already exists".format(filename) + log.error(msg) + raise RuntimeError(msg) + + for sense in sense_to_compute: + log.info("Drawing {}".format(sense)) + data = locals()[sense + '_data'] + if data is not None: + heatmap.draw_raw_image(data, out_filename[sense], color_map, marks=parsed_args.mark) + else: + log.warning("{} data are empty: skip drawing.".format(sense)) + else: + # matplotlib backend + ########## + # layout # + ########## + if parsed_args.sense_on_left: + sense_on = 'left' + elif parsed_args.sense_on_right: + sense_on = 'right' + elif parsed_args.sense_on_top: + sense_on = 'top' + elif parsed_args.sense_on_bottom: + sense_on = 'bottom' + else: + sense_on = 'top' + + if parsed_args.title is None: + title = os.path.basename(os.path.splitext(parsed_args.cov_file)[0]) + else: + title = parsed_args.title + + fig = heatmap.draw_heatmap(sense_data, antisense_data, + color_map=color_map, + title=title, + sense_on=sense_on, + size=parsed_args.size, + marks=parsed_args.mark) + + if parsed_args.out: + if os.path.exists(parsed_args.out): + msg = "The output file: {} already exists.".format(parsed_args.out) + log.error(msg) + raise RuntimeError(msg) + + if parsed_args.dpi: + fig.savefig(parsed_args.out, dpi=parsed_args.dpi) + fig.savefig(parsed_args.out) + else: + plt.show() + + +if __name__ == '__name__': + main() diff --git a/craw/wig.py b/craw/wig.py index bcc272df5774855dad3f9eee73ca7a25ade5e515..27ce458f278686b336ce5d08e684c6ac75c2f5ad 100644 --- a/craw/wig.py +++ b/craw/wig.py @@ -3,7 +3,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright (c) 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # diff --git a/doc/source/craw_coverage.rst b/doc/source/craw_coverage.rst new file mode 100644 index 0000000000000000000000000000000000000000..3ab79b42f45a48d629968b11dc2ef2e672bc43cf --- /dev/null +++ b/doc/source/craw_coverage.rst @@ -0,0 +1,14 @@ +.. _craw_coverage: + +============= +craw_coverage +============= + + +`craw_coverage.py` is the entry point script to compute coverage. + +.. automodule:: craw.scripts.craw_coverage + :members: + :private-members: + :special-members: + diff --git a/doc/source/craw_htmp.rst b/doc/source/craw_htmp.rst new file mode 100644 index 0000000000000000000000000000000000000000..7f4a22ee46b88f6e81c1c0fe0e8d37536ca62207 --- /dev/null +++ b/doc/source/craw_htmp.rst @@ -0,0 +1,14 @@ +.. _craw_htmp: + +========= +craw_htmp +========= + + +`craw_htmp.py` is the entry point script to compute heatmap. + +.. automodule:: craw.scripts.craw_htmp + :members: + :private-members: + :special-members: + diff --git a/doc/source/index.rst b/doc/source/index.rst index 4e6ee288b690f42190dd38204a0049220dfa4781..8eb3bb89e081d73908a812ebd57d042fe240ce30 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -46,6 +46,8 @@ reference API coverage heatmap argparse_util + craw_coverage + craw_htmp .. only:: html diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 6b8a6b0874c2db7981c9156d4ba4ff0dcd06d8f7..ccf18f2547eaef416ea05ae3c09e50362bb3ab97 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -17,12 +17,13 @@ For craw_coverage For craw_htmp ------------- - - python > 3 - - pysam >= 0.9.1.4 - - pandas >= 0.17.1 - - numpy >= 1.11.2 - - matplotlib >= 1.5.3 - - pillow >= 3.4.2 + - python >= 3.5 + - pysam == 0.15.2 + - pandas >= 0.24 + - numpy >= 1.16 + - matplotlib >= 3.0 + - pillow >= 5.4 + - scipy >= 0.16.1 Installation @@ -61,7 +62,7 @@ Clone the project and install with the setup.py :: git clone https://gitlab.pasteur.fr/bneron/craw.git cd craw - python3 setup.py install + pip install . .. note:: Instead of installing craw you can directly use the scripts from the repository. diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 309f8dd7f8186fd6d4f5f36a230acef43dd7d984..57847b5f46ac1b0a289a6ec9cc2094976da66f03 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -93,6 +93,6 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -Authors: Bertrand Néron -Copyright © 2017 Institut Pasteur (Paris). +Authors: Bertrand Neron +Copyright © 2017-2019 Institut Pasteur (Paris). see COPYRIGHT file for details. diff --git a/requirements.txt b/requirements.txt index 833198647c04bb7cb1d21e60b9c752e1c6d82753..51d2d8dec1292ff5e78b6c23b3ac8eb9fce6ee53 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -# integron_finder requirements are specified in setup.py - ---index-url https://pypi.python.org/simple/ - --e . \ No newline at end of file +pysam==0.15.2 +matplotlib>=3.0 +pandas>=0.24 +numpy>=1.16 +pillow>=5.4 +psutil>=5.6 +scipy>=0.16.1 \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..9cfac1170c84f44ea3b9f438d63700b132fc6593 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,3 @@ +sphinx +sphinx_rtd_theme +coverage \ No newline at end of file diff --git a/setup.py b/setup.py index 22ee7f538492b00d9d0e0b29444387ac3c2a3d33..4d0616f5384b2cb293c354328bae9b98446dbdcf 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ # This file is part of Counter RNAseq Window (craw) package. # # # # Authors: Bertrand Neron # -# Copyright c 2017 Institut Pasteur (Paris). # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -24,73 +24,13 @@ # # ########################################################################### - -import sys -if sys.version_info[0] == 2: - sys.exit("Sorry, Python 2 is not supported") - import os -import time import sysconfig -from distutils.errors import DistutilsFileError -from distutils.util import subst_vars as distutils_subst_vars - -from setuptools import setup +from setuptools import setup, find_packages from setuptools.dist import Distribution -from setuptools.command.install_scripts import install_scripts as _install_scripts -from setuptools.command.install_lib import install_lib as _install_lib - - - -class install_lib(_install_lib): - - def finalize_options(self): - _install_lib.finalize_options(self) - - def run(self): - def subst_file(_file, vars_2_subst): - input_file = os.path.join(self.build_dir, _file) - output_file = input_file + '.tmp' - subst_vars(input_file, output_file, vars_2_subst) - os.unlink(input_file) - self.move_file(output_file, input_file) - - inst = self.distribution.command_options.get('install') - if inst: - if self.distribution.fix_lib is not None: - vars_2_subst = {'PREFIX': inst['prefix'][1] if 'prefix' in inst else '', - 'VERSION': self.distribution.get_version() - } - for _file in self.distribution.fix_lib: - subst_file(_file, vars_2_subst) - _install_lib.run(self) - -class install_scripts(_install_scripts): - - def finalize_options(self): - inst = self.distribution.command_options.get('install') - inst = {} if inst is None else inst - _install_scripts.finalize_options(self) - - def run(self): - def subst_file(_file, vars_2_subst): - input_file = os.path.join(self.build_dir, _file) - output_file = input_file + '.tmp' - subst_vars(input_file, output_file, vars_2_subst) - os.unlink(input_file) - self.move_file(output_file, input_file) - - inst = self.distribution.command_options.get('install') - inst = {} if inst is None else inst - if self.distribution.fix_scripts is not None: - vars_2_subst = {'PREFIX': inst['prefix'][1] if 'prefix' in inst else '', - 'PREFIXDATA': os.path.join(get_install_data_dir(inst), 'craw'), - } - for _file in self.distribution.fix_scripts: - subst_file(_file, vars_2_subst) - _install_scripts.run(self) +from craw import __version__ as cr_vers class UsageDistribution(Distribution): @@ -99,13 +39,13 @@ class UsageDistribution(Distribution): # It's important to define options before to call __init__ # otherwise AttributeError: UsageDistribution instance has no attribute 'conf_files' self.fix_lib = None - self.fix_scripts = None Distribution.__init__(self, attrs=attrs) self.common_usage = """\ Common commands: (see '--help-commands' for more) setup.py build will build the package underneath 'build/' setup.py install will install the package + setup.py test run tests after in-place build """ @@ -138,30 +78,6 @@ def get_install_data_dir(inst): return install_dir -def subst_vars(src, dst, vars): - """ - substitute variables (string starting with $) in file - :param src: the file containing variable to substitute - :type src: string - :param dst: the destination file - :type dst: string - :param vars: the variables to substitute in dict key are variable name - :type vars: dict - """ - try: - src_file = open(src, "r") - except os.error as err: - raise DistutilsFileError("could not open '{0}': {1}".format(src, err)) - try: - dest_file = open(dst, "w") - except os.error as err: - raise DistutilsFileError("could not create '{0}': {1}".format(dst, err)) - with src_file, dest_file: - for line in src_file: - new_line = distutils_subst_vars(line, vars) - dest_file.write(new_line) - - def expand_data(data_to_expand): """ From data structure like setup.py data_files (see http://) @@ -199,18 +115,12 @@ def expand_data(data_to_expand): data_struct.append((base_dest_dir, [one_src])) return data_struct -try: - from pypandoc import convert - def read_md(f): - return convert(f, 'rst') -except ImportError: - print("warning: pypandoc module not found, " - "could not convert Markdown to RST") - def read_md(f): return open(f, 'r').read() +def read_md(f): return open(f, 'r').read() + setup(name="craw", - version='branch1.0.dev{}'.format(time.strftime('%Y%m%d')), + version=cr_vers, author='Bertrand Neron', author_email='bneron@pasteur.fr', url="https://gitlab.pasteur.fr/bneron/craw", @@ -221,29 +131,27 @@ setup(name="craw", 'Intended Audience :: Science/Research', 'Operating System :: Unix', 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Scientific/Engineering :: Bio-Informatics' + 'Topic :: Scientific/Engineering :: Bio-Informatics', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', ], - description="Counter RNA seq Window is a package which aim to compute and visualize the coverage of RNA seq experiment.", - # gitlab and github use md format for README whereas pypi use restructuredtext - # so to display correctly the readme on the pypi page we need - # to convert md -> rst using pandoc/pypandoc + description="Counter RNA seq Window is a package which aim to compute and " + "visualize the coverage of RNA seq experiment.", long_description=read_md('README.md'), + long_description_content_type='text/markdown', platforms=["Unix"], - install_requires=['pysam>=0.9.1.4', 'matplotlib>=1.5.3', 'pandas>=0.17.1', 'numpy>=1.11.2', 'pillow>=3.4.2', - 'psutil>=4.0.0', 'scipy>=0.16.1'], - packages=['craw'], - scripts=['bin/craw_coverage', 'bin/craw_htmp'], + python_requires='>=3.5', + install_requires=open("requirements.txt").read().split(), + extras_require={'dev': open("requirements_dev.txt").read().split()}, + packages=find_packages(), + entry_points={ + 'console_scripts': [ + 'craw_coverage=craw.scripts.craw_coverage:main', + 'craw_htmp=craw.scripts.craw_htmp:main', + ] + }, data_files=expand_data([('share/craw/doc/html', ['doc/build/html/']), ('share/craw/doc/pdf/', ['doc/build/latex/CounterRNAseqWindow.pdf'])]), - # library file where some variable must be fix by install_lib - fix_lib=['craw/__init__.py'], - # scripts file where some variable must be fix by install_scripts - fix_scripts=['craw_coverage'], - - cmdclass={'install_lib': install_lib, - 'install_scripts': install_scripts, - }, distclass=UsageDistribution ) diff --git a/singularity/Singularity b/singularity/Singularity new file mode 100644 index 0000000000000000000000000000000000000000..f5620ce4e34e831798d58d4ba48850b8def563f4 --- /dev/null +++ b/singularity/Singularity @@ -0,0 +1,74 @@ +Bootstrap: docker +from: ubuntu:bionic + + +%labels + maintainer Bertrand Neron <bneron@pasteur.fr> + package.name craw + package.version latest + package.homepage https://gitlab.pasteur.fr/bneron/craw + package.license GPLv3 + +%post + #################################### + # Installing system # + #################################### + + apt-get update -y + apt-get install -y --no-install-recommends python3 python3-tk + apt-get install -y git + apt-get install -y python3-pip + + ################################# + # installing craw # + ################################# + cd / + CRAW_VERS="1.0.2" + git clone https://gitlab.pasteur.fr/bneron/craw/ + cd craw + pip3 install . + + mkdir /craw + mv tests /craw/ + + ################################# + # cleaning image # + ################################# + cd / + rm -Rf craw + apt-get purge -y git + apt-get autoremove -y + apt-get clean -y + +%test + /usr/bin/python3 /craw/tests/run_tests.py -vv + +%help + This singularity image contains the "Counter RNAseq Window (CRAW) package. + Two commands are available \"coverage\" and \"htmp\. + To run command: + ./craw.img [coverage|htmp] [options]... [args]... . + + To get help about each command ./craw.img [coverage|htmp] --help. + The detailed documentation is accessible here: http://bneron.pages.pasteur.fr/craw/ + +%runscript + +# the following syntax allow to get the command and args +# in POSIX manner so compliant with dash which +# is the debian/ubuntu /bin/sh shell + +CMD="$1" +shift +ARGS=${@} + +case ${CMD} in + coverage ) + exec /usr/local/bin/craw_coverage ${ARGS} ;; + htmp ) + exec /usr/local/bin/craw_htmp ${ARGS} ;; + * ) + echo "command \"${CMD}\" is not supported. available commands: \"coverage\"|\"htmp\"" + exit 127 + ;; +esac diff --git a/tests/__init__.py b/tests/__init__.py index 29ccf1e1aa727257778b8c3839ef6fcdd4343e9e..1863f47cb1cbc87305cf2b3fb001d38e6f1c56f3 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,6 +1,34 @@ +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### + +import logging import os.path import unittest import platform +from contextlib import contextmanager +from io import StringIO + import numpy as np from PIL import ImageChops @@ -39,6 +67,17 @@ class CRAWTest(unittest.TestCase): msg = self._formatMessage(msg, 'image 1 != image 2 within {} delta'.format(delta)) raise self.failureException(msg) + @contextmanager + def catch_log(self): + logger = logging.getLogger('craw') + handlers_ori = logger.handlers + fake_handler = logging.StreamHandler(StringIO()) + try: + logger.handlers = [fake_handler] + yield LoggerWrapper(logger) + finally: + logger.handlers = handlers_ori + def which(name, flags=os.X_OK): """ @@ -63,3 +102,16 @@ def which(name, flags=os.X_OK): result = p break return result + + +class LoggerWrapper(object): + + def __init__(self, logger): + self.logger = logger + + def __getattr__(self, item): + return getattr(self.logger, item) + + def get_value(self): + return self.logger.handlers[0].stream.getvalue() + diff --git a/tests/data/4_htmp.antisense.png b/tests/data/4_htmp.antisense.png new file mode 100644 index 0000000000000000000000000000000000000000..ccdd95123bd88b412317946feba3e92a50b61ca3 Binary files /dev/null and b/tests/data/4_htmp.antisense.png differ diff --git a/tests/data/4_htmp.sense.png b/tests/data/4_htmp.sense.png new file mode 100644 index 0000000000000000000000000000000000000000..bc226115c9664e1e97dac0a130bd8f92b0df22c4 Binary files /dev/null and b/tests/data/4_htmp.sense.png differ diff --git a/tests/data/4_htmp_sorting_file.txt b/tests/data/4_htmp_sorting_file.txt new file mode 100644 index 0000000000000000000000000000000000000000..64a061f4694101c4ab98af74966e94f3f08da2e1 --- /dev/null +++ b/tests/data/4_htmp_sorting_file.txt @@ -0,0 +1,9 @@ +name +YPR036W +YEL043W +snR67 +YEL071W +YEL072W + + + diff --git a/tests/data/htmp_raw_lin+row.antisense.png b/tests/data/htmp_raw_lin+row.antisense.png new file mode 100644 index 0000000000000000000000000000000000000000..ccdd95123bd88b412317946feba3e92a50b61ca3 Binary files /dev/null and b/tests/data/htmp_raw_lin+row.antisense.png differ diff --git a/tests/data/htmp_raw_lin+row.sense.png b/tests/data/htmp_raw_lin+row.sense.png new file mode 100644 index 0000000000000000000000000000000000000000..bc226115c9664e1e97dac0a130bd8f92b0df22c4 Binary files /dev/null and b/tests/data/htmp_raw_lin+row.sense.png differ diff --git a/tests/data/htmp_raw_lin_crop.antisense.png b/tests/data/htmp_raw_lin_crop.antisense.png new file mode 100644 index 0000000000000000000000000000000000000000..dd084f8ed123f0f63a917a5ad5b977d3913e61ac Binary files /dev/null and b/tests/data/htmp_raw_lin_crop.antisense.png differ diff --git a/tests/data/htmp_raw_lin_crop.sense.png b/tests/data/htmp_raw_lin_crop.sense.png new file mode 100644 index 0000000000000000000000000000000000000000..95e02db22b9c881492f19c01be900333e7011ef7 Binary files /dev/null and b/tests/data/htmp_raw_lin_crop.sense.png differ diff --git a/tests/data/htmp_raw_lin_sort_file.antisense.png b/tests/data/htmp_raw_lin_sort_file.antisense.png new file mode 100644 index 0000000000000000000000000000000000000000..4158ecbae65eb42f5d03823919878c8afd39d6d6 Binary files /dev/null and b/tests/data/htmp_raw_lin_sort_file.antisense.png differ diff --git a/tests/data/htmp_raw_lin_sort_file.sense.png b/tests/data/htmp_raw_lin_sort_file.sense.png new file mode 100644 index 0000000000000000000000000000000000000000..293c2e9d7e1fd90672aa86454e384b30e5d6617d Binary files /dev/null and b/tests/data/htmp_raw_lin_sort_file.sense.png differ diff --git a/tests/data/htmp_raw_log_marks.antisense.png b/tests/data/htmp_raw_log_marks.antisense.png new file mode 100644 index 0000000000000000000000000000000000000000..d7afb34596f70a6801f1f129a40d3292d24c8913 Binary files /dev/null and b/tests/data/htmp_raw_log_marks.antisense.png differ diff --git a/tests/data/htmp_raw_log_marks.sense.png b/tests/data/htmp_raw_log_marks.sense.png new file mode 100644 index 0000000000000000000000000000000000000000..fc1dca5da515fccc85bb1570bf0d16b0f66b59c1 Binary files /dev/null and b/tests/data/htmp_raw_log_marks.sense.png differ diff --git a/tests/data/small.cov b/tests/data/small.cov index 41600ffb51030b5491bcefe5f925ff14afc0a631..55842ba1162ed5fe5008465d1ebc6504410128b2 100644 --- a/tests/data/small.cov +++ b/tests/data/small.cov @@ -1,17 +1,17 @@ # Running Counter RnAseq Window craw_coverage # -# Version: craw NOT packaged, it should be a development version | Python 3.4 +# Version: craw NOT packaged, it should be a development version | Python 3.7 # Using: -# - pysam 0.9.1.4 (samtools 1.3.1) -# - scipy 0.18.1 (only for --justify opt) +# - pysam 0.15.2 (samtools 1.9) +# - scipy 1.2.1 (only for --justify opt) # # craw_coverage run with the following arguments: # --after=3 -# --annot=/home/bneron/Projects/gwenael/src/tests/data/annotation_wo_start.txt -# --bam=/home/bneron/Projects/gwenael/src/tests/data/small.bam +# --annot=tests/data/annotation_wo_start.txt +# --bam=tests/data/small.bam # --before=5 # --chr-col=chromosome -# --output=/tmp/craw_test/small.cov +# --output=tests/data/small # --qual-thr=0 # --quiet=1 # --ref-col=Position diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/tests/functional/test_craw_coverage.py b/tests/functional/test_craw_coverage.py deleted file mode 100644 index f3cb46499f459392ad0e94381da27f352b0b43ce..0000000000000000000000000000000000000000 --- a/tests/functional/test_craw_coverage.py +++ /dev/null @@ -1,854 +0,0 @@ -########################################################################### -# # -# This file is part of Counter RNAseq Window (craw) package. # -# # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # -# see COPYRIGHT file for details. # -# # -# craw is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# craw is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# See the GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with craw (see COPYING file). # -# If not, see <http://www.gnu.org/licenses/>. # -# # -########################################################################### - -import shutil -import tempfile -import os -from subprocess import Popen, PIPE -from itertools import zip_longest - -from tests import CRAWTest, which - - -class Test(CRAWTest): - - def setUp(self): - if 'CRAW_HOME' in os.environ: - self.craw_home = os.environ['CRAW_HOME'] - self.local_install = True - else: - self.local_install = False - self.craw_home = os.path.normpath(os.path.abspath(os.path.join(os.path.dirname(__file__), '..' '..'))) - self.tmp_dir = tempfile.gettempdir() - self.bin = os.path.join(self.craw_home, 'bin', 'craw_coverage') if self.local_install else which('craw_coverage') - - - def tearDown(self): - try: - shutil.rmtree(self.out_dir) - pass - except: - pass - - - def test_bam_with_fixed_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'small.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --bam={bam_file} --annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - bam_file=os.path.join(self._data_dir, 'small.bam'), - annot_file=os.path.join(self._data_dir, 'annotation_wo_start.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_bam_with_chr_strand_col(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'coverage_fix_window_chr_strand_col.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --bam={bam_file} --annot={annot_file} " \ - "--chr-col={chr_col} " \ - "--strand-col={strand_col} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - bam_file=os.path.join(self._data_dir, 'small.bam'), - annot_file=os.path.join(self._data_dir, 'annotation_wo_start_chr_strand_col.txt'), - ref_col='Position', - chr_col='chr', - strand_col='brin', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_bam_with_var_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'coverage_var_window.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --bam={bam_file} --annot={annot_file} " \ - "--ref-col={ref_col} " \ - "--start-col={start_col} " \ - "--stop-col={stop_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - bam_file=os.path.join(self._data_dir, 'small.bam'), - annot_file=os.path.join(self._data_dir, 'annotation_w_start.txt'), - ref_col='Position', - start_col='beg', - stop_col='end', - qual=15, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_wig_with_fixed_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_fixed_window.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig={wig_file} --annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - wig_file=os.path.join(self._data_dir, 'small_fixed.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_wig_with_var_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_var_window.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig={wig_file} --annot={annot_file} " \ - "--ref-col={ref_col} " \ - "--start-col={start_col} " \ - "--stop-col={stop_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - wig_file=os.path.join(self._data_dir, 'small_variable.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_var_win.txt'), - ref_col='Position', - start_col='beg', - stop_col='end', - qual=15, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_2wig_with_fixed_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_splited_fixed_window.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig-for={wig_for} --wig-rev={wig_rev} " \ - "--annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - wig_for=os.path.join(self._data_dir, 'small_fixed.wig'), - wig_rev=os.path.join(self._data_dir, 'small_fixed_reverse.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_only_forward_wig(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_only_forward.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig-for={wig_for} " \ - "--annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - wig_for=os.path.join(self._data_dir, 'small_fixed.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_only_reverse_wig(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_only_reverse.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig-rev={wig_rev} " \ - "--annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - wig_rev=os.path.join(self._data_dir, 'small_fixed_reverse.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_forward_n_mixed_wig(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_only_forward.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig={wig_file} " \ - "--wig-for={wig_for} " \ - "--annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - wig_file=os.path.join(self._data_dir, 'small_variable.wig'), - wig_for=os.path.join(self._data_dir, 'small_fixed.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 1) - last_line = cov_process.stderr.readlines()[-1].decode('utf-8') - self.assertEqual(last_line, - "argparse.ArgumentError: '--wig' option cannot be specified in the same time as '--wig-for' or '--wig-rev' options.\n") - - - def test_bam_n_wig(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_only_forward.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} " \ - "--bam={bam_file} " \ - "--wig={wig_file} " \ - "--annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - bam_file=os.path.join(self._data_dir, 'small.bam'), - wig_file=os.path.join(self._data_dir, 'small_variable.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 1) - last_line = cov_process.stderr.readlines()[-1].decode('utf-8') - self.assertEqual(last_line, - "argparse.ArgumentError: '--bam' option cannot be specified in the same time as '--wig', '--wig-for' or '--wig-rev' options.\n") - - def test_no_input_file(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_only_forward.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} " \ - "--annot={annot_file} " \ - "--before={before} " \ - "--after={after} " \ - "--ref-col={ref_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} ".format( - bin=self.bin, - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), - ref_col='Position', - before=5, - after=3, - qual=0, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 1) - last_line = cov_process.stderr.readlines()[-1].decode('utf-8') - self.assertEqual(last_line, - "argparse.ArgumentError: At least one of these options must be specified '--bam', '--wig' , '--wig-for', '--wig-rev'.\n") - - - def test_resized_var_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_var_window_justify.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig={wig_file} --annot={annot_file} " \ - "--ref-col={ref_col} " \ - "--start-col={start_col} " \ - "--stop-col={stop_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} " \ - "--justify 10".format( - bin=self.bin, - wig_file=os.path.join(self._data_dir, 'small_variable.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_var_win.txt'), - ref_col='Position', - start_col='beg', - stop_col='end', - qual=15, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - def test_sum_var_window(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'wig_var_window_sum.cov' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} --wig={wig_file} --annot={annot_file} " \ - "--ref-col={ref_col} " \ - "--start-col={start_col} " \ - "--stop-col={stop_col} " \ - "--qual-thr={qual} " \ - "--quiet " \ - "--output={out_file} " \ - "--sum".format( - bin=self.bin, - wig_file=os.path.join(self._data_dir, 'small_variable.wig'), - annot_file=os.path.join(self._data_dir, 'annotation_4_wig_var_win.txt'), - ref_col='Position', - start_col='beg', - stop_col='end', - qual=15, - out_file=test_result_path - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "coverage execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "coverage finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - - expected_result_path = os.path.join(self._data_dir, output_filename) - with open(expected_result_path) as expected_result_file: - expected_result = expected_result_file.readlines() - - with open(test_result_path) as test_result_file: - test_result = test_result_file.readlines() - - self._check_coverage_file(expected_result, test_result) - - - - def _check_coverage_file(self, expected_result, test_result): - for expected, result in zip_longest(expected_result, test_result, fillvalue=''): - if expected.startswith("# Version:"): - continue - elif expected.startswith("# - pysam"): - continue - elif expected.startswith("# - scipy"): - continue - elif expected.startswith("# --annot="): - continue - elif expected.startswith("# --bam="): - continue - elif expected.startswith("# --wig="): - continue - elif expected.startswith("# --wig-for="): - continue - elif expected.startswith("# --wig-rev="): - continue - elif expected.startswith("# --output="): - continue - else: - self.assertEqual(expected, result) - diff --git a/tests/functional/test_craw_htmp.py b/tests/functional/test_craw_htmp.py deleted file mode 100644 index 32ed22cad099f5618f25850fb4c3a1de9c44c48d..0000000000000000000000000000000000000000 --- a/tests/functional/test_craw_htmp.py +++ /dev/null @@ -1,221 +0,0 @@ -########################################################################### -# # -# This file is part of Counter RNAseq Window (craw) package. # -# # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # -# see COPYRIGHT file for details. # -# # -# craw is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# craw is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# See the GNU General Public License for more details. # -# # -# You should have received a copy of the GNU General Public License # -# along with craw (see COPYING file). # -# If not, see <http://www.gnu.org/licenses/>. # -# # -########################################################################### - -import shutil -import tempfile -import os -from subprocess import Popen, PIPE -from PIL import Image - -from tests import CRAWTest, which - - -class Test(CRAWTest): - - def setUp(self): - if 'CRAW_HOME' in os.environ: - self.craw_home = os.environ['CRAW_HOME'] - self.local_install = True - else: - self.local_install = False - self.craw_home = os.path.normpath(os.path.abspath(os.path.join(os.path.dirname(__file__), '..' '..'))) - self.tmp_dir = tempfile.gettempdir() - self.bin = os.path.join(self.craw_home, 'bin', 'craw_htmp') if self.local_install else which('craw_htmp') - - - def tearDown(self): - try: - shutil.rmtree(self.out_dir) - except: - pass - - - def test_raw(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'htmp_raw_lin.png' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} " \ - "--size {size} " \ - "--out={out_file} " \ - "--quiet " \ - "{cov_file}".format( - bin=self.bin, - size='raw', - out_file=test_result_path, - cov_file=os.path.join(self._data_dir, '4_htmp.cov') - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "craw_htmp execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "craw_htmp finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - for sense in ('sense', 'antisense'): - filename, suffix = os.path.splitext(output_filename) - filename = "{}.{}{}".format(filename, sense, suffix) - expected_result_path = os.path.join(self._data_dir, filename) - expected_img = Image.open(expected_result_path) - result_path, suffix = os.path.splitext(test_result_path) - result_path = "{}.{}{}".format(result_path, sense, suffix) - result_img = Image.open(result_path) - self.assertImageAlmostEqual(expected_img, result_img) - - - def test_raw_log(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'htmp_raw_log.png' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} " \ - "--size {size} " \ - "--norm {norm} " \ - "--out={out_file} " \ - "--quiet " \ - "{cov_file}".format( - bin=self.bin, - size='raw', - norm='log', - out_file=test_result_path, - cov_file=os.path.join(self._data_dir, '4_htmp.cov') - ) - ##print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "craw_htmp execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "craw_htmp finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - for sense in ('sense', 'antisense'): - filename, suffix = os.path.splitext(output_filename) - filename = "{}.{}{}".format(filename, sense, suffix) - expected_result_path = os.path.join(self._data_dir, filename) - result_path, suffix = os.path.splitext(test_result_path) - result_path = "{}.{}{}".format(result_path, sense, suffix) - expected_img = Image.open(expected_result_path) - result_img = Image.open(result_path) - self.assertImageAlmostEqual(expected_img, result_img, delta=1.0, msg=sense) - - - def test_raw_log_row(self): - """ - | test if returncode of coverage is 0 and - | then test if the generated file is the same as a reference file - """ - self.out_dir = os.path.join(self.tmp_dir, 'craw_test') - os.makedirs(self.out_dir) - output_filename = 'htmp_raw_log+row.png' - test_result_path = os.path.join(self.out_dir, output_filename) - command = "{bin} " \ - "--size {size} " \ - "--norm {norm} " \ - "--out={out_file} " \ - "--quiet " \ - "{cov_file}".format( - bin=self.bin, - size='raw', - norm='log+row', - out_file=test_result_path, - cov_file=os.path.join(self._data_dir, '4_htmp.cov') - ) - # print("\n@@@", command) - if not self.bin: - raise RuntimeError('coverage not found, CRAW_HOME must be either in your path or CRAW_HOME must be defined ' - 'command launch: \n{}'.format(command)) - - try: - cov_process = Popen(command, - shell=True, - stdin=None, - stderr=PIPE, - close_fds=False - ) - except Exception as err: - msg = "craw_htmp execution failed: command = {0} : {1}".format(command, err) - print() - print(msg) - raise err from None - - cov_process.wait() - self.assertEqual(cov_process.returncode, 0, - "craw_htmp finished with non zero exit code: {0} command launched=\n{1}\n{2}".format( - cov_process.returncode, - command, - ''.join([l.decode('utf-8') for l in cov_process.stderr.readlines()]), - )) - for sense in ('sense', 'antisense'): - filename, suffix = os.path.splitext(output_filename) - filename = "{}.{}{}".format(filename, sense, suffix) - expected_result_path = os.path.join(self._data_dir, filename) - result_path, suffix = os.path.splitext(test_result_path) - result_path = "{}.{}{}".format(result_path, sense, suffix) - expected_img = Image.open(expected_result_path) - result_img = Image.open(result_path) - self.assertImageAlmostEqual(expected_img, result_img) - diff --git a/tests/run_tests.py b/tests/run_tests.py index f65568920eab7af083619573366df8a27b2fc49b..cf4723ff658f0bcdaecd0851aea01cc775c3c905 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -2,8 +2,8 @@ # # # This file is part of Counter RNAseq Window (craw) package. # # # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -28,10 +28,15 @@ import sys import os -def _run(test_files, test_root_path, verbosity=0): +def discover(test_files=None, test_root_path=None): + if not test_root_path: + test_root_path = os.path.dirname(__file__) + if not test_files: suite = unittest.TestLoader().discover(test_root_path, pattern="test_*.py") + else: + test_files = [os.path.abspath(f) for f in test_files] test_files = [t for t in test_files if test_root_path in t] suite = unittest.TestSuite() for test_file in test_files: @@ -44,31 +49,12 @@ def _run(test_files, test_root_path, verbosity=0): else: sys.stderr.write("{0} : no such file or directory\n".format(test_file)) - test_runner = unittest.TextTestRunner(verbosity=verbosity).run(suite) - return test_runner + return suite -def run_unittests(test_files, verbosity=0): +def run_tests(test_files, verbosity=0): """ Execute Unit Tests - - :param test_files: the file names of tests to run. - of it is empty, discover recursively tests from 'tests/unit' directory. - a test is python module with the test_*.py pattern - :type test_files: list of string - :param verbosity: the verbosity of the output - :type verbosity: positive int - :return: True if the test passed successfully, False otherwise. - :rtype: bool - """ - test_root_path = os.path.join(os.path.dirname(__file__), 'unit') - return _run(test_files, test_root_path, verbosity) - - -def run_functional_tests(test_files, verbosity=0): - """ - Execute Functional Tests - :param test_files: the file names of tests to run. of it is empty, discover recursively tests from 'tests/unit' directory. a test is python module with the test_*.py pattern @@ -78,12 +64,15 @@ def run_functional_tests(test_files, verbosity=0): :return: True if the test passed successfully, False otherwise. :rtype: bool """ - test_root_path = os.path.join(os.path.dirname(__file__), 'functional') - return _run(test_files, test_root_path, verbosity) - + test_root_path = os.path.abspath(os.path.dirname(__file__)) + suite = discover(test_files, test_root_path) + test_runner = unittest.TextTestRunner(verbosity=verbosity).run(suite) + return test_runner -if __name__ == '__main__': +def main(args=None): + if args is None: + args = sys.argv[1:] from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("tests", @@ -91,18 +80,6 @@ if __name__ == '__main__': default=False, help="name of test to execute") - parser.add_argument("--unit", - dest='unit', - action='store_true', - default=False, - help="execute unit tests") - - parser.add_argument("--functional", - dest='functional', - action='store_true', - default=False, - help="execute functional tests") - parser.add_argument("-v", "--verbose", dest="verbosity", action="count", @@ -110,52 +87,27 @@ if __name__ == '__main__': default=0 ) - args = parser.parse_args() - if not any((args.unit, args.functional)): - args.unit, args.functional = True, True - - result_all_tests = [] - - if args.unit: - print("\n", "#" * 70, sep='') - print("Test Runner: Unit tests") - print("#" * 70) - - old_path = sys.path - if 'CRAW_HOME' in os.environ and os.environ['CRAW_HOME']: - CRAW_HOME = os.environ['CRAW_HOME'] - if CRAW_HOME not in sys.path: - sys.path.insert(0, CRAW_HOME) - test_runner = run_unittests(args.tests, verbosity=args.verbosity) - unit_results = test_runner.wasSuccessful() - result_all_tests.append(unit_results) - sys.path = old_path - - if args.functional: - print("\n", "#" * 70, sep='') - print("Test Runner: Functional tests") - print("#" * 70) - - old_path = sys.path - if 'CRAW_HOME' in os.environ and os.environ['CRAW_HOME']: - CRAW_HOME = os.environ['CRAW_HOME'] - if CRAW_HOME not in sys.path: - sys.path.insert(0, CRAW_HOME) - else: - home_tests = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) - # we are in the case where we tests an installed CRAW - # so the libraries are already in PYTHONPATH - # but test are not - # we must had tests parent dir in pythonpath - # but after the standard libraries containing craw - # as we want to run CRAW using installed libraries - sys.path.append(home_tests) - test_runner = run_functional_tests(args.tests, verbosity=args.verbosity) - functional_results = test_runner.wasSuccessful() - result_all_tests.append(functional_results) - sys.path = old_path - - if all(result_all_tests): + args = parser.parse_args(args) + + CRAW_HOME = os.path.abspath(os.path.join(__file__, '..', '..')) + + old_path = sys.path + + if CRAW_HOME not in sys.path: + # need to add tests in path + # tests inherits from IntegronTest which is located in tests/__init__.py + sys.path.insert(0, CRAW_HOME) + + test_runner = run_tests(args.tests, verbosity=args.verbosity) + unit_results = test_runner.wasSuccessful() + sys.path = old_path + return unit_results + + +if __name__ == '__main__': + + unit_results = main(sys.argv[1:]) + if unit_results: sys.exit(0) else: sys.exit(1) diff --git a/tests/unit/test_annotation.py b/tests/test_annotation.py similarity index 97% rename from tests/unit/test_annotation.py rename to tests/test_annotation.py index 07d4e5c00fdfd6dd47782645b273e3bbe2daf4f1..7e575e8fbafd9a6dbb3138bb35053a7ceb8c67a9 100644 --- a/tests/unit/test_annotation.py +++ b/tests/test_annotation.py @@ -2,8 +2,8 @@ # # # This file is part of Counter RNAseq Window (craw) package. # # # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -24,12 +24,7 @@ import os -try: - from tests import CRAWTest -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - raise ImportError("Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err)) - +from tests import CRAWTest from craw.annotation import Entry, Idx, new_entry_type, AnnotationParser diff --git a/tests/unit/test_coverage.py b/tests/test_coverage.py similarity index 87% rename from tests/unit/test_coverage.py rename to tests/test_coverage.py index f5ddb84acb699004e8507ec102a43250a555a17b..161f362f5d8d6823799ff3cde897c23c41aa5708 100644 --- a/tests/unit/test_coverage.py +++ b/tests/test_coverage.py @@ -2,8 +2,8 @@ # # # This file is part of Counter RNAseq Window (craw) package. # # # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -28,15 +28,11 @@ import pysam import logging from itertools import zip_longest -try: - from tests import CRAWTest -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - raise ImportError("Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err)) +from tests import CRAWTest from craw.wig import Genome, WigParser, _log -from craw.coverage import get_raw_bam_coverage, get_raw_wig_coverage, get_raw_coverage_function -from craw.coverage import sum_coverage_maker, padded_coverage_maker, resized_coverage_maker +import craw.coverage + from craw.annotation import new_entry_type @@ -49,13 +45,13 @@ class TestCoverage(CRAWTest): def test_get_coverage_function(self): sam_path = os.path.join(self._data_dir, 'small.bam') bam_obj = pysam.AlignmentFile(sam_path, "rb") - func = get_raw_coverage_function(bam_obj) - self.assertEqual(get_raw_bam_coverage, func) + func = craw.coverage.get_raw_coverage_function(bam_obj) + self.assertEqual(craw.coverage.get_raw_bam_coverage, func) genome = Genome() - func = get_raw_coverage_function(genome) - self.assertEqual(func, get_raw_wig_coverage) + func = craw.coverage.get_raw_coverage_function(genome) + self.assertEqual(func, craw.coverage.get_raw_wig_coverage) with self.assertRaises(RuntimeError) as ctx: - get_raw_coverage_function('foo') + craw.coverage.get_raw_coverage_function('foo') self.assertEqual(str(ctx.exception), "get_coverage support only 'wig.Genome' or 'pysam.calignmentfile.AlignmentFile' " "as Input, not str") @@ -100,15 +96,35 @@ class TestCoverage(CRAWTest): stop = values[-1] + before start = max(start, 0) stop = max(stop, 0) - forward_cov, reverse_cov = get_raw_bam_coverage(sam_file, - annot_entry, - start, - stop, - qual_thr=0) + forward_cov, reverse_cov = craw.coverage.get_raw_bam_coverage(sam_file, + annot_entry, + start, + stop, + qual_thr=0) self.assertListEqual(forward_cov, exp_val['for']) self.assertListEqual(reverse_cov, exp_val['rev']) + def test_get_raw_bam_coverage_bad_pysam(self): + class FakeAlignmentFile: + def __init__(self, *args, **kwargs): + pass + + def count_coverage(self, *args, **kwarg): + raise SystemError + + annot_fields = ['name', 'gene', 'chromosome', 'strand', 'Position'] + ref_col = 'Position' + ne_class = new_entry_type('Foo', annot_fields, ref_col) + annot_entry = ne_class(['YEL072W', 'RMD6', 'chrV', '+', '14415']) + with self.catch_log(): + with self.assertRaises(SystemError) as ctx: + craw.coverage.get_raw_bam_coverage(FakeAlignmentFile(), + annot_entry, + 0, + 3, + qual_thr=0) + def test_get_raw_wig_coverage(self): wig_parser = WigParser(os.path.join(self._data_dir, 'small_fixed.wig')) @@ -156,7 +172,7 @@ class TestCoverage(CRAWTest): stop = (values[-1] - 1) + before + 1 start = max(start, 0) stop = max(stop, 0) - forward_cov, reverse_cov = get_raw_wig_coverage(genome, + forward_cov, reverse_cov = craw.coverage.get_raw_wig_coverage(genome, annot_entry, start, stop, @@ -192,7 +208,7 @@ class TestCoverage(CRAWTest): # get_wig_coverage work with 0-based positions # whereas annot_entry with 1-based positions - get_coverage = padded_coverage_maker(genome, 5, 9, qual_thr=0) + get_coverage = craw.coverage.padded_coverage_maker(genome, 5, 9, qual_thr=0) for values, exp_val in zip_longest(value_lines, exp_values): annot_entry = ne_class([str(v) for v in values]) # get_bam_coverage work with 0-based positions @@ -237,7 +253,7 @@ class TestCoverage(CRAWTest): before = 5 after = 3 - get_coverage = padded_coverage_maker(genome, 5, 3, qual_thr=0) + get_coverage = craw.coverage.padded_coverage_maker(genome, 5, 3, qual_thr=0) for values, exp_val in zip_longest(value_lines, exp_values): annot_entry = ne_class([str(v) for v in values]) # get_wig_coverage work with 0-based positions @@ -288,7 +304,7 @@ class TestCoverage(CRAWTest): before = 5 after = 3 - get_coverage = sum_coverage_maker(genome, qual_thr=0) + get_coverage = craw.coverage.sum_coverage_maker(genome, qual_thr=0) for values, exp_val in zip_longest(value_lines, exp_values): annot_entry = ne_class([str(v) for v in values]) # get_wig_coverage work with 0-based positions @@ -331,7 +347,7 @@ class TestCoverage(CRAWTest): ################################### # keep the number of values as is # ################################### - get_coverage = resized_coverage_maker(genome, 8) + get_coverage = craw.coverage.resized_coverage_maker(genome, 8) annot_entry = ne_class([str(v) for v in value_line]) # get_bam_coverage work with 0-based positions # whereas annot_entry with 1-based positions @@ -353,7 +369,7 @@ class TestCoverage(CRAWTest): 'rev': [12.0, 12.5, 13.0, 13.5, 14.0, 14.5, 15.0, 15.5, 16.0, 16.5, 17.0, 17.5, 18.0, 18.5, 19.0] } - get_coverage = resized_coverage_maker(genome, 15) + get_coverage = craw.coverage.resized_coverage_maker(genome, 15) forward_cov, reverse_cov = get_coverage(annot_entry, start, stop) self.assertListEqual(forward_cov, exp_values['for']) @@ -364,7 +380,7 @@ class TestCoverage(CRAWTest): ############################### value_line = ['YEL072W', 'RMD6', 'chrV', '+', 15, 12, 18] annot_entry = ne_class([str(v) for v in value_line]) - get_coverage = resized_coverage_maker(genome, 4) + get_coverage = craw.coverage.resized_coverage_maker(genome, 4) start = annot_entry.start - 1 stop = annot_entry.stop forward_cov, reverse_cov = get_coverage(annot_entry, start, stop) @@ -379,7 +395,7 @@ class TestCoverage(CRAWTest): ########################### value_line = ['YEL072W', 'RMD6', 'chrV', '+', 2, -2, 4] annot_entry = ne_class([str(v) for v in value_line]) - get_coverage = resized_coverage_maker(genome, 5) + get_coverage = craw.coverage.resized_coverage_maker(genome, 5) start = annot_entry.start - 1 stop = annot_entry.stop forward_cov, reverse_cov = get_coverage(annot_entry, start, stop) diff --git a/tests/unit/test_craw.py b/tests/test_craw.py similarity index 75% rename from tests/unit/test_craw.py rename to tests/test_craw.py index fd632a0ad8527c3a8b24708f2b55beb8c3c5dca7..189d644fa25d6d3e13df1728ad78c440826f3596 100644 --- a/tests/unit/test_craw.py +++ b/tests/test_craw.py @@ -2,8 +2,8 @@ # # # This file is part of Counter RNAseq Window (craw) package. # # # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -24,22 +24,20 @@ import logging -try: - from tests import CRAWTest -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - raise ImportError("Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err)) - +from tests import CRAWTest import craw + class MyTestCase(CRAWTest): def test_get_version_message(self): - self.assertTrue(craw.get_version_message().startswith( - "craw NOT packaged, it should be a development version | Python 3.")) + craw_version = craw.__version__ craw.__version__ = 1.0 - self.assertTrue(craw.get_version_message().startswith("craw 1.0 | Python 3.")) + try: + self.assertTrue(craw.get_version_message().startswith("craw 1.0 | Python 3.")) + finally: + craw.__version__ = craw_version def test_init_logger(self): @@ -49,4 +47,4 @@ class MyTestCase(CRAWTest): craw_log = logging.getLogger('craw') self.assertEqual(len(craw_log.handlers), 1) self.assertEqual(craw_log.handlers[0].__class__.__name__, 'StreamHandler') - self.assertEqual(craw_log.getEffectiveLevel(), log_level) \ No newline at end of file + self.assertEqual(craw_log.getEffectiveLevel(), log_level) diff --git a/tests/test_craw_coverage.py b/tests/test_craw_coverage.py new file mode 100644 index 0000000000000000000000000000000000000000..ee8b239ae88e9216168aa220e64fe580db842d1d --- /dev/null +++ b/tests/test_craw_coverage.py @@ -0,0 +1,635 @@ +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### + +import shutil +import tempfile +import os +from builtins import type +from itertools import zip_longest +import argparse +import sys +import pysam + +import craw +from craw.scripts import craw_coverage as craw_cov +from tests import CRAWTest + + +class TestCoverage(CRAWTest): + + def setUp(self): + tmp_dir = tempfile.gettempdir() + self.out_dir = os.path.join(tmp_dir, 'test_craw') + if os.path.exists(self.out_dir) and os.path.isdir(self.out_dir): + shutil.rmtree(self.out_dir) + os.makedirs(self.out_dir) + self.bin = 'craw_coverage' + + + def tearDown(self): + try: + shutil.rmtree(self.out_dir) + pass + except: + pass + + + def test_positive_int(self): + self.assertEqual(craw_cov.positive_int(3), 3) + + with self.assertRaises(argparse.ArgumentTypeError) as ctx: + craw_cov.positive_int('Foo') + self.assertEqual(str(ctx.exception), "must be a positive integer, got: Foo") + + with self.assertRaises(argparse.ArgumentTypeError) as ctx: + craw_cov.positive_int(-1) + self.assertEqual(str(ctx.exception), "must be a positive integer, got: -1") + + + def test_quality_checker(self): + self.assertEqual(craw_cov.quality_checker(3), 3) + + with self.assertRaises(argparse.ArgumentTypeError) as ctx: + craw_cov.quality_checker('Foo') + self.assertEqual(str(ctx.exception), "must be a integer between 0 and 42, got: Foo") + + with self.assertRaises(argparse.ArgumentTypeError) as ctx: + craw_cov.quality_checker(-1) + self.assertEqual(str(ctx.exception), "must be a integer between 0 and 42, got: -1") + + with self.assertRaises(argparse.ArgumentTypeError) as ctx: + craw_cov.quality_checker(45) + self.assertEqual(str(ctx.exception), "must be a integer between 0 and 42, got: 45") + + + def test_get_result_header(self): + header = """# Running Counter RnAseq Window craw_coverage +# +# Version: craw {cr_ver} | Python {py} +# Using: +# - pysam {pysam} (samtools {sam}) +# - scipy {scipy} (only for --justify opt) +# +# craw_coverage run with the following arguments: +# --after=3 +# --annot=tests/data/annotation_wo_start.txt +# --bam=tests/data/small.bam +# --before=5 +# --chr-col=chromosome +# --output=foo.cov +# --qual-thr=0 +# --quiet=1 +# --ref-col=Position +# --sense=mixed +# --sep=\t +# --strand-col=strand +# --suffix=cov +# --verbose=0 +sense\tname\tgene\tchromosome\tstrand\tPosition\t-5\t-4\t-3\t-2\t-1\t0\t1\t2\t3""".format( + cr_ver=craw.__version__, + py='{}.{}'.format(sys.version_info.major, sys.version_info.minor), + pysam=pysam.__version__, + sam=pysam.__samtools_version__, + scipy=craw.coverage.scipy.__version__) + + args = argparse.Namespace() + args.bam = 'tests/data/small.bam' + args.annot = 'tests/data/annotation_wo_start.txt' + args.ref_col = 'Position' + args.before = 5 + args.after = 3 + args.qual_thr = 0 + args.quiet = 1 + args.output = 'foo.cov' + args.justify = False + args.sum = False + args.start_col = False + args.chr_col = 'chromosome' + args.sense = 'mixed' + args.sep = '\t' + args.strand_col = 'strand' + args.suffix = 'cov' + args.verbose = 0 + + annot = self.fake_annotation_parser(['name', 'gene', 'chromosome', 'strand', 'Position'], (0, 0)) + self.maxDiff = None + + self.assertEqual(craw_cov.get_result_header(annot, args), + header.rstrip()) + + def test_get_version_message(self): + msg = """craw {cr_ver} | Python {py} +Using: + - pysam {pysam} (samtools {sam}) + - scipy {scipy} (only for --justify opt) +""".format(cr_ver=craw.__version__, + py='{}.{}'.format(sys.version_info.major, sys.version_info.minor), + pysam=pysam.__version__, + sam=pysam.__samtools_version__, + scipy=craw.coverage.scipy.__version__) + + self.assertEqual(craw_cov.get_version_message(), + msg) + + def test_get_results_file(self): + try: + sense, antisense = craw_cov.get_results_file('S', os.path.join(self.out_dir, 'foo'), 'cov') + sense_file_name = os.path.join(self.out_dir, 'foo.sense.cov') + self.assertTrue(os.path.exists( + sense_file_name) + ) + self.assertEqual(sense.name, sense_file_name) + self.assertEqual(antisense.name, os.devnull) + finally: + sense.close() + antisense.close() + self.tearDown() + + try: + self.setUp() + sense, antisense = craw_cov.get_results_file('AS', os.path.join(self.out_dir, 'foo'), 'cov') + antisense_file_name = os.path.join(self.out_dir, 'foo.antisense.cov') + self.assertTrue(os.path.exists( + antisense_file_name) + ) + self.assertEqual(sense.name, os.devnull) + self.assertEqual(antisense.name, antisense_file_name) + finally: + sense.close() + antisense.close() + self.tearDown() + + try: + self.setUp() + sense, antisense = craw_cov.get_results_file('split', os.path.join(self.out_dir, 'foo'), 'cov') + sense_file_name = os.path.join(self.out_dir, 'foo.sense.cov') + antisense_file_name = os.path.join(self.out_dir, 'foo.antisense.cov') + self.assertTrue(os.path.exists( + sense_file_name) + ) + self.assertTrue(os.path.exists( + antisense_file_name) + ) + self.assertEqual(sense.name, sense_file_name) + self.assertEqual(antisense.name, antisense_file_name) + finally: + sense.close() + antisense.close() + self.tearDown() + + try: + self.setUp() + sense, antisense = craw_cov.get_results_file('mixed', os.path.join(self.out_dir, 'foo'), 'cov') + output_file_name = os.path.join(self.out_dir, 'foo.cov') + self.assertTrue(os.path.exists( + output_file_name) + ) + self.assertEqual(sense.name, output_file_name) + self.assertIs(antisense, sense) + finally: + sense.close() + antisense.close() + self.tearDown() + + + def test_parse_args(self): + args = {'bam': 'small.bam', + 'annot': 'annotation_wo_start.txt', + 'ref_col': 'Position', + 'before': 5, + 'after': 3, + 'qual_thr': 0, + 'output': 'result_path'} + command = "--bam {bam} --annot {annot} --before={before} --after={after} " \ + "--ref-col={ref_col} --qual-thr={qual_thr} --quiet --output={output} ".format(**args) + self.check_args(args, command) + + args = {'bam': 'small.bam', + 'annot': 'annotation_wo_start_chr_strand_col.txt', + 'ref_col': 'Position', + 'chr_col': 'chr', + 'strand_col': 'brin', + 'before': 5, + 'after': 3, + 'qual_thr': 0, + 'output': 'test_result_path' + } + command = "--bam={bam} --annot={annot} --chr-col={chr_col} " \ + "--strand-col={strand_col} --before={before} --after={after} --ref-col={ref_col} " \ + "--qual-thr={qual_thr} --quiet --output={output}" + self.check_args(args, command) + + args = {'bam': 'small.bam', + 'annot': 'annotation_w_start.txt', + 'ref_col': 'Position', + 'start_col': 'beg', + 'stop_col': 'end', + 'qual_thr': 15, + 'output': 'result_path'} + + command = "--bam={bam} --annot={annot} " \ + "--ref-col={ref_col} --start-col={start_col} --stop-col={stop_col} " \ + "--qual-thr={qual_thr} --quiet --output={output}" + self.check_args(args, command) + + args = {'wig': 'small_fixed.wig', + 'annot': 'annotation_4_wig_fixed_win.txt', + 'ref_col': 'Position', + 'before': 5, + 'after': 3, + 'qual_thr': 0, + 'output': 'result_path'} + command = "--wig={wig} --annot={annot} --before={before} --after={after} " \ + "--ref-col={ref_col} --qual-thr={qual_thr} --quiet --output={output} " + self.check_args(args, command) + + args = {'wig': 'small_fixed.wig', + 'annot': 'annotation_4_wig_fixed_win.txt', + 'ref_col': 'Position', + 'window': 5, + 'qual_thr': 0, + 'output': 'result_path'} + command = "--wig={wig} --annot={annot} --window={window} " \ + "--ref-col={ref_col} --qual-thr={qual_thr} --quiet --output={output} " + self.check_args(args, command) + + command = "--annot=annot_file --before=3 --after=5 --ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "At least one of these options must be specified " + "'--bam', '--wig' , '--wig-for', '--wig-rev'.") + + command = "--wig=wig --bam=bam --wig-for=wig_for --wig-rev=wig_rev --annot=annot_file " \ + "--before=3 --after=5 --ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), + "'--bam', '--wig' , '--wig-for', '--wig-rev' cannot specify at the same time.") + + command = "--wig=wig --bam=bam --annot=annot_file " \ + "--before=3 --after=5 --ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "'--bam' option cannot be specified in the same time as " + "'--wig', '--wig-for' or '--wig-rev' options.") + + command = "--wig=wig --wig-for=wig_for --wig-rev=wig_rev --annot=annot_file " \ + "--before=3 --after=5 --ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "'--wig' option cannot be specified in the same time as " + "'--wig-for' or '--wig-rev' options.") + + command = "--wig=wig --annot=annot_file " \ + "--ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "[--window or [--before, --after] or [--start-col, --stop-col] options " + "must be specified") + + command = "--wig=wig --annot=annot_file " \ + "--window=5 --before=3 --after=5 --ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "options [--before, --after] and --window are mutually exclusives.") + + command = "--wig=wig --annot=annot_file --after=3 --start-col=start --stop-col=stop " \ + "--ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "Options [--before, --after, --window] and [--start-col, --stop-col] " + "are mutually exclusives.") + + command = "--wig=wig --annot=annot_file --after=3 " \ + "--ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "The two options --after and --before work together. " + "The both options must be specified in same time") + + command = "--wig=wig --annot=annot_file --start-col=start " \ + "--ref-col=Position --qual-thr=0 --quiet --output=output" + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_cov.parse_args(command.split()) + self.assertEqual(str(ctx.exception), "The two options --start-col and --stop-col work together. " + "The both options must be specified in same time") + + + def check_args(self, args, tpl): + command = tpl.format(**args) + got_args = craw_cov.parse_args(command.split()) + for opt in args: + self.assertEqual(getattr(got_args, opt), args[opt]) + + + def test_bam_with_fixed_window(self): + output_filename = 'small.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + + args = {'bam': os.path.join(self._data_dir, 'small.bam'), + 'annot': os.path.join(self._data_dir, 'annotation_wo_start.txt'), + 'ref_col': 'Position', + 'before': 5, + 'after': 3, + 'qual_thr': 0, + 'output': test_result_path} + command = "--bam {bam} --annot {annot} --before={before} --after={after} " \ + "--ref-col={ref_col} --qual-thr={qual_thr} --quiet --output={output} ".format(**args) + craw_cov.main(command.split()) + + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_bam_with_chr_strand_col(self): + output_filename = 'coverage_fix_window_chr_strand_col.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + args = {'bam': os.path.join(self._data_dir, 'small.bam'), + 'annot': os.path.join(self._data_dir, 'annotation_wo_start_chr_strand_col.txt'), + 'ref_col': 'Position', + 'chr_col': 'chr', + 'strand_col': 'brin', + 'before': 5, + 'after': 3, + 'qual_thr': 0, + 'output': test_result_path + } + command = "--bam={bam} --annot={annot} --chr-col={chr_col} " \ + "--strand-col={strand_col} --before={before} --after={after} --ref-col={ref_col} " \ + "--qual-thr={qual_thr} --quiet --output={output}".format(**args) + + craw_cov.main(command.split()) + + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_bam_with_var_window(self): + output_filename = 'coverage_var_window.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + args = {'bam': os.path.join(self._data_dir, 'small.bam'), + 'annot': os.path.join(self._data_dir, 'annotation_w_start.txt'), + 'ref_col': 'Position', + 'start_col': 'beg', + 'stop_col': 'end', + 'qual_thr': 15, + 'output': test_result_path + } + command = "--bam={bam} --annot={annot} " \ + "--ref-col={ref_col} --start-col={start_col} --stop-col={stop_col} " \ + "--qual-thr={qual_thr} --quiet --output={output}".format(**args) + + craw_cov.main(command.split()) + + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_wig_with_fixed_window(self): + output_filename = 'wig_fixed_window.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + args = {'wig': os.path.join(self._data_dir, 'small_fixed.wig'), + 'annot': os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), + 'ref_col': 'Position', + 'before': 5, + 'after': 3, + 'qual_thr': 0, + 'output': test_result_path} + command = "--wig={wig} --annot={annot} --before={before} --after={after} " \ + "--ref-col={ref_col} --qual-thr={qual_thr} --quiet --output={output}".format(**args) + craw_cov.main(command.split()) + + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_wig_with_var_window(self): + output_filename = 'wig_var_window.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--wig={wig_file} --annot={annot_file} " \ + "--ref-col={ref_col} --start-col={start_col} --stop-col={stop_col} " \ + "--qual-thr={qual} --quiet --output={out_file} ".format( + wig_file=os.path.join(self._data_dir, 'small_variable.wig'), + annot_file=os.path.join(self._data_dir, 'annotation_4_wig_var_win.txt'), + ref_col='Position', + start_col='beg', + stop_col='end', + qual=15, + out_file=test_result_path + ) + + craw_cov.main(command.split()) + + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_2wig_with_fixed_window(self): + """ + """ + output_filename = 'wig_splited_fixed_window.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--wig-for={wig_for} --wig-rev={wig_rev} --annot={annot_file} " \ + "--before={before} --after={after} --ref-col={ref_col} " \ + "--qual-thr={qual} --quiet " \ + "--output={out_file} ".format(wig_for=os.path.join(self._data_dir, 'small_fixed.wig'), + wig_rev=os.path.join(self._data_dir, 'small_fixed_reverse.wig'), + annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), + ref_col='Position', + before=5, + after=3, + qual=0, + out_file=test_result_path + ) + craw_cov.main(command.split()) + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_only_forward_wig(self): + """ + """ + output_filename = 'wig_only_forward.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--wig-for={wig_for} --annot={annot_file} " \ + "--before={before} --after={after} " \ + "--ref-col={ref_col} --qual-thr={qual} " \ + "--quiet --output={out_file} ".format( + wig_for=os.path.join(self._data_dir, 'small_fixed.wig'), + annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), + ref_col='Position', + before=5, + after=3, + qual=0, + out_file=test_result_path + ) + craw_cov.main(command.split()) + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + + self._check_coverage_file(expected_result, test_result) + + + def test_only_reverse_wig(self): + """ + """ + output_filename = 'wig_only_reverse.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--wig-rev={wig_rev} --annot={annot_file} " \ + "--before={before} --after={after} " \ + "--ref-col={ref_col} --qual-thr={qual} --quiet " \ + "--output={out_file} ".format(wig_rev=os.path.join(self._data_dir, 'small_fixed_reverse.wig'), + annot_file=os.path.join(self._data_dir, 'annotation_4_wig_fixed_win.txt'), + ref_col='Position', + before=5, + after=3, + qual=0, + out_file=test_result_path + ) + craw_cov.main(command.split()) + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + + self._check_coverage_file(expected_result, test_result) + + + def test_resized_var_window(self): + """ + """ + output_filename = 'wig_var_window_justify.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--wig={wig_file} --annot={annot_file} " \ + "--ref-col={ref_col} --start-col={start_col} --stop-col={stop_col} " \ + "--qual-thr={qual} --quiet --output={out_file} " \ + "--justify 10".format(wig_file=os.path.join(self._data_dir, 'small_variable.wig'), + annot_file=os.path.join(self._data_dir, 'annotation_4_wig_var_win.txt'), + ref_col='Position', + start_col='beg', + stop_col='end', + qual=15, + out_file=test_result_path + ) + craw_cov.main(command.split()) + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def test_sum_var_window(self): + """ + """ + output_filename = 'wig_var_window_sum.cov' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--wig={wig_file} --annot={annot_file} " \ + "--ref-col={ref_col} --start-col={start_col} --stop-col={stop_col} " \ + "--qual-thr={qual} --quiet --output={out_file} " \ + "--sum".format(wig_file=os.path.join(self._data_dir, 'small_variable.wig'), + annot_file=os.path.join(self._data_dir, 'annotation_4_wig_var_win.txt'), + ref_col='Position', + start_col='beg', + stop_col='end', + qual=15, + out_file=test_result_path + ) + craw_cov.main(command.split()) + expected_result_path = os.path.join(self._data_dir, output_filename) + with open(expected_result_path) as expected_result_file: + expected_result = expected_result_file.readlines() + + with open(test_result_path) as test_result_file: + test_result = test_result_file.readlines() + self._check_coverage_file(expected_result, test_result) + + + def _check_coverage_file(self, expected_result, test_result): + for expected, result in zip_longest(expected_result, test_result, fillvalue=''): + if expected.startswith("# Version:"): + continue + elif expected.startswith("# - pysam"): + continue + elif expected.startswith("# - scipy"): + continue + elif expected.startswith("# --annot="): + continue + elif expected.startswith("# --bam="): + continue + elif expected.startswith("# --wig="): + continue + elif expected.startswith("# --wig-for="): + continue + elif expected.startswith("# --wig-rev="): + continue + elif expected.startswith("# --output="): + continue + else: + self.assertEqual(expected, result) + + + def fake_annotation_parser(self, header, max): + return type("AnnotationParser", + (object,), + {"header": header, + "max": lambda: max}) diff --git a/tests/test_craw_htmp.py b/tests/test_craw_htmp.py new file mode 100644 index 0000000000000000000000000000000000000000..99c5f2d9ad9763160b1523bf87a0ee54290b2260 --- /dev/null +++ b/tests/test_craw_htmp.py @@ -0,0 +1,720 @@ +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### + +import shutil +import tempfile +import os +import stat +import argparse +import logging + +import PIL +from PIL import Image +import numpy as np +import pandas as pd +import matplotlib as mtp + +from tests import CRAWTest +import craw +from craw.scripts import craw_htmp +from unittest import skipIf + + +class TestCrawHtmp(CRAWTest): + + def setUp(self): + self.tmp_dir = tempfile.gettempdir() + self.bin = 'craw_htmp' + + + def tearDown(self): + try: + shutil.rmtree(self.out_dir) + logger = logging.getLogger('craw') + logger.handlers = [] + except: + pass + + def test_file_readable(self): + self.assertEqual(craw_htmp._file_readable(__file__), __file__) + + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_htmp._file_readable('nimportnaoik') + self.assertEqual(str(ctx.exception), "No such file: {}".format('nimportnaoik')) + + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_htmp._file_readable(os.path.dirname(__file__)) + self.assertEqual(str(ctx.exception), + "{} is not a regular file".format(os.path.dirname(__file__))) + + + @skipIf(os.getuid() == 0, "root have always right to access file") + def test_file_readable_bad_permission(self): + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.mkdir(self.out_dir) + file = os.path.join(self.out_dir, 'fake_file') + open(file, 'w').close() + os.chmod(file, stat.S_IWUSR) + with self.assertRaises(argparse.ArgumentError) as ctx: + craw_htmp._file_readable(file) + self.assertEqual(str(ctx.exception), + "{} is not readable".format(file)) + + + def test_gene_size_parser(self): + self.assertListEqual(craw_htmp._gene_size_parser("start,stop"), ['start', 'stop']) + with self.assertRaises(argparse.ArgumentError): + craw_htmp._gene_size_parser("start_stop") + + with self.assertRaises(argparse.ArgumentError): + craw_htmp._gene_size_parser("start,middle,stop") + + + def test_size_fig_parser(self): + self.assertTupleEqual(craw_htmp._size_fig_parser("12x22"), (12, 22)) + self.assertTupleEqual(craw_htmp._size_fig_parser("12x22in"), (12, 22)) + wide, height = craw_htmp._size_fig_parser("12x22cm") + self.assertAlmostEqual(wide, 4.72, places=2) + self.assertAlmostEqual(height, 8.66, places=2) + wide, height = craw_htmp._size_fig_parser("12x22mm") + self.assertAlmostEqual(wide, 0.472, places=3) + self.assertAlmostEqual(height, 0.866, places=3) + self.assertTupleEqual(craw_htmp._size_fig_parser("12x22px"), (0.12, 0.22)) + self.assertEqual(craw_htmp._size_fig_parser("raw"), "raw") + + with self.assertRaises(argparse.ArgumentError): + craw_htmp._size_fig_parser("12cm") + + with self.assertRaises(argparse.ArgumentError): + craw_htmp._size_fig_parser("douze_x_vingtcm") + + with self.assertRaises(argparse.ArgumentError): + craw_htmp._size_fig_parser("-2x22") + + + def test_get_version_message(self): + msg_expected = craw.get_version_message() + msg_expected += """ +Using: + - numpy {np_ver} + - pandas {pd_ver} + - matplotlib {mtp_ver} + - pillow {pil_ver} +""".format(np_ver=np.__version__, + pd_ver=pd.__version__, + mtp_ver=mtp.__version__, + pil_ver=PIL.PILLOW_VERSION + ) + get_message = craw_htmp.get_version_message() + self.assertEqual(msg_expected, get_message) + + + def test_parse_args(self): + output_filename = 'htmp_raw_lin.png' + test_result_path = os.path.join('result_dir', output_filename) + command = "--size {size} " \ + "--out={out_file} " \ + "--sort-using-col {sort_using_col} " \ + "-q " \ + "{cov_file}".format(size='raw', + out_file=test_result_path, + sort_using_col="start", + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + args = craw_htmp.parse_args(command.split()) + expected_args = self.get_expected_htmp_args(size='raw', + out=test_result_path, + quiet=1, + sort_using_col="start", + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + for k in expected_args.__dict__: + self.assertEqual(getattr(args, k), getattr(expected_args, k), + msg="for option {}".format(k)) + + + def test_raw(self): + """ + | test if returncode of coverage is 0 and + | then test if the generated file is the same as a reference file + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + bin=self.bin, + size='raw', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + #print("\n@@@", command) + + craw_htmp.main(command.split()) + + for sense in ('sense', 'antisense'): + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + expected_img = Image.open(expected_result_path) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + + def test_raw_log(self): + """ + | test if returncode of coverage is 0 and + | then test if the generated file is the same as a reference file + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_log.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + ##print("\n@@@", command) + craw_htmp.main(command.split()) + + for sense in ('sense', 'antisense'): + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img, delta=1.0, msg=sense) + + + def test_raw_log_row(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_log+row.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log+row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + + for sense in ('sense', 'antisense'): + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + + def test_raw_lin_row(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin+row.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + + for sense in ('sense', 'antisense'): + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + + def test_with_marks(self): + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_log_marks.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--mark -2 red " \ + "--mark 2 green " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + + for sense in ('sense', 'antisense'): + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + def test_raw_sense_only(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin+row.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--sense-only " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + + sense = 'sense' + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + self.assertFalse(os.path.exists("{}.{}{}".format(result_path, 'antisense', suffix))) + + + def test_raw_antisense_only(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin+row.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--antisense-only " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + + sense = 'antisense' + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + self.assertFalse(os.path.exists("{}.{}{}".format(result_path, 'sense', suffix))) + + + def test_raw_crop(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin_crop' + ext = '.png' + test_result_path = os.path.join(self.out_dir, output_filename + ext) + command = "--size {size} " \ + "--out={out_file} " \ + "--crop -2 2 " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + for sense in ('sense', 'antisense'): + filename = "{}.{}{}".format(output_filename, sense, ext) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + def test_raw_sort(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin' + ext = '.png' + test_result_path = os.path.join(self.out_dir, output_filename + ext) + command = "--size {size} " \ + "--out={out_file} " \ + "--sort-using-col Position " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + for sense in ('sense', 'antisense'): + filename = "{}.{}{}".format(output_filename, sense, ext) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + + def test_raw_sort_file(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin_sort_file' + ext = '.png' + test_result_path = os.path.join(self.out_dir, output_filename + ext) + command = "--size {size} " \ + "--out={out_file} " \ + "--sort-using-file {sort_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + out_file=test_result_path, + sort_file=os.path.join(self._data_dir, '4_htmp_sorting_file.txt'), + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + craw_htmp.main(command.split()) + for sense in ('sense', 'antisense'): + filename = "{}.{}{}".format(output_filename, sense, ext) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + + def test_raw_no_fmt(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp' + ext = 'png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + if 'DISPLAY' in os.environ: + craw_htmp.main(command.split()) + for sense in ('sense', 'antisense'): + self.assertTrue(os.path.exists(os.path.join(self.out_dir, + "{}.{}.{}".format(output_filename, sense, ext)) + ) + ) + else: + with self.assertRaises(RuntimeError) as ctx: + craw_htmp.main(command.split()) + self.assertEqual(str(ctx.exception), + """ + 'DISPLAY' variable is not set (you probably run craw_htmp in non graphic environment) + So you must specify an output format (add ext to the output file option as 'my_file.png') + """) + + @skipIf('DISPLAY' not in os.environ, "run in non interactive environment") + def test_raw_no_out(self): + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + dir_ori = os.getcwd() + output_filename = '4_htmp' + # craw_htmp create result_file beside source file if --out is not specify + shutil.copy(os.path.join(self._data_dir, output_filename + '.cov'), + self.out_dir) + os.chdir(self.out_dir) + command = "--size {size} " \ + "--norm {norm} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='row', + cov_file=os.path.join(self.out_dir, output_filename + '.cov') + ) + # print("\n@@@", command) + try: + craw_htmp.main(command.split()) + for sense in ('sense', 'antisense'): + path = "{}.{}.{}".format(output_filename, sense, 'png') + self.assertTrue(os.path.exists(path)) + finally: + os.chdir(dir_ori) + + + def test_non_display(self): + """ + """ + ############################################################# + # run in not interactive environment + ############################################################# + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_log+row.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log+row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + display = os.environ.get('DISPLAY', None) + if display: + del os.environ['DISPLAY'] + # print("\n@@@", command) + try: + craw_htmp.main(command.split()) + finally: + if display: + os.environ['DISPLAY'] = display + + for sense in ('sense', 'antisense'): + filename, suffix = os.path.splitext(output_filename) + filename = "{}.{}{}".format(filename, sense, suffix) + expected_result_path = os.path.join(self._data_dir, filename) + result_path, suffix = os.path.splitext(test_result_path) + result_path = "{}.{}{}".format(result_path, sense, suffix) + expected_img = Image.open(expected_result_path) + result_img = Image.open(result_path) + self.assertImageAlmostEqual(expected_img, result_img) + + ############################################################# + # use file not compatible with not interactive environment + ############################################################# + output_filename = 'htmp_raw_log+row.nimpornaoik' + test_result_path = os.path.join(self.out_dir, output_filename) + + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log+row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + display = os.environ.get('DISPLAY', None) + if display: + del os.environ['DISPLAY'] + # print("\n@@@", command) + try: + with self.assertRaises(RuntimeError)as ctx: + craw_htmp.main(command.split()) + finally: + if display: + os.environ['DISPLAY'] = display + self.assertTrue(str(ctx.exception).strip().startswith( + "The '.nimpornaoik' format is not supported, choose among" + )) + + ############################################################# + # not interactive environment and no file specified + ############################################################# + command = "--quiet " \ + "{cov_file}".format( + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + #print("\n@@@", command) + + display = os.environ.get('DISPLAY', None) + if display: + del os.environ['DISPLAY'] + try: + with self.assertRaises(RuntimeError)as ctx: + craw_htmp.main(command.split()) + finally: + if display: + os.environ['DISPLAY'] = display + self.assertEqual(str(ctx.exception).strip(), + """ + 'DISPLAY' variable is not set (you probably run craw_htmp in non graphic environment) + So you cannot use interactive output + please specify an output file (--out).""".strip()) + + + def test_bad_cmap(self): + output_filename = 'htmp_raw_log+row.png' + test_result_path = os.path.join('foo', output_filename) + + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--cmap=nimportnaoik "\ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log+row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + with self.assertRaises(RuntimeError)as ctx: + craw_htmp.main(command.split()) + self.assertTrue(str(ctx.exception).strip().startswith( + "Colormap nimportnaoik is not recognized. Possible values are:" + )) + + @skipIf(os.getuid() == 0, 'root has always right write') + def test_bad_outdir(self): + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + os.chmod(self.out_dir, stat.S_IRUSR|stat.S_IXUSR|stat.S_IRGRP|stat.S_IROTH) + output_filename = 'htmp_raw_log+row.png' + test_result_path = os.path.join(self.out_dir, output_filename) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='log+row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + # print("\n@@@", command) + with self.catch_log() as log: + with self.assertRaises(RuntimeError) as ctx: + craw_htmp.main(command.split(), logger_out=False) + self.assertEqual(str(ctx.exception), "/tmp/craw_test is not writable") + + + def test_output_exists(self): + """ + """ + self.out_dir = os.path.join(self.tmp_dir, 'craw_test') + os.makedirs(self.out_dir) + output_filename = 'htmp_raw_lin+row' + ext = '.png' + test_result_path = os.path.join(self.out_dir, output_filename + ext) + command = "--size {size} " \ + "--norm {norm} " \ + "--out={out_file} " \ + "--quiet " \ + "{cov_file}".format( + size='raw', + norm='row', + out_file=test_result_path, + cov_file=os.path.join(self._data_dir, '4_htmp.cov') + ) + with open(os.path.join(self.out_dir, '{}.sense{}'.format(output_filename, ext)), 'w'): + pass + # print("\n@@@", command) + with self.assertRaises(RuntimeError) as ctx: + craw_htmp.main(command.split(), logger_out=False) + + + def get_expected_htmp_args(self, **kwargs): + default_args = argparse.Namespace(antisense_only=False, + cmap='Blues', + cov_file=None, + crop=None, + dpi=None, + mark=None, + norm='lin', + out=None, + quiet=0, + sense_on_bottom=False, + sense_on_left=False, + sense_on_right=False, + sense_on_top=False, + sense_only=False, + size=None, + sort_by_gene_size=None, + sort_using_col=False, + sort_using_file=None, + title=None, + verbose=0) + for k, v in kwargs.items(): + setattr(default_args, k, v) + return default_args \ No newline at end of file diff --git a/tests/unit/test_heatmap.py b/tests/test_heatmap.py similarity index 97% rename from tests/unit/test_heatmap.py rename to tests/test_heatmap.py index db6aba72ef53dc6a21e13f120cb16d2706c92c82..7b10081078a4d141ca8a45d4123b7ee611412502 100644 --- a/tests/unit/test_heatmap.py +++ b/tests/test_heatmap.py @@ -2,8 +2,8 @@ # # # This file is part of Counter RNAseq Window (craw) package. # # # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -34,17 +34,13 @@ from pandas.util.testing import assert_frame_equal from PIL import Image -try: - from tests import CRAWTest -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - raise ImportError("Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err)) - +from tests import CRAWTest import craw.heatmap as htmp class TestHeatmap(CRAWTest): + @classmethod def setUpClass(cls): htmp._log.setLevel(logging.ERROR) @@ -147,6 +143,9 @@ class TestHeatmap(CRAWTest): received_data = htmp._sort_using_col(data, col='name') assert_frame_equal(expected_data, received_data) + with self.assertRaises(RuntimeError) as ctx: + htmp._sort_using_col(data) + self.assertEqual(str(ctx.exception), "You must specify the column used to sort.") def test_sort_using_file(self): data = pd.DataFrame([ diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000000000000000000000000000000000000..7b27c9614e5b66a6ca97f55602af191bee7eb386 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,40 @@ +########################################################################### +# # +# This file is part of Counter RNAseq Window (craw) package. # +# # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # +# see COPYRIGHT file for details. # +# # +# craw is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# craw is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # +# See the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with craw (see COPYING file). # +# If not, see <http://www.gnu.org/licenses/>. # +# # +########################################################################### + + + +from tests import CRAWTest +from craw.util import progress + + +class TestUtil(CRAWTest): + + + def test_progress(self): + import io + stream = io.StringIO() + progress(10, 100, status="", file=stream) + self.assertEqual('[======> ] 10.0% ... of 100 annotations\r', + stream.getvalue()) + diff --git a/tests/unit/test_wig.py b/tests/test_wig.py similarity index 98% rename from tests/unit/test_wig.py rename to tests/test_wig.py index d8904facaef853534aaef1d8a094a85aa6d81bda..3436bba6795b5c1b4b6b4c73091884bde460d76a 100644 --- a/tests/unit/test_wig.py +++ b/tests/test_wig.py @@ -2,8 +2,8 @@ # # # This file is part of Counter RNAseq Window (craw) package. # # # -# Authors: Bertrand Néron # -# Copyright © 2017 Institut Pasteur (Paris). # +# Authors: Bertrand Neron # +# Copyright (c) 2017-2019 Institut Pasteur (Paris). # # see COPYRIGHT file for details. # # # # craw is free software: you can redistribute it and/or modify # @@ -26,17 +26,10 @@ import logging import psutil import numpy as np -try: - from tests import CRAWTest -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - raise ImportError("Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err)) - +from tests import CRAWTest from craw.wig import WigError, FixedChunk, VariableChunk, Chromosome, Genome, WigParser, _log - - class TestFixedChunk(CRAWTest): @classmethod diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py deleted file mode 100644 index 36617356c3d8b5c4ebb7a1dc52cb4c82dbf2704b..0000000000000000000000000000000000000000 --- a/tests/unit/test_util.py +++ /dev/null @@ -1,19 +0,0 @@ -try: - from tests import CRAWTest -except ImportError as err: - msg = "Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err) - raise ImportError("Cannot import craw, check your installation or your CRAW_HOME variable : {0!s}".format(err)) - -from craw.util import progress - - -class TestUtil(CRAWTest): - - - def test_progress(self): - import io - stream = io.StringIO() - progress(10, 100, status="", file=stream) - self.assertEqual('[======> ] 10.0% ... of 100 annotations\r', - stream.getvalue()) -