diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a45d544827e115e66afd2bb8f96157ede1bf5e6e..8e3f12408c4595a16b1138a0cfdfed2e931b1232 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -107,6 +107,7 @@ func-test-ubuntu: - pip3 install -r requirements-dev.txt - ./make script: + - py.test test/test_functional/test_prepare* -vx - py.test test/test_functional/test_pangenome* -vx - py.test test/test_functional/test_corepers* -vx - py.test test/test_functional/test_align* -vx diff --git a/PanACoTA/subcommands/prepare.py b/PanACoTA/subcommands/prepare.py index ebb00dae3ca7168c3eb1082eb03778e466e9eccc..be807821a6b8110a720fb5f4dccad473ced53b10 100644 --- a/PanACoTA/subcommands/prepare.py +++ b/PanACoTA/subcommands/prepare.py @@ -325,16 +325,18 @@ def build_parser(parser): general.add_argument("--nbcont", dest="nbcont", type=utils_argparse.cont_num, default=999, help=("Maximum number of contigs allowed to keep a genome. " "Default is 999.")) - general.add_argument("--min", dest="min_dist", default=1e-4, type=float, + general.add_argument("--min_dist", dest="min_dist", default=1e-4, + type=utils_argparse.mash_dist, help="By default, genomes whose distance to the reference is not " "between 1e-4 and 0.06 are discarded. You can specify your own " "lower limit (instead of 1e-4) with this option.") - general.add_argument("--max_dist", dest="max_dist", default=0.06, type=float, - help="By default, genomes whose distance to the reference is not " - "between 1e-4 and 0.06 are discarded. You can specify your own " - "lower limit (instead of 0.06) with this option.") - general.add_argument("-p", dest="parallel", type=utils_argparse.thread_num, default=1, - help=("Run 'N' downloads in parallel (default=1). Put 0 if " + general.add_argument("--max_dist", dest="max_dist", default=0.06, + type=utils_argparse.mash_dist, + help="By default, genomes whose distance to the reference is not " + "between 1e-4 and 0.06 are discarded. You can specify your own " + "lower limit (instead of 0.06) with this option.") + general.add_argument("-p", "--threads", dest="parallel", type=utils_argparse.thread_num, + default=1, help=("Run 'N' downloads in parallel (default=1). Put 0 if " "you want to use all cores of your computer.")) optional = parser.add_argument_group('Alternatives') @@ -453,6 +455,11 @@ def check_args(parser, args): parser.error("Choose between a verbose output (-v) or a quiet output (-q)." " You cannot have both.") + # min_dist must be higher than max_dist + if args.min_dist >= args.max_dist: + parser.error(f"min_dist ({args.min_dist}) cannot be higher " + f"than max_dist ({args.max_dist})") + # WARNINGS # User did not specify a species name if not args.NCBI_species: diff --git a/PanACoTA/utils_argparse.py b/PanACoTA/utils_argparse.py index 5651d00a731bb7cca96348427e0af222f178606a..9f4d1e4b2eb63996fe03bcbea325dc41162300fd 100644 --- a/PanACoTA/utils_argparse.py +++ b/PanACoTA/utils_argparse.py @@ -111,3 +111,15 @@ def positive_int(param): msg = f"error: argument --cutn must be a positive integer: invalid int value: '{param}'" raise argparse.ArgumentTypeError(msg) return param + + +def mash_dist(param): + try: + param = float(param) + except ValueError: + msg = f"error: mash distance: invalid float value: '{param}'" + raise argparse.ArgumentTypeError(msg) + if param < 0 or param > 1: + msg = f"error: mash_distance must be between 0 and 1: invalid value: '{param}'" + raise argparse.ArgumentTypeError(msg) + return param diff --git a/test/test_functional/test_prepare-parser.py b/test/test_functional/test_prepare-parser.py new file mode 100644 index 0000000000000000000000000000000000000000..e9caf50b94a0dfbf3eba9f456fcd06049945cc7e --- /dev/null +++ b/test/test_functional/test_prepare-parser.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +""" +Functional tests for the parser of align subcommand +""" +import argparse +import pytest + +from PanACoTA.subcommands import prepare + + +def test_parser_noarg(capsys): + """ + Test that when the script is called without any argument, an error message appears, + indicating the required arguments. + """ + parser = argparse.ArgumentParser(description="Prepare genomes", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "".split()) + _, err = capsys.readouterr() + assert "error: " in err + assert ("As you did not put the '--norefseq' nor the '-M' option, it means that you want " + "to download refseq genomes. But you did not provide any information, so PanACoTA " + "cannot guess which species you want to download. Specify NCBI_taxid and/or " + "NCBI_species to download, or add one of the 2 options (--norefseq or -M) " + "if you want to skip the 'download step'.") in err + + +def test_cutn_noint(capsys): + """ + Test that when user is giving a number of 'N' from which to cut which is: + - not a number + - not an int + - <0 + it gives error message + """ + parser = argparse.ArgumentParser(description="Prepare genomes", add_help=False) + prepare.build_parser(parser) + # Not a number + with pytest.raises(SystemExit): + prepare.parse(parser, "--cutn ten".split()) + _, err = capsys.readouterr() + assert "error: argument --cutn: invalid int value: 'ten'" in err + # Not an int + with pytest.raises(SystemExit): + prepare.parse(parser, "--cutn 1.5".split()) + _, err = capsys.readouterr() + assert "error: argument --cutn: invalid int value: '1.5'" in err + # Negative number + with pytest.raises(SystemExit): + prepare.parse(parser, "--cutn -5".split()) + _, err = capsys.readouterr() + assert "error: argument --cutn must be a positive integer: invalid int value: '-5'" in err + + +def test_l90_noint(capsys): + """ + Test that when user is giving a number for max L90 which is not valid: + - not a number + - not an int + it gives error message + """ + parser = argparse.ArgumentParser(description="Prepare genomes", add_help=False) + prepare.build_parser(parser) + # Not a number + with pytest.raises(SystemExit): + prepare.parse(parser, "--l90 ten".split()) + _, err = capsys.readouterr() + assert "error: argument --l90: invalid int value: 'ten'" in err + # Not an int + with pytest.raises(SystemExit): + prepare.parse(parser, "--l90 1.5".split()) + _, err = capsys.readouterr() + assert "error: argument --l90: invalid int value: '1.5'" in err + + +def test_parser_negative_cont(capsys): + """ + Test that when the script is called with a limit of contig number <0, + it returns an error message + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "--nbcont -5".split()) + _, err = capsys.readouterr() + assert "The maximum number of contigs allowed must be a positive number." in err + + +def test_parser_high_cont(capsys): + """ + Test that when the script is called with a negative limit of contig number, + it returns an error message + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "--nbcont 10005".split()) + _, err = capsys.readouterr() + assert "We do not support genomes with more than 9999 contigs." in err + + +def test_parser_wrong_cont(capsys): + """ + Test that when the script is called with a non integer limit of contig number, + it returns an error message + """ + parser = argparse.ArgumentParser(description="prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "--nbcont 10.5".split()) + _, err = capsys.readouterr() + assert "argument --nbcont: invalid int value: 10.5" in err + + +def test_max_mash_dist(capsys): + """ + Test that when user is giving a number for max_dist which is not valid: + - not a number + - > 1 + - <0 + it gives error message + """ + parser = argparse.ArgumentParser(description="Prepare genomes", add_help=False) + prepare.build_parser(parser) + # Not a number + with pytest.raises(SystemExit): + prepare.parse(parser, "--max_dist ten".split()) + _, err = capsys.readouterr() + assert "error: mash distance: invalid float value: 'ten'" in err + # > 1 + with pytest.raises(SystemExit): + prepare.parse(parser, "--max_dist 1.5".split()) + _, err = capsys.readouterr() + assert "error: mash_distance must be between 0 and 1: invalid value: '1.5'" in err + # < 0 + with pytest.raises(SystemExit): + prepare.parse(parser, "--max_dist -0.5".split()) + _, err = capsys.readouterr() + assert "error: mash_distance must be between 0 and 1: invalid value: '-0.5'" in err + + +def test_min_mash_dist(capsys): + """ + Test that when user is giving a number for max_dist which is not valid: + - not a number + - > 1 + - <0 + it gives error message + """ + parser = argparse.ArgumentParser(description="Prepare genomes", add_help=False) + prepare.build_parser(parser) + # Not a number + with pytest.raises(SystemExit): + prepare.parse(parser, "--min_dist ten".split()) + _, err = capsys.readouterr() + assert "error: mash distance: invalid float value: 'ten'" in err + # > 1 + with pytest.raises(SystemExit): + prepare.parse(parser, "--min_dist 1.5".split()) + _, err = capsys.readouterr() + assert "error: mash_distance must be between 0 and 1: invalid value: '1.5'" in err + # < 0 + with pytest.raises(SystemExit): + prepare.parse(parser, "--min_dist -0.5".split()) + _, err = capsys.readouterr() + assert "error: mash_distance must be between 0 and 1: invalid value: '-0.5'" in err + + +def test_min_sup_max(capsys): + ''' + Test that we get an error message if min_dist > max_dist + ''' + parser = argparse.ArgumentParser(description="Prepare genomes", add_help=False) + prepare.build_parser(parser) + # Not a number + with pytest.raises(SystemExit): + prepare.parse(parser, "--min_dist 0.9 --max_dist=0.8 --norefseq -o toto".split()) + _, err = capsys.readouterr() + assert "min_dist (0.9) cannot be higher than max_dist (0.8)" in err + + +def test_parser_wrong_thread(capsys): + """ + Test that when the user does not give an int for the threads value, it returns an + error message. + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "-p 10.5".split()) + _, err = capsys.readouterr() + assert "argument --threads threads: invalid int value: 10.5" in err + # Negative number of threads + with pytest.raises(SystemExit): + prepare.parse(parser, "-p -1".split()) + _, err = capsys.readouterr() + assert ("Please provide a positive number of threads (or 0 for all threads): " + "Invalid value: -1") in err + + +def test_parser_more_threads(capsys): + """ + Test that when the user does not give an int for the threads value, it returns an + error message. + """ + import multiprocessing + nb_cpu = multiprocessing.cpu_count() + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "-p 50".split()) + _, err = capsys.readouterr() + assert (f"You have {nb_cpu} threads on your computer, you cannot ask for more: " + "invalid value: 50") in err + + +def test_parser_all_threads(capsys): + """ + Test that when the user does not give an int for the threads value, it returns an + error message. + """ + import multiprocessing + nb_cpu = multiprocessing.cpu_count() + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + options = prepare.parse(parser, "-p 0 --norefseq -o toto".split()) + assert options.parallel == nb_cpu + assert options.no_refseq == True + assert options.only_mash == False + + +def test_parse_missing_arg(capsys): + """ + running prepare without NCBI info nor mash_only nor norefseq -> error asking one of those + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "-p 1".split()) + _, err = capsys.readouterr() + assert ("As you did not put the '--norefseq' nor the '-M' option, it means that " + "you want to download refseq genomes. But you did not provide any " + "information, so PanACoTA cannot guess which species you want to download. " + "Specify NCBI_taxid and/or NCBI_species to download, or add one of " + "the 2 options (--norefseq or -M) if you want to skip the 'download step'.") in err + + +def test_norefseq_nooutdir(capsys): + """ + Try running without refseq, but not giving an output directory + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "--norefseq".split()) + _, err = capsys.readouterr() + assert ("You must provide an output directory, where your results will be saved.") in err + + +def test_onlymash_noinfo(capsys): + """ + Try running without refseq, but not giving an output directory + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "-M".split()) + _, err = capsys.readouterr() + assert ("If you want to run only Mash filtering steps, please give the info file with " + "the required information (see '--info' option") in err + + +def test_onlymash_nooutdir(capsys): + """ + Try running without refseq, but not giving an output directory + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "-M --info toto ".split()) + _, err = capsys.readouterr() + assert ("If you want to run only Mash filtering steps, please give the output " + "directory where you want to save your results (see '-o' option)") in err + + +def test_verbose_quiet(capsys): + """ + Try running without refseq, but not giving an output directory + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + with pytest.raises(SystemExit): + prepare.parse(parser, "-q -vv -M --info toto -o outdir".split()) + _, err = capsys.readouterr() + assert ("Choose between a verbose output (-v) or a quiet output (-q). " + "You cannot have both.") in err + + +def test_parser_nospecies(capsys): + """ + Test that when the user does not give an int for the threads value, it returns an + error message. + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + options = prepare.parse(parser, "-t 1234 -o outdir".split()) + assert not options.no_refseq + assert not options.only_mash + assert options.NCBI_species_taxid == "1234" + assert options.NCBI_species == "" + out, err = capsys.readouterr() + assert ("WARNING: you did not provide a species name ('-s species' option'). " + "All files will be downloaded in a folder called with the NCBI species " + "taxid 1234 instead of the species name.") in out + + +def test_parser_default_cutn(capsys): + """ + Test that when the user does not give an int for the threads value, it returns an + error message. + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + options = prepare.parse(parser, "-t 1234 -o outdir -s species".split()) + assert not options.no_refseq + assert not options.only_mash + assert options.NCBI_species_taxid == "1234" + assert options.NCBI_species == "species" + out, err = capsys.readouterr() + assert ("!! Your genomes will be split when sequence contains at " + "least 5'N' in a row. If you want to change this threshold, use " + "'--cutn n' option (n=0 if you do not want to cut)") in out + + +def test_parser_default_l90_nb_cont(capsys): + """ + Test that when the user does not give an int for the threads value, it returns an + error message. + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + options = prepare.parse(parser, "-t 1234 -o outdir -s species --cutn 1".split()) + assert not options.no_refseq + assert not options.only_mash + assert options.NCBI_species_taxid == "1234" + assert options.NCBI_species == "species" + out, err = capsys.readouterr() + assert ("!! Your genomes will be filtered, and only the ones with 'L90' <= 100 " + "and 'number of contigs' < 999 will be kept. If you want to change those " + "thresholds, use '--l90' and '--nbcont' options.") in out + + +def test_parser_info_notonlymash(capsys): + """ + Giving an info file, but not asking for only_mash -> useless info file + """ + parser = argparse.ArgumentParser(description="Prepare", add_help=False) + prepare.build_parser(parser) + options = prepare.parse(parser, "-t 1234 -o outdir -s species --cutn 1 --info toto".split()) + assert not options.no_refseq + assert not options.only_mash + assert options.NCBI_species_taxid == "1234" + assert options.NCBI_species == "species" + out, err = capsys.readouterr() + assert ("!! You gave an info file (--info option), but did not ask to run only Mash " + "step (-M option). Your info file will be ignored (and renamed with '.back' " + "at the end), and another one will be created with the new calculated values.") in out