Snakefile 2.97 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
configfile: "config.yaml"

def count_sequences(fasta_file):
    with open(fasta_file, 'r') as file:
        seq = 0
        for line in file:
            if '>' in line:
                seq += 1
    return seq

# ==== Snakefile path ====
__split_fasta_rules = config.get("snakefiles", {}).get("split_fasta")
__eggnogmapper2_diamond_rules = config.get("snakefiles", {}).get("eggnogmapper2_diamond")
__cat_rules = config.get("snakefiles", {}).get("cat")
__eggnogmapper2_annotate_rules = config.get("snakefiles", {}).get("eggnogmapper2_annotate")

# ---- Main config ----
__main_output_dir = config.get('output_dir', 'output')
__prefix = config['split_fasta']['prefix']


# ==== Split FASTA ====
__split_fasta_output_dir = __main_output_dir +  "/split_fasta"

__split_fasta_input = config['input_fasta']
__split_fasta_number_sequences = config.get('split_fasta', {}).get('number_sequences', 1000000)
total_number_sequences = count_sequences(__split_fasta_input)
SPLIT_NB = [f"{i:05d}" for i in range(0, int(total_number_sequences/__split_fasta_number_sequences) + 1)]
CHUNKS = [f"{__prefix}{i}" for i in SPLIT_NB]
__split_fasta_prefix = "/".join([__split_fasta_output_dir, __prefix])
__split_fasta_output = expand(__split_fasta_prefix + "{split_nb}.fa", split_nb=SPLIT_NB)

include: __split_fasta_rules


# ==== EggNOGmapper2 Diamond ====
__eggnogmapper2_output_dir = __main_output_dir +  "/eggnogmapper2"
__eggnogmapper2_diamond_input_dir = __split_fasta_output_dir
__eggnogmapper2_diamond_output_dir = __eggnogmapper2_output_dir + "/diamond"

__eggnogmapper2_diamond_input = "{dir}/{{chunk}}.fa".format(dir=__eggnogmapper2_diamond_input_dir)
__eggnogmapper2_diamond_output_prefix = "{dir}/{{chunk}}".format(dir=__eggnogmapper2_diamond_output_dir)
__eggnogmapper2_diamond_output = "{dir}/{{chunk}}.emapper.seed_orthologs".format(dir=__eggnogmapper2_diamond_output_dir)

include: __eggnogmapper2_diamond_rules


# ==== Cat ====
__cat_merged_name = config.get('cat', {}).get('name_merge', 'cat_file')
__cat_dir = __eggnogmapper2_diamond_output_dir

__cat_input = expand("{dir}/{{chunk}}.emapper.seed_orthologs".format(dir=__cat_dir), chunk=CHUNKS)
__cat_output = "{dir}/{file_name}".format(dir=__cat_dir, file_name=__cat_merged_name)
include: __cat_rules


# ==== EggNOGmapper2 Annotate ====
__eggnogmapper2_annotate_output_dir = __eggnogmapper2_output_dir + "/annotate"

__eggnogmapper2_annotate_input = __cat_output
__eggnogmapper2_annotate_outname_prefix = config.get('eggnogmapper2', {}).get('annotate', {}).get('outname_prefix', 'all')
__eggnogmapper2_annotate_output_prefix = "{dir}/{name}".format(dir=__eggnogmapper2_annotate_output_dir, name=__eggnogmapper2_annotate_outname_prefix)
__eggnogmapper2_annotate_output = "{dir}/{name}.emapper.annotations".format(dir=__eggnogmapper2_annotate_output_dir, name=__eggnogmapper2_annotate_outname_prefix)
include: __eggnogmapper2_annotate_rules

rule all:
    input: "{dir}/{name}.emapper.annotations".format(dir=__eggnogmapper2_annotate_output_dir, name=__eggnogmapper2_annotate_outname_prefix)