Snakefile 1009 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
configfile: "config.yaml"

def count_sequences(fasta_file):
    with open(fasta_file, 'r') as file:
        seq = 0
        for line in file:
            if '>' in line:
                seq += 1
    return seq

# ==== Snakefile path ====
__split_fasta_rules = config.get("snakefiles", {}).get("split_fasta")

__main_output_dir = config.get('output_dir', 'output')

# ==== Split FASTA ====
__split_fasta_output_dir = __main_output_dir +  "/split_fasta"

__split_fasta_input = config['input_fasta']
__split_fasta_number_sequences = config.get('split_fasta', {}).get('number_sequences', 1000000)
total_number_sequences = count_sequences(__split_fasta_input)
EXTENSIONS = [f"{i:05d}" for i in range(0, int(total_number_sequences/__split_fasta_number_sequences) + 1)]
__split_fasta_prefix = "/".join([__split_fasta_output_dir, config['split_fasta']['prefix']])
__split_fasta_output = expand(__split_fasta_prefix + "{ext}.fa", ext=EXTENSIONS)
include: __split_fasta_rules

rule all:
    input: __split_fasta_output