Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
RNAflow
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
hub
RNAflow
Commits
bb0461a3
Commit
bb0461a3
authored
4 years ago
by
Rachel LEGENDRE
Browse files
Options
Downloads
Patches
Plain Diff
Add new file
parent
c98b480b
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Snakefile
+215
-0
215 additions, 0 deletions
Snakefile
with
215 additions
and
0 deletions
Snakefile
0 → 100644
+
215
−
0
View file @
bb0461a3
#########################################################################
# RNAflow: an automated pipeline to analyse transcriptomic data #
# #
# Authors: Rachel Legendre #
# Copyright (c) 2021-2022 Institut Pasteur (Paris). #
# #
# This file is part of RNAflow workflow. #
# #
# RNAflow is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# RNAflow is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details . #
# #
# You should have received a copy of the GNU General Public License #
# along with RNAflow (LICENSE). #
# If not, see <https://www.gnu.org/licenses/>. #
#########################################################################
import pandas as pd
from fnmatch import fnmatch
from re import sub, match
from itertools import repeat, chain
import os
#-------------------------------------------------------
# read config files
configfile: "config/config.yaml"
RULES = os.path.join("workflow", "rules")
#-------------------------------------------------------
# list of all files in the directory 'input_dir'
filenames = [f for f in os.listdir(config["input_dir"]) if match(r'.*'+config["input_readtag"]+config["input_extension"]+'', f)]
samples = [sub(config["input_readtag"]+config["input_extension"], '', file) for file in filenames]
receptor = [ x.strip() for x in (config["design"]["receptor"]).split(",")]
conds = [ x.strip() for x in (config["design"]["condition"]).split(",")]
#-------------------------------------------------------
# paired-end data gestion
read_tag = config["input_readtag"]
rt1 = read_tag.replace("[12]", "1")
rt2 = read_tag.replace("[12]", "2")
R1 = [1 for this in filenames if rt1 in this]
R2 = [1 for this in filenames if rt2 in this]
if len(R2) == 0:
paired = False
else:
if R1 == R2:
paired = True
else:
raise ValueError("Mix of paired and single-end data: please provide a homogenenous dataset")
# -----------------------------------------------
#initialize global variables
__sample_dir = config["input_dir"]
__analysis_dir = config["analysis_dir"]
__input_data = [__sample_dir + "/{{SAMPLE}}{}.fastq.gz".format(rt1)]
if paired:
__input_data += [__sample_dir + "/{{SAMPLE}}{}.fastq.gz".format(rt2)]
# global variable for all output files
expected_output = []
# -----------------------------------------------
#begin of the rules
## quality control
__fastqc__input_fastq = __input_data
__fastqc__output_done = "00-Fastqc/{SAMPLE}_R1_fastqc.fastq.gz"
__fastqc__wkdir = "00-Fastqc"
__fastqc__log = "00-Fastqc/logs/{SAMPLE}_R1_fastqc_raw.log"
expected_output.extend(expand(__fastqc__output_done, SAMPLE=samples))
include: os.path.join(RULES, "fastqc.rules")
## remove adapters
if config["adapters"]["remove"] :
adapter_tool = config["adapters"]["tool_choice"]
## TODO add AlienTrimmer
if adapter_tool in ["cutadapt", "atropos"]:
adapter_tool = "adapters"
__adapters__input_fastq = __input_data
__adapters__wkdir = "01-Trimming"
__adapters__output = ["01-Trimming/{SAMPLE}_R1_trim.fastq.gz"]
if paired:
__adapters__output += ["01-Trimming/{SAMPLE}_R2_trim.fastq.gz"]
# Set parameters
__adapters__adapt_list = config["adapters"]["adapter_list"]
__adapters__options = config["adapters"]["options"]
__adapters__mode = config["adapters"]["mode"]
__adapters_min = config["adapters"]["m"]
__adapters_qual = config["adapters"]["quality"]
__adapters__log = "01-Trimming/logs/{SAMPLE}_trim.txt"
expected_output.extend(expand(__adapters__output, SAMPLE=samples))
include: os.path.join(RULES, "adapters.rules")
else:
raise ValueError("Invalid choice of tool using for trimming in config file. Use either cutadapt or atropos or AlienTrimmer")
else:
__adapters__output = __input_data
## Indexing genome
ref = [ config["genome"]["name"] ]
if config["genome"]["host_mapping"]:
ref += [ config["genome"]["host_name"] ]
def mapping_index(wildcards):
if (wildcards.REF == config["genome"]["name"]):
input = config["genome"]["fasta_file"]
elif (wildcards.REF == config["genome"]["host_name"]):
input = config["genome"]["host_fasta_file"]
return(input)
if config["genome"]["index"]:
# indexing for bowtie2
__bowtie2_index__fasta = mapping_index
__bowtie2_index__output_done = os.path.join(config["genome"]["genome_directory"],"{REF}.1.bt2")
__bowtie2_index__output_prefix = os.path.join(config["genome"]["genome_directory"],"{REF}")
__bowtie2_index__log = "02-Mapping/logs/bowtie2_{REF}_indexing.log"
include: os.path.join(RULES, "bowtie2_index.rules")
else:
__bowtie2_index__output_done = os.path.join(config["genome"]["genome_directory"],"{REF}.1.bt2")
__bowtie2_index__output_prefix = os.path.join(config["genome"]["genome_directory"],"{REF}")
# Mapping step
__bowtie2_mapping__input = __adapters__output
__bowtie2_mapping__index_done = __bowtie2_index__output_done
__bowtie2_mapping__sort = "02-Mapping/{SAMPLE}_{REF}_sort.bam"
__bowtie2_mapping__bam = "02-Mapping/{SAMPLE}_{REF}.bam"
__bowtie2_mapping__sortprefix = "02-Mapping/{SAMPLE}_{REF}_sort"
__bowtie2_mapping__logs_err = "02-Mapping/logs/{SAMPLE}_{REF}_mapping.err"
__bowtie2_mapping__logs_out = "02-Mapping/logs/{SAMPLE}_{REF}_mapping.out"
__bowtie2_mapping__prefix_index = __bowtie2_index__output_prefix
__bowtie2_mapping__options = config["bowtie2_mapping"]["options"]
expected_output.extend(expand(__bowtie2_mapping__sort, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "bowtie2_mapping.rules")
# then we performed peak calling only in files against reference genome and not spike genome !
ref = config["genome"]["name"]
# Genome coverage for plus and minus strand
strand = ["minus", "plus"]
__genomecov__input = "02-Mapping/{{SAMPLE}}_{}_sort.bam".format(ref)
__genomecov__logs = "03-Coverage/logs/{SAMPLE}_{STRAND}_coverage.out"
__genomecov__output = "03-Coverage/{SAMPLE}_{STRAND}_coverage.csv"
__genomecov__genome = config["genome"]["fasta_file"]
__genomecov__options = ""
expected_output.extend(expand(__genomecov__output, SAMPLE=samples, STRAND=strand))
include: os.path.join(RULES, "genomecov.rules")
# RNAsig R script
__rnasig__target = "config/design_strand.txt"
__rnasig__output_dir = "."
__rnasig__covdir = "03-Coverage/"
__rnasig__gfffile = config['genome']['gff_file']
__rnasig__report = "04-RNAsig/finalplot.pdf"
__rnasig__logs_out = "04-RNAsig/logs/rnasig.out"
__rnasig__logs_err = "04-RNAsig/logs/rnasig.err"
expected_output.extend([__rnasig__report])
include: os.path.join(RULES, "rnasig.rules")
# Multiqc rule
config['multiqc']['options'] += " -c config/multiqc_config.yaml"
__multiqc__input = expected_output
__multiqc__input_dir = "."
__multiqc__logs = "04-Multiqc/multiqc.log"
__multiqc__output = "04-Multiqc"
__multiqc__options = config['multiqc']['options']
__multiqc__output_dir = "04-Multiqc"
expected_output = [__multiqc__output]
include: os.path.join(RULES, "multiqc.rules")
rule rnasig:
input: expected_output
onsuccess:
# copy metrics json in the corresponding multiQC output when you are in exploratory mode
import shutil
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment