diff --git a/README.md b/README.md index a8f53e1010678fe48afaab9e98bc633815c0d540..defb93da3bff6d2b046ec9c9e5fa71e614627e05 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,8 @@ You need to copy the singularity image in the cloned ChIPflow directory and rena `mv chipflow_latest.sif chipflow/chipflow.img` +**Note that you can use public datasets to test the pipeline as described [here](https://gitlab.pasteur.fr/hub/chipflow/-/edit/master/README.md#run-the-pipeline-on-test-data)** + * Step 3: Execute workflow @@ -86,7 +88,7 @@ Execute the workflow locally via using $N cores or run it in a cluster environment via -`snakemake --use-singularity --singularity-args "-B '$HOME'" --cluster-config config/cluster_config.json --cluster "sbatch --mem={cluster.ram} --cpus-per-task={threads} " -j 200 --nolock --cores $SLURM_JOB_CPUS_PER_NODE` +`snakemake --use-singularity --singularity-args "-B '$HOME'" --cluster-config config/cluster_config.json --cluster "sbatch --mem={cluster.ram} --cpus-per-task={threads} " -j 200 --nolock --cores 1` Visualize how the rules are connected via @@ -206,6 +208,53 @@ report_header_info: ``` <img src="images/multiqc_header.png" width="600"> + + +### Run the pipeline on test data + +You need to have sra-toolkit installed before to download test data. + + +``` +# Download genome references +wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.26_GRCm38.p6/GCF_000001635.26_GRCm38.p6_genomic.fna.gz +gunzip GCF_000001635.26_GRCm38.p6_genomic.fna.gz + + +# get blacklisted regions +wget http://mitra.stanford.edu/kundaje/akundaje/release/blacklists/mm10-mouse/mm10.blacklist.bed.gz +gunzip mm10.blacklist.bed.gz + +# create genome directory +mkdir genome +mv GCF_000001635.26_GRCm38.p6_genomic.fna genome/mm10.fa +mv mm10.blacklist.bed genome/mm10.blacklist.bed + +# copy config file +cp test/config.yaml config/config.yaml +cp test/design.txt config/design.txt + +# Download FastQ files from GEO (GSE99009) https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE99009 +# Only include the H3K27ac histone mark and Klf4 transcription factor with their associated inputs for the shUbc9 and shCtrl conditions +SRR=("SRR5572646" "SRR5572647" "SRR5572658" "SRR5572659" "SRR5572668" "SRR5572669" "SRR5572676" "SRR5572677" "SRR5572652" "SRR5572653" "SRR5572664" "SRR5572665") +sample=("H3K27ac_shCtrl_Rep1_R1" "H3K27ac_shCtrl_Rep2_R1" "H3K27ac_shUbc9_Rep1_R1" "H3K27ac_shUbc9_Rep2_R1" "Klf4_shCtrl_Rep1_R1" "Klf4_shCtrl_Rep2_R1" "Klf4_shUbc9_Rep1_R1" "Klf4_shUbc9_Rep2_R1" "INPUT_shCtrl_Rep1_R1" "INPUT_shCtrl_Rep2_R1" "INPUT_shUbc9_Rep1_R1" "INPUT_shUbc9_Rep2_R1") + +mkdir data +cd data +for i in ${!SRR[*]} ; do + echo ${SRR[$i]}, ${sample[$i]} + prefetch ${SRR[$i]} -o ${sample[$i]}.sra + fastq-dump ${sample[$i]}.sra +done + +rm *.sra +for file in *.fastq ; do + pigz $file ; +done +``` + + + ## How to cite ChIPflow ? https://doi.org/10.1101/2021.02.02.429342