diff --git a/Makefile b/Makefile index a5db487b8c12ecce2d8dd6631f33a4eefb906257..fece0b3dff66c057aef0495e8a01c1ac153b9b06 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ DATE:=$(shell date +%d_%m_%Y) # to be adjusted in src/main_prediction.py as well CLASSIFIERS:=CLF_BBH_19_07_2023 CLF_BBH_19_07_2023_no_weak CLF_BBH_sNPFR_07_01_2025 CLF_BBH_NPFR_20_02_2025 CLF_BBH_NPF_10_03_2025 +CLASSIFIERS:=CLF_BBH_19_07_2023 CLF_BBH_sNPFR_07_01_2025 CLF_BBH_NPFR_20_02_2025 CLF_BBH_NPF_10_03_2025 CLF_PATHS:=$(foreach clf,$(CLASSIFIERS),src/$(clf).pkl) ifneq (,$(wildcard bin/python)) @@ -24,6 +25,7 @@ src/CLF_BBH_sNPFR_21_01_2025.pkl: HASH = dPl6WZOE src/CLF_BBH_NPFR_22_01_2025.pkl: HASH = iPIbwqtk src/CLF_BBH_NPFR_20_02_2025.pkl: HASH = tV359AdB src/CLF_BBH_NPF_04_03_2025.pkl: HASH = MwQEHvB7 +src/CLF_BBH_NPF_10_03_2025.pkl: HASH = lo8kzXSK src/CLF_BBH_%.pkl: @echo "the download link might have expired;" @@ -46,6 +48,7 @@ bin/python: .PHONY: clean clean: rm -rf bin include lib lib64 log pyvenv.cfg share src/__pycache__ + rm -rf data/apptainer .PHONY: export export: @@ -74,3 +77,27 @@ sNPFR: sed -i -E -e "s/clf_snpfr_suffix = '.*'/clf_snpfr_suffix = 'sNPFR_$(DATE)'/" src/main_prediction.py sbatch --mem-per-cpu=$(MEM)G --array=25-42 process_multiple_conditions.sh +bin/larvatagger.sif: + mkdir -p bin + module load apptainer && srun $(SLURM_OPTS) apptainer build $@ docker://flaur/larvatagger:0.18.4-20230311 + +.PHONY: embeddings +embeddings: bin/larvatagger.sif data/maggotuba/20230129 + sbatch -J embeddings --mem-per-cpu=$(MEM)G --array=1-24 process_multiple_conditions.sh embeddings + +data/maggotuba/20230129: + mkdir -p $@ + wget 'https://gitlab.pasteur.fr/nyx/MaggotUBA-adapter/-/raw/main/pretrained_models/20230129/autoencoder_config.json?ref_type=heads&inline=false' -O- > $@/autoencoder_config.json + # rename in a misleading way to ensure the tagging backend picks the file + wget 'https://gitlab.pasteur.fr/nyx/MaggotUBA-adapter/-/raw/main/pretrained_models/20230129/best_validated_encoder.pt?ref_type=heads&inline=false' -O- > $@/retrained_encoder.pt + +## does not work +.PHONY: larvatagger +larvatagger: install.sh + module load Python/3.11.5 && ./install.sh --install --with-backend --experimental +install.sh: + wget 'https://gitlab.pasteur.fr/nyx/larvatagger.jl/-/raw/main/scripts/install.sh?ref_type=heads&inline=false' -O- > $@ + chmod +x $@ + +mmd: + cd src/Latent.jl && $(MAKE) diff --git a/embeddings.sh b/embeddings.sh new file mode 100755 index 0000000000000000000000000000000000000000..3fa3c805cabc94a602260e982f93111c90955de4 --- /dev/null +++ b/embeddings.sh @@ -0,0 +1,52 @@ +#!/bin/sh + +COND=`python groups.py $1` + +case ${COND:0:3} in + t5/) + rig=t5 + COND=`echo $COND | cut -d/ -f2-` + ;; + *) + rig=t2 +esac + +echo "Processing condition ${COND} in ${rig}..." + +if [ "$rig" = "t2" ]; then + date_file=src/Date_francesca.csv + repository=tihana/t2 +else + date_file=src/Date_francesca_$rig.csv + repository=screens/t5 +fi + +ENCODER=20230129 + +if [ "$ENCODER" = "20230311" ]; then + ENCODER_MOUNT= +else + ENCODER_MOUNT="--mount type=bind,source=data/maggotuba/$ENCODER,destination=/app/MaggotUBA/models/$ENCODER" +fi + +mkdir -p data/embeddings/$COND + +for row_index in `grep -n $COND $date_file | cut -d: -f1`; do + row=`head -n$row_index $date_file | tail -n1` + for date_time in `ls -1d /pasteur/helix/projects/hecatonchire/$repository/$COND/*`; do + assay=$COND/$(basename $date_time) + dest=data/embeddings/$assay + mkdir -p $dest + [ -h $dest/trx.mat ] || ln -s $date_time/trx.mat $dest/ + if ! [ -f $dest/embeddings-$ENCODER.h5 ]; then + apptainer run --cleanenv --no-home \ + --writable-tmpfs --env JULIA_DEPOT_PATH=/tmp:/usr/local/share/julia \ + --bind /pasteur --mount 'type=bind,source=data/embeddings,destination=/data' \ + --mount 'type=bind,source=data/apptainer,destination=/app/MaggotUBA/data' $ENCODER_MOUNT \ + bin/larvatagger.sif predict /app/MaggotUBA $ENCODER /data/$assay --embeddings \ + --data-isolation + # --output not implemented? + mv $dest/embeddings.h5 $dest/embeddings-$ENCODER.h5 + fi + done +done diff --git a/process_multiple_conditions.sh b/process_multiple_conditions.sh index 34d0cc2798f0a1e5db9651b206327097f9953072..71fff843ad80cedd90bd69718ea196b851877fed 100755 --- a/process_multiple_conditions.sh +++ b/process_multiple_conditions.sh @@ -7,4 +7,15 @@ mkdir -p log source bin/activate + +if [ "$1" = "embeddings" ]; then + +mkdir -p data/apptainer +module load apptainer +srun embeddings.sh $SLURM_ARRAY_TASK_ID + +else + srun process_single_condition.sh $SLURM_ARRAY_TASK_ID + +fi