Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BEBP/diphtoscan
1 result
Show changes
Commits on Source (7)
...@@ -52,8 +52,8 @@ Usage: ...@@ -52,8 +52,8 @@ Usage:
""" """
__authors__ = ("Melanie HENNART; Martin RETHORET-PASTY") __authors__ = ("Melanie HENNART; Martin RETHORET-PASTY")
__contact__ = ("melanie.hennart@pasteur.fr; martin.rethoret-pasty@pasteur.fr") __contact__ = ("martin.rethoret-pasty@pasteur.fr")
__version__ = "1.6.0" __version__ = "1.6.1"
__copyright__ = "copyleft" __copyright__ = "copyleft"
__date__ = "2024/03/04" __date__ = "2024/03/04"
...@@ -125,13 +125,13 @@ from module.utils import ( ...@@ -125,13 +125,13 @@ from module.utils import (
) )
def test_unique_dependency(name:str): def test_unique_dependency(name:str):
return subprocess.call(["command", "-v", name]) return shutil.which(name) is not None
def test_multiple_dependencies(dependencies:List[str]): def test_multiple_dependencies(dependencies:List[str]):
for dependency in dependencies: for dependency in dependencies:
rc = test_unique_dependency(dependency) presence = test_unique_dependency(dependency)
if rc == 1: if presence is not True:
print(f'/!\\ Warning /!\\ : {dependency} missing in path!') print(f'/!\\ Warning /!\\ : {dependency} missing in path!')
sys.exit(-1) sys.exit(-1)
...@@ -400,7 +400,7 @@ if __name__ == "__main__": ...@@ -400,7 +400,7 @@ if __name__ == "__main__":
else : else :
results = table_results results = table_results
results = results.fillna("-") results = results.infer_objects().fillna("-")
spuA(results, args) spuA(results, args)
narG(results, args) narG(results, args)
......
#!/bin/bash
PATH_DB=$(dirname "$0")
DATE=$(date "+%Y-%m-%d")
echo "Indexing" ;
hmmpress -f $PATH_DB/$DATE/AMR.LIB > /dev/null 2> /dev/null
makeblastdb -in $PATH_DB/$DATE/AMRProt -dbtype prot -logfile /dev/null
makeblastdb -in $PATH_DB/$DATE/AMR_CDS -dbtype nucl -logfile /dev/null
taxgroups=$(awk '{if ($3>0 && $1!="#taxgroup") print $1}' $PATH_DB/$DATE/taxgroup.tab)
for taxgroup in $taxgroups
do makeblastdb -in $PATH_DB/$DATE/AMR_DNA-$taxgroup -dbtype nucl -logfile /dev/null
done
echo -e "Corynebacterium_diphtheriae\tCorynebacterium_diphtheriae\t0" >> $PATH_DB/$DATE/taxgroup.tab
PATH_DB="$PATH_DB/$DATE"
VERSION="$DATE"
echo "Database directory: '$PATH_DB'"
echo "Database version: $DATE.1"
\ No newline at end of file
...@@ -18,24 +18,4 @@ mv version.txt $PATH_DB/$DATE/ ...@@ -18,24 +18,4 @@ mv version.txt $PATH_DB/$DATE/
cat $PATH_DB/Corynebacterium_diphtheriae/AMRProt_Cd >> $PATH_DB/$DATE/AMRProt cat $PATH_DB/Corynebacterium_diphtheriae/AMRProt_Cd >> $PATH_DB/$DATE/AMRProt
sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-mutation_Cd.tab >> $PATH_DB/$DATE/AMRProt-mutation.tab sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-mutation_Cd.tab >> $PATH_DB/$DATE/AMRProt-mutation.tab
#sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-susceptible_Cd.tab >> $PATH_DB/$DATE/AMRProt-susceptible.tab #sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-susceptible_Cd.tab >> $PATH_DB/$DATE/AMRProt-susceptible.tab
sed '1d' $PATH_DB/Corynebacterium_diphtheriae/fam_Cd.tab >> $PATH_DB/$DATE/fam.tab sed '1d' $PATH_DB/Corynebacterium_diphtheriae/fam_Cd.tab >> $PATH_DB/$DATE/fam.tab
\ No newline at end of file
echo "Indexing" ;
hmmpress -f $PATH_DB/$DATE/AMR.LIB > /dev/null 2> /dev/null
makeblastdb -in $PATH_DB/$DATE/AMRProt -dbtype prot -logfile /dev/null
makeblastdb -in $PATH_DB/$DATE/AMR_CDS -dbtype nucl -logfile /dev/null
taxgroups=$(awk '{if ($3>0 && $1!="#taxgroup") print $1}' $PATH_DB/$DATE/taxgroup.tab)
for taxgroup in $taxgroups
do makeblastdb -in $PATH_DB/$DATE/AMR_DNA-$taxgroup -dbtype nucl -logfile /dev/null
done
echo -e "Corynebacterium_diphtheriae\tCorynebacterium_diphtheriae\t0" >> $PATH_DB/$DATE/taxgroup.tab
PATH_DB="$PATH_DB/$DATE"
VERSION="$DATE"
echo "Database directory: '$PATH_DB'"
echo "Database version: $DATE.1"
\ No newline at end of file
import sys import datetime
import os import os
import sys
import pandas as pd
from module.download_alleles_st import create_db, download_profiles_st, download_profiles_tox from module.download_alleles_st import create_db, download_profiles_st, download_profiles_tox
node_class = {'pld':'OTHER_TOXINS',
'spaA' : 'SpaA-type_pili_diphtheriae',
'spaB' : 'SpaA-type_pili_diphtheriae',
'spaC' : 'SpaA-type_pili_diphtheriae',
'srtA' : 'SpaA-type_pili_diphtheriae',
'spaD' : 'SpaD-type_pili_diphtheriae',
'spaE' : 'SpaD-type_pili_diphtheriae',
'spaF' : 'SpaD-type_pili_diphtheriae',
'srtB' : 'SpaD-type_pili_diphtheriae',
'srtC' : 'SpaD-type_pili_diphtheriae',
'spaG' : 'SpaH-type_pili_diphtheriae',
'spaH' : 'SpaH-type_pili_diphtheriae',
'spaI' : 'SpaH-type_pili_diphtheriae',
'srtD' : 'SpaH-type_pili_diphtheriae',
'srtE' : 'SpaH-type_pili_diphtheriae',
'tox' : 'TOXIN',
'cbpA' : 'VIRULENCE/ADHESIN',
'nanH' : 'VIRULENCE/ADHESIN',
}
def complete_missing_classification(path:str):
df = pd.read_csv(path, sep="\t", escapechar="\\", engine="python")
missing_class = df.loc[df['parent_node_id']=='VIRULENCE_Cdiphth']
for index in missing_class.index:
for field in ['class','subclass']:
if pd.isna(df.iloc[index, df.columns.get_loc(field)]) :
df.iloc[index, df.columns.get_loc(field)] = node_class[df.iloc[index]['#node_id']]
df.to_csv(path, sep="\t", escapechar="\\", index=False)
return
def update_database(arguments, mlst_database:tuple, tox_database:tuple): def update_database(arguments, mlst_database:tuple, tox_database:tuple):
if arguments.update : if arguments.update :
date = datetime.datetime.today().strftime('%Y-%m-%d')
os.system("rm "+ mlst_database[1] + "* " + mlst_database[2] + "* ") os.system("rm "+ mlst_database[1] + "* " + mlst_database[2] + "* ")
print("Downloading MLST database") print("Downloading MLST database")
path_mlst_sequences, loci_mlst = create_db("pubmlst_diphtheria_seqdef", "3", arguments.path +"/data/mlst") path_mlst_sequences, loci_mlst = create_db("pubmlst_diphtheria_seqdef", "3", arguments.path +"/data/mlst")
...@@ -18,4 +54,6 @@ def update_database(arguments, mlst_database:tuple, tox_database:tuple): ...@@ -18,4 +54,6 @@ def update_database(arguments, mlst_database:tuple, tox_database:tuple):
print(" ... done \n") print(" ... done \n")
os.system('bash ' + arguments.path + '/data/resistance/update_database_resistance.sh') os.system('bash ' + arguments.path + '/data/resistance/update_database_resistance.sh')
complete_missing_classification(arguments.path + '/data/resistance/' + date + '/fam.tab')
os.system('bash ' + arguments.path + '/data/resistance/making_blastdb.sh')
print(" ... done \n\n\n") print(" ... done \n\n\n")
\ No newline at end of file
...@@ -153,7 +153,7 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame: ...@@ -153,7 +153,7 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame:
gene = data_resistance['Gene symbol'][res] + dico_Method[data_resistance['Method'][res]] gene = data_resistance['Gene symbol'][res] + dico_Method[data_resistance['Method'][res]]
# Search for certain cases of interruption due to a contig end that AMRfinder is unable to find. # Search for certain cases of interruption due to a contig end that AMRfinder is unable to find.
if is_contig_edge(data_resistance.iloc[res]) : if is_contig_edge(data_resistance.iloc[res]) :
data_resistance['Method'][res] = "CTRL_CONTIG_END" data_resistance.loc[res, 'Method'] = "CTRL_CONTIG_END"
if ('tox' in data_resistance['Gene symbol'][res]) and \ if ('tox' in data_resistance['Gene symbol'][res]) and \
(float(data_resistance['% Coverage of reference sequence'][res]) != 100.00) and \ (float(data_resistance['% Coverage of reference sequence'][res]) != 100.00) and \
...@@ -173,8 +173,8 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame: ...@@ -173,8 +173,8 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame:
family = data_resistance['Class'][res] family = data_resistance['Class'][res]
if table[family][strain] != '' : if table[family][strain] != '' :
table[family][strain] += ";" table.loc[strain, family] += ";"
table[family][strain] += gene table.loc[strain, family] += gene
return table return table
......