Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BEBP/diphtoscan
1 result
Show changes
Commits on Source (7)
......@@ -52,8 +52,8 @@ Usage:
"""
__authors__ = ("Melanie HENNART; Martin RETHORET-PASTY")
__contact__ = ("melanie.hennart@pasteur.fr; martin.rethoret-pasty@pasteur.fr")
__version__ = "1.6.0"
__contact__ = ("martin.rethoret-pasty@pasteur.fr")
__version__ = "1.6.1"
__copyright__ = "copyleft"
__date__ = "2024/03/04"
......@@ -125,13 +125,13 @@ from module.utils import (
)
def test_unique_dependency(name:str):
return subprocess.call(["command", "-v", name])
return shutil.which(name) is not None
def test_multiple_dependencies(dependencies:List[str]):
for dependency in dependencies:
rc = test_unique_dependency(dependency)
if rc == 1:
presence = test_unique_dependency(dependency)
if presence is not True:
print(f'/!\\ Warning /!\\ : {dependency} missing in path!')
sys.exit(-1)
......@@ -400,7 +400,7 @@ if __name__ == "__main__":
else :
results = table_results
results = results.fillna("-")
results = results.infer_objects().fillna("-")
spuA(results, args)
narG(results, args)
......
#!/bin/bash
PATH_DB=$(dirname "$0")
DATE=$(date "+%Y-%m-%d")
echo "Indexing" ;
hmmpress -f $PATH_DB/$DATE/AMR.LIB > /dev/null 2> /dev/null
makeblastdb -in $PATH_DB/$DATE/AMRProt -dbtype prot -logfile /dev/null
makeblastdb -in $PATH_DB/$DATE/AMR_CDS -dbtype nucl -logfile /dev/null
taxgroups=$(awk '{if ($3>0 && $1!="#taxgroup") print $1}' $PATH_DB/$DATE/taxgroup.tab)
for taxgroup in $taxgroups
do makeblastdb -in $PATH_DB/$DATE/AMR_DNA-$taxgroup -dbtype nucl -logfile /dev/null
done
echo -e "Corynebacterium_diphtheriae\tCorynebacterium_diphtheriae\t0" >> $PATH_DB/$DATE/taxgroup.tab
PATH_DB="$PATH_DB/$DATE"
VERSION="$DATE"
echo "Database directory: '$PATH_DB'"
echo "Database version: $DATE.1"
\ No newline at end of file
......@@ -18,24 +18,4 @@ mv version.txt $PATH_DB/$DATE/
cat $PATH_DB/Corynebacterium_diphtheriae/AMRProt_Cd >> $PATH_DB/$DATE/AMRProt
sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-mutation_Cd.tab >> $PATH_DB/$DATE/AMRProt-mutation.tab
#sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-susceptible_Cd.tab >> $PATH_DB/$DATE/AMRProt-susceptible.tab
sed '1d' $PATH_DB/Corynebacterium_diphtheriae/fam_Cd.tab >> $PATH_DB/$DATE/fam.tab
echo "Indexing" ;
hmmpress -f $PATH_DB/$DATE/AMR.LIB > /dev/null 2> /dev/null
makeblastdb -in $PATH_DB/$DATE/AMRProt -dbtype prot -logfile /dev/null
makeblastdb -in $PATH_DB/$DATE/AMR_CDS -dbtype nucl -logfile /dev/null
taxgroups=$(awk '{if ($3>0 && $1!="#taxgroup") print $1}' $PATH_DB/$DATE/taxgroup.tab)
for taxgroup in $taxgroups
do makeblastdb -in $PATH_DB/$DATE/AMR_DNA-$taxgroup -dbtype nucl -logfile /dev/null
done
echo -e "Corynebacterium_diphtheriae\tCorynebacterium_diphtheriae\t0" >> $PATH_DB/$DATE/taxgroup.tab
PATH_DB="$PATH_DB/$DATE"
VERSION="$DATE"
echo "Database directory: '$PATH_DB'"
echo "Database version: $DATE.1"
\ No newline at end of file
sed '1d' $PATH_DB/Corynebacterium_diphtheriae/fam_Cd.tab >> $PATH_DB/$DATE/fam.tab
\ No newline at end of file
import sys
import datetime
import os
import sys
import pandas as pd
from module.download_alleles_st import create_db, download_profiles_st, download_profiles_tox
node_class = {'pld':'OTHER_TOXINS',
'spaA' : 'SpaA-type_pili_diphtheriae',
'spaB' : 'SpaA-type_pili_diphtheriae',
'spaC' : 'SpaA-type_pili_diphtheriae',
'srtA' : 'SpaA-type_pili_diphtheriae',
'spaD' : 'SpaD-type_pili_diphtheriae',
'spaE' : 'SpaD-type_pili_diphtheriae',
'spaF' : 'SpaD-type_pili_diphtheriae',
'srtB' : 'SpaD-type_pili_diphtheriae',
'srtC' : 'SpaD-type_pili_diphtheriae',
'spaG' : 'SpaH-type_pili_diphtheriae',
'spaH' : 'SpaH-type_pili_diphtheriae',
'spaI' : 'SpaH-type_pili_diphtheriae',
'srtD' : 'SpaH-type_pili_diphtheriae',
'srtE' : 'SpaH-type_pili_diphtheriae',
'tox' : 'TOXIN',
'cbpA' : 'VIRULENCE/ADHESIN',
'nanH' : 'VIRULENCE/ADHESIN',
}
def complete_missing_classification(path:str):
df = pd.read_csv(path, sep="\t", escapechar="\\", engine="python")
missing_class = df.loc[df['parent_node_id']=='VIRULENCE_Cdiphth']
for index in missing_class.index:
for field in ['class','subclass']:
if pd.isna(df.iloc[index, df.columns.get_loc(field)]) :
df.iloc[index, df.columns.get_loc(field)] = node_class[df.iloc[index]['#node_id']]
df.to_csv(path, sep="\t", escapechar="\\", index=False)
return
def update_database(arguments, mlst_database:tuple, tox_database:tuple):
if arguments.update :
if arguments.update :
date = datetime.datetime.today().strftime('%Y-%m-%d')
os.system("rm "+ mlst_database[1] + "* " + mlst_database[2] + "* ")
print("Downloading MLST database")
path_mlst_sequences, loci_mlst = create_db("pubmlst_diphtheria_seqdef", "3", arguments.path +"/data/mlst")
......@@ -18,4 +54,6 @@ def update_database(arguments, mlst_database:tuple, tox_database:tuple):
print(" ... done \n")
os.system('bash ' + arguments.path + '/data/resistance/update_database_resistance.sh')
complete_missing_classification(arguments.path + '/data/resistance/' + date + '/fam.tab')
os.system('bash ' + arguments.path + '/data/resistance/making_blastdb.sh')
print(" ... done \n\n\n")
\ No newline at end of file
......@@ -153,7 +153,7 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame:
gene = data_resistance['Gene symbol'][res] + dico_Method[data_resistance['Method'][res]]
# Search for certain cases of interruption due to a contig end that AMRfinder is unable to find.
if is_contig_edge(data_resistance.iloc[res]) :
data_resistance['Method'][res] = "CTRL_CONTIG_END"
data_resistance.loc[res, 'Method'] = "CTRL_CONTIG_END"
if ('tox' in data_resistance['Gene symbol'][res]) and \
(float(data_resistance['% Coverage of reference sequence'][res]) != 100.00) and \
......@@ -173,8 +173,8 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame:
family = data_resistance['Class'][res]
if table[family][strain] != '' :
table[family][strain] += ";"
table[family][strain] += gene
table.loc[strain, family] += ";"
table.loc[strain, family] += gene
return table
......