Compare revisions

mrethore · mrethore · mrethore · mrethore · mrethore · mrethore
--- a/__main__.py
+++ b/__main__.py
@@ -52,8 +52,8 @@ Usage:
 """

 __authors__ = ("Melanie HENNART; Martin RETHORET-PASTY")
-__contact__ = ("melanie.hennart@pasteur.fr; martin.rethoret-pasty@pasteur.fr")
-__version__ = "1.6.0" 
+__contact__ = ("martin.rethoret-pasty@pasteur.fr")
+__version__ = "1.6.1" 
 __copyright__ = "copyleft"
 __date__ = "2024/03/04"

@@ -125,13 +125,13 @@ from module.utils import (
    )

 def test_unique_dependency(name:str):
-    return subprocess.call(["command", "-v", name])
+    return shutil.which(name) is not None


 def test_multiple_dependencies(dependencies:List[str]):
    for dependency in dependencies:
-        rc = test_unique_dependency(dependency)
-        if rc == 1:
+        presence = test_unique_dependency(dependency)
+        if presence is not True:
            print(f'/!\\ Warning /!\\ : {dependency} missing in path!')
            sys.exit(-1)

@@ -400,7 +400,7 @@ if __name__ == "__main__":
    else : 
        results = table_results
        
-    results = results.fillna("-")
+    results = results.infer_objects().fillna("-")
        
    spuA(results, args)
    narG(results, args)

--- a/data/resistance/making_blastdb.sh
+++ b/data/resistance/making_blastdb.sh
+#!/bin/bash
+
+PATH_DB=$(dirname "$0")
+DATE=$(date "+%Y-%m-%d")
+
+echo "Indexing" ;
+
+hmmpress -f $PATH_DB/$DATE/AMR.LIB > /dev/null 2> /dev/null
+
+makeblastdb -in $PATH_DB/$DATE/AMRProt -dbtype prot  -logfile /dev/null
+makeblastdb -in $PATH_DB/$DATE/AMR_CDS -dbtype nucl  -logfile /dev/null
+
+taxgroups=$(awk '{if ($3>0 && $1!="#taxgroup") print $1}' $PATH_DB/$DATE/taxgroup.tab)
+for taxgroup in $taxgroups  
+do makeblastdb -in $PATH_DB/$DATE/AMR_DNA-$taxgroup -dbtype nucl  -logfile /dev/null
+done
+
+echo -e "Corynebacterium_diphtheriae\tCorynebacterium_diphtheriae\t0" >> $PATH_DB/$DATE/taxgroup.tab
+
+PATH_DB="$PATH_DB/$DATE" 
+VERSION="$DATE"
+echo "Database directory: '$PATH_DB'"
+echo "Database version: $DATE.1"
\ No newline at end of file
--- a/data/resistance/update_database_resistance.sh
+++ b/data/resistance/update_database_resistance.sh
@@ -18,24 +18,4 @@ mv version.txt $PATH_DB/$DATE/
 cat $PATH_DB/Corynebacterium_diphtheriae/AMRProt_Cd >> $PATH_DB/$DATE/AMRProt 
 sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-mutation_Cd.tab >> $PATH_DB/$DATE/AMRProt-mutation.tab
 #sed '1d' $PATH_DB/Corynebacterium_diphtheriae/AMRProt-susceptible_Cd.tab >> $PATH_DB/$DATE/AMRProt-susceptible.tab
-sed '1d' $PATH_DB/Corynebacterium_diphtheriae/fam_Cd.tab >> $PATH_DB/$DATE/fam.tab
-
-echo "Indexing" ;
-
-hmmpress -f $PATH_DB/$DATE/AMR.LIB > /dev/null 2> /dev/null
-
-makeblastdb -in $PATH_DB/$DATE/AMRProt -dbtype prot  -logfile /dev/null
-makeblastdb -in $PATH_DB/$DATE/AMR_CDS -dbtype nucl  -logfile /dev/null
-
-taxgroups=$(awk '{if ($3>0 && $1!="#taxgroup") print $1}' $PATH_DB/$DATE/taxgroup.tab)
-for taxgroup in $taxgroups  
-do makeblastdb -in $PATH_DB/$DATE/AMR_DNA-$taxgroup -dbtype nucl  -logfile /dev/null
-done
-
-
-echo -e "Corynebacterium_diphtheriae\tCorynebacterium_diphtheriae\t0" >> $PATH_DB/$DATE/taxgroup.tab
-
-PATH_DB="$PATH_DB/$DATE" 
-VERSION="$DATE"
-echo "Database directory: '$PATH_DB'"
-echo "Database version: $DATE.1"
\ No newline at end of file
+sed '1d' $PATH_DB/Corynebacterium_diphtheriae/fam_Cd.tab >> $PATH_DB/$DATE/fam.tab
\ No newline at end of file
--- a/module/updating_database.py
+++ b/module/updating_database.py
-import sys
+import datetime
 import os
+import sys
+
+import pandas as pd

 from module.download_alleles_st import create_db, download_profiles_st, download_profiles_tox

+node_class = {'pld':'OTHER_TOXINS',
+'spaA' : 'SpaA-type_pili_diphtheriae',
+'spaB' : 'SpaA-type_pili_diphtheriae',
+'spaC' : 'SpaA-type_pili_diphtheriae',
+'srtA' : 'SpaA-type_pili_diphtheriae',
+'spaD' : 'SpaD-type_pili_diphtheriae',
+'spaE' : 'SpaD-type_pili_diphtheriae',
+'spaF' : 'SpaD-type_pili_diphtheriae',
+'srtB' : 'SpaD-type_pili_diphtheriae',
+'srtC' : 'SpaD-type_pili_diphtheriae',
+'spaG' : 'SpaH-type_pili_diphtheriae',
+'spaH' : 'SpaH-type_pili_diphtheriae',
+'spaI' : 'SpaH-type_pili_diphtheriae',
+'srtD' : 'SpaH-type_pili_diphtheriae',
+'srtE' : 'SpaH-type_pili_diphtheriae',
+'tox' : 'TOXIN',
+'cbpA' : 'VIRULENCE/ADHESIN',
+'nanH' : 'VIRULENCE/ADHESIN',
+}
+
+def complete_missing_classification(path:str):
+    df = pd.read_csv(path, sep="\t", escapechar="\\", engine="python")
+    missing_class = df.loc[df['parent_node_id']=='VIRULENCE_Cdiphth']
+    for index in missing_class.index:
+        for field in ['class','subclass']:
+            if pd.isna(df.iloc[index, df.columns.get_loc(field)]) :
+                df.iloc[index, df.columns.get_loc(field)] = node_class[df.iloc[index]['#node_id']]
+    df.to_csv(path, sep="\t", escapechar="\\", index=False)
+    return
+
+
 def update_database(arguments, mlst_database:tuple, tox_database:tuple):
-    if arguments.update : 
+    if arguments.update :
+        date = datetime.datetime.today().strftime('%Y-%m-%d') 
+
        os.system("rm "+ mlst_database[1] + "* " + mlst_database[2] + "* ")                  
        print("Downloading MLST database")
        path_mlst_sequences, loci_mlst = create_db("pubmlst_diphtheria_seqdef", "3", arguments.path +"/data/mlst")
@@ -18,4 +54,6 @@ def update_database(arguments, mlst_database:tuple, tox_database:tuple):
        print("   ... done \n")
        
        os.system('bash ' + arguments.path + '/data/resistance/update_database_resistance.sh')
+        complete_missing_classification(arguments.path + '/data/resistance/' + date + '/fam.tab')
+        os.system('bash ' + arguments.path + '/data/resistance/making_blastdb.sh')
        print("   ... done \n\n\n")
\ No newline at end of file
--- a/module/utils.py
+++ b/module/utils.py
@@ -153,7 +153,7 @@ def armfinder_to_table(data_resistance:pd.DataFrame) ->  pd.DataFrame:
        gene = data_resistance['Gene symbol'][res] + dico_Method[data_resistance['Method'][res]]
        # Search for certain cases of interruption due to a contig end that AMRfinder is unable to find.    
        if is_contig_edge(data_resistance.iloc[res]) : 
-            data_resistance['Method'][res] = "CTRL_CONTIG_END" 
+            data_resistance.loc[res, 'Method'] = "CTRL_CONTIG_END" 

        if ('tox' in data_resistance['Gene symbol'][res]) and \
           (float(data_resistance['% Coverage of reference sequence'][res]) != 100.00) and \
@@ -173,8 +173,8 @@ def armfinder_to_table(data_resistance:pd.DataFrame) ->  pd.DataFrame:
        family = data_resistance['Class'][res]
        
        if table[family][strain] != '' :
-               table[family][strain] += ";"
-        table[family][strain] += gene
+               table.loc[strain, family] += ";"
+        table.loc[strain, family] += gene
    return table
No results found