diff --git a/__main__.py b/__main__.py index b5d478939f3fa8803ded4d2592b38b6e3eafd127..eece02f45aa92e379b6e400243a330729395b94f 100644 --- a/__main__.py +++ b/__main__.py @@ -338,10 +338,11 @@ if __name__ == "__main__": dict_results = {} data_resistance = pd.DataFrame() for genome in args.assemblies : + basename = os.path.basename(genome) strain = os.path.splitext(basename)[0] - - fasta = get_path +'/'+genome + + fasta = f"{get_path}/{genome}" dict_genome = get_species_results(fasta, args.path + '/data/species', str(args.threads)) if args.mlst : @@ -366,6 +367,7 @@ if __name__ == "__main__": ' --translation_table 11 --plus --quiet ') if is_non_zero_file(args.outdir +'/' +strain + ".prot.fa"): data = pd.read_csv(args.outdir +'/' + strain + ".blast.out",sep="\t", dtype='str') + data['File'] = genome data_resistance = pd.concat([data_resistance, data], axis = 0, ignore_index=True) dict_genome.update({"GENOMIC_CONTEXT" : get_genomic_context (args.outdir, data)}) else : @@ -387,7 +389,7 @@ if __name__ == "__main__": table_results = table_results.T if len(data_resistance.index) != 0 : - table_resistance = armfinder_to_table(data_resistance, fasta) + table_resistance = armfinder_to_table(data_resistance) for family in table_resistance.columns: table_resistance[family] = table_resistance[family].apply(lambda x : ";".join(sorted(x.split(';')))) diff --git a/module/utils.py b/module/utils.py index 36de7834ffa7073d0a0b8b7a36e0d88f466a8056..0438cd4e2b917fdd66d00a7ea9e6d853f57677f1 100644 --- a/module/utils.py +++ b/module/utils.py @@ -91,7 +91,7 @@ def get_tox_results(infoTOX:tuple, contigs:str, args) -> dict: #results.update(dict(zip(infoTOX[0], chr_st_detail))) return results -def is_contig_edge(data_resistance:pd.DataFrame, file:str) -> bool: +def is_contig_edge(data_resistance:pd.DataFrame) -> bool: len_seq_ref = int(data_resistance['Reference sequence length'])*3 pos_start = int(data_resistance['Start']) @@ -101,7 +101,7 @@ def is_contig_edge(data_resistance:pd.DataFrame, file:str) -> bool: if len_seq_found < len_seq_ref : missing_nucleotides = len_seq_ref - len_seq_found over_start = (pos_start-missing_nucleotides) < 0 - over_stop = (find_len_contig(file, data_resistance['Contig id']) - (pos_stop + missing_nucleotides)) < 0 + over_stop = (find_len_contig(data_resistance['File'], data_resistance['Contig id']) - (pos_stop + missing_nucleotides)) < 0 if over_start or over_stop : return True @@ -128,10 +128,10 @@ def find_len_contig(file:str, contig :str): return length else: line = fichier.readline() - return None + return None #TODO to change -def armfinder_to_table(data_resistance:pd.DataFrame, fasta:str) -> pd.DataFrame: +def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame: dico_Method = {'ALLELEX' : "", 'EXACTX' : "", 'POINTX' : "!", @@ -154,7 +154,7 @@ def armfinder_to_table(data_resistance:pd.DataFrame, fasta:str) -> pd.DataFrame else : gene = data_resistance['Gene symbol'][res] + "-NTTB" + dico_Method[data_resistance['Method'][res]] - if is_contig_edge(data_resistance.iloc[res], fasta) : # Used to find certain cases of interruption due to a contig end that AMRfinder is unable to find. + if is_contig_edge(data_resistance.iloc[res]) : # Used to find certain cases of interruption due to a contig end that AMRfinder is unable to find. gene = f"{data_resistance['Gene symbol'][res]}_end_of_contig" if (data_resistance['Method'][res] == 'PARTIALX') or \