Skip to content
Snippets Groups Projects
Commit 625380aa authored by mrethore's avatar mrethore
Browse files

Change armfinder_to_tables

parent b51698d6
No related branches found
No related tags found
1 merge request!11Update nomenclature from amrfinder
......@@ -137,32 +137,39 @@ def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame:
'POINTX' : "!",
'BLASTX' : "*",
'PARTIALX' : "?",
'PARTIAL_CONTIG_ENDX' : "?$", #The PARTIAL_CONTIG_ENDX method is only attributedd when the start or end position of the sequence being searched coincides exactly with the start or end of the contig.
'PARTIAL_CONTIG_ENDX' : "_end_of_contig", #The PARTIAL_CONTIG_ENDX method is only attributedd when the start or end position of the sequence being searched coincides exactly with the start or end of the contig.
'CTRL_CONTIG_END' : "_end_of_contig",
'INTERNAL_STOP' : "#"}
avoid_NTTB_prediction = ['PARTIAL_CONTIG_ENDX',
'CTRL_CONTIG_END']
data_resistance['Class'] = data_resistance['Class'].fillna ('NoClass')
Class = data_resistance['Class'].value_counts().keys()
Strains = data_resistance['Name'].value_counts().keys()
table = pd.DataFrame('',index=Strains, columns=Class)
for res in data_resistance.index :
gene = data_resistance['Gene symbol'][res] + dico_Method[data_resistance['Method'][res]]
if 'tox' in data_resistance['Gene symbol'][res] :
if float(data_resistance['% Coverage of reference sequence'][res]) != 100.00 :
if (data_resistance['Method'][res] == 'BLASTX') :
gene = data_resistance['Gene symbol'][res] + "-NTTB?-"+str(round(100-float(data_resistance['% Coverage of reference sequence'][res])))+"%"
else :
gene = data_resistance['Gene symbol'][res] + "-NTTB" + dico_Method[data_resistance['Method'][res]]
if is_contig_edge(data_resistance.iloc[res]) : # Used to find certain cases of interruption due to a contig end that AMRfinder is unable to find.
gene = f"{data_resistance['Gene symbol'][res]}_end_of_contig"
gene = data_resistance['Gene symbol'][res] + dico_Method[data_resistance['Method'][res]]
# Search for certain cases of interruption due to a contig end that AMRfinder is unable to find.
if is_contig_edge(data_resistance.iloc[res]) :
data_resistance['Method'][res] = "CTRL_CONTIG_END"
if ('tox' in data_resistance['Gene symbol'][res]) and \
(float(data_resistance['% Coverage of reference sequence'][res]) != 100.00) and \
(data_resistance['Method'][res] not in avoid_NTTB_prediction) :
gene = data_resistance['Gene symbol'][res] + "-NTTB"
# For all methods where coverage can be < 100%, display the %age of missing coverage
if (data_resistance['Method'][res] == 'PARTIALX') or \
(data_resistance['Method'][res] == 'BLASTX') or \
(data_resistance['Method'][res] == 'PARTIAL_CONTIG_ENDX') or \
("end_of_contig" in gene) or \
(data_resistance['Method'][res] == 'INTERNAL_STOP') :
gene += "-"+str(round(100-float(data_resistance['% Coverage of reference sequence'][res])))+"%"
(data_resistance['Method'][res] == 'CTRL_CONTIG_END') or \
(data_resistance['Method'][res] == 'INTERNAL_STOP') :
missing_coverage = round(100-float(data_resistance['% Coverage of reference sequence'][res]),1)
if (100 - missing_coverage) < 100 :
gene = f"{gene}-{missing_coverage}%"
print(gene)
strain = data_resistance['Name'][res]
family = data_resistance['Class'][res]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment