Skip to content
Snippets Groups Projects
Commit 95ce8aaa authored by mrethore's avatar mrethore
Browse files

Merge branch 'multi_file_contig_end_detection' into 'main'

Multi_file_contig_end_detection

See merge request !9
parents 7c9b33d4 010e1072
No related branches found
No related tags found
1 merge request!9Multi_file_contig_end_detection
......@@ -338,10 +338,11 @@ if __name__ == "__main__":
dict_results = {}
data_resistance = pd.DataFrame()
for genome in args.assemblies :
basename = os.path.basename(genome)
strain = os.path.splitext(basename)[0]
fasta = get_path +'/'+genome
fasta = f"{get_path}/{genome}"
dict_genome = get_species_results(fasta, args.path + '/data/species', str(args.threads))
if args.mlst :
......@@ -366,6 +367,7 @@ if __name__ == "__main__":
' --translation_table 11 --plus --quiet ')
if is_non_zero_file(args.outdir +'/' +strain + ".prot.fa"):
data = pd.read_csv(args.outdir +'/' + strain + ".blast.out",sep="\t", dtype='str')
data['File'] = genome
data_resistance = pd.concat([data_resistance, data], axis = 0, ignore_index=True)
dict_genome.update({"GENOMIC_CONTEXT" : get_genomic_context (args.outdir, data)})
else :
......@@ -387,7 +389,7 @@ if __name__ == "__main__":
table_results = table_results.T
if len(data_resistance.index) != 0 :
table_resistance = armfinder_to_table(data_resistance, fasta)
table_resistance = armfinder_to_table(data_resistance)
for family in table_resistance.columns:
table_resistance[family] = table_resistance[family].apply(lambda x : ";".join(sorted(x.split(';'))))
......
......@@ -91,7 +91,7 @@ def get_tox_results(infoTOX:tuple, contigs:str, args) -> dict:
#results.update(dict(zip(infoTOX[0], chr_st_detail)))
return results
def is_contig_edge(data_resistance:pd.DataFrame, file:str) -> bool:
def is_contig_edge(data_resistance:pd.DataFrame) -> bool:
len_seq_ref = int(data_resistance['Reference sequence length'])*3
pos_start = int(data_resistance['Start'])
......@@ -101,7 +101,7 @@ def is_contig_edge(data_resistance:pd.DataFrame, file:str) -> bool:
if len_seq_found < len_seq_ref :
missing_nucleotides = len_seq_ref - len_seq_found
over_start = (pos_start-missing_nucleotides) < 0
over_stop = (find_len_contig(file, data_resistance['Contig id']) - (pos_stop + missing_nucleotides)) < 0
over_stop = (find_len_contig(data_resistance['File'], data_resistance['Contig id']) - (pos_stop + missing_nucleotides)) < 0
if over_start or over_stop :
return True
......@@ -128,10 +128,10 @@ def find_len_contig(file:str, contig :str):
return length
else:
line = fichier.readline()
return None
return None #TODO to change
def armfinder_to_table(data_resistance:pd.DataFrame, fasta:str) -> pd.DataFrame:
def armfinder_to_table(data_resistance:pd.DataFrame) -> pd.DataFrame:
dico_Method = {'ALLELEX' : "",
'EXACTX' : "",
'POINTX' : "!",
......@@ -154,7 +154,7 @@ def armfinder_to_table(data_resistance:pd.DataFrame, fasta:str) -> pd.DataFrame
else :
gene = data_resistance['Gene symbol'][res] + "-NTTB" + dico_Method[data_resistance['Method'][res]]
if is_contig_edge(data_resistance.iloc[res], fasta) : # Used to find certain cases of interruption due to a contig end that AMRfinder is unable to find.
if is_contig_edge(data_resistance.iloc[res]) : # Used to find certain cases of interruption due to a contig end that AMRfinder is unable to find.
gene = f"{data_resistance['Gene symbol'][res]}_end_of_contig"
if (data_resistance['Method'][res] == 'PARTIALX') or \
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment