From 10fdccc3f9879405000ce36c504194d8a9848f90 Mon Sep 17 00:00:00 2001 From: hjulienn <hanna.julienne@pasteur.fr> Date: Mon, 18 Sep 2023 13:15:31 +0200 Subject: [PATCH] remove duplicated index in imputed SNPs --- raiss/windows.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/raiss/windows.py b/raiss/windows.py index e583bb4..8452055 100644 --- a/raiss/windows.py +++ b/raiss/windows.py @@ -32,8 +32,15 @@ def realigned_zfiles_on_panel(ref_panel, zscore): - ref_panel (pd.dataframe) : snp of reference on the imputed chromosome - zscore (pd.dataframe): """ - zscore.drop_duplicates(keep='first', inplace=True) - ref_panel.drop_duplicates(keep='first', inplace=True) + + # Remove SNPs with duplicated index + print("Duplicated SNPs to be removed") + print(ref_panel[ref_panel.index.duplicated(keep='first')]) + print(zscore[zscore.index.duplicated(keep='first')]) + + zscore = zscore[~zscore.index.duplicated(keep='first')] + ref_panel = ref_panel[~ref_panel.index.duplicated(keep='first')] + inter_id = zscore.index.intersection(ref_panel.index).drop_duplicates(keep='first') zscore = zscore.loc[inter_id] @@ -41,7 +48,6 @@ def realigned_zfiles_on_panel(ref_panel, zscore): zscore.sort_index(inplace=True) sub_ref_panel.sort_index(inplace=True) - allele_inverted = (sub_ref_panel.Ref_all != zscore.A0) zscore.loc[allele_inverted, "A0"] = sub_ref_panel.loc[allele_inverted].Ref_all -- GitLab