diff --git a/Data_submission/find_common_raws.py b/Data_submission/find_common_raws.py index b9d1705ac489c023d2aa81cfbc95fce70509e4ab..cfd4e3d9f21e3e18c26ab6560fe15eea56d2b7ce 100755 --- a/Data_submission/find_common_raws.py +++ b/Data_submission/find_common_raws.py @@ -15,19 +15,29 @@ if major < 3 or (major == 3 and minor < 6): sys.exit("Need at least python 3.6\n") +from itertools import combinations import pandas as pd - +pd.set_option('display.max_rows', None) +pd.set_option('display.max_columns', None) +pd.set_option('display.width', None) +pd.set_option('display.max_colwidth', None) def main(): - tab1 = pd.read_table(sys.argv[1], sep="\t", header=None, index_col=2) - tab2 = pd.read_table(sys.argv[2], sep="\t", header=None, index_col=2) - common_idx = tab1.index.intersection(tab2.index) - print(f"{len(common_idx)} common md5sums.") - print(common_idx) - common = tab1.join(tab2, how="inner", lsuffix="_left", rsuffix="_right") - print(common) + tabs = { + fname: pd.read_table(fname, sep="\t", header=None, index_col=2) + for fname in sys.argv[1:] + } + for (fname1, fname2) in combinations(tabs.keys(), 2): + tab1 = tabs[fname1] + tab2 = tabs[fname2] + common_idx = tab1.index.intersection(tab2.index) + if len(common_idx): + print(f"{len(common_idx)} common md5sums between {fname1} and {fname2}.") + print(common_idx) + common = tab1.join(tab2, how="inner", lsuffix=f"_{fname1}", rsuffix=f"_{fname2}") + print(common) return 0