Skip to content
Snippets Groups Projects
Select Git revision
  • f3355b10bf5f457d5fa9a345793e808e9c104490
  • master default protected
2 results

RunSlurmExample.sh

Blame
  • parse_correlation_results.py 6.49 KiB
    import re
    import pandas as pd
    import glob
    import numpy as np
    import sys
    
    ancestry = sys.argv[1]
    current_date = sys.argv[2]
    
    print("Parsing_correlation")
    file_pairs = set(glob.glob("*-_-*.log"))
    file_h2 = set(glob.glob("*.log")) - file_pairs
    
    def get_trait(fi):
        traits = re.search('([-0-9A-Z]+_[-0-9A-Z-]+).log', fi)
        return [traits.group(1)]
    
    print(file_pairs)
    print(file_h2)
    
    traits = [get_trait(fi) for fi in file_h2] # trait for trait in
    traits = list(set([trait for trait_pair in traits for trait in trait_pair])) # fla
    traits_ind = 'z_' + pd.Series(traits)
    
    ### Create matrices:
    Covariance_matrix_H0 = pd.DataFrame(index=traits_ind, columns=traits_ind)
    Sd_matrix_H0 = pd.DataFrame(index=traits_ind, columns=traits_ind)
    
    Covariance_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    Sd_cov_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    
    Correlation_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    Sd_cor_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    Pval_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    
    for i1, t1 in enumerate(traits):
        for t2 in traits[(i1+1):]:
            print(t1,t2)
            f=0
            flip=False
    
            h2_fi_t1 = open("{0}.log".format(t1), "r") 
            h2_fi_t2 = open("{0}.log".format(t2), "r")
    
            try:
                cov_fi = "{0}-_-{1}.log".format(t1, t2)
                fi = open(cov_fi, "r")
                f=1
            except FileNotFoundError:
                try:
                    cov_fi = "{0}-_-{1}.log".format(t2, t1)
                    fi = open(cov_fi, "r")
                    f=1
                    flip=True
                except FileNotFoundError:
                    print("Not found")
                    print(t1, t2)
                    f=0
                    pass
    
            if f==1:
                print("PARSING")
                print(cov_fi)
                L = fi.readlines()
                L_t1 = h2_fi_t1.readlines()
                L_t2 = h2_fi_t2.readlines()
    
                #retrieve Informative Lines
                L_intercept = list(filter(lambda x:re.match("Intercept:", x)!=None , L))
                L_gencov = list(filter(lambda x:re.match('Total Observed scale gencov', x)!=None , L))
                L_gencor = list(filter(lambda x:re.match('Genetic Correlation', x)!=None , L))
    
                L_h2_t1 = list(filter(lambda x:re.match('Total Observed scale h2', x)!=None , L_t1))
                L_h2_t2 = list(filter(lambda x:re.match('Total Observed scale h2', x)!=None , L_t2))
                L_inter_t1 = list(filter(lambda x:re.match("Intercept:", x)!=None , L_t1))
                L_inter_t2 = list(filter(lambda x:re.match("Intercept:", x)!=None , L_t2))
                L_P = list(filter(lambda x:re.match('P: ', x)!=None , L))
    
                t1_col = "z_" + t1
                t2_col = "z_" + t2
    
                if len(L_intercept)==3:
                    if flip:
                        print("IS FLIP")
                        ttmp = t1_col
                        t1_col = t2_col
                        t2_col = ttmp
    
                        L_tmp = L_h2_t1
                        L_h2_t1 = L_h2_t2
                        L_h2_t2 = L_tmp
    
                        L_tmp = L_inter_t1
                        L_inter_t1 = L_inter_t2
                        L_inter_t2 = L_tmp
    
                    print(L_intercept)
                    Covariance_matrix_H0.loc[t1_col, t2_col] = float(L_intercept[2].split(" ")[1])
                    Covariance_matrix_H0.loc[t2_col, t1_col] = float(L_intercept[2].split(" ")[1])
    
                    Sd_matrix_H0.loc[t1_col, t2_col] = float(L_intercept[2].split(" ")[2].strip('()\n'))
                    Sd_matrix_H0.loc[t2_col, t1_col] = float(L_intercept[2].split(" ")[2].strip("()\n"))
    
                    print(L_gencov)
                    Covariance_matrix_genetic.loc[t1_col, t2_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
                    Covariance_matrix_genetic.loc[t2_col, t1_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
    
                    Sd_cov_matrix_genetic.loc[t1_col, t2_col] = float(L_gencov[0].split(":")[1].split(" ")[2].strip('()\n'))
                    Sd_cov_matrix_genetic.loc[t2_col, t1_col] = float(L_gencov[0].split(":")[1].split(" ")[2].strip("()\n"))
    
                    Correlation_matrix_genetic.loc[t1_col, t1_col] = 1
                    Correlation_matrix_genetic.loc[t2_col, t2_col] = 1
    
                    print(L_gencor)
                    Correlation_matrix_genetic.loc[t1_col, t2_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
                    Correlation_matrix_genetic.loc[t2_col, t1_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
    
                    print(L_P)
                    Pval_matrix_genetic.loc[t1_col, t2_col] = float(L_P[0].split(":")[1].split(" ")[1])
                    Pval_matrix_genetic.loc[t2_col, t1_col] = float(L_P[0].split(":")[1].split(" ")[1])
    
                    Sd_cor_matrix_genetic.loc[t1_col, t2_col] = float(L_gencor[1].split(":")[1].split(" ")[2].strip('()\n'))
                    Sd_cor_matrix_genetic.loc[t2_col, t1_col] = float(L_gencor[1].split(":")[1].split(" ")[2].strip('()\n'))
    
                    Covariance_matrix_H0.loc[t1_col, t1_col] = float(L_inter_t1[0].split(" ")[1])
                    Covariance_matrix_H0.loc[t2_col, t2_col] = float(L_inter_t2[0].split(" ")[1])
    
                    Sd_matrix_H0.loc[t1_col, t1_col] = float(L_inter_t1[0].split(" ")[2].strip("()\n"))
                    Sd_matrix_H0.loc[t2_col, t2_col] = float(L_inter_t2[0].split(" ")[2].strip("()\n"))
    
                    print(L_h2_t1)
                    print(L_h2_t2)
    
                    Covariance_matrix_genetic.loc[t1_col, t1_col] = float(L_h2_t1[0].split(":")[1].split(" ")[1])
                    Covariance_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[1])
    
                    Sd_cov_matrix_genetic.loc[t1_col, t1_col] = float(L_h2_t1[0].split(":")[1].split(" ")[2].strip("()\n"))
                    Sd_cov_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[2].strip("()\n"))
    
    
    Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
    Covariance_matrix_H0.to_csv("Covariance_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
    Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
    
    Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
    Sd_matrix_H0.to_csv("Sd_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
    Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
    Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
    
    print("Parsing_correlation")