Select Git revision
RunSlurmExample.sh
parse_correlation_results.py 6.49 KiB
import re
import pandas as pd
import glob
import numpy as np
import sys
ancestry = sys.argv[1]
current_date = sys.argv[2]
print("Parsing_correlation")
file_pairs = set(glob.glob("*-_-*.log"))
file_h2 = set(glob.glob("*.log")) - file_pairs
def get_trait(fi):
traits = re.search('([-0-9A-Z]+_[-0-9A-Z-]+).log', fi)
return [traits.group(1)]
print(file_pairs)
print(file_h2)
traits = [get_trait(fi) for fi in file_h2] # trait for trait in
traits = list(set([trait for trait_pair in traits for trait in trait_pair])) # fla
traits_ind = 'z_' + pd.Series(traits)
### Create matrices:
Covariance_matrix_H0 = pd.DataFrame(index=traits_ind, columns=traits_ind)
Sd_matrix_H0 = pd.DataFrame(index=traits_ind, columns=traits_ind)
Covariance_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
Sd_cov_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
Correlation_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
Sd_cor_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
Pval_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
for i1, t1 in enumerate(traits):
for t2 in traits[(i1+1):]:
print(t1,t2)
f=0
flip=False
h2_fi_t1 = open("{0}.log".format(t1), "r")
h2_fi_t2 = open("{0}.log".format(t2), "r")
try:
cov_fi = "{0}-_-{1}.log".format(t1, t2)
fi = open(cov_fi, "r")
f=1
except FileNotFoundError:
try:
cov_fi = "{0}-_-{1}.log".format(t2, t1)
fi = open(cov_fi, "r")
f=1
flip=True
except FileNotFoundError:
print("Not found")
print(t1, t2)
f=0
pass
if f==1:
print("PARSING")
print(cov_fi)
L = fi.readlines()
L_t1 = h2_fi_t1.readlines()
L_t2 = h2_fi_t2.readlines()
#retrieve Informative Lines
L_intercept = list(filter(lambda x:re.match("Intercept:", x)!=None , L))
L_gencov = list(filter(lambda x:re.match('Total Observed scale gencov', x)!=None , L))
L_gencor = list(filter(lambda x:re.match('Genetic Correlation', x)!=None , L))
L_h2_t1 = list(filter(lambda x:re.match('Total Observed scale h2', x)!=None , L_t1))
L_h2_t2 = list(filter(lambda x:re.match('Total Observed scale h2', x)!=None , L_t2))
L_inter_t1 = list(filter(lambda x:re.match("Intercept:", x)!=None , L_t1))
L_inter_t2 = list(filter(lambda x:re.match("Intercept:", x)!=None , L_t2))
L_P = list(filter(lambda x:re.match('P: ', x)!=None , L))
t1_col = "z_" + t1
t2_col = "z_" + t2
if len(L_intercept)==3:
if flip:
print("IS FLIP")
ttmp = t1_col
t1_col = t2_col
t2_col = ttmp
L_tmp = L_h2_t1
L_h2_t1 = L_h2_t2
L_h2_t2 = L_tmp
L_tmp = L_inter_t1
L_inter_t1 = L_inter_t2
L_inter_t2 = L_tmp
print(L_intercept)
Covariance_matrix_H0.loc[t1_col, t2_col] = float(L_intercept[2].split(" ")[1])
Covariance_matrix_H0.loc[t2_col, t1_col] = float(L_intercept[2].split(" ")[1])
Sd_matrix_H0.loc[t1_col, t2_col] = float(L_intercept[2].split(" ")[2].strip('()\n'))
Sd_matrix_H0.loc[t2_col, t1_col] = float(L_intercept[2].split(" ")[2].strip("()\n"))
print(L_gencov)
Covariance_matrix_genetic.loc[t1_col, t2_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
Covariance_matrix_genetic.loc[t2_col, t1_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
Sd_cov_matrix_genetic.loc[t1_col, t2_col] = float(L_gencov[0].split(":")[1].split(" ")[2].strip('()\n'))
Sd_cov_matrix_genetic.loc[t2_col, t1_col] = float(L_gencov[0].split(":")[1].split(" ")[2].strip("()\n"))
Correlation_matrix_genetic.loc[t1_col, t1_col] = 1
Correlation_matrix_genetic.loc[t2_col, t2_col] = 1
print(L_gencor)
Correlation_matrix_genetic.loc[t1_col, t2_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
Correlation_matrix_genetic.loc[t2_col, t1_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
print(L_P)
Pval_matrix_genetic.loc[t1_col, t2_col] = float(L_P[0].split(":")[1].split(" ")[1])
Pval_matrix_genetic.loc[t2_col, t1_col] = float(L_P[0].split(":")[1].split(" ")[1])
Sd_cor_matrix_genetic.loc[t1_col, t2_col] = float(L_gencor[1].split(":")[1].split(" ")[2].strip('()\n'))
Sd_cor_matrix_genetic.loc[t2_col, t1_col] = float(L_gencor[1].split(":")[1].split(" ")[2].strip('()\n'))
Covariance_matrix_H0.loc[t1_col, t1_col] = float(L_inter_t1[0].split(" ")[1])
Covariance_matrix_H0.loc[t2_col, t2_col] = float(L_inter_t2[0].split(" ")[1])
Sd_matrix_H0.loc[t1_col, t1_col] = float(L_inter_t1[0].split(" ")[2].strip("()\n"))
Sd_matrix_H0.loc[t2_col, t2_col] = float(L_inter_t2[0].split(" ")[2].strip("()\n"))
print(L_h2_t1)
print(L_h2_t2)
Covariance_matrix_genetic.loc[t1_col, t1_col] = float(L_h2_t1[0].split(":")[1].split(" ")[1])
Covariance_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[1])
Sd_cov_matrix_genetic.loc[t1_col, t1_col] = float(L_h2_t1[0].split(":")[1].split(" ")[2].strip("()\n"))
Sd_cov_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[2].strip("()\n"))
Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Covariance_matrix_H0.to_csv("Covariance_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Sd_matrix_H0.to_csv("Sd_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
print("Parsing_correlation")