Select Git revision
segment-checkpoint.ipynb
parse_correlation_results.py 3.70 KiB
import re
import pandas as pd
import glob
import numpy as np
print("Parsing_correlation")
file_input = glob.glob("*.log")
print(file_input)
def get_trait(fi):
traits = re.search('([-0-9A-Z]+_[-0-9A-Z-]+)-_-([-0-9A-Z]+_[-0-9A-Z-]+).log', fi)
return [traits.group(1), traits.group(2)]
traits = [get_trait(fi) for fi in file_input] # trait for trait in
traits = list(set([trait for trait_pair in traits for trait in trait_pair])) # fla
print(traits)
traits_ind = 'z_' + pd.Series(traits)
### Create matrices:
Covariance_matrix_H0 = pd.DataFrame(index=traits_ind, columns=traits_ind)
Covariance_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
Correlation_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
for i1, t1 in enumerate(traits):
for t2 in traits[(i1+1):]:
print(t1,t2)
f=0
flip=False
try:
cov_fi = "{0}-_-{1}.log".format(t1, t2)
fi = open(cov_fi, "r")
f=1
except FileNotFoundError:
try:
cov_fi = "{0}-_-{1}.log".format(t2, t1)
fi = open(cov_fi, "r")
f=1
flip=True
except FileNotFoundError:
print("Not found")
print(t1, t2)
f=0
pass
if f==1:
print("PARSING")
print(cov_fi)
L = fi.readlines()
#retrieve Intercept
L_intercept = list(filter(lambda x:re.match("Intercept:", x)!=None , L))
L_gencov = list(filter(lambda x:re.match('Total Observed scale gencov', x)!=None , L))
L_gencor = list(filter(lambda x:re.match('Genetic Correlation', x)!=None , L))
L_h2 = list(filter(lambda x:re.match('Total Observed scale h2', x)!=None , L))
t1_col = "z_" + t1
t2_col = "z_" + t2
if len(L_intercept)==3:
Covariance_matrix_H0.loc[t1_col, t2_col] = float(L_intercept[2].split(" ")[1])
Covariance_matrix_H0.loc[t2_col, t1_col] = float(L_intercept[2].split(" ")[1])
Covariance_matrix_genetic.loc[t1_col, t2_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
Covariance_matrix_genetic.loc[t2_col, t1_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
Correlation_matrix_genetic.loc[t1_col, t1_col] = 1
Correlation_matrix_genetic.loc[t2_col, t2_col] = 1
print(L_gencor)
Correlation_matrix_genetic.loc[t1_col, t2_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
Correlation_matrix_genetic.loc[t2_col, t1_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
if flip:
Covariance_matrix_H0.loc[t1_col, t1_col] = float(L_intercept[1].split(" ")[1])
Covariance_matrix_H0.loc[t2_col, t2_col] = float(L_intercept[0].split(" ")[1])
Covariance_matrix_genetic.loc[t1_col, t1_col] = float(L_h2[1].split(":")[1].split(" ")[1])
Covariance_matrix_genetic.loc[t2_col, t2_col] = float(L_h2[0].split(":")[1].split(" ")[1])
else:
Covariance_matrix_H0.loc[t1_col, t1_col] = float(L_intercept[0].split(" ")[1])
Covariance_matrix_H0.loc[t2_col, t2_col] = float(L_intercept[1].split(" ")[1])
Covariance_matrix_genetic.loc[t1_col, t1_col] = float(L_h2[0].split(":")[1].split(" ")[1])
Covariance_matrix_genetic.loc[t2_col, t2_col] = float(L_h2[1].split(":")[1].split(" ")[1])
Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic.csv", sep="\t")
Covariance_matrix_H0.to_csv("Covariance_matrix_H0.csv", sep="\t")
print("Parsing_correlation")