Skip to content
Snippets Groups Projects
Select Git revision
  • f85245c23e5ac9f616b15b542a4d93b2d557ebb1
  • master default protected
  • patch-upgrade-scm
  • Develop-multiwell-analysis
  • Nadia
  • 0.3.3
  • 0.3.2
  • 0.3.1
8 results

segment-checkpoint.ipynb

Blame
  • parse_correlation_results.py 3.70 KiB
    import re
    import pandas as pd
    import glob
    import numpy as np
    
    print("Parsing_correlation")
    file_input = glob.glob("*.log")
    print(file_input)
    
    def get_trait(fi):
        traits = re.search('([-0-9A-Z]+_[-0-9A-Z-]+)-_-([-0-9A-Z]+_[-0-9A-Z-]+).log', fi)
        return [traits.group(1), traits.group(2)]
    
    traits = [get_trait(fi) for fi in file_input] # trait for trait in
    traits = list(set([trait for trait_pair in traits for trait in trait_pair])) # fla
    print(traits)
    traits_ind = 'z_' + pd.Series(traits)
    
    ### Create matrices:
    Covariance_matrix_H0 = pd.DataFrame(index=traits_ind, columns=traits_ind)
    Covariance_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    Correlation_matrix_genetic = pd.DataFrame(index=traits_ind, columns=traits_ind)
    
    for i1, t1 in enumerate(traits):
        for t2 in traits[(i1+1):]:
            print(t1,t2)
            f=0
            flip=False
            try:
                cov_fi = "{0}-_-{1}.log".format(t1, t2)
                fi = open(cov_fi, "r")
                f=1
            except FileNotFoundError:
                try:
                    cov_fi = "{0}-_-{1}.log".format(t2, t1)
                    fi = open(cov_fi, "r")
                    f=1
                    flip=True
                except FileNotFoundError:
                    print("Not found")
                    print(t1, t2)
                    f=0
                    pass
    
            if f==1:
                print("PARSING")
                print(cov_fi)
                L = fi.readlines()
                #retrieve Intercept
                L_intercept = list(filter(lambda x:re.match("Intercept:", x)!=None , L))
                L_gencov = list(filter(lambda x:re.match('Total Observed scale gencov', x)!=None , L))
                L_gencor = list(filter(lambda x:re.match('Genetic Correlation', x)!=None , L))
                L_h2 = list(filter(lambda x:re.match('Total Observed scale h2', x)!=None , L))
    
                t1_col = "z_" + t1
                t2_col = "z_" + t2
    
                if len(L_intercept)==3:
                    Covariance_matrix_H0.loc[t1_col, t2_col] = float(L_intercept[2].split(" ")[1])
                    Covariance_matrix_H0.loc[t2_col, t1_col] = float(L_intercept[2].split(" ")[1])
    
                    Covariance_matrix_genetic.loc[t1_col, t2_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
                    Covariance_matrix_genetic.loc[t2_col, t1_col] = float(L_gencov[0].split(":")[1].split(" ")[1])
    
                    Correlation_matrix_genetic.loc[t1_col, t1_col] = 1
                    Correlation_matrix_genetic.loc[t2_col, t2_col] = 1
                    print(L_gencor)
                    Correlation_matrix_genetic.loc[t1_col, t2_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
                    Correlation_matrix_genetic.loc[t2_col, t1_col] = float(L_gencor[1].split(":")[1].split(" ")[1])
                    if flip:
                        Covariance_matrix_H0.loc[t1_col, t1_col] = float(L_intercept[1].split(" ")[1])
                        Covariance_matrix_H0.loc[t2_col, t2_col] = float(L_intercept[0].split(" ")[1])
    
                        Covariance_matrix_genetic.loc[t1_col, t1_col] = float(L_h2[1].split(":")[1].split(" ")[1])
                        Covariance_matrix_genetic.loc[t2_col, t2_col] = float(L_h2[0].split(":")[1].split(" ")[1])
    
                    else:
                        Covariance_matrix_H0.loc[t1_col, t1_col] = float(L_intercept[0].split(" ")[1])
                        Covariance_matrix_H0.loc[t2_col, t2_col] = float(L_intercept[1].split(" ")[1])
                        Covariance_matrix_genetic.loc[t1_col, t1_col] = float(L_h2[0].split(":")[1].split(" ")[1])
                        Covariance_matrix_genetic.loc[t2_col, t2_col] = float(L_h2[1].split(":")[1].split(" ")[1])
    
    Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic.csv", sep="\t")
    Covariance_matrix_H0.to_csv("Covariance_matrix_H0.csv", sep="\t")
    
    print("Parsing_correlation")