Skip to content
Snippets Groups Projects
Commit fe5f676a authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

command line tool to compute gain

parent 18f81184
No related branches found
No related tags found
2 merge requests!97Newmain gain,!92Predict gain
0
k_coef_mv 0.07740334977380119
log10_avg_distance_cor_coef_mv -0.6999110771883902
log10_mean_gencov_coef_mv 0.746794584985343
avg_Neff_coef_mv 0.07289261717080556
avg_h2_mixer_coef_mv -0.516496395500929
avg_perc_h2_diff_region_coef_mv 0.15727591593399
This diff is collapsed.
minimum_value maximum_value
k 2.0 12.0
log10_avg_distance_cor -4.675617219570908 0.20864138105896807
log10_mean_gencov -4.4093921991254446 -0.46117501106209624
avg_Neff 6730.5 697828.0
avg_h2_mixer 0.014033707225812 0.4361454950334251
avg_perc_h2_diff_region 0.0906544694784672 0.9831222899777692
......@@ -21,6 +21,8 @@ from jass.models.plots import (
create_local_plot,
create_qq_plot,
)
from jass.models.gain import compute_gain
from pandas import read_hdf
def absolute_path_of_the_file(fileName, output_file=False):
......@@ -279,6 +281,14 @@ def w_gene_annotation(args):
gene_data_path, initTable_path, df_gene_csv_path, df_exon_csv_path
)
def w_compute_gain(args):
combi_path = absolute_path_of_the_file(args.combination_path)
combi_path_with_gain = absolute_path_of_the_file(args.gain_path, True)
compute_gain(
combi_path, combi_path_with_gain
)
def get_parser():
parser = argparse.ArgumentParser(prog="jass")
......@@ -619,6 +629,22 @@ def get_parser():
help="Existing key are 'SumStatTab' : The results of the joint analysis by SNPs - 'PhenoList' : the meta data of analysed GWAS - 'COV' : The H0 covariance used to perform joint analysis - 'GENCOV' (If present in the initTable): The genetic covariance as computed by the LDscore. Uniquely for the worktable: 'Regions' : Results of the joint analysis summarised by LD regions (Notably Lead SNPs by regions) - 'summaryTable': a double entry table summarizing the number of significant regions by test (univariate vs joint test)",
)
parser_create_mp.set_defaults(func=w_extract_tsv)
# ------- compute predicted gain -------#
parser_create_mp = subparsers.add_parser(
"predict-gain", help="predict gain based on the genetic architecture of the set of multi-trait"
)
parser_create_mp.add_argument(
"--combination-path",
required=True,
help="path to the worktable file containing the data",
)
parser_create_mp.add_argument(
"--gain-path", required=True, help="path to save predicted gain"
)
parser_create_mp.set_defaults(func=w_compute_gain)
return parser
......
import pandas as pd
import numpy as np
X_range = pd.read_csv("./data/range_feature_gain_prediction.tsv", sep="\t", index_col=0)
model_coefficients = pd.read_csv("./data/coef_mean_model.tsv", sep="\t", index_col=0)
# Scale according to observed
def scale_feature(X, feature_name):
X_std = (X - X_range.loc[feature_name, "minimum_value"]) / ( X_range.loc[feature_name, "maximum_value"] - X_range.loc[feature_name, "minimum_value"])
return X_std
def preprocess_feature(df_combinations):
# transformation of features
df_combinations['log10_mean_gencov'] = np.log10(df_combinations.mean_gencov)
df_combinations['log10_avg_distance_cor'] = np.log10(df_combinations.avg_distance_cor)
for f in ["k", "log10_avg_distance_cor", "log10_mean_gencov", "avg_Neff", "avg_h2_mixer", "avg_perc_h2_diff_region"]:
df_combinations[f] = scale_feature(df_combinations[f], f)
return df_combinations
def compute_gain(path_combi, path_output):
df_combinations = pd.read_csv(path_combi)
preprocess_feature(df_combinations)
df_combinations["gain"] = df_combinations[["k", "log10_avg_distance_cor", "log10_mean_gencov", "avg_Neff", "avg_h2_mixer", "avg_perc_h2_diff_region"]].dot(model_coefficients["0"].values)
df_combinations.sort_values(by="gain", ascending=False).to_csv(path_output, sep="\t")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment