From 57ee2dfbd0575d4cfd4cf95ef85b5f2b98318ec6 Mon Sep 17 00:00:00 2001 From: hjulienn <hanna.julienne@pasteur.fr> Date: Thu, 21 Nov 2024 18:31:25 +0100 Subject: [PATCH] making doc and code coherent for beta_or_Z column --- doc/source/index.rst | 4 ++-- jass_preprocessing/map_gwas.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/index.rst b/doc/source/index.rst index e4ea722..cf4cf8b 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -112,7 +112,7 @@ Input * n: name of the column storing the sample size by variants (optional, will be inferred from the MAF, genetic effect and standard deviation if absent) * ncas: For binary traits, name of the column storing the number of cases by variants (optional) * ncont: For binary traits, name of the column storing the number of controls by variants (optional) -* beta_or_z: name of the column storing the genetic effect (beta) in the gwas file. This column will be used only to retrieve the sign of the genetic effect with respect to the reference and effect allele. +* beta_or_Z: name of the column storing the genetic effect (beta) in the gwas file. This column will be used only to retrieve the sign of the genetic effect with respect to the reference and effect allele. * OR : For binary traits, Odd ratio when available. Not to be confounded with the genetic effect size or 'beta'. * index-type: precise the type of index * imputation_quality: (Optional) column containing individual-based imputation quality. Will be used to filter low quality imputation data from GWASs if the option --imputation-quality-treshold is used @@ -127,7 +127,7 @@ Some fields are optional like the imputation_quality. If not used they can be fi .. csv-table:: GWAS information table :header-rows: 1 - "filename","Consortium","Outcome","FullName","Type","Nsample","Ncase","Ncontrol","Nsnp","snpid", "POS", "a1","a2","freq","pval","n","beta_or_z","OR","se","code","imp","ncas","ncont","imputation_quality","index_type" + "filename","Consortium","Outcome","FullName","Type","Nsample","Ncase","Ncontrol","Nsnp","snpid", "POS", "a1","a2","freq","pval","n","beta_or_Z","OR","se","code","imp","ncas","ncont","imputation_quality","index_type" "GIANT_HEIGHT_Wood_et_al.txt","GIANT","HEIGHT","Height","Anthropometry",253288, na, na, 2550858, "MarkerName", "position","Allele1", "Allele2", "Freq.Allele1.HapMapCEU","p","N","b",na,"SE",na,na,na,na, "imputationInfo","rs-number" diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py index c9739b6..a80da26 100644 --- a/jass_preprocessing/map_gwas.py +++ b/jass_preprocessing/map_gwas.py @@ -85,7 +85,6 @@ def map_columns_position(gwas_internal_link, column_dict): Return: pandas Series with column position and column names as index """ - print(gwas_internal_link) gwas_file = gwas_internal_link.split('/')[-1] #Our standart labels: reference_label = column_dict.index.tolist() @@ -93,6 +92,7 @@ def map_columns_position(gwas_internal_link, column_dict): # labels in the GWAS files target_lab = pd.Index(column_dict.values.tolist()) is_gzipped = re.search(r".gz$", gwas_internal_link) + if is_gzipped: f = gzip.open(gwas_internal_link) line = f.readline() @@ -103,6 +103,7 @@ def map_columns_position(gwas_internal_link, column_dict): count_line = 0 header = pd.Index(line.split()) + def get_position(I,x): try: position_in_header = I.get_loc(x) @@ -117,7 +118,7 @@ def map_columns_position(gwas_internal_link, column_dict): mapgw = pd.Series(label_position, index=reference_label) mapgw = mapgw.loc[~mapgw.isna()].astype(int) mapgw.sort_values(inplace=True) - + f.close() return mapgw -- GitLab