making doc and code coherent for beta_or_Z column

57ee2dfb · Hanna JULIENNE · a27a77c6 · 57ee2dfb · 57ee2dfb
Commit 57ee2dfb authored 6 months ago by Hanna JULIENNE
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -112,7 +112,7 @@ Input
 * n: name of the column storing the sample size by variants (optional, will be inferred from the MAF, genetic effect and standard deviation if absent)
 * ncas: For binary traits, name of the column storing the number of cases by variants (optional)
 * ncont: For binary traits, name of the column storing the number of controls by variants (optional)
-* beta_or_z: name of the column storing the genetic effect (beta) in the gwas file. This column will be used only to retrieve the sign of the genetic effect with respect to the reference and effect allele.
+* beta_or_Z: name of the column storing the genetic effect (beta) in the gwas file. This column will be used only to retrieve the sign of the genetic effect with respect to the reference and effect allele.
 * OR : For binary traits, Odd ratio when available. Not to be confounded with the genetic effect size or 'beta'.
 * index-type: precise the type of index 
 * imputation_quality: (Optional) column containing individual-based imputation quality. Will be used to filter low quality imputation data from GWASs if the option --imputation-quality-treshold is used
@@ -127,7 +127,7 @@ Some fields are optional like the imputation_quality. If not used they can be fi
 .. csv-table:: GWAS information table
  :header-rows: 1

-  "filename","Consortium","Outcome","FullName","Type","Nsample","Ncase","Ncontrol","Nsnp","snpid", "POS", "a1","a2","freq","pval","n","beta_or_z","OR","se","code","imp","ncas","ncont","imputation_quality","index_type"
+  "filename","Consortium","Outcome","FullName","Type","Nsample","Ncase","Ncontrol","Nsnp","snpid", "POS", "a1","a2","freq","pval","n","beta_or_Z","OR","se","code","imp","ncas","ncont","imputation_quality","index_type"
  "GIANT_HEIGHT_Wood_et_al.txt","GIANT","HEIGHT","Height","Anthropometry",253288,	na,	na, 2550858,	"MarkerName",	"position","Allele1", "Allele2", "Freq.Allele1.HapMapCEU","p","N","b",na,"SE",na,na,na,na, "imputationInfo","rs-number"



--- a/jass_preprocessing/map_gwas.py
+++ b/jass_preprocessing/map_gwas.py
@@ -85,7 +85,6 @@ def map_columns_position(gwas_internal_link,  column_dict):
    Return:
        pandas Series with column position and column names as index
    """
-    print(gwas_internal_link)
    gwas_file = gwas_internal_link.split('/')[-1]
    #Our standart labels:
    reference_label = column_dict.index.tolist()
@@ -93,6 +92,7 @@ def map_columns_position(gwas_internal_link,  column_dict):
    # labels in the GWAS files
    target_lab = pd.Index(column_dict.values.tolist())
    is_gzipped = re.search(r".gz$", gwas_internal_link)
+
    if is_gzipped:
        f = gzip.open(gwas_internal_link)
        line = f.readline()
@@ -103,6 +103,7 @@ def map_columns_position(gwas_internal_link,  column_dict):
    count_line = 0

    header = pd.Index(line.split())
+
    def get_position(I,x):
        try:
            position_in_header = I.get_loc(x)
@@ -117,7 +118,7 @@ def map_columns_position(gwas_internal_link,  column_dict):
    mapgw = pd.Series(label_position, index=reference_label)
    mapgw = mapgw.loc[~mapgw.isna()].astype(int)
    mapgw.sort_values(inplace=True)
-
+    
    f.close()
    return mapgw