updated requirements

4018e364 · Hanna JULIENNE · 28fe2b23 · 4018e364 · 4018e364 · 4018e364
Commit 4018e364 authored 5 years ago by Hanna JULIENNE
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,7 +14,7 @@ pages:
    - yum install -y make
    - pip3 install sphinx
    - pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse
-    - pip3 install -r jass_preprocessing/requirements.txt
+    - pip3 install -r requirements.txt
    - cd doc
    - sphinx-apidoc -f -o ./source/_autosummary/ ../jass_preprocessing/
    - make html

--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
@@ -22,24 +22,20 @@ import argparse
 #| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |


-
 def launch_preprocessing(args):
    """
    Preprocessing GWAS dataset
    """
-    gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
-    print(gwas_map.head())
+    gwas_map = pd.read_csv(args.gwas_info, sep="\t")
+    gwas_map.set_index("filename", inplace=True)

    for gwas_filename in gwas_map.index:
-        print(gwas_filename)
-        print(gwas_map.columns)
-        tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
-                               gwas_map.loc[gwas_filename, 'outcome'])
+        tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'Consortium'],
+                               gwas_map.loc[gwas_filename, 'Outcome'])

        print('processing GWAS: {}'.format(tag))
        start = time.time()
        GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
-
        mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)

        gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)

--- a/jass_preprocessing/map_gwas.py
+++ b/jass_preprocessing/map_gwas.py
@@ -84,23 +84,22 @@ def map_columns_position(gwas_internal_link,  GWAS_labels):
    Return:
        pandas Series with column position and column names as index
    """
-    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na', index_col=0)

-    gwas_file = gwas_internal_link.split('/')[-1]
+    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
+
+    column_dict.set_index("filename", inplace=True)

+    gwas_file = gwas_internal_link.split('/')[-1]
    my_labels = column_dict.loc[gwas_file]

    #Our standart labels:
    reference_label = column_dict.columns.tolist()
    # labels in the GWAS files
    target_lab = pd.Index(my_labels.values.tolist())
-
    f = open(gwas_internal_link)
    count_line = 0
    line = f.readline()
-    print(line)
    header = pd.Index(line.split())
-
    def get_position(I,x):
        try:
            return I.get_loc(x)
@@ -108,7 +107,6 @@ def map_columns_position(gwas_internal_link,  GWAS_labels):
            return np.nan
    label_position = [get_position(header,i) for i in target_lab]

-
    mapgw = pd.Series(label_position, index=reference_label)
    mapgw = mapgw.loc[~mapgw.isna()].astype(int)
    mapgw.sort_values(inplace=True)
@@ -137,8 +135,7 @@ def read_gwas( gwas_internal_link, column_map):
                                index_col=0,
                                 header=0, na_values= ['', '#N/A', '#N/A', 'N/A',
                                                       '#NA', '-1.#IND', '-1.#QNAN',
-                                                 '-NaN',
-                                                 '-nan', '1.#IND', '1.#QNAN', 'N/A',
+                                                 '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A',
                                                 'NA', 'NULL', 'NaN',
                                                 'nan', 'na', '.'])


--- a/jass_preprocessing/requirements.txt
+++ b/jass_preprocessing/requirements.txt
-scipy
-pandas
-numpy
-seaborn
-matplotlib