Skip to content
Snippets Groups Projects
Commit 21dc444c authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'add_inittable_attribute' into 'master'

Add inittable attribute

See merge request statistical-genetics/jass!86
parents 940d3f58 48486443
No related branches found
No related tags found
No related merge requests found
...@@ -5,7 +5,7 @@ import os ...@@ -5,7 +5,7 @@ import os
import shutil import shutil
import sys import sys
import argparse import argparse
from datetime import timedelta, datetime from datetime import timedelta, datetime, date
from json import JSONDecodeError from json import JSONDecodeError
import uvicorn import uvicorn
...@@ -231,6 +231,7 @@ def w_create_inittable(args): ...@@ -231,6 +231,7 @@ def w_create_inittable(args):
regions_map_path = absolute_path_of_the_file(args.regions_map_path) regions_map_path = absolute_path_of_the_file(args.regions_map_path)
description_file_path = absolute_path_of_the_file(args.description_file_path) description_file_path = absolute_path_of_the_file(args.description_file_path)
init_table_metadata_path = absolute_path_of_the_file(args.init_table_metadata_path)
init_table_path = absolute_path_of_the_file(args.init_table_path, True) init_table_path = absolute_path_of_the_file(args.init_table_path, True)
create_inittable_file( create_inittable_file(
...@@ -240,6 +241,7 @@ def w_create_inittable(args): ...@@ -240,6 +241,7 @@ def w_create_inittable(args):
init_table_path, init_table_path,
init_covariance_path, init_covariance_path,
init_genetic_covariance_path, init_genetic_covariance_path,
init_table_metadata_path=init_table_metadata_path,
) )
...@@ -438,6 +440,13 @@ def get_parser(): ...@@ -438,6 +440,13 @@ def get_parser():
default=None, default=None,
help="path to the genetic covariance file to import. Used only for display on Jass web application", help="path to the genetic covariance file to import. Used only for display on Jass web application",
) )
parser_create_it.add_argument(
"--init-table-metadata-path",
required=False,
default=None,
help="path to metadata file to attache to the initial data file",
)
parser_create_it.set_defaults(func=w_create_inittable) parser_create_it.set_defaults(func=w_create_inittable)
# ------- create-worktable ------- # ------- create-worktable -------
......
...@@ -9,8 +9,6 @@ import re ...@@ -9,8 +9,6 @@ import re
import glob import glob
import logging import logging
from pandas import HDFStore, DataFrame, read_csv, concat, options, read_hdf from pandas import HDFStore, DataFrame, read_csv, concat, options, read_hdf
import pandas as pd
# create (or open) an hdf5 file and opens in append mode
import numpy as np import numpy as np
import tables import tables
import warnings import warnings
...@@ -29,17 +27,36 @@ class InitMeta(object): ...@@ -29,17 +27,36 @@ class InitMeta(object):
def get_inittable_meta(file_name): def get_inittable_meta(file_name):
init_store = HDFStore(file_name, mode='r') init_store = HDFStore(file_name, mode='r')
nb_snps = init_store.get_storer("SumStatTab").nrows nb_snps = init_store.get_storer("SumStatTab").nrows
name=f"Name of {file_name.split('/')[-1]}" metadata = dict(
desc=f"Description {file_name.split('/')[-1]}" title=f"Filename: {file_name.split('/')[-1]}",
description="No description",
ancestry="??",
assembly="????",
)
try:
df = init_store.get('METADATA')
for i in range(len(df)):
metadata[df.iloc[i, 0]] = df.iloc[i, 1]
except KeyError:
pass
init_store.close() init_store.close()
nb_phenotypes = read_hdf(file_name, "PhenoList").shape[0] nb_phenotypes = read_hdf(file_name, "PhenoList").shape[0]
return dict( return dict(
nb_snps=int(nb_snps), nb_snps=int(nb_snps),
nb_phenotypes=int(nb_phenotypes), nb_phenotypes=int(nb_phenotypes),
name=name, name=metadata['title'],
desc=desc, desc=metadata['description'],
**dict(
(k, metadata[k])
for k in set(metadata.keys()) if k not in {
'title',
'description',
}
),
) )
def get_gwasname(file_name): def get_gwasname(file_name):
return "_".join(os.path.basename(file_name).split("_")[0:3]) return "_".join(os.path.basename(file_name).split("_")[0:3])
...@@ -51,7 +68,6 @@ def check_if_SNP_unique(z_gwas_chrom): ...@@ -51,7 +68,6 @@ def check_if_SNP_unique(z_gwas_chrom):
) )
raise IOError(msg) raise IOError(msg)
def get_gwas_dict(input_data_path): def get_gwas_dict(input_data_path):
gwas_dict = {} gwas_dict = {}
# retrieve all files corresponding to glob patterns # retrieve all files corresponding to glob patterns
...@@ -219,6 +235,7 @@ def create_inittable_file( ...@@ -219,6 +235,7 @@ def create_inittable_file(
init_table_path: str, init_table_path: str,
init_covariance_path=None, init_covariance_path=None,
init_genetic_covariance_path=None, init_genetic_covariance_path=None,
init_table_metadata_path=None,
): ):
# Read region file # Read region file
regions = read_csv(regions_map_path, sep="\s+", memory_map=True) regions = read_csv(regions_map_path, sep="\s+", memory_map=True)
...@@ -266,6 +283,11 @@ def create_inittable_file( ...@@ -266,6 +283,11 @@ def create_inittable_file(
GEN_COV = genetic_covariance.loc[pheno_select, pheno_select] GEN_COV = genetic_covariance.loc[pheno_select, pheno_select]
hdf_init.put("GEN_COV", GEN_COV, format="table", data_columns=True) hdf_init.put("GEN_COV", GEN_COV, format="table", data_columns=True)
# Read metadata from file and store it
if init_table_metadata_path is not None:
metadata = read_csv(init_table_metadata_path, sep='\t', quotechar='"', index_col=False, memory_map=True)
hdf_init.put("METADATA", metadata, format="table", data_columns=True)
which_cols = [ which_cols = [
"Region", "Region",
"CHR", "CHR",
......
No preview for this file type
information content
title Small subset of Curated GWAS data
description "lorem ipsum"
ancestry UNK
assembly hg99
foo bar
\ No newline at end of file
File mode changed from 100755 to 100644
No preview for this file type
information content
title Mock dataset with disney
description "lorem ipsum"
ancestry DIS
assembly dSNY
No preview for this file type
information content
title Mock dataset with car
description "lorem ipsum"
ancestry CAR
assembly car1
...@@ -68,7 +68,15 @@ class TestDefaultController(JassWebClientTestCase): ...@@ -68,7 +68,15 @@ class TestDefaultController(JassWebClientTestCase):
respT1 = json.loads(response.content.decode("utf-8")) respT1 = json.loads(response.content.decode("utf-8"))
self.assertNotEqual(respT1, respMain) self.assertNotEqual(respT1, respMain)
self.assertSetEqual(set(respMain.keys()), {'nb_phenotypes', 'nb_snps', 'name', 'desc'}) for key in {
'nb_phenotypes',
'nb_snps',
'name',
'desc',
'ancestry',
'assembly',
}:
self.assertIn(key , respMain)
def test_get_tables(self): def test_get_tables(self):
response = self.testing_client.get("/api/tables") response = self.testing_client.get("/api/tables")
......
...@@ -23,7 +23,7 @@ for DATA_DIR in $DATA_DIRS; do ...@@ -23,7 +23,7 @@ for DATA_DIR in $DATA_DIRS; do
fi fi
echo "Creating inittable" echo "Creating inittable"
jass create-inittable --input-data-path "./${DATA_DIR}/z*.txt" --init-covariance-path "./${DATA_DIR}/COV.csv" --init-genetic-covariance-path ${GEN_COV} --regions-map-path "./${DATA_DIR}/regions.txt" --description-file-path "./${DATA_DIR}/summary.csv" --init-table-path "./${DATA_DIR}/initTable.hdf5" jass create-inittable --input-data-path "./${DATA_DIR}/z*.txt" --init-covariance-path "./${DATA_DIR}/COV.csv" --init-genetic-covariance-path ${GEN_COV} --regions-map-path "./${DATA_DIR}/regions.txt" --description-file-path "./${DATA_DIR}/summary.csv" --init-table-metadata-path "./${DATA_DIR}/metadata.txt" --init-table-path "./${DATA_DIR}/initTable.hdf5"
echo "Creating worktable" echo "Creating worktable"
jass create-project-data --init-table-path "${DATA_DIR}/initTable.hdf5" --phenotype ${TRAITS} --worktable-path ./${DATA_DIR}/worktable.hdf5 jass create-project-data --init-table-path "${DATA_DIR}/initTable.hdf5" --phenotype ${TRAITS} --worktable-path ./${DATA_DIR}/worktable.hdf5
......
...@@ -17,3 +17,5 @@ httpx ...@@ -17,3 +17,5 @@ httpx
uvicorn[standard] uvicorn[standard]
typing_extensions; python_version < '3.8' typing_extensions; python_version < '3.8'
requests requests
h5py
wheel
\ No newline at end of file
import argparse
import csv
import json
import tempfile
from pandas import HDFStore, read_csv
from jass.models.inittable import get_inittable_meta
def set_metadata_from_file(*, hdf5_file, init_table_metadata_path):
global init_store, metadata
init_store = HDFStore(hdf5_file)
metadata = read_csv(init_table_metadata_path, sep='\t', quotechar='"', index_col=False, memory_map=True)
init_store.put("METADATA", metadata, format="table", data_columns=True)
init_store.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--init-table-path", default=None, help="path to the inittable to edit", required=True, dest="hdf5_file"
)
meta_arg = parser.add_argument(
"--init-table-metadata-path",
required=False,
default=None,
help="path to metadata file to attache to the inittable. Note that all previous metadata are purged.",
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--clean-metadata",
action="store_true",
default=False,
help="Remove all information in metadata before adding new one",
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--title",
help="title to append to the metadata",
default=None,
required=False,
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--description",
help="description to append to the metadata",
default=None,
required=False,
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--ancestry",
help="ancestry to append to the metadata",
default=None,
required=False,
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--assembly",
help="assembly to append to the metadata",
default=None,
required=False,
)
args = parser.parse_args()
if args.init_table_metadata_path:
set_metadata_from_file(hdf5_file=args.hdf5_file, init_table_metadata_path=args.init_table_metadata_path)
else:
init_store = HDFStore(args.hdf5_file, mode='r')
if args.clean_metadata:
metadata = dict()
else:
try:
df = init_store.get('METADATA')
metadata = dict((df.iloc[i, 0], df.iloc[i, 1]) for i in range(len(df)))
except KeyError:
metadata = dict()
init_store.close()
for k in [
'title',
'description',
'ancestry',
'assembly',
]:
if getattr(args, k):
metadata[k] = getattr(args, k)
with tempfile.NamedTemporaryFile(suffix=".csv") as f:
with open(f.name, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["information", "content"])
for item in metadata.items():
csvwriter.writerow(item)
set_metadata_from_file(hdf5_file=args.hdf5_file, init_table_metadata_path=f.name)
print("Resulting metadata is:", json.dumps(get_inittable_meta(args.hdf5_file), indent=4))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment