Select Git revision
hdf5_add_attributes.py 3.50 KiB
import argparse
import csv
import json
import tempfile
from pandas import HDFStore, read_csv
from jass.models.inittable import get_inittable_meta
def set_metadata_from_file(*, hdf5_file, init_table_metadata_path):
global init_store, metadata
init_store = HDFStore(hdf5_file)
metadata = read_csv(init_table_metadata_path, sep='\t', quotechar='"', index_col=False, memory_map=True)
init_store.put("METADATA", metadata, format="table", data_columns=True)
init_store.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--init-table-path", default=None, help="path to the inittable to edit", required=True, dest="hdf5_file"
)
meta_arg = parser.add_argument(
"--init-table-metadata-path",
required=False,
default=None,
help="path to metadata file to attache to the inittable. Note that all previous metadata are purged.",
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--clean-metadata",
action="store_true",
default=False,
help="Remove all information in metadata before adding new one",
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--title",
help="title to append to the metadata",
default=None,
required=False,
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--description",
help="description to append to the metadata",
default=None,
required=False,
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--ancestry",
help="ancestry to append to the metadata",
default=None,
required=False,
)
mutex_grp = parser.add_mutually_exclusive_group()
mutex_grp._group_actions.append(meta_arg)
mutex_grp.add_argument(
"--assembly",
help="assembly to append to the metadata",
default=None,
required=False,
)
args = parser.parse_args()
if args.init_table_metadata_path:
set_metadata_from_file(hdf5_file=args.hdf5_file, init_table_metadata_path=args.init_table_metadata_path)
else:
init_store = HDFStore(args.hdf5_file, mode='r')
if args.clean_metadata:
metadata = dict()
else:
try:
df = init_store.get('METADATA')
metadata = dict((df.iloc[i, 0], df.iloc[i, 1]) for i in range(len(df)))
except KeyError:
metadata = dict()
init_store.close()
for k in [
'title',
'description',
'ancestry',
'assembly',
]:
if getattr(args, k):
metadata[k] = getattr(args, k)
with tempfile.NamedTemporaryFile(suffix=".csv") as f:
with open(f.name, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["information", "content"])
for item in metadata.items():
csvwriter.writerow(item)
set_metadata_from_file(hdf5_file=args.hdf5_file, init_table_metadata_path=f.name)
print("Resulting metadata is:", json.dumps(get_inittable_meta(args.hdf5_file), indent=4))