Skip to content
Snippets Groups Projects
Commit 455f5263 authored by Bryan BRANCOTTE's avatar Bryan BRANCOTTE
Browse files

don't delete project, only remove large files.

parent a2af9635
No related branches found
No related tags found
1 merge request!58run computation when opening the project, not when creating it.
......@@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
import os
import shutil
import sys
import argparse
from datetime import timedelta, datetime
......@@ -14,7 +13,7 @@ from jass.config import config
from jass.models.phenotype import get_available_phenotypes
from jass.models.inittable import create_inittable_file, add_gene_annotation
from jass.models.worktable import create_worktable_file
from jass.models.project import get_projects_last_access
from jass.models.project import get_projects_last_access, load_project
from jass.models.plots import (
create_global_plot,
create_quadrant_plot,
......@@ -172,8 +171,8 @@ def w_clean_project_data(args):
for proj in get_projects_last_access():
print(f"Project {proj['project_id']} was last accessed on {proj['last_access']}, ", end='')
if proj['last_access'] + shift < datetime.now():
print("removing it")
shutil.rmtree(proj['path'])
print("removing its large files")
load_project(project_id=proj['project_id'], flag_as_visited=False).delete_large_files()
else:
print("keeping it")
......@@ -346,13 +345,13 @@ def get_parser():
parser_clean_pd = subparsers.add_parser(
"clean-project-data",
help="Remove old projects that haven't been accessed recently",
help="Remove old projects data that haven't been accessed recently",
)
parser_clean_pd.add_argument(
"--max-days-without-access",
type=int,
default=30,
help="A project is marked for deletion if the number of days elapsed since "
help="A project is marked for large file deletion if the number of days elapsed since "
"the last access is greater than the amount provided.",
)
parser_clean_pd.set_defaults(func=w_clean_project_data)
......
......@@ -38,6 +38,7 @@ from jass.models.worktable import (
get_worktable_phenolist,
create_worktable_file,
create_genome_full_csv,
create_genome_full_csv_lock_file,
)
from jass.config import config
......@@ -220,6 +221,27 @@ class Project(BaseModel, abc.ABC):
def create_worktable_file(self):
pass
def delete_large_files(self):
# delete the worktable file
project_hdf_path = self.get_worktable_path()
if os.path.exists(project_hdf_path):
os.remove(project_hdf_path)
# as a consequence, its creation will have to be redone, so set progress to 0 and remove is status.
self.progress = 0
try:
del self.status['worktable']
except KeyError:
# worktable not in status
pass
self.save()
if self.delayed_gen_csv_file:
# Delete the full csv file if it exists, and re-set the lock file indicating that it should be generated
create_genome_full_csv_lock_file(project_hdf_path)
csv_path = self.get_csv_path()
if os.path.exists(csv_path):
os.remove(csv_path)
@call_with_tb('global_manhattan')
def create_global_manhattan_plot(self):
return create_global_plot(
......@@ -301,7 +323,7 @@ class LocalProject(Project):
)
def load_project(project_id):
def load_project(project_id, flag_as_visited: bool = True):
path = os.path.join(Project.get_folder_path_from_id(project_id), "meta.json")
with open(path, "r") as fp:
project = parse_raw_as(
......@@ -311,7 +333,8 @@ def load_project(project_id):
],
"".join(fp.readlines()),
)
flag_project_as_visited(project_id)
if flag_as_visited:
flag_project_as_visited(project_id)
return project
......
......@@ -394,14 +394,7 @@ def create_worktable_file(
os.remove(csv_file)
if delayed_gen_csv_file:
# setting a lock to generate the csv_file asynchronously
the_lock_path = os.path.join(
os.path.dirname(project_hdf_path), "the_lock.txt"
)
the_lock = "The lock is set on : workTable.csv is not yet available"
file_lock = open(the_lock_path, "w")
file_lock.write(the_lock)
file_lock.close()
create_genome_full_csv_lock_file(project_hdf_path)
# subset of phenotypes that have been selected
phenolist = read_hdf(init_file_path, "PhenoList")
......@@ -1023,7 +1016,7 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35)
"""
# path of the lock that indicates that the csv file is not available
the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt")
the_lock_path = get_genome_full_csv_lock_path(project_hdf_path)
if os.path.isfile(the_lock_path):
# The lock is set on
if os.path.isfile(csv_file):
......@@ -1056,3 +1049,17 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35)
The_file_is_available = False
return The_file_is_available
def get_genome_full_csv_lock_path(project_hdf_path):
the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt")
return the_lock_path
def create_genome_full_csv_lock_file(project_hdf_path):
# setting a lock to generate the csv_file asynchronously
the_lock_path = get_genome_full_csv_lock_path(project_hdf_path)
the_lock = "The lock is set on : workTable.csv is not yet available"
file_lock = open(the_lock_path, "w")
file_lock.write(the_lock)
file_lock.close()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment