From 9197b5c4d52c409e1ede2daaa37bd5f8e455f98a Mon Sep 17 00:00:00 2001 From: Lance-Drane Date: Wed, 6 Nov 2024 10:35:36 -0500 Subject: [PATCH] jupyter - simplify add_analysis_data_file api, save analysis data files as a list of files Signed-off-by: Lance-Drane --- .../004-time-loop/mymodule/components.py | 22 ++++++------- ipsframework/_jupyter/api_v1.py | 6 ++-- ipsframework/_jupyter/initializer.py | 30 ++++++----------- ipsframework/services.py | 33 +++++++------------ 4 files changed, 35 insertions(+), 56 deletions(-) diff --git a/examples-proposed/004-time-loop/mymodule/components.py b/examples-proposed/004-time-loop/mymodule/components.py index 6acc348..468e1be 100644 --- a/examples-proposed/004-time-loop/mymodule/components.py +++ b/examples-proposed/004-time-loop/mymodule/components.py @@ -70,12 +70,6 @@ def step(self, timestamp=0.0): print(msg, file=stderr) self.services.send_portal_event(event_comment=msg) - data = { - 'y1': float, - 'y2': float, - 'y3': float, - } - data = { 'y1': math.sin(self.start + timestamp / 50 * math.pi), 'y2': math.sin(self.start + timestamp / 50 * math.pi) ** 2, @@ -85,9 +79,13 @@ def step(self, timestamp=0.0): state_file = self.services.get_config_param('STATE_FILES') with open(state_file, 'w') as f: json.dump(data, f) - self.services.update_state() + # copy the state file to a unique path for the monitor + data_loc = os.path.join(self.services.get_config_param('SIM_ROOT'), f'{timestamp if not REPLACE else 0.0}_{state_file}') + with open(data_loc, 'w') as f: + json.dump(data, f) + class Monitor(Component): """ @@ -105,16 +103,16 @@ def step(self, timestamp=0.0, **keywords): self.services.stage_state() state_file = self.services.get_config_param('STATE_FILES') - with open(state_file, 'rb') as f: + data_loc = os.path.join(self.services.get_config_param('SIM_ROOT'), f'{timestamp if not REPLACE else 0.0}_{state_file}') + with open(data_loc, 'rb') as f: data = f.read() # stage the state file in the JupyterHub directory and update the module file to handle it if REPLACE: - self.services.add_analysis_data_file(state_file, os.path.basename(state_file), replace=True) + self.services.add_analysis_data_files([data_loc], replace=True) else: - self.services.add_analysis_data_file( - state_file, - f'{timestamp}_{os.path.basename(state_file)}', + self.services.add_analysis_data_files( + [data_loc], timestamp=timestamp, ) diff --git a/ipsframework/_jupyter/api_v1.py b/ipsframework/_jupyter/api_v1.py index 8cb5418..8f4fb54 100644 --- a/ipsframework/_jupyter/api_v1.py +++ b/ipsframework/_jupyter/api_v1.py @@ -5,12 +5,12 @@ import os import tarfile from pathlib import Path -from typing import Dict, Iterable, Union +from typing import Dict, Iterable, List, Union THIS_DIR = Path(__file__).resolve().parent -def get_data_from_runid(runid: int) -> Dict[float, str]: +def get_data_from_runid(runid: int) -> Dict[float, List[str]]: """Load all data associated with a single runid into a dictionary. Params: @@ -25,7 +25,7 @@ def get_data_from_runid(runid: int) -> Dict[float, str]: return module.DATA_FILES -def get_data_from_runids(runids: Iterable[int]) -> Dict[int, Dict[float, str]]: +def get_data_from_runids(runids: Iterable[int]) -> Dict[int, Dict[float, List[str]]]: """Load all data associated with multiple runids into a common data structure. Params: diff --git a/ipsframework/_jupyter/initializer.py b/ipsframework/_jupyter/initializer.py index 59a2aeb..f96bdf3 100644 --- a/ipsframework/_jupyter/initializer.py +++ b/ipsframework/_jupyter/initializer.py @@ -17,7 +17,6 @@ import re import shutil from pathlib import Path -from typing import Optional import nbformat as nbf @@ -172,11 +171,11 @@ def initialize_jupyter_import_module_file(dest: str): f.write(_initial_data_file_code()) -def update_module_file_with_data_file(dest: str, data_file: str, replace: bool, timestamp: float = 0.0) -> Optional[str]: +def update_module_file_with_data_files(dest: str, data_files: list[str], replace: bool, timestamp: float = 0.0) -> None: """ Params: - dest: directory of the module file which will be modified - - data_file: file which will be added to the module + - data_files: files which will be added to the module - replace: if True, we can update - timestamp: key we associate the data file with @@ -187,31 +186,22 @@ def update_module_file_with_data_file(dest: str, data_file: str, replace: bool, with open(dest, 'r') as f: old_module_code = f.read() - replaced_file_name = None + new_listing = ''.join(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{val}'," for val in data_files) + new_str = f'{timestamp}: [{new_listing}],\n' timestamp_regex = str(timestamp).replace('.', '\\.') - directory_str = '\{' + DIRECTORY_VARIABLE_NAME + '\}' - - search_pattern = f"{timestamp_regex}: f'{directory_str}(.*)'," + search_pattern = f'^{timestamp_regex}: [(.*)],\n' found_match = re.search(search_pattern, old_module_code) - if found_match: # timestamp already exists + if found_match: if replace: - replaced_file_name = found_match.group(1) - if replaced_file_name == data_file: - # in this case, we're not actually removing an obsolete file, so no need to write to the module file - # return None because we've already directly replaced the file - return None - new_module_code = re.sub(search_pattern, f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',", old_module_code) + new_module_code = re.sub(search_pattern, new_str, old_module_code, count=1) else: raise ValueError( - f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_file}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call." + f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_files}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call." ) - else: # timestamp does not exist, so add it - # search from right of string for the '}' character, should work assuming user does not modify the cell past the variable definition - new_module_code = replace_last(old_module_code, '}', f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n" + '}') + else: + new_module_code = replace_last(old_module_code, '}', new_str + '}') with open(dest, 'w') as f: f.write(new_module_code) - - return replaced_file_name diff --git a/ipsframework/services.py b/ipsframework/services.py index 64b6ce6..4b67284 100644 --- a/ipsframework/services.py +++ b/ipsframework/services.py @@ -31,7 +31,7 @@ initialize_jupyter_import_module_file, initialize_jupyter_notebook, initialize_jupyter_python_api, - update_module_file_with_data_file, + update_module_file_with_data_files, ) from .cca_es_spec import initialize_event_service from .ips_es_spec import eventManager @@ -1948,12 +1948,12 @@ def initialize_jupyter_notebook( self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data) self._send_monitor_event('IPS_PORTAL_REGISTER_NOTEBOOK', f'URL = {url}') - def add_analysis_data_file(self, current_data_file_path: str, new_data_file_name: str, timestamp: float = 0.0, replace: bool = False): + # TODO REMOVE new_data_file_name, make current_data_file_path string or list of strings + def add_analysis_data_files(self, current_data_file_paths: list[str], timestamp: float = 0.0, replace: bool = False): """Add data file to the module file referenced by the Jupyter Notebook. Params: - - current_data_file_path: location of the current data file we want to copy to the Jupyter directory. This will usually be a state file. - - new_data_file_name: name of the new data file (relative to Jupyterhub data directory, should be unique per run) + - current_data_file_paths: location of the current data file we want to copy to the Jupyter directory. This will usually be a state file. - timestamp: label to assign to the data (currently must be a floating point value) - replace: If True, replace the last data file added with the new data file. If False, simply append the new data file. (default: False) Note that if replace is not True but you attempt to overwrite it, a ValueError will be thrown. @@ -1963,24 +1963,15 @@ def add_analysis_data_file(self, current_data_file_path: str, new_data_file_name # TODO generic exception raise Exception('Unable to initialize base JupyterHub dir') - # make sure we're working with a file, and not a directory, regarding the data file name - new_data_file_name = os.path.basename(new_data_file_name) + destination_paths = [os.path.basename(old_fname) for old_fname in current_data_file_paths] + for source, destination in zip(current_data_file_paths, destination_paths): + full_destination = os.path.join(self._jupyterhub_dir, 'data', destination) + if not replace and os.path.exists(full_destination): + raise ValueError(f'Replacing existing filename {destination}, set replace to equal True in add_analysis_data_files if this was intended.') + # this may raise an OSError, it is the responsibility of the caller to handle it. + shutil.copyfile(source, full_destination) - jupyter_data_file = os.path.join(self._jupyterhub_dir, 'data', new_data_file_name) - if not replace and os.path.exists(jupyter_data_file): - raise ValueError(f'Replacing existing filename {jupyter_data_file}, set replace to equal True in add_analysis_data_file if this was intended.') - # this may raise an OSError, it is the responsibility of the caller to handle it. - shutil.copyfile(current_data_file_path, jupyter_data_file) - - # update the module file - replaced_file_name = update_module_file_with_data_file(self._jupyterhub_dir, new_data_file_name, replace, timestamp) - if replaced_file_name: - # now remove the state file from the filesystem - file_to_remove = os.path.join(self._jupyterhub_dir, 'data', replaced_file_name) - try: - os.remove(file_to_remove) - except FileNotFoundError: - pass + update_module_file_with_data_files(self._jupyterhub_dir, destination_paths, replace, timestamp) def publish(self, topicName: str, eventName: str, eventBody: Any): """