jupyter - simplify add_analysis_data_file api, save analysis data fil…

…es as a list of files Signed-off-by: Lance-Drane <[email protected]>
HPC-SimTools · Nov 6, 2024 · 9197b5c · 9197b5c
1 parent e07f5b3
commit 9197b5c
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 56 deletions.
diff --git a/examples-proposed/004-time-loop/mymodule/components.py b/examples-proposed/004-time-loop/mymodule/components.py
@@ -70,12 +70,6 @@ def step(self, timestamp=0.0):
         print(msg, file=stderr)
         self.services.send_portal_event(event_comment=msg)
 
-        data = {
-            'y1': float,
-            'y2': float,
-            'y3': float,
-        }
-
         data = {
             'y1': math.sin(self.start + timestamp / 50 * math.pi),
             'y2': math.sin(self.start + timestamp / 50 * math.pi) ** 2,
@@ -85,9 +79,13 @@ def step(self, timestamp=0.0):
         state_file = self.services.get_config_param('STATE_FILES')
         with open(state_file, 'w') as f:
             json.dump(data, f)
-
         self.services.update_state()
 
+        # copy the state file to a unique path for the monitor
+        data_loc = os.path.join(self.services.get_config_param('SIM_ROOT'), f'{timestamp if not REPLACE else 0.0}_{state_file}')
+        with open(data_loc, 'w') as f:
+            json.dump(data, f)
+
 
 class Monitor(Component):
     """
@@ -105,16 +103,16 @@ def step(self, timestamp=0.0, **keywords):
         self.services.stage_state()
 
         state_file = self.services.get_config_param('STATE_FILES')
-        with open(state_file, 'rb') as f:
+        data_loc = os.path.join(self.services.get_config_param('SIM_ROOT'), f'{timestamp if not REPLACE else 0.0}_{state_file}')
+        with open(data_loc, 'rb') as f:
             data = f.read()
 
         # stage the state file in the JupyterHub directory and update the module file to handle it
         if REPLACE:
-            self.services.add_analysis_data_file(state_file, os.path.basename(state_file), replace=True)
+            self.services.add_analysis_data_files([data_loc], replace=True)
         else:
-            self.services.add_analysis_data_file(
-                state_file,
-                f'{timestamp}_{os.path.basename(state_file)}',
+            self.services.add_analysis_data_files(
+                [data_loc],
                 timestamp=timestamp,
             )
 

diff --git a/ipsframework/_jupyter/api_v1.py b/ipsframework/_jupyter/api_v1.py
@@ -5,12 +5,12 @@
 import os
 import tarfile
 from pathlib import Path
-from typing import Dict, Iterable, Union
+from typing import Dict, Iterable, List, Union
 
 THIS_DIR = Path(__file__).resolve().parent
 
 
-def get_data_from_runid(runid: int) -> Dict[float, str]:
+def get_data_from_runid(runid: int) -> Dict[float, List[str]]:
     """Load all data associated with a single runid into a dictionary.
 
     Params:
@@ -25,7 +25,7 @@ def get_data_from_runid(runid: int) -> Dict[float, str]:
     return module.DATA_FILES
 
 
-def get_data_from_runids(runids: Iterable[int]) -> Dict[int, Dict[float, str]]:
+def get_data_from_runids(runids: Iterable[int]) -> Dict[int, Dict[float, List[str]]]:
     """Load all data associated with multiple runids into a common data structure.
 
     Params:

diff --git a/ipsframework/_jupyter/initializer.py b/ipsframework/_jupyter/initializer.py
@@ -17,7 +17,6 @@
 import re
 import shutil
 from pathlib import Path
-from typing import Optional
 
 import nbformat as nbf
 
@@ -172,11 +171,11 @@ def initialize_jupyter_import_module_file(dest: str):
         f.write(_initial_data_file_code())
 
 
-def update_module_file_with_data_file(dest: str, data_file: str, replace: bool, timestamp: float = 0.0) -> Optional[str]:
+def update_module_file_with_data_files(dest: str, data_files: list[str], replace: bool, timestamp: float = 0.0) -> None:
     """
     Params:
       - dest: directory of the module file which will be modified
-      - data_file: file which will be added to the module
+      - data_files: files which will be added to the module
       - replace: if True, we can update
       - timestamp: key we associate the data file with
 
@@ -187,31 +186,22 @@ def update_module_file_with_data_file(dest: str, data_file: str, replace: bool,
     with open(dest, 'r') as f:
         old_module_code = f.read()
 
-    replaced_file_name = None
+    new_listing = ''.join(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{val}'," for val in data_files)
+    new_str = f'{timestamp}: [{new_listing}],\n'
 
     timestamp_regex = str(timestamp).replace('.', '\\.')
-    directory_str = '\{' + DIRECTORY_VARIABLE_NAME + '\}'
-
-    search_pattern = f"{timestamp_regex}: f'{directory_str}(.*)',"
+    search_pattern = f'^{timestamp_regex}: [(.*)],\n'
 
     found_match = re.search(search_pattern, old_module_code)
-    if found_match:  # timestamp already exists
+    if found_match:
         if replace:
-            replaced_file_name = found_match.group(1)
-            if replaced_file_name == data_file:
-                # in this case, we're not actually removing an obsolete file, so no need to write to the module file
-                # return None because we've already directly replaced the file
-                return None
-            new_module_code = re.sub(search_pattern, f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',", old_module_code)
+            new_module_code = re.sub(search_pattern, new_str, old_module_code, count=1)
         else:
             raise ValueError(
-                f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_file}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call."
+                f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_files}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call."
             )
-    else:  # timestamp does not exist, so add it
-        # search from right of string for the '}' character, should work assuming user does not modify the cell past the variable definition
-        new_module_code = replace_last(old_module_code, '}', f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n" + '}')
+    else:
+        new_module_code = replace_last(old_module_code, '}', new_str + '}')
 
     with open(dest, 'w') as f:
         f.write(new_module_code)
-
-    return replaced_file_name
diff --git a/ipsframework/services.py b/ipsframework/services.py
@@ -31,7 +31,7 @@
     initialize_jupyter_import_module_file,
     initialize_jupyter_notebook,
     initialize_jupyter_python_api,
-    update_module_file_with_data_file,
+    update_module_file_with_data_files,
 )
 from .cca_es_spec import initialize_event_service
 from .ips_es_spec import eventManager
@@ -1948,12 +1948,12 @@ def initialize_jupyter_notebook(
         self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data)
         self._send_monitor_event('IPS_PORTAL_REGISTER_NOTEBOOK', f'URL = {url}')
 
-    def add_analysis_data_file(self, current_data_file_path: str, new_data_file_name: str, timestamp: float = 0.0, replace: bool = False):
+    # TODO REMOVE new_data_file_name, make current_data_file_path string or list of strings
+    def add_analysis_data_files(self, current_data_file_paths: list[str], timestamp: float = 0.0, replace: bool = False):
         """Add data file to the module file referenced by the Jupyter Notebook.
 
         Params:
-        - current_data_file_path: location of the current data file we want to copy to the Jupyter directory. This will usually be a state file.
-        - new_data_file_name: name of the new data file (relative to Jupyterhub data directory, should be unique per run)
+        - current_data_file_paths: location of the current data file we want to copy to the Jupyter directory. This will usually be a state file.
         - timestamp: label to assign to the data (currently must be a floating point value)
         - replace: If True, replace the last data file added with the new data file. If False, simply append the new data file. (default: False)
               Note that if replace is not True but you attempt to overwrite it, a ValueError will be thrown.
@@ -1963,24 +1963,15 @@ def add_analysis_data_file(self, current_data_file_path: str, new_data_file_name
                 # TODO generic exception
                 raise Exception('Unable to initialize base JupyterHub dir')
 
-        # make sure we're working with a file, and not a directory, regarding the data file name
-        new_data_file_name = os.path.basename(new_data_file_name)
+        destination_paths = [os.path.basename(old_fname) for old_fname in current_data_file_paths]
+        for source, destination in zip(current_data_file_paths, destination_paths):
+            full_destination = os.path.join(self._jupyterhub_dir, 'data', destination)
+            if not replace and os.path.exists(full_destination):
+                raise ValueError(f'Replacing existing filename {destination}, set replace to equal True in add_analysis_data_files if this was intended.')
+            # this may raise an OSError, it is the responsibility of the caller to handle it.
+            shutil.copyfile(source, full_destination)
 
-        jupyter_data_file = os.path.join(self._jupyterhub_dir, 'data', new_data_file_name)
-        if not replace and os.path.exists(jupyter_data_file):
-            raise ValueError(f'Replacing existing filename {jupyter_data_file}, set replace to equal True in add_analysis_data_file if this was intended.')
-        # this may raise an OSError, it is the responsibility of the caller to handle it.
-        shutil.copyfile(current_data_file_path, jupyter_data_file)
-
-        # update the module file
-        replaced_file_name = update_module_file_with_data_file(self._jupyterhub_dir, new_data_file_name, replace, timestamp)
-        if replaced_file_name:
-            # now remove the state file from the filesystem
-            file_to_remove = os.path.join(self._jupyterhub_dir, 'data', replaced_file_name)
-            try:
-                os.remove(file_to_remove)
-            except FileNotFoundError:
-                pass
+        update_module_file_with_data_files(self._jupyterhub_dir, destination_paths, replace, timestamp)
 
     def publish(self, topicName: str, eventName: str, eventBody: Any):
         """