Skip to content

Commit

Permalink
jupyter - simplify add_analysis_data_file api, save analysis data fil…
Browse files Browse the repository at this point in the history
…es as a list of files

Signed-off-by: Lance-Drane <[email protected]>
  • Loading branch information
Lance-Drane committed Nov 6, 2024
1 parent e07f5b3 commit 9197b5c
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 56 deletions.
22 changes: 10 additions & 12 deletions examples-proposed/004-time-loop/mymodule/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,6 @@ def step(self, timestamp=0.0):
print(msg, file=stderr)
self.services.send_portal_event(event_comment=msg)

data = {
'y1': float,
'y2': float,
'y3': float,
}

data = {
'y1': math.sin(self.start + timestamp / 50 * math.pi),
'y2': math.sin(self.start + timestamp / 50 * math.pi) ** 2,
Expand All @@ -85,9 +79,13 @@ def step(self, timestamp=0.0):
state_file = self.services.get_config_param('STATE_FILES')
with open(state_file, 'w') as f:
json.dump(data, f)

self.services.update_state()

# copy the state file to a unique path for the monitor
data_loc = os.path.join(self.services.get_config_param('SIM_ROOT'), f'{timestamp if not REPLACE else 0.0}_{state_file}')
with open(data_loc, 'w') as f:
json.dump(data, f)


class Monitor(Component):
"""
Expand All @@ -105,16 +103,16 @@ def step(self, timestamp=0.0, **keywords):
self.services.stage_state()

state_file = self.services.get_config_param('STATE_FILES')
with open(state_file, 'rb') as f:
data_loc = os.path.join(self.services.get_config_param('SIM_ROOT'), f'{timestamp if not REPLACE else 0.0}_{state_file}')
with open(data_loc, 'rb') as f:
data = f.read()

# stage the state file in the JupyterHub directory and update the module file to handle it
if REPLACE:
self.services.add_analysis_data_file(state_file, os.path.basename(state_file), replace=True)
self.services.add_analysis_data_files([data_loc], replace=True)
else:
self.services.add_analysis_data_file(
state_file,
f'{timestamp}_{os.path.basename(state_file)}',
self.services.add_analysis_data_files(
[data_loc],
timestamp=timestamp,
)

Expand Down
6 changes: 3 additions & 3 deletions ipsframework/_jupyter/api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import os
import tarfile
from pathlib import Path
from typing import Dict, Iterable, Union
from typing import Dict, Iterable, List, Union

THIS_DIR = Path(__file__).resolve().parent


def get_data_from_runid(runid: int) -> Dict[float, str]:
def get_data_from_runid(runid: int) -> Dict[float, List[str]]:
"""Load all data associated with a single runid into a dictionary.
Params:
Expand All @@ -25,7 +25,7 @@ def get_data_from_runid(runid: int) -> Dict[float, str]:
return module.DATA_FILES


def get_data_from_runids(runids: Iterable[int]) -> Dict[int, Dict[float, str]]:
def get_data_from_runids(runids: Iterable[int]) -> Dict[int, Dict[float, List[str]]]:
"""Load all data associated with multiple runids into a common data structure.
Params:
Expand Down
30 changes: 10 additions & 20 deletions ipsframework/_jupyter/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import re
import shutil
from pathlib import Path
from typing import Optional

import nbformat as nbf

Expand Down Expand Up @@ -172,11 +171,11 @@ def initialize_jupyter_import_module_file(dest: str):
f.write(_initial_data_file_code())


def update_module_file_with_data_file(dest: str, data_file: str, replace: bool, timestamp: float = 0.0) -> Optional[str]:
def update_module_file_with_data_files(dest: str, data_files: list[str], replace: bool, timestamp: float = 0.0) -> None:
"""
Params:
- dest: directory of the module file which will be modified
- data_file: file which will be added to the module
- data_files: files which will be added to the module
- replace: if True, we can update
- timestamp: key we associate the data file with
Expand All @@ -187,31 +186,22 @@ def update_module_file_with_data_file(dest: str, data_file: str, replace: bool,
with open(dest, 'r') as f:
old_module_code = f.read()

replaced_file_name = None
new_listing = ''.join(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{val}'," for val in data_files)
new_str = f'{timestamp}: [{new_listing}],\n'

timestamp_regex = str(timestamp).replace('.', '\\.')
directory_str = '\{' + DIRECTORY_VARIABLE_NAME + '\}'

search_pattern = f"{timestamp_regex}: f'{directory_str}(.*)',"
search_pattern = f'^{timestamp_regex}: [(.*)],\n'

found_match = re.search(search_pattern, old_module_code)
if found_match: # timestamp already exists
if found_match:
if replace:
replaced_file_name = found_match.group(1)
if replaced_file_name == data_file:
# in this case, we're not actually removing an obsolete file, so no need to write to the module file
# return None because we've already directly replaced the file
return None
new_module_code = re.sub(search_pattern, f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',", old_module_code)
new_module_code = re.sub(search_pattern, new_str, old_module_code, count=1)
else:
raise ValueError(
f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_file}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call."
f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_files}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call."
)
else: # timestamp does not exist, so add it
# search from right of string for the '}' character, should work assuming user does not modify the cell past the variable definition
new_module_code = replace_last(old_module_code, '}', f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n" + '}')
else:
new_module_code = replace_last(old_module_code, '}', new_str + '}')

with open(dest, 'w') as f:
f.write(new_module_code)

return replaced_file_name
33 changes: 12 additions & 21 deletions ipsframework/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
initialize_jupyter_import_module_file,
initialize_jupyter_notebook,
initialize_jupyter_python_api,
update_module_file_with_data_file,
update_module_file_with_data_files,
)
from .cca_es_spec import initialize_event_service
from .ips_es_spec import eventManager
Expand Down Expand Up @@ -1948,12 +1948,12 @@ def initialize_jupyter_notebook(
self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data)
self._send_monitor_event('IPS_PORTAL_REGISTER_NOTEBOOK', f'URL = {url}')

def add_analysis_data_file(self, current_data_file_path: str, new_data_file_name: str, timestamp: float = 0.0, replace: bool = False):
# TODO REMOVE new_data_file_name, make current_data_file_path string or list of strings
def add_analysis_data_files(self, current_data_file_paths: list[str], timestamp: float = 0.0, replace: bool = False):
"""Add data file to the module file referenced by the Jupyter Notebook.
Params:
- current_data_file_path: location of the current data file we want to copy to the Jupyter directory. This will usually be a state file.
- new_data_file_name: name of the new data file (relative to Jupyterhub data directory, should be unique per run)
- current_data_file_paths: location of the current data file we want to copy to the Jupyter directory. This will usually be a state file.
- timestamp: label to assign to the data (currently must be a floating point value)
- replace: If True, replace the last data file added with the new data file. If False, simply append the new data file. (default: False)
Note that if replace is not True but you attempt to overwrite it, a ValueError will be thrown.
Expand All @@ -1963,24 +1963,15 @@ def add_analysis_data_file(self, current_data_file_path: str, new_data_file_name
# TODO generic exception
raise Exception('Unable to initialize base JupyterHub dir')

# make sure we're working with a file, and not a directory, regarding the data file name
new_data_file_name = os.path.basename(new_data_file_name)
destination_paths = [os.path.basename(old_fname) for old_fname in current_data_file_paths]
for source, destination in zip(current_data_file_paths, destination_paths):
full_destination = os.path.join(self._jupyterhub_dir, 'data', destination)
if not replace and os.path.exists(full_destination):
raise ValueError(f'Replacing existing filename {destination}, set replace to equal True in add_analysis_data_files if this was intended.')
# this may raise an OSError, it is the responsibility of the caller to handle it.
shutil.copyfile(source, full_destination)

jupyter_data_file = os.path.join(self._jupyterhub_dir, 'data', new_data_file_name)
if not replace and os.path.exists(jupyter_data_file):
raise ValueError(f'Replacing existing filename {jupyter_data_file}, set replace to equal True in add_analysis_data_file if this was intended.')
# this may raise an OSError, it is the responsibility of the caller to handle it.
shutil.copyfile(current_data_file_path, jupyter_data_file)

# update the module file
replaced_file_name = update_module_file_with_data_file(self._jupyterhub_dir, new_data_file_name, replace, timestamp)
if replaced_file_name:
# now remove the state file from the filesystem
file_to_remove = os.path.join(self._jupyterhub_dir, 'data', replaced_file_name)
try:
os.remove(file_to_remove)
except FileNotFoundError:
pass
update_module_file_with_data_files(self._jupyterhub_dir, destination_paths, replace, timestamp)

def publish(self, topicName: str, eventName: str, eventBody: Any):
"""
Expand Down

0 comments on commit 9197b5c

Please sign in to comment.