Skip to content

Commit 6c47475

Browse files
committed
simplify Jupyter API
Signed-off-by: Lance-Drane <[email protected]>
1 parent 11ef677 commit 6c47475

File tree

5 files changed

+41
-211
lines changed

5 files changed

+41
-211
lines changed

bokeh-plots.ipynb

-124
This file was deleted.

examples-proposed/004-time-loop/sim/input_dir/basic.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
"outputs": [],
99
"source": [
1010
"# Notebook template, the IPS Framework will add a cell before this one\n",
11-
"# defining IPS_STATE_FILES as a list of state file paths.\n",
11+
"# defining ANALYSIS_FILES as a list of state file paths.\n",
1212
"\n",
1313
"# In this example, this notebook is generated during the time loop.\n",
1414
"\n",
1515
"mapping = {}\n",
16-
"for file in IPS_STATE_FILES:\n",
16+
"for file in ANALYSIS_FILES:\n",
1717
" with open(file, 'rb') as f:\n",
1818
" mapping[file] = f.read()\n",
1919
"print(mapping)\n"

examples-proposed/004-time-loop/sim/input_dir/bokeh-plots.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"outputs": [],
99
"source": [
1010
"# Notebook template, the IPS Framework will add a cell before this one\n",
11-
"# defining IPS_STATE_FILES as a list of state file paths.\n",
11+
"# defining ANALYSIS_FILES as a list of state file paths.\n",
1212
"\n",
1313
"# In this example, this notebook is only generated at the end of the run.\n",
1414
"\n",
@@ -21,10 +21,10 @@
2121
"\n",
2222
"DATA = []\n",
2323
"# create DATA list, will depend on user input type (i.e. 'hdf5', 'json')\n",
24-
"for file in IPS_STATE_FILES:\n",
24+
"for file in ANALYSIS_FILES:\n",
2525
" with open(file, 'rb') as f:\n",
2626
" DATA.append(json.load(f))\n",
27-
"x = [float(f.rpartition('/')[2]) for f in IPS_STATE_FILES]\n",
27+
"x = [float(f.rpartition('/')[2]) for f in ANALYSIS_FILES]\n",
2828
"\n",
2929
"COLORS = ['red', 'green', 'blue']\n",
3030
"\n",

ipsframework/jupyter.py

+20-13
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
It is written to a notebook cell on initializing it, and is searched for when adding a data file to it.
2323
"""
2424

25+
DIRECTORY_VARIABLE_NAME = 'DATA_DIR'
26+
2527

2628
def replace_last(source_string: str, old: str, new: str) -> str:
2729
"""Attempt to replace the last occurence of 'old' with 'new' in 'source_string', searching from the right.
@@ -32,17 +34,17 @@ def replace_last(source_string: str, old: str, new: str) -> str:
3234
return f'{head}{new}{tail}'
3335

3436

35-
def _initial_jupyter_file_notebook_cell(dest: str, variable: str) -> str:
37+
def _initial_jupyter_file_notebook_cell(dest: str, files_variable_name: str) -> str:
3638
return f"""{HOOK}
3739
3840
import os
3941
4042
# NOTE: directory should be sim_name plus the run id from the Portal
41-
IPS_DATA_DIR = '{str(Path(dest).parent / 'data') + sep}'
43+
{DIRECTORY_VARIABLE_NAME} = '{str(Path(dest).parent / 'data') + sep}'
4244
# Uncomment below line to implicitly use any state files saved in the data directory, note that the IPS framework explicitly lists out each file used
43-
#{variable} = os.listdir('data')
45+
#{files_variable_name} = os.listdir('data')
4446
# files created during the run
45-
{variable} = [
47+
{files_variable_name} = [
4648
]
4749
"""
4850

@@ -61,10 +63,15 @@ def initialize_jupyter_notebook(dest: str, src: str, variable_name: str, index:
6163
# to avoid conversion, use as_version=nbf.NO_CONVERT
6264
nb: nbf.NotebookNode = nbf.read(src, as_version=4)
6365

64-
header = '# Next cell generated by IPS Framework'
6566
nb['cells'] = (
66-
nb['cells'][:index]
67-
+ [nbf.v4.new_markdown_cell(header), nbf.v4.new_code_cell(_initial_jupyter_file_notebook_cell(dest, variable_name))]
67+
# warning notification for users inspecting the file, unused programatically
68+
[nbf.v4.new_markdown_cell('# WARNING: Do not manually modify this file until the IPS simulation is complete.')]
69+
+ nb['cells'][:index]
70+
+ [
71+
# explicitly mark the IPS cell for users inspecting the file, unused programatically
72+
nbf.v4.new_markdown_cell('## Next cell generated by IPS Framework'),
73+
nbf.v4.new_code_cell(_initial_jupyter_file_notebook_cell(dest, variable_name)),
74+
]
6875
+ nb['cells'][index:]
6976
)
7077

@@ -88,21 +95,21 @@ def add_data_file_to_notebook(dest: str, data_file: str, index: Optional[int] =
8895
raise Exception('Cannot find IPS notebook node')
8996
ips_cell: str = nb['cells'][index]['source']
9097

91-
if ips_cell.find(f"f'{{IPS_DATA_DIR}}{data_file}',\n]") != -1:
98+
if ips_cell.find(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n]") != -1:
9299
# The data file is already referenced in the notebook, so there's nothing else to do
93100
return
94101

95102
# data file does not exist, so we need to add it
96103
# search from right of string for the ']' character, should work assuming user does not modify the cell past the variable definition
97-
result = replace_last(ips_cell, ']', f"f'{{IPS_DATA_DIR}}{data_file}',\n]")
104+
result = replace_last(ips_cell, ']', f"f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n]")
98105
nb['cells'][index]['source'] = result
99106

100107
with open(dest, 'w') as f:
101108
nbf.write(nb, f)
102109

103110

104111
def remove_data_file_from_notebook(dest: str, data_file: str, index: Optional[int] = None):
105-
"""Remove data file from the notebook list.
112+
"""Remove a specific data file from the notebook list.
106113
107114
Params:
108115
- dest: path to notebook which will be modified
@@ -116,7 +123,7 @@ def remove_data_file_from_notebook(dest: str, data_file: str, index: Optional[in
116123
raise Exception('Cannot find IPS notebook node')
117124
ips_cell: str = nb['cells'][index]['source']
118125

119-
head, sep, tail = ips_cell.rpartition(f"f'{{IPS_DATA_DIR}}{data_file}',\n")
126+
head, sep, tail = ips_cell.rpartition(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n")
120127
if sep == '':
121128
# existing match not found, so there's nothing left to remove
122129
return
@@ -133,7 +140,7 @@ def remove_last_data_file_from_notebook(dest: str, index: Optional[int] = None)
133140
Note that this function assumes the notebook maintains a specific format.
134141
135142
Returns:
136-
- None if there were no data entries in the notebook,
143+
- None if there were no data entries in the notebook, the name of the file removed (without the directory) as a string if there was
137144
"""
138145
nb: nbf.NotebookNode = nbf.read(dest, as_version=4)
139146
if index is None:
@@ -142,7 +149,7 @@ def remove_last_data_file_from_notebook(dest: str, index: Optional[int] = None)
142149
raise Exception('Cannot find IPS notebook node')
143150
ips_cell: str = nb['cells'][index]['source']
144151

145-
search_hook = "f'{IPS_DATA_DIR}"
152+
search_hook = f"f'{{{DIRECTORY_VARIABLE_NAME}}}"
146153

147154
start_index = ips_cell.rfind(search_hook)
148155
if start_index == -1:

ipsframework/services.py

+16-69
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from . import ipsutil, messages
3030
from .cca_es_spec import initialize_event_service
3131
from .ips_es_spec import eventManager
32-
from .jupyter import add_data_file_to_notebook, initialize_jupyter_notebook, remove_data_file_from_notebook, remove_last_data_file_from_notebook
32+
from .jupyter import add_data_file_to_notebook, initialize_jupyter_notebook, remove_last_data_file_from_notebook
3333
from .taskManager import TaskInit
3434

3535
RunningTask = namedtuple('RunningTask', ['process', 'start_time', 'timeout', 'nproc', 'cores_allocated', 'command', 'binary', 'args'])
@@ -1903,7 +1903,7 @@ def initialize_jupyter_notebook(
19031903
self,
19041904
dest_notebook_name: str,
19051905
source_notebook_path: str,
1906-
variable_name: str = 'IPS_STATE_FILES',
1906+
variable_name: str = 'ANALYSIS_FILES',
19071907
cell_to_modify: int = 0,
19081908
) -> None:
19091909
"""Loads a notebook from source_notebook_path, adds a cell to load the data, and then saves it to source_notebook_path. Will also try to register the notebook with the IPS Portal, if available.
@@ -1913,7 +1913,7 @@ def initialize_jupyter_notebook(
19131913
Params:
19141914
- dest_notebook_name: name of the JupyterNotebook you want to write (do not include file paths).
19151915
- source_notebook_path: location you want to load the source notebook from
1916-
- variable_name: name of the variable you want to load files from (default: "IPS_STATE_FILES")
1916+
- variable_name: name of the variable you want to load files from (default: "ANALYSIS_FILES")
19171917
- cell_to_modify: which cell in the JupyterNotebook you want to add the data call to (0-indexed).
19181918
(This will not overwrite any cells, just appends.)
19191919
By default, the data listing will happen in the FIRST cell.
@@ -1942,7 +1942,7 @@ def initialize_jupyter_notebook(
19421942
self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data)
19431943
self._send_monitor_event('IPS_PORTAL_REGISTER_NOTEBOOK', f'URL = {url}')
19441944

1945-
def add_data_file_to_notebook(self, state_file_path: str, timestamp: float, notebook_name: str, index: Optional[int] = None):
1945+
def add_data_file_to_notebook(self, state_file_path: str, timestamp: float, notebook_name: str, replace: bool = False, index: Optional[int] = None):
19461946
"""Add data file to JupyterHub directory, and reference it in the notebook.
19471947
19481948
This function assumes that a notebook has already been created with intialize_jupyter_notebook. Using this function does not call the IPS Portal.
@@ -1951,83 +1951,30 @@ def add_data_file_to_notebook(self, state_file_path: str, timestamp: float, note
19511951
- state_file_path: location of the current state file we want to copy to the Jupyter directory
19521952
- timestamp: label to assign to the data (currently must be a floating point value)
19531953
- notebook_name: name of notebook which will be modified. Note that this path is relative to the JupyterHub directory.
1954+
- replace: If True, replace the last data file added with the new data file. If False, simply append the new data file.
19541955
- index: optional index of the IPS notebook cell. If not provided, the IPS Framework will attempt to automatically find the cell it created,
19551956
which should work for every usecase where you don't anticipate modifying the notebook until after the run is complete.
19561957
"""
1957-
19581958
if not self._jupyterhub_dir:
19591959
if not self._init_jupyter():
19601960
# TODO generic exception
19611961
raise Exception('Unable to initialize base JupyterHub dir')
19621962

1963-
file_parts = state_file_path.split('.')
1964-
if len(file_parts) > 2: # name of the file could just be a floating point value with no extension
1965-
extension = f'.{file_parts[-1]}'
1966-
else:
1967-
extension = ''
1968-
1969-
state_file_name = f'{timestamp}{extension}'
1970-
jupyter_data_dir = os.path.join(self._jupyterhub_dir, 'data', state_file_name)
1963+
data_file_name = f'{timestamp}_{os.path.basename(state_file_path)}'
1964+
jupyter_data_dir = os.path.join(self._jupyterhub_dir, 'data', data_file_name)
19711965
# this may raise an OSError, it is the responsibility of the caller to handle it.
19721966
shutil.copyfile(state_file_path, jupyter_data_dir)
19731967

1974-
# TODO - maybe add flag which allows us to replace old state files
1975-
add_data_file_to_notebook(f'{self._jupyterhub_dir}{notebook_name}', state_file_name, index)
1976-
1977-
def remove_data_file_from_notebook(self, state_file_path: str, timestamp: float, notebook_name: str, index: Optional[int] = None):
1978-
"""Remove data file from JupyterHub data directory and from being referenced in the notebook.
1979-
1980-
This function assumes that a notebook has already been created with intialize_jupyter_notebook. Using this function does not call the IPS Portal.
1981-
1982-
Params:
1983-
- state_file_path: location of the current state file we want to copy to the Jupyter directory
1984-
- timestamp: label to assign to the data (currently must be a floating point value)
1985-
- notebook_name: name of notebook which will be modified. Note that this path is relative to the JupyterHub directory.
1986-
- index: optional index of the IPS notebook cell. If not provided, the IPS Framework will attempt to automatically find the cell it created,
1987-
which should work for every usecase where you don't anticipate modifying the notebook until after the run is complete.
1988-
"""
1989-
1990-
if not self._jupyterhub_dir:
1991-
if not self._init_jupyter():
1992-
# TODO generic exception
1993-
raise Exception('Unable to initialize base JupyterHub dir')
1968+
if replace:
1969+
# first try to remove the reference from the Jupyter Notebook
1970+
filename_to_remove = remove_last_data_file_from_notebook(f'{self._jupyterhub_dir}{notebook_name}', index)
1971+
if filename_to_remove is not None:
1972+
# now remove the state file from the filesyste,
1973+
file_to_remove = os.path.join(self._jupyterhub_dir, 'data', filename_to_remove)
1974+
shutil.rmtree(file_to_remove, ignore_errors=True)
19941975

1995-
file_parts = state_file_path.split('.')
1996-
if len(file_parts) > 2: # name of the file could just be a floating point value with no extension
1997-
extension = f'.{file_parts[-1]}'
1998-
else:
1999-
extension = ''
2000-
2001-
state_file_name = f'{timestamp}{extension}'
2002-
jupyter_data_dir = os.path.join(self._jupyterhub_dir, 'data', state_file_name)
2003-
2004-
# if this errors out, we can safely ignore them
2005-
shutil.rmtree(jupyter_data_dir, ignore_errors=True)
2006-
2007-
# TODO - maybe add flag which allows us to replace old state files
2008-
remove_data_file_from_notebook(f'{self._jupyterhub_dir}{notebook_name}', state_file_name, index)
2009-
2010-
def remove_last_data_file_from_notebook(self, notebook_name: str, index: Optional[int] = None):
2011-
"""Remove the last added data file from a notebook and from the filesystem.
2012-
2013-
This function assumes that a notebook has already been created with intialize_jupyter_notebook. Using this function does not call the IPS Portal.
2014-
2015-
Params:
2016-
- notebook_name: name of notebook which will be modified. Note that this path is relative to the JupyterHub directory.
2017-
- index: optional index of the IPS notebook cell. If not provided, the IPS Framework will attempt to automatically find the cell it created,
2018-
which should work for every usecase where you don't anticipate modifying the notebook until after the run is complete.
2019-
"""
2020-
2021-
if not self._jupyterhub_dir:
2022-
if not self._init_jupyter():
2023-
# TODO generic exception
2024-
raise Exception('Unable to initialize base JupyterHub dir')
2025-
2026-
last_state_file = remove_last_data_file_from_notebook(notebook_name, index)
2027-
if last_state_file is None:
2028-
return
2029-
data_file = os.path.join(self._jupyterhub_dir, 'data', last_state_file)
2030-
shutil.rmtree(data_file, ignore_errors=True)
1976+
# add newest data file to notebook
1977+
add_data_file_to_notebook(f'{self._jupyterhub_dir}{notebook_name}', data_file_name, index)
20311978

20321979
def publish(self, topicName: str, eventName: str, eventBody: Any):
20331980
"""

0 commit comments

Comments
 (0)