Skip to content

Commit cf9c6b3

Browse files
committed
JUPYTER: create dictionary in module file separate from notebook
use simple append+replace Jupyter API Signed-off-by: Lance-Drane <[email protected]>
1 parent 995d5f8 commit cf9c6b3

File tree

6 files changed

+134
-144
lines changed

6 files changed

+134
-144
lines changed

examples-proposed/004-time-loop/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,11 @@ To run the code, run:
2121
```bash
2222
./run.sh
2323
```
24+
25+
By default, this example will always _append_ a state file. If you prefer to see an example of how to _replace_ a state file, run:
26+
27+
```bash
28+
EXAMPLE_REPLACE=1 ./run.sh
29+
```
30+
31+
There is also a script `run-delayed.sh` which you can use instead of `run.sh` if you would like to simulate a delay between monitor steps.

examples-proposed/004-time-loop/mymodule/components.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,15 @@
88
from ipsframework import Component
99

1010
DELAY = bool(os.environ.get('EXAMPLE_DELAY'))
11+
REPLACE = bool(os.environ.get('EXAMPLE_REPLACE'))
1112

13+
# templates are existing files from the input directory
14+
# names are what the notebook and the associated data file will be labeled with (you can leave off the .ipynb / .py)
1215
NOTEBOOK_1_TEMPLATE = 'basic.ipynb'
1316
NOTEBOOK_1_NAME = 'basic.ipynb'
1417
NOTEBOOK_2_TEMPLATE = 'bokeh-plots.ipynb'
1518
NOTEBOOK_2_NAME = 'bokeh-plots.ipynb'
19+
DATA_MODULE_NAME = 'data_files'
1620

1721

1822
class Init(Component):
@@ -37,10 +41,12 @@ def step(self, timestamp=0.0):
3741
self.services.initialize_jupyter_notebook(
3842
dest_notebook_name=NOTEBOOK_1_NAME, # path is relative to JupyterHub directory
3943
source_notebook_path=NOTEBOOK_1_TEMPLATE, # path is relative to input directory
44+
data_module_name=DATA_MODULE_NAME,
4045
)
4146
self.services.initialize_jupyter_notebook(
4247
dest_notebook_name=NOTEBOOK_2_NAME, # path is relative to JupyterHub directory
4348
source_notebook_path=NOTEBOOK_2_TEMPLATE, # path is relative to input directory
49+
data_module_name=DATA_MODULE_NAME,
4450
)
4551

4652
# The time loop is configured in its own section of sim.conf
@@ -105,9 +111,16 @@ def step(self, timestamp=0.0, **keywords):
105111
with open(state_file, 'rb') as f:
106112
data = f.read()
107113

108-
# stage the state file in the JupyterHub directory
109-
self.services.add_data_file_to_notebook(state_file, timestamp, NOTEBOOK_1_NAME)
110-
self.services.add_data_file_to_notebook(state_file, timestamp, NOTEBOOK_2_NAME)
114+
# stage the state file in the JupyterHub directory and update the module file to handle it
115+
if REPLACE:
116+
self.services.add_analysis_data_file(state_file, os.path.basename(state_file), DATA_MODULE_NAME, replace=True)
117+
else:
118+
self.services.add_analysis_data_file(
119+
state_file,
120+
f'{timestamp}_{os.path.basename(state_file)}',
121+
DATA_MODULE_NAME,
122+
timestamp=timestamp,
123+
)
111124

112125
print('SEND PORTAL DATA', timestamp, data, file=stderr)
113126
self.services.send_portal_data(timestamp, data)

examples-proposed/004-time-loop/sim/input_dir/basic.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"# In this example, this notebook is generated during the time loop.\n",
1414
"\n",
1515
"mapping = {}\n",
16-
"for file in DATA_FILES:\n",
16+
"for _timestep, file in DATA_FILES:\n",
1717
" with open(file, 'rb') as f:\n",
1818
" mapping[file] = f.read()\n",
1919
"print(mapping)\n"

examples-proposed/004-time-loop/sim/input_dir/bokeh-plots.ipynb

+5-3
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
"\n",
2222
"DATA = []\n",
2323
"# create DATA list, will depend on user input type (i.e. 'hdf5', 'json')\n",
24-
"for file in DATA_FILES:\n",
24+
"for _timestep, file in DATA_FILES:\n",
2525
" with open(file, 'rb') as f:\n",
2626
" DATA.append(json.load(f))\n",
27-
"x = [float(f.rpartition('/')[2]) for f in DATA_FILES]\n",
27+
"x = list(DATA_FILES.keys())\n",
2828
"\n",
2929
"COLORS = ['red', 'green', 'blue']\n",
3030
"\n",
@@ -51,7 +51,9 @@
5151
" for idx, prop in enumerate(paths):\n",
5252
" y = [get_data(d, prop) for d in DATA]\n",
5353
" graph.line(x, y, line_color=COLORS[idx % len(COLORS)], line_dash='solid', legend_label='_'.join(prop))\n",
54-
" show(graph)\n"
54+
" show(graph)\n",
55+
"\n",
56+
"# TODO - add cell at end which shows history as a simulation (do it frame by frame instead of a line graph)\n"
5557
]
5658
}
5759
],

ipsframework/jupyter.py

+64-110
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
"""
2+
NOTE: this is not intended to be a public API for framework users, use instead:
3+
- "services.initialize_jupyter_notebook" (to set up the JupyterHub interaction for a notebook, done only once)
4+
- "services.add_analysis_data_file" (each time you want to add or remove a data file from JupyterHub)
5+
26
This module is designed to help generate JupyterNotebooks to be used with IPS Portal analysis.
37
Some parts of the script will need direction from users on the Framework side to generate.
48
@@ -10,18 +14,13 @@
1014
...in a shell on Jupyter NERSC.
1115
"""
1216

17+
import re
1318
from os.path import sep
1419
from pathlib import Path
1520
from typing import Optional
1621

1722
import nbformat as nbf
1823

19-
HOOK = '### This cell autogenerated by IPS Framework. DO NOT EDIT UNTIL IPS RUN IS FINALIZED. ###'
20-
"""This hook is used to determine which "cell" the IPS framework should work with.
21-
22-
It is written to a notebook cell on initializing it, and is searched for when adding a data file to it.
23-
"""
24-
2524
DIRECTORY_VARIABLE_NAME = 'DATA_DIR'
2625

2726

@@ -34,149 +33,104 @@ def replace_last(source_string: str, old: str, new: str) -> str:
3433
return f'{head}{new}{tail}'
3534

3635

37-
def _initial_jupyter_file_notebook_cell(dest: str, files_variable_name: str) -> str:
38-
return f"""{HOOK}
36+
def _initial_data_file_code(dest: str, files_variable_name: str) -> str:
37+
return f"""# This file should be imported by a jupyter notebook. DO NOT EDIT UNTIL IPS RUN IS FINALIZED>
3938
4039
import os
4140
4241
# NOTE: directory should be sim_name plus the run id from the Portal
4342
{DIRECTORY_VARIABLE_NAME} = '{str(Path(dest).parent / 'data') + sep}'
44-
# Uncomment below line to implicitly use any state files saved in the data directory, note that the IPS framework explicitly lists out each file used
45-
#{files_variable_name} = os.listdir('data')
46-
# files created during the run
47-
{files_variable_name} = [
48-
]
43+
{files_variable_name} = {{
44+
}}
4945
"""
5046

5147

52-
def initialize_jupyter_notebook(dest: str, src: str, variable_name: str, index: int):
48+
def initialize_jupyter_notebook(notebook_dest: str, notebook_src: str, module_name: str, variable_name: str, index: int):
5349
"""Create a new notebook from an old notebook, copying the result from 'src' to 'dest'.
5450
51+
This adds an additional cell which will import the data files. The notebook should not be written again after this function.
52+
5553
Params:
56-
- dest - location of notebook to create on filesystem (absolute file path)
57-
- src - location of source notebook on filesystem (is not overwritten unless src == dest)
54+
- notebook_dest - location of notebook to create on filesystem (absolute file path)
55+
- notebook_src - location of source notebook on filesystem (is not overwritten unless src == dest)
56+
- module_name - name of the python module which will contain the data file list
5857
- variable_name: what to call the variable
5958
- index: insert new cells at position before this value (will not remove preexisting cells)
60-
- initial_data_files: optional list of files to initialize the notebook with
61-
6259
"""
6360
# to avoid conversion, use as_version=nbf.NO_CONVERT
64-
nb: nbf.NotebookNode = nbf.read(src, as_version=4)
61+
nb: nbf.NotebookNode = nbf.read(notebook_src, as_version=4)
6562

6663
nb['cells'] = (
67-
# warning notification for users inspecting the file, unused programatically
68-
[nbf.v4.new_markdown_cell('# WARNING: Do not manually modify this file until the IPS simulation is complete.')]
69-
+ nb['cells'][:index]
64+
nb['cells'][:index]
7065
+ [
7166
# explicitly mark the IPS cell for users inspecting the file, unused programatically
7267
nbf.v4.new_markdown_cell('## Next cell generated by IPS Framework'),
73-
nbf.v4.new_code_cell(_initial_jupyter_file_notebook_cell(dest, variable_name)),
68+
nbf.v4.new_code_cell(f"""
69+
from {module_name} import {variable_name}
70+
import importlib
71+
72+
importlib.reload('{variable_name}')
73+
"""),
7474
]
7575
+ nb['cells'][index:]
7676
)
7777

7878
nbf.validate(nb)
79-
with open(dest, 'w') as f:
79+
with open(notebook_dest, 'w') as f:
8080
nbf.write(nb, f)
8181

8282

83-
def add_data_file_to_notebook(dest: str, data_file: str, index: Optional[int] = None):
84-
"""Add data file to notebook list.
83+
def initialize_jupyter_import_module_file(dest: str, variable_name: str):
84+
"""Create a new notebook from an old notebook, copying the result from 'src' to 'dest'.
8585
8686
Params:
87-
- dest: path to notebook which will be modified
88-
- data_file: data file we add to the notebook
89-
- index: optional index of the IPS notebook cell. If not provided, search through the notebook via an expected string hook.
87+
- dest - location of notebook to create on filesystem (absolute file path)
88+
- variable_name: what to call the variable
9089
"""
91-
nb: nbf.NotebookNode = nbf.read(dest, as_version=4)
92-
if index is None:
93-
index = next((i for i, e in enumerate(nb['cells']) if HOOK in e['source']), -1)
94-
if index < 0:
95-
raise Exception('Cannot find IPS notebook node')
96-
ips_cell: str = nb['cells'][index]['source']
97-
98-
if ips_cell.find(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n]") != -1:
99-
# The data file is already referenced in the notebook, so there's nothing else to do
100-
return
101-
102-
# data file does not exist, so we need to add it
103-
# search from right of string for the ']' character, should work assuming user does not modify the cell past the variable definition
104-
result = replace_last(ips_cell, ']', f"f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n]")
105-
nb['cells'][index]['source'] = result
10690

10791
with open(dest, 'w') as f:
108-
nbf.write(nb, f)
109-
92+
f.write(_initial_data_file_code(dest, variable_name))
11093

111-
def remove_data_file_from_notebook(dest: str, data_file: str, index: Optional[int] = None):
112-
"""Remove a specific data file from the notebook list.
11394

114-
Params:
115-
- dest: path to notebook which will be modified
116-
- data_file: data file we remove from the notebook
117-
- index: optional index of the IPS notebook cell. If not provided, search through the notebook via an expected string hook.
95+
def update_module_file_with_data_file(dest: str, data_file: str, replace: bool, timestamp: float = 0.0) -> Optional[str]:
11896
"""
119-
nb: nbf.NotebookNode = nbf.read(dest, as_version=4)
120-
if index is None:
121-
index = next((i for i, e in enumerate(nb['cells']) if HOOK in e['source']), -1)
122-
if index < 0:
123-
raise Exception('Cannot find IPS notebook node')
124-
ips_cell: str = nb['cells'][index]['source']
125-
126-
head, sep, tail = ips_cell.rpartition(f"f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n")
127-
if sep == '':
128-
# existing match not found, so there's nothing left to remove
129-
return
130-
result = f'{head}\n{tail}'
131-
nb['cells'][index]['source'] = result
132-
133-
with open(dest, 'w') as f:
134-
nbf.write(nb, f)
135-
136-
137-
def remove_last_data_file_from_notebook(dest: str, index: Optional[int] = None) -> Optional[str]:
138-
"""Obtain the last data file entry in a notebook, remove it, and then return the name of the file.
139-
140-
Note that this function assumes the notebook maintains a specific format.
97+
Params:
98+
- dest: path to module file which will be modified
99+
- data_file: file which will be added to the module
100+
- replace: if True, we can update
101+
- timestamp: key we associate the data file with
141102
142103
Returns:
143-
- None if there were no data entries in the notebook, the name of the file removed (without the directory) as a string if there was
104+
- if we replaced a file, the name of the file which was replaced; otherwise, None
144105
"""
145-
nb: nbf.NotebookNode = nbf.read(dest, as_version=4)
146-
if index is None:
147-
index = next((i for i, e in enumerate(nb['cells']) if HOOK in e['source']), -1)
148-
if index < 0:
149-
raise Exception('Cannot find IPS notebook node')
150-
ips_cell: str = nb['cells'][index]['source']
151-
152-
search_hook = f"f'{{{DIRECTORY_VARIABLE_NAME}}}"
153-
154-
start_index = ips_cell.rfind(search_hook)
155-
if start_index == -1:
156-
# no data files have been added, nothing to do
157-
return None
158-
159-
ret = None
160-
file_name_start_index = start_index + len(search_hook)
161-
end_index = file_name_start_index
162-
while True:
163-
try:
164-
end_char = ips_cell[end_index]
165-
end_index += 1
166-
if end_char == '\n':
167-
# each entry gets its own "line", so we don't need to search anymore
168-
break
169-
if ips_cell[end_index] == "'" and ips_cell[end_index - 1] != '\\':
170-
# we have found the name of the file
171-
ret = ips_cell[file_name_start_index:end_index]
172-
except IndexError:
173-
# improperly formatted file (reached EOF), fall back to just removing everything after the break
174-
return None
175-
176-
result = ips_cell[:start_index] + ips_cell[end_index:]
177-
nb['cells'][index]['source'] = result
106+
with open(dest, 'r') as f:
107+
old_module_code = f.read()
108+
109+
replaced_file_name = None
110+
111+
timestamp_regex = str(timestamp).replace('.', '\\.')
112+
directory_str = '\{' + DIRECTORY_VARIABLE_NAME + '\}'
113+
114+
search_pattern = f"{timestamp_regex}: f'{directory_str}(.*)',"
115+
116+
found_match = re.search(search_pattern, old_module_code)
117+
if found_match: # timestamp already exists
118+
if replace:
119+
replaced_file_name = found_match.group(1)
120+
if replaced_file_name == data_file:
121+
# in this case, we're not actually removing an obsolete file, so no need to write to the module file
122+
# return None because we've already directly replaced the file
123+
return None
124+
new_module_code = re.sub(search_pattern, f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',", old_module_code)
125+
else:
126+
raise ValueError(
127+
f"For timestamp entry {timestamp}, you are trying to replace '{found_match.group(1)}' with '{data_file}' . If this was intended, you must explicitly set 'replace=True' on the IPS function call."
128+
)
129+
else: # timestamp does not exist, so add it
130+
# search from right of string for the '}' character, should work assuming user does not modify the cell past the variable definition
131+
new_module_code = replace_last(old_module_code, '}', f"{timestamp}: f'{{{DIRECTORY_VARIABLE_NAME}}}{data_file}',\n" + '}')
178132

179133
with open(dest, 'w') as f:
180-
nbf.write(nb, f)
134+
f.write(new_module_code)
181135

182-
return ret
136+
return replaced_file_name

0 commit comments

Comments
 (0)