Skip to content

Commit 106f6dd

Browse files
committed
simplify jupyter notebook API
Signed-off-by: Lance-Drane <[email protected]>
1 parent 62f6d8d commit 106f6dd

File tree

6 files changed

+145
-43
lines changed

6 files changed

+145
-43
lines changed

examples-proposed/004-time-loop/mymodule/components.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55

66
from ipsframework import Component
77

8+
NOTEBOOK_1_TEMPLATE = 'base-notebook-iterative.ipynb'
9+
NOTEBOOK_1_NAME = 'full_state_iterative.ipynb'
10+
NOTEBOOK_2_TEMPLATE = 'base-notebook-one-pass.ipynb'
11+
NOTEBOOK_2_NAME = 'full_state_one_pass.ipynb'
12+
813

914
class Init(Component):
1015
"""Empty init component."""
@@ -16,14 +21,19 @@ class Driver(Component):
1621
"""In this example, the driver iterates through the time loop and calls both the worker and the monitor component on each timestep."""
1722

1823
def step(self, timestamp=0.0):
19-
NOTEBOOK_TEMPLATE = 'base-notebook.ipynb'
20-
2124
worker = self.services.get_port('WORKER')
2225
monitor = self.services.get_port('MONITOR')
2326

2427
self.services.call(worker, 'init', 0)
2528
# Needed for notebook template
26-
self.services.stage_input_files(NOTEBOOK_TEMPLATE)
29+
self.services.stage_input_files([NOTEBOOK_1_TEMPLATE, NOTEBOOK_2_TEMPLATE])
30+
31+
# Example of a notebook we want to initialize and then periodically append to during the run
32+
self.services.initialize_jupyter_notebook(
33+
dest_notebook_name=NOTEBOOK_1_NAME, # path is relative to JupyterHub directory
34+
source_notebook_path=NOTEBOOK_1_TEMPLATE, # path is relative to input directory
35+
)
36+
# Initialize second notebook
2737

2838
# The time loop is configured in its own section of sim.conf
2939
# It is shared across all components
@@ -33,15 +43,12 @@ def step(self, timestamp=0.0):
3343
# TODO - perhaps monitor timestep does not need to be called every step, but only every 20 steps?
3444
self.services.call(monitor, 'step', t)
3545

36-
# create notebook here
37-
NOTEBOOK_NAME = 'full_state.ipynb'
38-
jupyter_state_files = self.services.get_staged_jupyterhub_files()
39-
self.services.stage_jupyter_notebook(
40-
dest_notebook_name=NOTEBOOK_NAME, # path is relative to JupyterHub directory
41-
source_notebook_path='base-notebook.ipynb', # path is relative to input directory
42-
tags=jupyter_state_files,
46+
# With this second "example" notebook, we only create it once and only write to it once.
47+
self.services.initialize_jupyter_notebook(
48+
dest_notebook_name=NOTEBOOK_2_NAME, # path is relative to JupyterHub directory
49+
source_notebook_path=NOTEBOOK_2_TEMPLATE, # path is relative to input directory
50+
initial_data_files=self.services.get_staged_jupyterhub_files(),
4351
)
44-
self.services.portal_register_jupyter_notebook(NOTEBOOK_NAME)
4552

4653
self.services.call(worker, 'finalize', 0)
4754

@@ -95,7 +102,9 @@ def step(self, timestamp=0.0, **keywords):
95102
data = f.read()
96103

97104
# stage the state file in the JupyterHub directory
98-
self.services.jupyterhub_make_state(state_file, timestamp)
105+
data_file = self.services.jupyterhub_make_state(state_file, timestamp)
106+
print('ADD DATA FILE', data_file)
107+
self.services.add_data_file_to_notebook(NOTEBOOK_1_NAME, data_file)
99108

100109
print('SEND PORTAL DATA', timestamp, data, file=stderr)
101110
self.services.send_portal_data(timestamp, data)

examples-proposed/004-time-loop/sim/input_dir/base-notebook.ipynb renamed to examples-proposed/004-time-loop/sim/input_dir/base-notebook-iterative.ipynb

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
"# Notebook template, the IPS Framework will add a cell before this one\n",
1111
"# defining FILES as a list of state file paths.\n",
1212
"\n",
13+
"# In this example, this notebook is generated during the time loop.\n",
14+
"\n",
1315
"mapping = {}\n",
1416
"for file in FILES:\n",
1517
" with open(file, 'rb') as f:\n",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "5d75faa3",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"# Notebook template, the IPS Framework will add a cell before this one\n",
11+
"# defining FILES as a list of state file paths.\n",
12+
"\n",
13+
"# In this example, this notebook is only generated at the end of the run.\n",
14+
"\n",
15+
"mapping = {}\n",
16+
"for file in FILES:\n",
17+
" with open(file, 'rb') as f:\n",
18+
" mapping[file] = f.read()\n",
19+
"print(mapping)\n"
20+
]
21+
}
22+
],
23+
"metadata": {
24+
"language_info": {
25+
"name": "python"
26+
}
27+
},
28+
"nbformat": 4,
29+
"nbformat_minor": 5
30+
}

ipsframework/jupyter.py

+61-9
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,88 @@
1010
...in a shell on Jupyter NERSC.
1111
"""
1212

13-
from os.path import sep
14-
from typing import List
13+
from typing import List, Optional
1514

1615
import nbformat as nbf
1716

17+
HOOK = '### This cell autogenerated by IPS Framework. DO NOT EDIT UNTIL IPS RUN IS FINALIZED. ###'
18+
"""This hook is used to determine which "cell" the IPS framework should work with.
19+
20+
It is written to a notebook cell on initializing it, and is searched for when adding a data file to it.
21+
"""
22+
23+
24+
def replace_last(source_string: str, old: str, new: str) -> str:
25+
"""Attempt to replace the last occurence of 'old' with 'new' in 'source_string', searching from the right."""
26+
head, _sep, tail = source_string.rpartition(old)
27+
return f'{head}{new}{tail}'
1828

19-
def _get_state_file_notebook_code_cell(variable: str, tags: List[str]):
20-
itemsep = ',\n'
21-
return f"""import os
2229

30+
def _initial_jupyter_file_notebook_cell(variable: str, initial_data_files: Optional[List[str]] = None) -> str:
31+
if not initial_data_files:
32+
initial = ''
33+
else:
34+
itemsep = '\n'
35+
initial = '\n' + itemsep.join([f"'{file}'," for file in initial_data_files])
36+
return f"""{HOOK}
37+
38+
import os
39+
40+
# NOTE: directory should be sim_name plus the run id from the Portal
41+
# NOTE: add absolute path as a comment to the notebook cell
2342
# Uncomment below line to use any state files saved
2443
#{variable} = os.listdir('data')
2544
# files created during the run
26-
{variable} = [{itemsep.join([f"'data{sep}{file}'" for file in tags])}]
45+
{variable} = [{initial}
46+
]
2747
"""
2848

2949

30-
def stage_jupyter_notebook(dest: str, src: str, tags: List[str], variable_name: str, index: int):
31-
""""""
50+
def initialize_jupyter_notebook(dest: str, src: str, variable_name: str, index: int, initial_data_files: Optional[List[str]] = None):
51+
"""Create a new notebook from an old notebook, copying the result from 'src' to 'dest'.
52+
53+
Params:
54+
- dest - location of notebook to create on filesystem
55+
- src - location of source notebook on filesystem (is not overwritten unless src == dest)
56+
- variable_name: what to call the variable
57+
- index: insert new cells at position before this value (will not remove preexisting cells)
58+
- initial_data_files: optional list of files to initialize the notebook with
59+
60+
"""
3261
# to avoid conversion, use as_version=nbf.NO_CONVERT
3362
#
3463
nb: nbf.NotebookNode = nbf.read(src, as_version=4)
3564

3665
header = '# Next cell generated by IPS Framework'
3766
nb['cells'] = (
3867
nb['cells'][:index]
39-
+ [nbf.v4.new_markdown_cell(header), nbf.v4.new_code_cell(_get_state_file_notebook_code_cell(variable_name, tags))]
68+
+ [nbf.v4.new_markdown_cell(header), nbf.v4.new_code_cell(_initial_jupyter_file_notebook_cell(variable_name, initial_data_files))]
4069
+ nb['cells'][index:]
4170
)
4271

4372
nbf.validate(nb)
4473
with open(dest, 'w') as f:
4574
nbf.write(nb, f)
75+
76+
77+
def add_data_file_to_notebook(dest: str, data_file: str, index: Optional[int] = None):
78+
"""Add data file to notebook list.
79+
80+
Params:
81+
- dest: path to notebook which will be modified
82+
- data_file: data file we add to the notebook
83+
- index: optional index of the IPS notebook cell. If not provided,
84+
"""
85+
nb: nbf.NotebookNode = nbf.read(dest, as_version=4)
86+
if index is None:
87+
index = next((i for i, e in enumerate(nb['cells']) if HOOK in e['source']), -1)
88+
if index < 0:
89+
raise Exception('Cannot find IPS notebook node')
90+
ips_cell = nb['cells'][index]['source']
91+
92+
# search from right of string for the ']' character, should work assuming user does not modify the cell past the variable definition
93+
result = replace_last(ips_cell, ']', f"'{data_file}',\n]")
94+
nb['cells'][index]['source'] = result
95+
96+
with open(dest, 'w') as f:
97+
nbf.write(nb, f)

ipsframework/portalBridge.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def send_post_data(conn: Connection, stop: EventType, url: str):
9999
break
100100

101101

102-
def send_put_jupyter_url(conn: Connection, stop: EventType, url: str):
102+
def send_post_jupyter_url(conn: Connection, stop: EventType, url: str):
103103
fail_count = 0
104104

105105
http = urllib3.PoolManager(retries=urllib3.util.Retry(3, backoff_factor=0.25))
@@ -110,7 +110,7 @@ def send_put_jupyter_url(conn: Connection, stop: EventType, url: str):
110110
# TODO - consider using multipart/form-data instead
111111
try:
112112
resp = http.request(
113-
'PUT',
113+
'POST',
114114
url,
115115
body=json.dumps({'url': next_val['url'], 'portal_runid': next_val['portal_runid']}).encode(),
116116
headers={
@@ -430,7 +430,7 @@ def send_notebook_url(self, sim_data, event_data):
430430
self.dataurl_parent_conn, child_conn = Pipe()
431431
self.dataurl_childProcessStop = Event()
432432
self.dataurl_childProcess = Process(
433-
target=send_put_jupyter_url, args=(child_conn, self.dataurl_childProcessStop, self.portal_url + '/api/data/add_url')
433+
target=send_post_jupyter_url, args=(child_conn, self.dataurl_childProcessStop, self.portal_url + '/api/data/add_url')
434434
)
435435
self.dataurl_childProcess.start()
436436
self.dataurl_first_event = False

ipsframework/services.py

+28-19
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import logging
1111
import logging.handlers
1212
import os
13+
import pathlib
1314
import queue
1415
import shutil
1516
import signal
@@ -28,7 +29,7 @@
2829
from . import ipsutil, messages
2930
from .cca_es_spec import initialize_event_service
3031
from .ips_es_spec import eventManager
31-
from .jupyter import stage_jupyter_notebook
32+
from .jupyter import add_data_file_to_notebook, initialize_jupyter_notebook
3233
from .taskManager import TaskInit
3334

3435
RunningTask = namedtuple('RunningTask', ['process', 'start_time', 'timeout', 'nproc', 'cores_allocated', 'command', 'binary', 'args'])
@@ -1842,14 +1843,15 @@ def get_staged_jupyterhub_files(self) -> List[str]:
18421843
# TODO generic exception
18431844
raise Exception('Unable to initialize base JupyterHub dir')
18441845

1845-
return os.listdir(os.path.join(self._jupyterhub_dir, 'data'))
1846+
data_dir = pathlib.Path(pathlib.Path(self._jupyterhub_dir) / 'data')
1847+
return [str(p.resolve()) for p in data_dir.glob('*')]
18461848

18471849
def jupyterhub_make_state(self, state_file_path: str, timestamp: float) -> str:
18481850
"""
18491851
Move a state file into the JupyterHub directory.
18501852
18511853
Returns:
1852-
- the path to the state file in the JupyterHub directory
1854+
- the path to the state file in the JupyterHub directory. This will be an absolute path.
18531855
18541856
Raises:
18551857
- Exception, if unable to move file to the provided JUPYTERHUB_DIR
@@ -1885,22 +1887,21 @@ def _get_jupyterhub_url(self) -> Optional[str]:
18851887
url += f'ipsframework/runs/{runid}/'
18861888
return url
18871889

1888-
def stage_jupyter_notebook(
1890+
def initialize_jupyter_notebook(
18891891
self,
18901892
dest_notebook_name: str,
18911893
source_notebook_path: str,
1892-
tags: List[str],
1894+
initial_data_files: Optional[List[str]] = None,
18931895
variable_name: str = 'FILES',
18941896
cell_to_modify: int = 0,
18951897
) -> None:
1896-
"""Loads a notebook from source_notebook_path, adds a cell to load the data, and then saves it to source_notebook_path.
1898+
"""Loads a notebook from source_notebook_path, adds a cell to load the data, and then saves it to source_notebook_path. Will also try to register the notebook with the IPS Portal, if available.
18971899
18981900
Does not modify the source notebook.
18991901
19001902
Params:
19011903
- dest_notebook_name: name of the JupyterNotebook you want to write (do not include file paths).
19021904
- source_notebook_path: location you want to load the source notebook from
1903-
- tags: list of state files you want to load in the notebook.
19041905
- variable_name: name of the variable you want to load files from (default: "FILES")
19051906
- cell_to_modify: which cell in the JupyterNotebook you want to add the data call to (0-indexed).
19061907
(This will not overwrite any cells, just appends.)
@@ -1910,22 +1911,14 @@ def stage_jupyter_notebook(
19101911
if not self._init_jupyter():
19111912
raise Exception('Unable to initialize base JupyterHub dir')
19121913

1913-
stage_jupyter_notebook(f'{self._jupyterhub_dir}{dest_notebook_name}', source_notebook_path, tags, variable_name, cell_to_modify)
1914+
# adds notebook to JupyterHub
1915+
initialize_jupyter_notebook(f'{self._jupyterhub_dir}{dest_notebook_name}', source_notebook_path, variable_name, cell_to_modify, initial_data_files)
19141916

1915-
def portal_register_jupyter_notebook(self, notebook_name: str) -> None:
1916-
"""Associate a JupyterNotebook with tags on the IPS Portal
1917-
1918-
NOTE: It's best to ONLY run this if you're wanting to associate multiple data files with a single notebook.
1919-
If you just want to save a single file, set the appropriate parameter on send_portal_data instead.
1920-
1921-
Params
1922-
- notebook_name: name of the notebook (do not provide any directories, use the config file for this)
1923-
- tags: list of tags to associate the notebook with
1924-
"""
1917+
# register notebook with IPS Portal
19251918
url = self._get_jupyterhub_url()
19261919
if not url:
19271920
return
1928-
url += notebook_name
1921+
url += dest_notebook_name
19291922

19301923
event_data = {}
19311924
event_data['sim_name'] = self.sim_conf['__PORTAL_SIM_NAME']
@@ -1938,6 +1931,22 @@ def portal_register_jupyter_notebook(self, notebook_name: str) -> None:
19381931
self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data)
19391932
self._send_monitor_event('IPS_PORTAL_REGISTER_NOTEBOOK', f'URL = {url}')
19401933

1934+
def add_data_file_to_notebook(self, notebook_name: str, state_file: str, index: Optional[int] = None):
1935+
"""Add data file to notebook list.
1936+
1937+
This function assumes that a notebook has already been created with intialize_jupyter_notebook. Using this function does not call the IPS Portal.
1938+
1939+
Params:
1940+
- notebook_name: name of notebook which will be modified. Note that this path is relative to the JupyterHub directory.
1941+
- data_file: data file we add to the notebook (simple string). This value should almost always be the return value from "self.services.jupyterhub_make_state".
1942+
- index: optional index of the IPS notebook cell. If not provided, the IPS Framework will attempt to automatically find the cell it created,
1943+
which should work for every usecase where you don't anticipate modifying the notebook until after the run is complete.
1944+
"""
1945+
if not self._jupyterhub_dir:
1946+
if not self._init_jupyter():
1947+
raise Exception('Unable to initialize base JupyterHub dir')
1948+
add_data_file_to_notebook(f'{self._jupyterhub_dir}{notebook_name}', state_file, index)
1949+
19411950
def publish(self, topicName, eventName, eventBody):
19421951
"""
19431952
Publish event consisting of *eventName* and *eventBody* to topic *topicName* to the IPS event service.

0 commit comments

Comments
 (0)