Skip to content

Commit

Permalink
create jupyter notebook from existing notebook, simplify jupyter API
Browse files Browse the repository at this point in the history
Signed-off-by: Lance-Drane <[email protected]>
  • Loading branch information
Lance-Drane committed Jul 24, 2024
1 parent 42d01d7 commit 5dd8406
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 87 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# example code
simulation_log/
simulation_setup/
sim/
www/
log
resource_usage
Expand Down
2 changes: 2 additions & 0 deletions examples-proposed/004-time-loop/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sim/*
!sim/input_dir/
33 changes: 15 additions & 18 deletions examples-proposed/004-time-loop/mymodule/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ class Driver(Component):
"""In this example, the driver iterates through the time loop and calls both the worker and the monitor component on each timestep."""

def step(self, timestamp=0.0):
NOTEBOOK_TEMPLATE = 'base-notebook.ipynb'

worker = self.services.get_port('WORKER')
monitor = self.services.get_port('MONITOR')

self.services.call(worker, 'init', 0)
# Needed for notebook template
self.services.stage_input_files(NOTEBOOK_TEMPLATE)

# The time loop is configured in its own section of sim.conf
# It is shared across all components
Expand All @@ -32,11 +36,12 @@ def step(self, timestamp=0.0):
# create notebook here
NOTEBOOK_NAME = 'full_state.ipynb'
jupyter_state_files = self.services.get_staged_jupyterhub_files()
self.services.create_jupyterhub_notebook(jupyter_state_files, NOTEBOOK_NAME)
# NOTE: depending on the names of the files, you may have to use a custom mapping function to get the tag
# You MUST store the tag somewhere in the file name
tags = jupyter_state_files
self.services.portal_register_jupyter_notebook(NOTEBOOK_NAME, tags)
self.services.stage_jupyter_notebook(
dest_notebook_name=NOTEBOOK_NAME, # path is relative to JupyterHub directory
source_notebook_path='base-notebook.ipynb', # path is relative to input directory
tags=jupyter_state_files,
)
self.services.portal_register_jupyter_notebook(NOTEBOOK_NAME)

self.services.call(worker, 'finalize', 0)

Expand Down Expand Up @@ -89,16 +94,8 @@ def step(self, timestamp=0.0, **keywords):
with open(state_file, 'rb') as f:
data = f.read()

# example of updating Jupyter state
jupyterhub_state_file = self.services.jupyterhub_make_state(state_file, timestamp)
notebook_param = None
# create two notebooks on certain timestamps, create no notebooks otherwise
if int(timestamp) % 10 == 0:
notebook_param = []
for ident in (1, 2):
notebook_name = f'state_{timestamp}_{ident}.ipynb'
self.services.create_jupyterhub_notebook([jupyterhub_state_file], notebook_name)
notebook_param.append(notebook_name)

print('SEND PORTAL DATA', timestamp, data, notebook_param, file=stderr)
self.services.send_portal_data(timestamp, data, notebook_param)
# stage the state file in the JupyterHub directory
self.services.jupyterhub_make_state(state_file, timestamp)

print('SEND PORTAL DATA', timestamp, data, file=stderr)
self.services.send_portal_data(timestamp, data)
1 change: 0 additions & 1 deletion examples-proposed/004-time-loop/run.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
rm -rf sim
PYTHONPATH=$PWD PSCRATCH=${PSCRATCH:-/tmp} ips.py --config=sim.conf --platform=platform.conf --log=ips.log #--debug --verbose
4 changes: 3 additions & 1 deletion examples-proposed/004-time-loop/sim.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ RUN_ID = sim run id
TOKAMAK_ID = tokamak
SHOT_NUMBER = 1
TAG = tag
INPUT_DIR = $SIM_ROOT/input_dir/

USER_W3_DIR = $PWD/www
USER_W3_BASEURL =

PORTAL_URL = https://lb.ipsportal.development.svc.spin.nersc.org
PORTAL_URL = http://localhost:5000
#PORTAL_URL = https://lb.ipsportal.development.svc.spin.nersc.org

# OPTIONAL
# The BASE DIRECTORY of your machine's JupyterHub web server directory. This is used strictly for moving files around on the machine itself.
Expand Down
28 changes: 28 additions & 0 deletions examples-proposed/004-time-loop/sim/input_dir/base-notebook.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "5d75faa3",
"metadata": {},
"outputs": [],
"source": [
"# Notebook template, the IPS Framework will add a cell before this one\n",
"# defining FILES as a list of state file paths.\n",
"\n",
"mapping = {}\n",
"for file in FILES:\n",
" with open(file, 'rb') as f:\n",
" mapping[file] = f.read()\n",
"print(mapping)\n"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
49 changes: 20 additions & 29 deletions ipsframework/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,28 @@
import nbformat as nbf


def _get_multi_state_file_notebook_code(state_file_paths: List[str]) -> str:
"""TODO this is currently just an example."""
return f"""FILES = [{','.join([f"'data{sep}{file}'" for file in state_file_paths])}]
mapping = {{}}
for file in FILES:
with open(file, 'rb') as f:
mapping[file] = f.read()
print(mapping)
def _get_state_file_notebook_code_cell(variable: str, tags: List[str]):
itemsep = ',\n'
return f"""import os
# Uncomment below line to use any state files saved
#{variable} = os.listdir('data')
# files created during the run
{variable} = [{itemsep.join([f"'data{sep}{file}'" for file in tags])}]
"""

def stage_jupyter_notebook(dest: str, src: str, tags: List[str], variable_name: str, index: int):
""""""
# to avoid conversion, use as_version=nbf.NO_CONVERT
#
nb: nbf.NotebookNode = nbf.read(src, as_version=4)

def _get_nb_v4(code: str):
"""Returns an nbf.v4 object"""
nb = nbf.v4.new_notebook()
text = '# AUTOGENERATED from IPS Framework'
header = '# Next cell generated by IPS Framework'
nb['cells'] = nb['cells'][:index] + [
nbf.v4.new_markdown_cell(header),
nbf.v4.new_code_cell(_get_state_file_notebook_code_cell(variable_name, tags))
] + nb['cells'][index:]

nb['cells'] = [nbf.v4.new_markdown_cell(text), nbf.v4.new_code_cell(code)]
return nb


def create_multi_state_file_notebook(state_file_paths: List[str], notebook_path: str):
"""
TODO
Writes notebook which will try to load multiple state files
for now, will just store all data in a dictionary of filepath str to raw bytes
"""

code = _get_multi_state_file_notebook_code(state_file_paths)
nb = _get_nb_v4(code)

with open(notebook_path, 'w') as f:
nbf.validate(nb)
with open(dest, 'w') as f:
nbf.write(nb, f)
5 changes: 1 addition & 4 deletions ipsframework/portalBridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ def send_post_data(conn: Connection, stop: EventType, url: str):
'X-IPS-Tag': next_val['tag'],
'X-IPS-Portal-Runid': next_val['portal_runid'],
}
links = next_val.get('jupyter_links')
if links:
headers['X-IPS-Jupyter-Links'] = '\x01'.join(links)
resp = http.request(
'POST',
url,
Expand Down Expand Up @@ -115,7 +112,7 @@ def send_put_jupyter_url(conn: Connection, stop: EventType, url: str):
resp = http.request(
'PUT',
url,
body=json.dumps({'url': next_val['url'], 'tags': next_val['tags'], 'portal_runid': next_val['portal_runid']}).encode(),
body=json.dumps({'url': next_val['url'], 'portal_runid': next_val['portal_runid']}).encode(),
headers={
'Content-Type': 'application/json',
},
Expand Down
58 changes: 25 additions & 33 deletions ipsframework/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
import weakref
from collections import namedtuple
from operator import iadd, itemgetter
from typing import Any, List, Optional
from typing import Any, Iterable, List, Optional, Union

from configobj import ConfigObj

from . import ipsutil, messages
from .cca_es_spec import initialize_event_service
from .ips_es_spec import eventManager
from .jupyter import create_multi_state_file_notebook
from .jupyter import stage_jupyter_notebook
from .taskManager import TaskInit

RunningTask = namedtuple('RunningTask', ['process', 'start_time', 'timeout', 'nproc', 'cores_allocated', 'command', 'binary', 'args'])
Expand Down Expand Up @@ -1353,7 +1353,7 @@ def get_working_dir(self):
return self.workdir

# DM stageInput
def stage_input_files(self, input_file_list):
def stage_input_files(self, input_file_list: Union[str, Iterable[str]]):
"""
Copy component input files to the component working directory
(as obtained via a call to :py:meth:`ServicesProxy.get_working_dir`). Input files
Expand Down Expand Up @@ -1763,15 +1763,13 @@ def update_time_stamp(self, new_time_stamp=-1):

# instead of explicit content_type_enum - parse file? Focus just on E2E for now
# TODO - change API to send a file path instead of raw data
def send_portal_data(self, tag: float, data: bytes, juypter_notebooks: Optional[List[str]] = None):
def send_portal_data(self, tag: float, data: bytes):
"""
Send data to the portal
Params:
- tag: currently, use the timestep for this
- data: raw data of statefile - must be in bytes format
- jupyter_notebooks: optional list of Jupyter notebooks.
If provided, associate these urls with this run
"""
if not isinstance(data, bytes):
self.error('Data argument passed to "services.send_portal_data" must be bytes')
Expand All @@ -1784,10 +1782,6 @@ def send_portal_data(self, tag: float, data: bytes, juypter_notebooks: Optional[
portal_data: dict[str, Any] = {}
portal_data['tag'] = str(tag)
portal_data['data'] = data
if juypter_notebooks:
url = self._get_jupyterhub_url()
if url:
portal_data['jupyter_links'] = [f'{url}{nb}' for nb in juypter_notebooks]
portal_data['eventtype'] = 'PORTAL_DATA'
event_data['portal_data'] = portal_data
self.publish('_IPS_MONITOR', 'PORTAL_DATA', event_data)
Expand Down Expand Up @@ -1891,34 +1885,33 @@ def _get_jupyterhub_url(self) -> Optional[str]:
url += f'ipsframework/runs/{runid}/'
return url

def create_jupyterhub_notebook(self, state_file_paths: List[str], name: str) -> str:
"""
Create a JupyterHub Notebook which involves several state file paths.
def stage_jupyter_notebook(self,
dest_notebook_name: str,
source_notebook_path: str,
tags: List[str],
variable_name: str = 'FILES',
cell_to_modify: int = 0,
) -> None:
"""Loads a notebook from source_notebook_path, adds a cell to load the data, and then saves it to source_notebook_path.
Does not modify the source notebook.
Params:
- state_file_paths: list of state files (state files should NOT include the directory)
- name: name you want to call the Jupyter Notebook (should NOT include the directory)
NOTE: state files and the JupyterNotebook are created in the same folder by default.
Returns:
- the path to the notebook file in the JupyterHub directory
Raises:
- Exception, if unable to create notebook in JUPYTERHUB_DIR
- dest_notebook_name: name of the JupyterNotebook you want to write (do not include file paths).
- source_notebook_path: location you want to load the source notebook from
- tags: list of state files you want to load in the notebook.
- variable_name: name of the variable you want to load files from (default: "FILES")
- cell_to_modify: which cell in the JupyterNotebook you want to add the data call to (0-indexed).
(This will not overwrite any cells, just appends.)
By default, the data listing will happen in the FIRST cell.
"""

if not self._jupyterhub_dir:
if not self._init_jupyter():
raise Exception('Unable to initialize base JupyterHub dir')

stage_jupyter_notebook(f'{self._jupyterhub_dir}{dest_notebook_name}', source_notebook_path, tags, variable_name, cell_to_modify)

jupyter_file = f'{self._jupyterhub_dir}{name}'
if not jupyter_file.endswith('.ipynb'):
jupyter_file = f'{jupyter_file}.ipynb'
create_multi_state_file_notebook(state_file_paths, jupyter_file)
return jupyter_file

def portal_register_jupyter_notebook(self, notebook_name: str, tags: List[str]) -> None:
def portal_register_jupyter_notebook(self, notebook_name: str) -> None:
"""Associate a JupyterNotebook with tags on the IPS Portal
NOTE: It's best to ONLY run this if you're wanting to associate multiple data files with a single notebook.
Expand All @@ -1939,7 +1932,6 @@ def portal_register_jupyter_notebook(self, notebook_name: str, tags: List[str])

portal_data: dict[str, Any] = {}
portal_data['url'] = url
portal_data['tags'] = tags
portal_data['eventtype'] = 'PORTAL_REGISTER_NOTEBOOK'
event_data['portal_data'] = portal_data
self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data)
Expand Down

0 comments on commit 5dd8406

Please sign in to comment.