1
1
"""
2
+ NOTE: this is not intended to be a public API for framework users, use instead:
3
+ - "services.initialize_jupyter_notebook" (to set up the JupyterHub interaction for a notebook, done only once)
4
+ - "services.add_analysis_data_file" (each time you want to add or remove a data file from JupyterHub)
5
+
2
6
This module is designed to help generate JupyterNotebooks to be used with IPS Portal analysis.
3
7
Some parts of the script will need direction from users on the Framework side to generate.
4
8
10
14
...in a shell on Jupyter NERSC.
11
15
"""
12
16
17
+ import re
13
18
from os .path import sep
14
19
from pathlib import Path
15
20
from typing import Optional
16
21
17
22
import nbformat as nbf
18
23
19
- HOOK = '### This cell autogenerated by IPS Framework. DO NOT EDIT UNTIL IPS RUN IS FINALIZED. ###'
20
- """This hook is used to determine which "cell" the IPS framework should work with.
21
-
22
- It is written to a notebook cell on initializing it, and is searched for when adding a data file to it.
23
- """
24
-
25
24
DIRECTORY_VARIABLE_NAME = 'DATA_DIR'
26
25
27
26
@@ -34,149 +33,104 @@ def replace_last(source_string: str, old: str, new: str) -> str:
34
33
return f'{ head } { new } { tail } '
35
34
36
35
37
- def _initial_jupyter_file_notebook_cell (dest : str , files_variable_name : str ) -> str :
38
- return f"""{ HOOK }
36
+ def _initial_data_file_code (dest : str , files_variable_name : str ) -> str :
37
+ return f"""# This file should be imported by a jupyter notebook. DO NOT EDIT UNTIL IPS RUN IS FINALIZED>
39
38
40
39
import os
41
40
42
41
# NOTE: directory should be sim_name plus the run id from the Portal
43
42
{ DIRECTORY_VARIABLE_NAME } = '{ str (Path (dest ).parent / 'data' ) + sep } '
44
- # Uncomment below line to implicitly use any state files saved in the data directory, note that the IPS framework explicitly lists out each file used
45
- #{ files_variable_name } = os.listdir('data')
46
- # files created during the run
47
- { files_variable_name } = [
48
- ]
43
+ { files_variable_name } = {{
44
+ }}
49
45
"""
50
46
51
47
52
- def initialize_jupyter_notebook (dest : str , src : str , variable_name : str , index : int ):
48
+ def initialize_jupyter_notebook (notebook_dest : str , notebook_src : str , module_name : str , variable_name : str , index : int ):
53
49
"""Create a new notebook from an old notebook, copying the result from 'src' to 'dest'.
54
50
51
+ This adds an additional cell which will import the data files. The notebook should not be written again after this function.
52
+
55
53
Params:
56
- - dest - location of notebook to create on filesystem (absolute file path)
57
- - src - location of source notebook on filesystem (is not overwritten unless src == dest)
54
+ - notebook_dest - location of notebook to create on filesystem (absolute file path)
55
+ - notebook_src - location of source notebook on filesystem (is not overwritten unless src == dest)
56
+ - module_name - name of the python module which will contain the data file list
58
57
- variable_name: what to call the variable
59
58
- index: insert new cells at position before this value (will not remove preexisting cells)
60
- - initial_data_files: optional list of files to initialize the notebook with
61
-
62
59
"""
63
60
# to avoid conversion, use as_version=nbf.NO_CONVERT
64
- nb : nbf .NotebookNode = nbf .read (src , as_version = 4 )
61
+ nb : nbf .NotebookNode = nbf .read (notebook_src , as_version = 4 )
65
62
66
63
nb ['cells' ] = (
67
- # warning notification for users inspecting the file, unused programatically
68
- [nbf .v4 .new_markdown_cell ('# WARNING: Do not manually modify this file until the IPS simulation is complete.' )]
69
- + nb ['cells' ][:index ]
64
+ nb ['cells' ][:index ]
70
65
+ [
71
66
# explicitly mark the IPS cell for users inspecting the file, unused programatically
72
67
nbf .v4 .new_markdown_cell ('## Next cell generated by IPS Framework' ),
73
- nbf .v4 .new_code_cell (_initial_jupyter_file_notebook_cell (dest , variable_name )),
68
+ nbf .v4 .new_code_cell (f"""
69
+ from { module_name } import { variable_name }
70
+ import importlib
71
+
72
+ importlib.reload('{ variable_name } ')
73
+ """ ),
74
74
]
75
75
+ nb ['cells' ][index :]
76
76
)
77
77
78
78
nbf .validate (nb )
79
- with open (dest , 'w' ) as f :
79
+ with open (notebook_dest , 'w' ) as f :
80
80
nbf .write (nb , f )
81
81
82
82
83
- def add_data_file_to_notebook (dest : str , data_file : str , index : Optional [ int ] = None ):
84
- """Add data file to notebook list .
83
+ def initialize_jupyter_import_module_file (dest : str , variable_name : str ):
84
+ """Create a new notebook from an old notebook, copying the result from 'src' to 'dest' .
85
85
86
86
Params:
87
- - dest: path to notebook which will be modified
88
- - data_file: data file we add to the notebook
89
- - index: optional index of the IPS notebook cell. If not provided, search through the notebook via an expected string hook.
87
+ - dest - location of notebook to create on filesystem (absolute file path)
88
+ - variable_name: what to call the variable
90
89
"""
91
- nb : nbf .NotebookNode = nbf .read (dest , as_version = 4 )
92
- if index is None :
93
- index = next ((i for i , e in enumerate (nb ['cells' ]) if HOOK in e ['source' ]), - 1 )
94
- if index < 0 :
95
- raise Exception ('Cannot find IPS notebook node' )
96
- ips_cell : str = nb ['cells' ][index ]['source' ]
97
-
98
- if ips_cell .find (f"f'{{{ DIRECTORY_VARIABLE_NAME } }}{ data_file } ',\n ]" ) != - 1 :
99
- # The data file is already referenced in the notebook, so there's nothing else to do
100
- return
101
-
102
- # data file does not exist, so we need to add it
103
- # search from right of string for the ']' character, should work assuming user does not modify the cell past the variable definition
104
- result = replace_last (ips_cell , ']' , f"f'{{{ DIRECTORY_VARIABLE_NAME } }}{ data_file } ',\n ]" )
105
- nb ['cells' ][index ]['source' ] = result
106
90
107
91
with open (dest , 'w' ) as f :
108
- nbf .write (nb , f )
109
-
92
+ f .write (_initial_data_file_code (dest , variable_name ))
110
93
111
- def remove_data_file_from_notebook (dest : str , data_file : str , index : Optional [int ] = None ):
112
- """Remove a specific data file from the notebook list.
113
94
114
- Params:
115
- - dest: path to notebook which will be modified
116
- - data_file: data file we remove from the notebook
117
- - index: optional index of the IPS notebook cell. If not provided, search through the notebook via an expected string hook.
95
+ def update_module_file_with_data_file (dest : str , data_file : str , replace : bool , timestamp : float = 0.0 ) -> Optional [str ]:
118
96
"""
119
- nb : nbf .NotebookNode = nbf .read (dest , as_version = 4 )
120
- if index is None :
121
- index = next ((i for i , e in enumerate (nb ['cells' ]) if HOOK in e ['source' ]), - 1 )
122
- if index < 0 :
123
- raise Exception ('Cannot find IPS notebook node' )
124
- ips_cell : str = nb ['cells' ][index ]['source' ]
125
-
126
- head , sep , tail = ips_cell .rpartition (f"f'{{{ DIRECTORY_VARIABLE_NAME } }}{ data_file } ',\n " )
127
- if sep == '' :
128
- # existing match not found, so there's nothing left to remove
129
- return
130
- result = f'{ head } \n { tail } '
131
- nb ['cells' ][index ]['source' ] = result
132
-
133
- with open (dest , 'w' ) as f :
134
- nbf .write (nb , f )
135
-
136
-
137
- def remove_last_data_file_from_notebook (dest : str , index : Optional [int ] = None ) -> Optional [str ]:
138
- """Obtain the last data file entry in a notebook, remove it, and then return the name of the file.
139
-
140
- Note that this function assumes the notebook maintains a specific format.
97
+ Params:
98
+ - dest: path to module file which will be modified
99
+ - data_file: file which will be added to the module
100
+ - replace: if True, we can update
101
+ - timestamp: key we associate the data file with
141
102
142
103
Returns:
143
- - None if there were no data entries in the notebook , the name of the file removed (without the directory) as a string if there was
104
+ - if we replaced a file , the name of the file which was replaced; otherwise, None
144
105
"""
145
- nb : nbf .NotebookNode = nbf .read (dest , as_version = 4 )
146
- if index is None :
147
- index = next ((i for i , e in enumerate (nb ['cells' ]) if HOOK in e ['source' ]), - 1 )
148
- if index < 0 :
149
- raise Exception ('Cannot find IPS notebook node' )
150
- ips_cell : str = nb ['cells' ][index ]['source' ]
151
-
152
- search_hook = f"f'{{{ DIRECTORY_VARIABLE_NAME } }}"
153
-
154
- start_index = ips_cell .rfind (search_hook )
155
- if start_index == - 1 :
156
- # no data files have been added, nothing to do
157
- return None
158
-
159
- ret = None
160
- file_name_start_index = start_index + len (search_hook )
161
- end_index = file_name_start_index
162
- while True :
163
- try :
164
- end_char = ips_cell [end_index ]
165
- end_index += 1
166
- if end_char == '\n ' :
167
- # each entry gets its own "line", so we don't need to search anymore
168
- break
169
- if ips_cell [end_index ] == "'" and ips_cell [end_index - 1 ] != '\\ ' :
170
- # we have found the name of the file
171
- ret = ips_cell [file_name_start_index :end_index ]
172
- except IndexError :
173
- # improperly formatted file (reached EOF), fall back to just removing everything after the break
174
- return None
175
-
176
- result = ips_cell [:start_index ] + ips_cell [end_index :]
177
- nb ['cells' ][index ]['source' ] = result
106
+ with open (dest , 'r' ) as f :
107
+ old_module_code = f .read ()
108
+
109
+ replaced_file_name = None
110
+
111
+ timestamp_regex = str (timestamp ).replace ('.' , '\\ .' )
112
+ directory_str = '\{' + DIRECTORY_VARIABLE_NAME + '\}'
113
+
114
+ search_pattern = f"{ timestamp_regex } : f'{ directory_str } (.*)',"
115
+
116
+ found_match = re .search (search_pattern , old_module_code )
117
+ if found_match : # timestamp already exists
118
+ if replace :
119
+ replaced_file_name = found_match .group (1 )
120
+ if replaced_file_name == data_file :
121
+ # in this case, we're not actually removing an obsolete file, so no need to write to the module file
122
+ # return None because we've already directly replaced the file
123
+ return None
124
+ new_module_code = re .sub (search_pattern , f"{ timestamp } : f'{{{ DIRECTORY_VARIABLE_NAME } }}{ data_file } '," , old_module_code )
125
+ else :
126
+ raise ValueError (
127
+ f"For timestamp entry { timestamp } , you are trying to replace '{ found_match .group (1 )} ' with '{ data_file } ' . If this was intended, you must explicitly set 'replace=True' on the IPS function call."
128
+ )
129
+ else : # timestamp does not exist, so add it
130
+ # search from right of string for the '}' character, should work assuming user does not modify the cell past the variable definition
131
+ new_module_code = replace_last (old_module_code , '}' , f"{ timestamp } : f'{{{ DIRECTORY_VARIABLE_NAME } }}{ data_file } ',\n " + '}' )
178
132
179
133
with open (dest , 'w' ) as f :
180
- nbf .write (nb , f )
134
+ f .write (new_module_code )
181
135
182
- return ret
136
+ return replaced_file_name
0 commit comments