Skip to content

Commit 7298cd1

Browse files
authored
docs(notebooks): automate data access in tutorials/examples (#2392)
First step towards #1872. Use pooch for data access. This is ugly, but it makes notebooks runnable (provided exes and python environment) out of the box. Local files will be used if detected, otherwise downloaded, following the pattern in the mf6 example models. An eventual models API could hide all the details of model access. Also mention the optional dependencies requirement on the tutorials and examples gallery pages.
1 parent 11d9b7f commit 7298cd1

37 files changed

+1814
-276
lines changed

.docs/Notebooks/array_output_tutorial.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,15 @@
2929
# + pycharm={"name": "#%%\n"}
3030
import os
3131
import sys
32+
from pathlib import Path
3233
from pprint import pformat
3334
from tempfile import TemporaryDirectory
3435

36+
import git
3537
import matplotlib as mpl
3638
import matplotlib.pyplot as plt
3739
import numpy as np
40+
import pooch
3841

3942
import flopy
4043

@@ -44,8 +47,40 @@
4447
exe_name = "mf2005"
4548
mfexe = exe_name
4649

50+
# Check if we are in the repository and define the data path.
51+
52+
try:
53+
root = Path(git.Repo(".", search_parent_directories=True).working_dir)
54+
except:
55+
root = None
56+
57+
data_path = root / "examples" / "data" if root else Path.cwd()
58+
59+
sim_name = "freyberg"
60+
61+
file_names = {
62+
"freyberg.bas": "63266024019fef07306b8b639c6c67d5e4b22f73e42dcaa9db18b5e0f692c097",
63+
"freyberg.dis": "62d0163bf36c7ee9f7ee3683263e08a0abcdedf267beedce6dd181600380b0a2",
64+
"freyberg.githds": "abe92497b55e6f6c73306e81399209e1cada34cf794a7867d776cfd18303673b",
65+
"freyberg.gitlist": "aef02c664344a288264d5f21e08a748150e43bb721a16b0e3f423e6e3e293056",
66+
"freyberg.lpf": "06500bff979424f58e5e4fbd07a7bdeb0c78f31bd08640196044b6ccefa7a1fe",
67+
"freyberg.nam": "e66321007bb603ef55ed2ba41f4035ba6891da704a4cbd3967f0c66ef1532c8f",
68+
"freyberg.oc": "532905839ccbfce01184980c230b6305812610b537520bf5a4abbcd3bd703ef4",
69+
"freyberg.pcg": "0d1686fac4680219fffdb56909296c5031029974171e25d4304e70fa96ebfc38",
70+
"freyberg.rch": "37a1e113a7ec16b61417d1fa9710dd111a595de738a367bd34fd4a359c480906",
71+
"freyberg.riv": "7492a1d5eb23d6812ec7c8227d0ad4d1e1b35631a765c71182b71e3bd6a6d31d",
72+
"freyberg.wel": "00aa55f59797c02f0be5318a523b36b168fc6651f238f34e8b0938c04292d3e7",
73+
}
74+
for fname, fhash in file_names.items():
75+
pooch.retrieve(
76+
url=f"https://github.com/modflowpy/flopy/raw/develop/examples/data/{sim_name}/{fname}",
77+
fname=fname,
78+
path=data_path / sim_name,
79+
known_hash=fhash,
80+
)
81+
4782
# Set the paths
48-
loadpth = os.path.join("..", "..", "examples", "data", "freyberg")
83+
loadpth = data_path / sim_name
4984
temp_dir = TemporaryDirectory()
5085
modelpth = temp_dir.name
5186

.docs/Notebooks/export_tutorial.py

+39-1
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020
# +
2121
import os
2222
import sys
23+
from pathlib import Path
2324
from tempfile import TemporaryDirectory
2425

26+
import git
27+
import pooch
28+
2529
import flopy
2630

2731
print(sys.version)
@@ -30,8 +34,42 @@
3034

3135
# Load our old friend...the Freyberg model
3236

37+
sim_name = "freyberg_multilayer_transient"
38+
39+
# Check if we are in the repository and define the data path.
40+
41+
try:
42+
root = Path(git.Repo(".", search_parent_directories=True).working_dir)
43+
except:
44+
root = None
45+
46+
data_path = root / "examples" / "data" if root else Path.cwd()
47+
48+
file_names = {
49+
"freyberg.bas": None,
50+
"freyberg.cbc": None,
51+
"freyberg.ddn": None,
52+
"freyberg.dis": None,
53+
"freyberg.drn": None,
54+
"freyberg.hds": None,
55+
"freyberg.list": None,
56+
"freyberg.nam": None,
57+
"freyberg.nwt": None,
58+
"freyberg.oc": None,
59+
"freyberg.rch": None,
60+
"freyberg.upw": None,
61+
"freyberg.wel": None,
62+
}
63+
for fname, fhash in file_names.items():
64+
pooch.retrieve(
65+
url=f"https://github.com/modflowpy/flopy/raw/develop/examples/data/{sim_name}/{fname}",
66+
fname=fname,
67+
path=data_path / sim_name,
68+
known_hash=fhash,
69+
)
70+
3371
nam_file = "freyberg.nam"
34-
model_ws = os.path.join("..", "..", "examples", "data", "freyberg_multilayer_transient")
72+
model_ws = data_path / sim_name
3573
ml = flopy.modflow.Modflow.load(nam_file, model_ws=model_ws, check=False)
3674

3775
# We can see the ``Modelgrid`` instance has generic entries, as does ``start_datetime``

.docs/Notebooks/export_vtk_tutorial.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
from pprint import pformat
3434
from tempfile import TemporaryDirectory
3535

36+
import git
3637
import numpy as np
38+
import pooch
3739

3840
import flopy
3941
from flopy.export import vtk
@@ -42,11 +44,39 @@
4244
print(f"flopy version: {flopy.__version__}")
4345
# -
4446

47+
try:
48+
root = Path(git.Repo(".", search_parent_directories=True).working_dir)
49+
except:
50+
root = None
51+
52+
data_path = root / "examples" / "data" if root else Path.cwd()
53+
sim_name = "freyberg_multilayer_transient"
54+
file_names = {
55+
"freyberg.bas": None,
56+
"freyberg.cbc": None,
57+
"freyberg.ddn": None,
58+
"freyberg.dis": None,
59+
"freyberg.drn": None,
60+
"freyberg.hds": None,
61+
"freyberg.list": None,
62+
"freyberg.nam": None,
63+
"freyberg.nwt": None,
64+
"freyberg.oc": None,
65+
"freyberg.rch": None,
66+
"freyberg.upw": None,
67+
"freyberg.wel": None,
68+
}
69+
for fname, fhash in file_names.items():
70+
pooch.retrieve(
71+
url=f"https://github.com/modflowpy/flopy/raw/develop/examples/data/{sim_name}/{fname}",
72+
fname=fname,
73+
path=data_path / sim_name,
74+
known_hash=fhash,
75+
)
76+
4577
# load model for examples
4678
nam_file = "freyberg.nam"
47-
model_ws = Path(
48-
os.path.join("..", "..", "examples", "data", "freyberg_multilayer_transient")
49-
)
79+
model_ws = data_path / sim_name
5080
ml = flopy.modflow.Modflow.load(nam_file, model_ws=model_ws, check=False)
5181

5282
# Create a temporary workspace.

.docs/Notebooks/feat_working_stack_examples.py

+46-16
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,68 @@
2323
from pprint import pformat
2424
from tempfile import TemporaryDirectory
2525

26+
import git
2627
import matplotlib as mpl
2728
import matplotlib.pyplot as plt
2829
import numpy as np
2930
import pandas as pd
30-
31-
# +
31+
import pooch
3232
from IPython.display import clear_output, display
3333

34-
proj_root = Path.cwd().parent.parent
35-
36-
# run installed version of flopy or add local path
3734
import flopy
3835

3936
print(sys.version)
4037
print(f"numpy version: {np.__version__}")
4138
print(f"matplotlib version: {mpl.__version__}")
4239
print(f"pandas version: {pd.__version__}")
4340
print(f"flopy version: {flopy.__version__}")
44-
# -
4541

42+
# First create a temporary workspace.
43+
44+
sim_name = "freyberg_multilayer_transient"
45+
temp_dir = TemporaryDirectory()
46+
workspace = Path(temp_dir.name)
47+
48+
# Check if we are in the repository and define the data path.
49+
50+
try:
51+
root = Path(git.Repo(".", search_parent_directories=True).working_dir)
52+
except:
53+
root = None
54+
55+
data_path = root / "examples" / "data" if root else Path.cwd()
56+
57+
# Download files if needed.
58+
59+
file_names = {
60+
"freyberg.bas": "781585c140d40a27bce9369baee262c621bcf969de82361ad8d6b4d8c253ee02",
61+
"freyberg.cbc": "d4e18e968cabde8470fcb7cb8a1c4cc57fcd643bd63b23e7751460bfdb651ea4",
62+
"freyberg.ddn": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
63+
"freyberg.dis": "1ef61a467a219c036e58902ce11297e06b4eeb5f2f9d2ea40245b421a248a471",
64+
"freyberg.drn": "93c22ab27d599938a8c2fc5b420ec03e5251b11b050d6ae1cb23ce2aa1b77997",
65+
"freyberg.hds": "0b3e911ef35f625d2d046e05a20bc1300341b41028220c5b25ace6f5a267ceef",
66+
"freyberg.list": "14ec36c22b48d253d6b82c44f36c5bad4f0785b3a3384b386f6b69c4ee2e31bf",
67+
"freyberg.nam": "9e3747ce6d6229caec55a9357285a96cb4608dae11d90dd165a23e0bb394a2bd",
68+
"freyberg.nwt": "d66c5cc255d050a0f871639af4af0cef8d48fa59c1c64217de65fc6e7fd78cb1",
69+
"freyberg.oc": "faefd462d11b9a21c4579420b2156fb616ca642bc1e66fc5eb5e1b9046449e43",
70+
"freyberg.rch": "93a12742a2d37961d53df0405e39cbecf0e6f14d45b5ca8cbba84a2d90828258",
71+
"freyberg.upw": "80838be7af2f97c92965bad1d121c252b69d9c66e4885c5f3f49a6e99582deac",
72+
"freyberg.wel": "dd322655eadff3f618f0835c9277af30720197bd48328aae2d6772f26eef2686",
73+
}
74+
for fname, fhash in file_names.items():
75+
pooch.retrieve(
76+
url=f"https://github.com/modflowpy/flopy/raw/develop/examples/data/{sim_name}/{fname}",
77+
fname=fname,
78+
path=data_path / sim_name,
79+
known_hash=fhash,
80+
)
81+
82+
# -
4683
# ### Model Inputs
4784

48-
# first lets load an existing model
49-
model_ws = proj_root / "examples" / "data" / "freyberg_multilayer_transient"
5085
ml = flopy.modflow.Modflow.load(
5186
"freyberg.nam",
52-
model_ws=model_ws,
87+
model_ws=data_path / sim_name,
5388
verbose=False,
5489
check=False,
5590
exe_name="mfnwt",
@@ -66,11 +101,6 @@
66101
ml.drn.plot(key="cond")
67102
ml.drn.plot(key="elev")
68103

69-
# First create a temporary workspace.
70-
71-
# create a temporary workspace
72-
temp_dir = TemporaryDirectory()
73-
workspace = Path(temp_dir.name)
74104

75105
# Write a shapefile of the DIS package.
76106

@@ -96,7 +126,7 @@
96126
#
97127
# First, let's look at the list file. The list file summarizes the model's results.
98128

99-
mfl = flopy.utils.MfListBudget(model_ws / "freyberg.list")
129+
mfl = flopy.utils.MfListBudget(workspace / "freyberg.list")
100130
df_flux, df_vol = mfl.get_dataframes(start_datetime="10-21-2015")
101131
df_flux
102132

@@ -116,7 +146,7 @@
116146
# Now let's look at the simulated head.
117147

118148
# if you pass the model instance, then the plots will be offset and rotated
119-
h = flopy.utils.HeadFile(model_ws / "freyberg.hds", model=ml)
149+
h = flopy.utils.HeadFile(workspace / "freyberg.hds", model=ml)
120150
h.times
121151

122152
h.plot(totim=900, contour=True, grid=True, colorbar=True, figsize=(10, 10))

.docs/Notebooks/groundwater2023_watershed_example.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@
2525
import pathlib as pl
2626
import sys
2727

28+
import git
2829
import matplotlib as mpl
2930
import matplotlib.gridspec as gridspec
3031
import matplotlib.pyplot as plt
3132
import numpy as np
33+
import pooch
3234
import shapely
3335
import yaml
3436
from shapely.geometry import LineString, Polygon
@@ -106,10 +108,25 @@ def set_idomain(grid, boundary):
106108
grid.idomain = idomain
107109

108110

109-
geometries = yaml.safe_load(
110-
open(pl.Path("../../examples/data/groundwater2023/geometries.yml"))
111+
# Check if we are in the repository and define the data path.
112+
113+
try:
114+
root = pl.Path(git.Repo(".", search_parent_directories=True).working_dir)
115+
except:
116+
root = None
117+
118+
data_path = root / "examples" / "data" if root else pl.Path.cwd()
119+
folder_name = "groundwater2023"
120+
fname = "geometries.yml"
121+
pooch.retrieve(
122+
url=f"https://github.com/modflowpy/flopy/raw/develop/examples/data/{folder_name}/{fname}",
123+
fname=fname,
124+
path=data_path / folder_name,
125+
known_hash=None,
111126
)
112127

128+
geometries = yaml.safe_load(open(data_path / folder_name / fname))
129+
113130
# basic figure size
114131
figwidth = 180 # 90 # mm
115132
figwidth = figwidth / 10 / 2.54 # inches
@@ -161,7 +178,13 @@ def set_idomain(grid, boundary):
161178
os.mkdir(temp_path)
162179

163180
# Load the fine topography that will be sampled
164-
ascii_file = pl.Path("../../examples/data/geospatial/fine_topo.asc")
181+
fname = "fine_topo.asc"
182+
ascii_file = pooch.retrieve(
183+
url=f"https://github.com/modflowpy/flopy/raw/develop/examples/data/geospatial/{fname}",
184+
fname=fname,
185+
path=data_path / "geospatial",
186+
known_hash=None,
187+
)
165188
fine_topo = flopy.utils.Raster.load(ascii_file)
166189

167190
# Define the problem size and extents

.docs/Notebooks/groundwater_paper_uspb_example.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@
2222
# +
2323
import os
2424
import sys
25+
from pathlib import Path
2526
from pprint import pformat
2627

28+
import git
2729
import matplotlib as mpl
2830
import matplotlib.pyplot as plt
2931
import numpy as np
32+
import pooch
3033
import scipy.ndimage
3134

3235
import flopy
@@ -41,9 +44,23 @@
4144
if not os.path.exists(ws):
4245
os.makedirs(ws)
4346

44-
fn = os.path.join(
45-
"..", "groundwater_paper", "uspb", "results", "USPB_capture_fraction_04_01.dat"
47+
# Check if we are in the repository and define the data path.
48+
49+
try:
50+
root = Path(git.Repo(".", search_parent_directories=True).working_dir)
51+
except:
52+
root = None
53+
54+
data_path = root / ".docs" / "groundwater_paper" if root else Path.cwd()
55+
56+
fname = "USPB_capture_fraction_04_01.dat"
57+
pooch.retrieve(
58+
url=f"https://github.com/modflowpy/flopy/raw/develop/.docs/groundwater_paper/uspb/results/{fname}",
59+
fname=fname,
60+
path=data_path / "uspb" / "results",
61+
known_hash=None,
4662
)
63+
fn = data_path / "uspb" / "results" / fname
4764
cf = np.loadtxt(fn)
4865
print(cf.shape)
4966

@@ -53,7 +70,7 @@
5370
c = plt.imshow(cf2, cmap="jet")
5471
plt.colorbar(c)
5572

56-
wsl = os.path.join("..", "groundwater_paper", "uspb", "flopy")
73+
wsl = data_path / "uspb" / "flopy"
5774
ml = flopy.modflow.Modflow.load("DG.nam", model_ws=wsl, verbose=False)
5875

5976
nlay, nrow, ncol = ml.nlay, ml.dis.nrow, ml.dis.ncol
@@ -191,9 +208,14 @@
191208
plt.savefig(os.path.join(ws, "uspb_heads.png"), dpi=300)
192209
# -
193210

194-
fn = os.path.join(
195-
"..", "groundwater_paper", "uspb", "results", "USPB_capture_fraction_04_10.dat"
211+
fname = "USPB_capture_fraction_04_10.dat"
212+
pooch.retrieve(
213+
url=f"https://github.com/modflowpy/flopy/raw/develop/.docs/groundwater_paper/uspb/results/{fname}",
214+
fname=fname,
215+
path=data_path / "uspb" / "results",
216+
known_hash=None,
196217
)
218+
fn = data_path / "uspb" / "results" / fname
197219
cf = np.loadtxt(fn)
198220
cf2 = scipy.ndimage.zoom(cf, 4, order=0)
199221

0 commit comments

Comments
 (0)