Skip to content

Commit 4d9f5ed

Browse files
committed
update docs
1 parent 36574b0 commit 4d9f5ed

File tree

4 files changed

+272
-0
lines changed

4 files changed

+272
-0
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "a48322c9",
6+
"metadata": {},
7+
"source": [
8+
"# Example Copernicus data download \n",
9+
"\n",
10+
"This Notebook provides a rough example of how to download Copernicus Marine data using the Copernicus Marine API.\n",
11+
"\n",
12+
"This will download:\n",
13+
"- Global bathymetry data (static)\n",
14+
"- Global biogeochemical monthly data (0.25 degree hindcast)\n",
15+
"- Global physical daily data (0.25 degree reanalysis)\n",
16+
"\n",
17+
"For a singular year (2023) and two months (June and July).\n",
18+
"\n",
19+
"This notebook is intended as a basic example only. Modifications will be needed to adapt this to your own use case."
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": null,
25+
"id": "7f5a7cc7",
26+
"metadata": {},
27+
"outputs": [],
28+
"source": [
29+
"import copernicusmarine\n",
30+
"import os\n",
31+
"from datetime import datetime"
32+
]
33+
},
34+
{
35+
"cell_type": "code",
36+
"execution_count": null,
37+
"id": "e7279d5a",
38+
"metadata": {},
39+
"outputs": [],
40+
"source": [
41+
"YEAR = \"2023\"\n",
42+
"MONTHS = [\"06\", \"07\"]\n",
43+
"DAYS = [\n",
44+
" \"01\",\n",
45+
" \"02\",\n",
46+
" \"03\",\n",
47+
" \"04\",\n",
48+
" \"05\",\n",
49+
" \"06\",\n",
50+
" \"07\",\n",
51+
" \"08\",\n",
52+
" \"09\",\n",
53+
" \"10\",\n",
54+
" \"11\",\n",
55+
" \"12\",\n",
56+
" \"13\",\n",
57+
" \"14\",\n",
58+
" \"15\",\n",
59+
" \"16\",\n",
60+
" \"17\",\n",
61+
" \"18\",\n",
62+
" \"19\",\n",
63+
" \"20\",\n",
64+
" \"21\",\n",
65+
" \"22\",\n",
66+
" \"23\",\n",
67+
" \"24\",\n",
68+
" \"25\",\n",
69+
" \"26\",\n",
70+
" \"27\",\n",
71+
" \"28\",\n",
72+
" \"29\",\n",
73+
" \"30\",\n",
74+
" \"31\",\n",
75+
"]"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"id": "1a583dba",
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"### PHYSICAL DAILY FILES\n",
86+
"\n",
87+
"os.chdir(\"~/data/phys/\")\n",
88+
"DATASET_ID = \"cmems_mod_glo_phy-all_my_0.25deg_P1D-m\"\n",
89+
"\n",
90+
"for month in MONTHS:\n",
91+
" for day in DAYS:\n",
92+
" # check is valid date\n",
93+
" try:\n",
94+
" datetime(year=int(YEAR), month=int(month), day=int(day), hour=0)\n",
95+
" except ValueError:\n",
96+
" continue\n",
97+
"\n",
98+
" filename = f\"{DATASET_ID}_global_fulldepth_{YEAR}_{month}_{day}.nc\"\n",
99+
"\n",
100+
" if os.path.exists(filename):\n",
101+
" print(f\"File {filename} already exists, skipping...\")\n",
102+
" continue\n",
103+
"\n",
104+
" copernicusmarine.subset(\n",
105+
" dataset_id=DATASET_ID,\n",
106+
" variables=[\"uo_glor\", \"vo_glor\", \"thetao_glor\", \"so_glor\"],\n",
107+
" minimum_longitude=-180,\n",
108+
" maximum_longitude=179.75,\n",
109+
" minimum_latitude=-80,\n",
110+
" maximum_latitude=90,\n",
111+
" start_datetime=f\"{YEAR}-{month}-{day}T00:00:00\",\n",
112+
" end_datetime=f\"{YEAR}-{month}-{day}T00:00:00\",\n",
113+
" minimum_depth=0.5057600140571594,\n",
114+
" maximum_depth=5902.0576171875,\n",
115+
" output_filename=filename,\n",
116+
" )"
117+
]
118+
},
119+
{
120+
"cell_type": "code",
121+
"execution_count": null,
122+
"id": "89921772",
123+
"metadata": {},
124+
"outputs": [],
125+
"source": [
126+
"### BIOGEOCHEMICAL MONTHLY FILES\n",
127+
"\n",
128+
"os.chdir(\"~/data/bgc/\")\n",
129+
"DATASET_ID = \"cmems_mod_glo_bgc_my_0.25deg_P1M-m\"\n",
130+
"DAY = \"01\"\n",
131+
"\n",
132+
"for month in MONTHS:\n",
133+
" try:\n",
134+
" datetime(year=int(YEAR), month=int(month), day=int(DAY), hour=0)\n",
135+
" except ValueError:\n",
136+
" continue\n",
137+
"\n",
138+
" filename = f\"{DATASET_ID}_global_fulldepth_{YEAR}_{month}_{DAY}.nc\"\n",
139+
"\n",
140+
" if os.path.exists(filename):\n",
141+
" print(f\"File {filename} already exists, skipping...\")\n",
142+
" continue\n",
143+
"\n",
144+
" copernicusmarine.subset(\n",
145+
" dataset_id=\"cmems_mod_glo_bgc_my_0.25deg_P1M-m\",\n",
146+
" variables=[\"chl\", \"no3\", \"nppv\", \"o2\", \"ph\", \"phyc\", \"po4\"],\n",
147+
" minimum_longitude=-180,\n",
148+
" maximum_longitude=179.75,\n",
149+
" minimum_latitude=-80,\n",
150+
" maximum_latitude=90,\n",
151+
" start_datetime=f\"{YEAR}-{month}-{DAY}T00:00:00\",\n",
152+
" end_datetime=f\"{YEAR}-{month}-{DAY}T00:00:00\",\n",
153+
" minimum_depth=0.5057600140571594,\n",
154+
" maximum_depth=5902.05810546875,\n",
155+
" output_filename=filename,\n",
156+
" )"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"id": "8b5495c6",
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"### BATHYMETRY FILE\n",
167+
"os.chdir(\"~/data/bathymetry/\")\n",
168+
"DATASET_ID = \"cmems_mod_glo_phy_anfc_0.083deg_static\"\n",
169+
"filename = \"cmems_mod_glo_phy_anfc_0.083deg_static_bathymetry.nc\"\n",
170+
"\n",
171+
"copernicusmarine.subset(\n",
172+
" dataset_id=DATASET_ID,\n",
173+
" dataset_part=\"bathy\",\n",
174+
" variables=[\"deptho\"],\n",
175+
" minimum_longitude=-180,\n",
176+
" maximum_longitude=179.91668701171875,\n",
177+
" minimum_latitude=-80,\n",
178+
" maximum_latitude=90,\n",
179+
" minimum_depth=0.49402499198913574,\n",
180+
" maximum_depth=0.49402499198913574,\n",
181+
" output_filename=filename,\n",
182+
")"
183+
]
184+
}
185+
],
186+
"metadata": {
187+
"language_info": {
188+
"name": "python"
189+
}
190+
},
191+
"nbformat": 4,
192+
"nbformat_minor": 5
193+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Pre-downloading data
2+
3+
By default, VirtualShip will automatically 'stream' data from the Copernicus Marine Service via their [copernicusmarine toolbox](https://github.com/mercator-ocean/copernicus-marine-toolbox?tab=readme-ov-file). However, for users with limited or unreliable internet connectivity, or those wishing to manage data locally, it is possible to pre-download the required datasets.
4+
5+
As outlined in the [Quickstart Guide](../tutorials/quickstart.md), the `virtualship run` command supports an optional `--from-data` argument, which allows users to specify a local directory containing the necessary data files.
6+
7+
Example Python code for automating data download from Copernicus Marine can be found in the [Example Copernicus Download Notebook](example_copernicus_download.ipynb).
8+
9+
### Data requirements
10+
11+
When using pre-downloaded data with VirtualShip, the software supports only: daily and monthly resolution physical and biogeochemical data, along with a static bathymetry file.
12+
13+
In addition, all pre-downloaded data must be split into separate files per timestep (i.e. one .nc file per day or month).
14+
15+
Further, VirtualShip expects pre-downloaded data to be organised in a specific directory & filename structure within the specified local data directory. The expected structure is as outlined in the subsequent sections.
16+
17+
#### Directory structure
18+
19+
Assuming the local data directory (as supplied in the `--from-data` argument) is named `data/`, the expected subdirectory structure is:
20+
21+
```bash
22+
.
23+
└── data
24+
├── bathymetry # containing the singular bathymetry .nc file
25+
├── bgc # containing biogeochemical .nc files
26+
└── phys # containing physical .nc files
27+
```
28+
29+
#### Filename conventions
30+
31+
Within these subdirectories, the expected filename conventions are:
32+
33+
- Physical data files (in `data/phys/`) should be named as follows:
34+
- `<COPERNICUS_DATESET_NOMENCLATURE>_<YYYY_MM_DD>.nc`
35+
- e.g. `cmems_mod_glo_phy-all_my_0.25deg_P1D-m_1998_05_01.nc`
36+
- Biogeochemical data files (in `data/bgc/`) should be named as follows:
37+
- `<COPERNICUS_DATESET_NOMENCLATURE>_<YYYY_MM_DD>.nc`
38+
- e.g. `cmems_mod_glo_bgc_my_0.25deg_P1M-m_1998_05_01.nc`
39+
- Bathymetry data file (in `data/bathymetry/`) should be named as follows:
40+
- `cmems_mod_glo_phy_anfc_0.083deg_static_bathymetry.nc`
41+
42+
```{tip}
43+
Careful to use an underscore (`_`) as the separator between date components in the filenames (i.e. `YYYY_MM_DD`).
44+
```
45+
46+
```{note}
47+
Using the `<COPERNICUS_DATESET_NOMENCLATURE>` in the filenames is vital in order to correctly identify the temporal resolution of the data (daily or monthly). The `P1D` in the example above indicates daily data, whereas `P1M` would indicate monthly data.
48+
49+
See [here](https://help.marine.copernicus.eu/en/articles/6820094-how-is-the-nomenclature-of-copernicus-marine-data-defined#h_34a5a6f21d) for more information on Copernicus dataset nomenclature.
50+
51+
See also our own [documentation](copernicus_products.md) on the Copernicus products used natively by VirtualShip when streaming data.
52+
```
53+
54+
#### Further assumptions
55+
56+
The following assumptions are also made about the data:
57+
58+
1. All pre-downloaded data files must be in NetCDF format (`.nc`).
59+
2. Physical data files must contain the following variables: `uo`, `vo`, `so`, `thetao`
60+
- Or these strings must appear as substrings within the variable names (e.g. `uo_glor` is acceptable for `uo`).
61+
3. If using BGC instruments (e.g. `CTD_BGC`), the relevant biogeochemical data files must contain the following variables: `o2`, `chl`, `no3`, `po4`, `nppv`, `ph`, `phyc`.
62+
- Or these strings must appear as substrings within the variable names (e.g. `o2_glor` is acceptable for `o2`).
63+
4. Bathymetry data files must contain a variable named `deptho`.
64+
65+
#### Also of note
66+
67+
1. Whilst not mandatory to use data downloaded only from Copernicus Marine (any existing data you may hold can be re-organised accordingly), the assumptions made by VirtualShip regarding directory structure and filename conventions are motivated by alignment with the Copernicus Marine's practices.
68+
- If you want to use pre-existing data with VirtualShip, which you may have accessed from a different source, it is possible to do so by restructuring and/or renaming your data files as necessary.
69+
2. The whole VirtualShip pre-downloaded data workflow should support global data or subsets thereof, provided the data files contain the necessary variables and are structured as outlined above.

docs/user-guide/index.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ assignments/index
1515
:maxdepth: 1
1616
1717
documentation/copernicus_products.md
18+
documentation/pre_download_data.md
19+
documentation/example_copernicus_download.ipynb
1820
```

docs/user-guide/quickstart.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# VirtualShip Quickstart Guide 🚢
22

3+
```{warning}
4+
This quickstart guide is currently out of date with the latest version of VirtualShip (v1.0.0).
5+
6+
It will be updated soon.
7+
8+
In particular, the `virtualship fetch` command is no longer supported. Instead, data fetching is now integrated into the `virtualship run` command. See [#226](https://github.com/Parcels-code/virtualship/pull/226) for more details in the meantime.
9+
```
10+
311
Welcome to this Quickstart to using VirtualShip. In this guide we will conduct a virtual expedition in the North Sea. Note, however, that you can plan your own expedition anywhere in the global ocean and conduct whatever set of measurements you wish!
412

513
This Quickstart is available as an instructional video below, or you can continue with the step-by-step guide.

0 commit comments

Comments
 (0)