diff --git a/.bumpversion.cfg b/.bumpversion.cfg index bf3a149c9..8e0b116f8 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.38.0 +current_version = 1.40.0 commit = True tag = False tag_name = {new_version} @@ -30,11 +30,11 @@ search = {current_version} replace = {new_version} [bumpversion:file:RELEASE.txt] -search = {current_version} 2023-11-21T16:50:24Z +search = {current_version} 2023-11-30T18:27:41Z replace = {new_version} {utcnow:%Y-%m-%dT%H:%M:%SZ} [bumpversion:part:releaseTime] -values = 2023-11-21T16:50:24Z +values = 2023-11-30T18:27:41Z [bumpversion:file(version):birdhouse/config/canarie-api/docker_configuration.py.template] search = 'version': '{current_version}' diff --git a/.github/labeler.yml b/.github/labeler.yml index 6c96b1c1a..aecc60cfc 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -61,6 +61,9 @@ component/geoserver: component/jupyterhub: - birdhouse/**/jupyterhub/**/* +component/STAC: + - birdhouse/**/*stac*/**/* + feature/WPS: - birdhouse/**/finch/**/* - birdhouse/**/flyingpigeon/**/* diff --git a/CHANGES.md b/CHANGES.md index 2d5b41d76..61487ff28 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -26,6 +26,85 @@ ## Fixes - Updates incorrect WPS outputs resource name in the cowbird config. +[1.40.0](https://github.com/bird-house/birdhouse-deploy/tree/1.40.0) (2023-11-30) +------------------------------------------------------------------------------------------------------------------ + +- `optional-components/stac-data-proxy`: add a new feature to allow hosting of local STAC assets. + + The new component defines variables `STAC_DATA_PROXY_DIR_PATH` (default `${DATA_PERSIST_ROOT}/stac-data`) and + `STAC_DATA_PROXY_URL_PATH` (default `/data/stac`) that are aliased (mapped) under `nginx` to provide a URL + where locally hosted STAC assets can be downloaded from. This allows a server node to be a proper data provider, + where its STAC-API can return Catalog, Collection and Item definitions that points at these local assets available + through the `STAC_DATA_PROXY_URL_PATH` endpoint. + + When enabled, this component can be combined with `optional-components/secure-data-proxy` to allow per-resource + access control of the contents under `STAC_DATA_PROXY_DIR_PATH` by setting relevant Magpie permissions under service + `secure-data-proxy` for children resources that correspond to `STAC_DATA_PROXY_URL_PATH`. Otherwise, the path and + all of its contents are publicly available, in the same fashion that WPS outputs are managed without + `optional-components/secure-data-proxy`. More details are provided under the component's + [README](./birdhouse/optional-components/README.rst#provide-a-proxy-for-local-stac-asset-hosting). + +- `optional-components/stac-public-access`: add public write permission for `POST /stac/search` request. + + Since [`pystac_client`](https://github.com/stac-utils/pystac-client), a common interface to interact with STAC API, + employs `POST` method by default to perform search, the missing permission caused an unexpected error for users that + are not aware of the specific permission control of Magpie. Since nothing is created by that endpoint, but rather, + the POST'ed body employs the convenient JSON format to provide search criteria, it is safe to set this permission + when the STAC service was configured to be publicly searchable. + +[1.39.2](https://github.com/bird-house/birdhouse-deploy/tree/1.39.2) (2023-11-30) +------------------------------------------------------------------------------------------------------------------ + +## Changes + +- Jupyterhub: periodically check whether the logged-in user still have permission to access + + By setting the `JUPYTERHUB_CRYPT_KEY` environment variable in the `env.local` file, jupyterhub will store user's + authentication information (session cookie) in the database. This allows jupyterhub to periodically check whether the + user still has permission to access jupyterhub (the session cookie is not expired and the permission have not + changed). + + The minimum duration between checks can be set with the `JUPYTERHUB_AUTHENTICATOR_REFRESH_AGE` variable which is an + integer (in seconds). + + Note that users who are already logged in to jupyterhub will need to log out and log in for these changes to take + effect. + + To forcibly log out all users currently logged in to jupyterhub you can run the following command to force the + recreation of the cookie secret: + + ```shell + docker exec jupyterhub rm /persist/jupyterhub_cookie_secret && docker restart jupyterhub + ``` + +[1.39.1](https://github.com/bird-house/birdhouse-deploy/tree/1.39.1) (2023-11-29) +------------------------------------------------------------------------------------------------------------------ + +## Changes + +- Limit usernames in Magpie to match restrictions by Jupyterhub's Dockerspawner + + When Jupyterhub spawns a new jupyterlab container, it escapes any non-ascii, non-digit character in the username. + This results in a username that may not match the expected username (as defined by Magpie). This mismatch results in + the container failing to spawn since expected volumes cannot be mounted to the jupyterlab container. + + This fixes the issue by ensuring that juptyerhub does not convert the username that is receives from Magpie. + + Note that this updates the Magpie version. + +[1.39.0](https://github.com/bird-house/birdhouse-deploy/tree/1.39.0) (2023-11-27) +------------------------------------------------------------------------------------------------------------------ + +## Changes + +- Add a Magpie Webhook to create the Magpie resources corresponding to the STAC-API path elements when a `STAC-API` + `POST /collections/{collection_id}` or `POST /collections/{collection_id}/items/{item_id}` request is accomplished. + - When creating the STAC `Item`, the `source` entry in `links` corresponding to a `THREDDS` file on the same instance + is used to define the Magpie `resource_display_name` corresponding to a file to be mapped later on + (eg: a NetCDF `birdhouse/test-data/tc_Anon[...].nc`). + - Checking same instance `source` path is necessary because `STAC` could refer to external assets, and we do not want + to inject Magpie resource that are not part of the active instance where the hook is running. + [1.38.0](https://github.com/bird-house/birdhouse-deploy/tree/1.38.0) (2023-11-21) ------------------------------------------------------------------------------------------------------------------ diff --git a/Makefile b/Makefile index 63c4f86a4..c99dd1088 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Generic variables override SHELL := bash override APP_NAME := birdhouse-deploy -override APP_VERSION := 1.38.0 +override APP_VERSION := 1.40.0 # utility to remove comments after value of an option variable override clean_opt = $(shell echo "$(1)" | $(_SED) -r -e "s/[ '$'\t'']+$$//g") diff --git a/README.rst b/README.rst index 3c67559cd..b62e91c92 100644 --- a/README.rst +++ b/README.rst @@ -14,13 +14,13 @@ for a full-fledged production platform. * - releases - | |latest-version| |commits-since| -.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/1.38.0.svg +.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/1.40.0.svg :alt: Commits since latest release - :target: https://github.com/bird-house/birdhouse-deploy/compare/1.38.0...master + :target: https://github.com/bird-house/birdhouse-deploy/compare/1.40.0...master -.. |latest-version| image:: https://img.shields.io/badge/tag-1.38.0-blue.svg?style=flat +.. |latest-version| image:: https://img.shields.io/badge/tag-1.40.0-blue.svg?style=flat :alt: Latest Tag - :target: https://github.com/bird-house/birdhouse-deploy/tree/1.38.0 + :target: https://github.com/bird-house/birdhouse-deploy/tree/1.40.0 .. |readthedocs| image:: https://readthedocs.org/projects/birdhouse-deploy/badge/?version=latest :alt: ReadTheDocs Build Status (latest version) diff --git a/RELEASE.txt b/RELEASE.txt index e00b91d6c..30a3ae815 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -1 +1 @@ -1.38.0 2023-11-21T16:50:24Z +1.40.0 2023-11-30T18:27:41Z diff --git a/birdhouse/components/stac/config/magpie/config.yml.template b/birdhouse/components/stac/config/magpie/config.yml.template index f8157469f..8351be5c3 100644 --- a/birdhouse/components/stac/config/magpie/config.yml.template +++ b/birdhouse/components/stac/config/magpie/config.yml.template @@ -7,6 +7,15 @@ providers: c4i: false type: api sync_type: api + hooks: + - type: response + path: "/stac/collections/?" + method: POST + target: /opt/birdhouse/src/magpie/hooks/stac_hooks.py:create_collection_resource + - type: response + path: "/stac/collections/[\\w-]+/items/?" + method: POST + target: /opt/birdhouse/src/magpie/hooks/stac_hooks.py:create_item_resource permissions: # create a default 'stac' resource under 'stac' service diff --git a/birdhouse/components/stac/config/magpie/stac_hooks.py b/birdhouse/components/stac/config/magpie/stac_hooks.py new file mode 100644 index 000000000..c6b150a06 --- /dev/null +++ b/birdhouse/components/stac/config/magpie/stac_hooks.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +These hooks will be running within Twitcher, using MagpieAdapter context, applied for STAC requests. + +The code below can make use of any package that is installed by Magpie/Twitcher. + +.. seealso:: + Documentation about Magpie/Twitcher request/response hooks is available here: + https://pavics-magpie.readthedocs.io/en/latest/configuration.html#service-hooks +""" + +import re +from typing import TYPE_CHECKING, List, Dict + +from magpie.api.management.resource import resource_utils as ru +from magpie.api.requests import get_service_matchdict_checked +from magpie.models import Route +from magpie.utils import get_logger +from magpie.db import get_session_from_other +from ziggurat_foundations.models.services.resource import ResourceService + +if TYPE_CHECKING: + from pyramid.response import Response + from sqlalchemy.orm.session import Session + +LOGGER = get_logger("magpie.stac") + +def create_collection_resource(response): + # type: (Response) -> Response + """ + Create the stac collection resource + """ + request = response.request + body = request.json + collection_id = body["id"] + try: + display_name = extract_display_name(body["links"]) + except Exception as exc: + LOGGER.error("Error when extracting display_name from links %s %s", body["links"], str(exc), exc_info=exc) + return response + + # note: matchdict reference of Twitcher owsproxy view is used, just so happens to be same name as Magpie + service = get_service_matchdict_checked(request) + # Getting a new session from the request, since the current session found in the request is already handled with his own transaction manager. + session = get_session_from_other(request.db) + try: + # Create the resource tree + create_resource_tree(f"stac/collections/{collection_id}", 0, service.resource_id , session, display_name) + session.commit() + + except Exception as exc: + LOGGER.error("Unexpected error while creating the collection %s %s", display_name, str(exc), exc_info=exc) + session.rollback() + + return response + +def create_item_resource(response): + # type: (Response) -> Response + """ + Create the stac item resource + """ + request = response.request + body = request.json + item_id = body["id"] + try: + display_name = extract_display_name(body["links"]) + except Exception as exc: + LOGGER.error("Error when extracting display_name from links %s %s", body["links"], str(exc), exc_info=exc) + return response + + # Get the from url -> /collections/{collection_id}/items + collection_id = re.search(r'(?<=collections/)[0-9a-zA-Z_.-]+?(?=/items)', request.url).group() + + # note: matchdict reference of Twitcher owsproxy view is used, just so happens to be same name as Magpie + service = get_service_matchdict_checked(request) + # Getting a new session from the request, since the current session found in the request is already handled with his own transaction manager. + session = get_session_from_other(request.db) + try: + # Create the resource tree + create_resource_tree(f"stac/collections/{collection_id}/items/{item_id}", 0, service.resource_id, session, display_name) + session.commit() + + except Exception as exc: + LOGGER.error("Unexpected error while creating the item %s %s", display_name, str(exc), exc_info=exc) + session.rollback() + + return response + +def extract_display_name(links): + # type: (List[Dict[str, str]]) -> str + """ + Extract THREDD path from a STAC links + """ + display_name = None + for link in links: + if link["rel"] == "source": + # Example of title `thredds:birdhouse/CMIP6` + display_name = link["title"] + break + if not display_name: + raise ValueError("The display name was not extracted properly") + + return display_name + +def create_resource_tree(resource_tree, current_depth, parent_id, session, display_name): + # type: (str, int, int, session, str) -> None + """ + Create the resource tree on Magpie + """ + tree = resource_tree.split("/") + # We are at the max depth of the tree. + if current_depth > len(tree) - 1: + return + + resource_name = tree[current_depth] + query = session.query(ResourceService.model).filter(ResourceService.model.resource_name == resource_name, ResourceService.model.parent_id == parent_id) + resource = query.first() + + if resource is not None: + # Since the resource exists, we can use its id to create the next resource. + parent_id = resource.resource_id + next_depth = current_depth + 1 + create_resource_tree(resource_tree, next_depth, parent_id, session, display_name) + + # The resource wasn't found in the current depth, we need to create it. + else: + # Creating the last resource in the tree, we need to use the display_name. + if current_depth == len(tree) - 1: + ru.create_resource(resource_name, display_name, Route.resource_type_name, parent_id, db_session=session) + else: + # Creating the resource somewhere in the middle of the tree before using its id. + node = ru.create_resource(resource_name, None, Route.resource_type_name, parent_id, db_session=session) + parent_id = node.json["resource"]["resource_id"] + next_depth = current_depth + 1 + create_resource_tree(resource_tree, next_depth, parent_id, session, display_name) diff --git a/birdhouse/components/stac/config/twitcher/docker-compose-extra.yml b/birdhouse/components/stac/config/twitcher/docker-compose-extra.yml new file mode 100644 index 000000000..b84f09b74 --- /dev/null +++ b/birdhouse/components/stac/config/twitcher/docker-compose-extra.yml @@ -0,0 +1,10 @@ +version: "3.4" + +services: + # extend twitcher with MagpieAdapter hooks employed for STAC proxied requests + twitcher: + volumes: + # NOTE: MagpieAdapter hooks are defined within Magpie config, but it is actually Twitcher proxy that runs them + # target mount location depends on 'MAGPIE_PROVIDERS_CONFIG_PATH' environment variable that is found under `birdhouse/config/twitcher/docker-compose-extra.yml` + - ./components/stac/config/magpie/config.yml:/opt/birdhouse/src/magpie/config/stac-config.yml:ro + - ./components/stac/config/magpie/stac_hooks.py:/opt/birdhouse/src/magpie/hooks/stac_hooks.py:ro diff --git a/birdhouse/components/weaver/config/twitcher/docker-compose-extra.yml b/birdhouse/components/weaver/config/twitcher/docker-compose-extra.yml index e1a5fe6d8..fca74d5dc 100644 --- a/birdhouse/components/weaver/config/twitcher/docker-compose-extra.yml +++ b/birdhouse/components/weaver/config/twitcher/docker-compose-extra.yml @@ -5,6 +5,6 @@ services: twitcher: volumes: # NOTE: MagpieAdapter hooks are defined within Magpie config, but it is actually Twitcher proxy that runs them - # target mount location depends on main docker-compose 'MAGPIE_PROVIDERS_CONFIG_PATH' environment variable + # target mount location depends on 'MAGPIE_PROVIDERS_CONFIG_PATH' environment variable that is found under `birdhouse/config/twitcher/docker-compose-extra.yml` - ./components/weaver/config/magpie/config.yml:/opt/birdhouse/src/magpie/config/weaver-config.yml:ro - ./components/weaver/config/magpie/weaver_hooks.py:/opt/birdhouse/src/magpie/hooks/weaver_hooks.py:ro diff --git a/birdhouse/config/canarie-api/docker_configuration.py.template b/birdhouse/config/canarie-api/docker_configuration.py.template index c8de1a026..1c6582767 100644 --- a/birdhouse/config/canarie-api/docker_configuration.py.template +++ b/birdhouse/config/canarie-api/docker_configuration.py.template @@ -109,8 +109,8 @@ SERVICES = { # NOTE: # Below version and release time auto-managed by 'make VERSION=x.y.z bump'. # Do NOT modify it manually. See 'Tagging policy' in 'birdhouse/README.rst'. - 'version': '1.38.0', - 'releaseTime': '2023-11-21T16:50:24Z', + 'version': '1.40.0', + 'releaseTime': '2023-11-30T18:27:41Z', 'institution': 'Ouranos', 'researchSubject': 'Climatology', 'supportEmail': '${SUPPORT_EMAIL}', @@ -142,8 +142,8 @@ PLATFORMS = { # NOTE: # Below version and release time auto-managed by 'make VERSION=x.y.z bump'. # Do NOT modify it manually. See 'Tagging policy' in 'birdhouse/README.rst'. - 'version': '1.38.0', - 'releaseTime': '2023-11-21T16:50:24Z', + 'version': '1.40.0', + 'releaseTime': '2023-11-30T18:27:41Z', 'institution': 'Ouranos', 'researchSubject': 'Climatology', 'supportEmail': '${SUPPORT_EMAIL}', diff --git a/birdhouse/config/jupyterhub/default.env b/birdhouse/config/jupyterhub/default.env index d8ffba773..e6d7ee139 100644 --- a/birdhouse/config/jupyterhub/default.env +++ b/birdhouse/config/jupyterhub/default.env @@ -5,7 +5,7 @@ # are applied and must be added to the list of DELAYED_EVAL. export JUPYTERHUB_DOCKER=pavics/jupyterhub -export JUPYTERHUB_VERSION=4.0.2-20231002 +export JUPYTERHUB_VERSION=4.0.2-20231127 # Jupyter single-user server images, can be overriden in env.local to have a space separated list of multiple images export DOCKER_NOTEBOOK_IMAGES="pavics/workflow-tests:230601" @@ -64,6 +64,15 @@ export JUPYTERHUB_CONFIG_OVERRIDE="" # recommended as it may permit unauthorized users from accessing jupyterhub. export JUPYTERHUB_AUTHENTICATOR_AUTHORIZATION_URL='http://twitcher:8000/ows/verify/jupyterhub' +# 32 byte hex-encoded key used to encrypt a user's authentication state in the juptyerhub database. +# If set, jupyterhub will periodically check if the user still has permission to access jupyterhub (according to Magpie) +export JUPYTERHUB_CRYPT_KEY= + +# Jupyterhub will check if the current logged in user still has permission to access jupyterhub (according to Magpie) +# if their authentication information is older that this value (in seconds). This value is only applied if +# JUPYTERHUB_CRYPT_KEY is set. +export JUPYTERHUB_AUTHENTICATOR_REFRESH_AGE=60 + export DELAYED_EVAL=" $DELAYED_EVAL JUPYTERHUB_USER_DATA_DIR @@ -86,6 +95,7 @@ OPTIONAL_VARS=" \$JUPYTERHUB_DOCKER \$JUPYTERHUB_VERSION \$JUPYTERHUB_AUTHENTICATOR_AUTHORIZATION_URL + \$JUPYTERHUB_AUTHENTICATOR_REFRESH_AGE \$JUPYTER_IDLE_SERVER_CULL_TIMEOUT \$JUPYTER_IDLE_KERNEL_CULL_TIMEOUT \$JUPYTER_IDLE_KERNEL_CULL_INTERVAL diff --git a/birdhouse/config/jupyterhub/docker-compose-extra.yml b/birdhouse/config/jupyterhub/docker-compose-extra.yml index e2a489590..32dd395ed 100644 --- a/birdhouse/config/jupyterhub/docker-compose-extra.yml +++ b/birdhouse/config/jupyterhub/docker-compose-extra.yml @@ -27,6 +27,7 @@ services: MOUNT_IMAGE_SPECIFIC_NOTEBOOKS: ${MOUNT_IMAGE_SPECIFIC_NOTEBOOKS} USER_WORKSPACE_UID: ${USER_WORKSPACE_UID} USER_WORKSPACE_GID: ${USER_WORKSPACE_GID} + JUPYTERHUB_CRYPT_KEY: ${JUPYTERHUB_CRYPT_KEY} volumes: - ./config/jupyterhub/jupyterhub_config.py:/srv/jupyterhub/jupyterhub_config.py:ro - ./config/jupyterhub/custom_templates:/custom_templates:ro diff --git a/birdhouse/config/jupyterhub/jupyterhub_config.py.template b/birdhouse/config/jupyterhub/jupyterhub_config.py.template index d7bda1ec6..dab08addb 100644 --- a/birdhouse/config/jupyterhub/jupyterhub_config.py.template +++ b/birdhouse/config/jupyterhub/jupyterhub_config.py.template @@ -19,12 +19,26 @@ c.MagpieAuthenticator.magpie_url = "http://magpie:2001" c.MagpieAuthenticator.public_fqdn = "${PAVICS_FQDN_PUBLIC}" c.MagpieAuthenticator.authorization_url = "${JUPYTERHUB_AUTHENTICATOR_AUTHORIZATION_URL}" +if os.getenv("JUPYTERHUB_CRYPT_KEY"): + c.MagpieAuthenticator.enable_auth_state = True + c.MagpieAuthenticator.refresh_pre_spawn = True + c.MagpieAuthenticator.auth_refresh_age = int("${JUPYTERHUB_AUTHENTICATOR_REFRESH_AGE}") + c.JupyterHub.cookie_secret_file = '/persist/jupyterhub_cookie_secret' c.JupyterHub.db_url = '/persist/jupyterhub.sqlite' c.JupyterHub.template_paths = ['/custom_templates'] class CustomDockerSpawner(DockerSpawner): + @property + def escaped_name(self): + """ + Return the username without escaping. This ensures that mounted directories on the + host machine are discovered properly since we expect the username to match the username + set by Magpie. + """ + return self.user.name + async def start(self): if(os.environ['MOUNT_IMAGE_SPECIFIC_NOTEBOOKS'] == 'true'): host_dir = join(os.environ['JUPYTERHUB_USER_DATA_DIR'], 'tutorial-notebooks-specific-images') diff --git a/birdhouse/config/magpie/default.env b/birdhouse/config/magpie/default.env index 6ad36301b..6b97051af 100644 --- a/birdhouse/config/magpie/default.env +++ b/birdhouse/config/magpie/default.env @@ -5,7 +5,7 @@ # are applied and must be added to the list of DELAYED_EVAL. # Tag version that will be used to update Magpie API, Magpie CLI, and matching Twitcher with Magpie Adapter -export MAGPIE_VERSION=3.36.0 +export MAGPIE_VERSION=3.38.0 export MAGPIE_DB_NAME="magpiedb" diff --git a/birdhouse/config/magpie/magpie.ini.template b/birdhouse/config/magpie/magpie.ini.template index fb6e235f8..8f2eb920b 100644 --- a/birdhouse/config/magpie/magpie.ini.template +++ b/birdhouse/config/magpie/magpie.ini.template @@ -81,6 +81,11 @@ magpie.smtp_port = ${MAGPIE_SMTP_PORT} magpie.smtp_ssl = ${MAGPIE_SMTP_SSL} magpie.smtp_password = ${MAGPIE_SMTP_PASSWORD} +# See https://pavics-magpie.readthedocs.io/en/latest/configuration.html#envvar-MAGPIE_USER_NAME_EXTRA_REGEX for details +# User names must match the regular expression defined here as well (whether this variable is set or not): +# https://github.com/Ouranosinc/Magpie/blob/595602/magpie/api/management/user/user_utils.py#L66 +#magpie.user_name_extra_regex = + # Caching settings for specific sections/functions - improves performance response times of recurring requests # See Twitcher INI configuration for caching that takes effect when resolving access enforcement to actual services. # Following cache settings apply only during Magpie API requests. diff --git a/birdhouse/env.local.example b/birdhouse/env.local.example index 1976bcfbc..402e4dddf 100644 --- a/birdhouse/env.local.example +++ b/birdhouse/env.local.example @@ -351,6 +351,23 @@ export GEOSERVER_ADMIN_PASSWORD=geoserverpass # #export JUPYTERHUB_CONFIG_OVERRIDE=" # +# +# The following variables can be used to configure additional authentication settings for jupyterhub +# +# 32 byte hex-encoded key used to encrypt a user's authentication state in the juptyerhub database. +# If set, jupyterhub will periodically check if the user still has permission to access jupyterhub (according to Magpie) +# This may be a semicolon-separated list of encryption keys. If there are multiple keys present, the first key is always +# used to persist any new auth_state. +# To generate a key the following command can be used: `openssl rand -hex 32` +# See for more details: https://jupyterhub.readthedocs.io/en/stable/reference/authenticators.html#authentication-state +#export JUPYTERHUB_CRYPT_KEY= +# +# Jupyterhub will check if the current logged in user still has permission to access jupyterhub (according to Magpie) +# if their authentication information is older that this value (in seconds). This value is only applied if +# JUPYTERHUB_CRYPT_KEY is set. +#export JUPYTERHUB_AUTHENTICATOR_REFRESH_AGE=60 +# +# # Sample below will allow for sharing notebooks between Jupyter users. # Note all shares are public. # diff --git a/birdhouse/optional-components/README.rst b/birdhouse/optional-components/README.rst index 47d3d6c8c..4c829abcf 100644 --- a/birdhouse/optional-components/README.rst +++ b/birdhouse/optional-components/README.rst @@ -346,6 +346,55 @@ To enable this optional-component: - Add ``./optional-components/stac-public-access`` to ``EXTRA_CONF_DIRS``. +Provide a proxy for local STAC asset hosting +-------------------------------------------------------- + +STAC data proxy allows to host the URL location defined by ``PAVICS_FQDN_PUBLIC`` and ``STAC_DATA_PROXY_URL_PATH`` +to provide access to files contained within ``STAC_DATA_PROXY_DIR_PATH``. + +The ``STAC_DATA_PROXY_DIR_PATH`` location can be used to hold STAC Assets defined by the current server node +(in contrast to STAC definitions that would refer to remote locations), such that the node can be the original +location of new data, or to make a new local replication of remote data. + +To enable this optional-component: + +- Edit ``env.local`` (a copy of `env.local.example`_) +- Add ``./optional-components/stac-data-proxy`` to ``EXTRA_CONF_DIRS``. +- Optionally, add any other relevant components to control access as desired (see below). + +When using this component, access to the endpoint defined by ``STAC_DATA_PROXY_URL_PATH``, and therefore all +corresponding files contained under mapped ``STAC_DATA_PROXY_DIR_PATH`` will depend on how this +feature is combined with ``./optional-components/stac-public-access`` and ``./optional-components/secure-data-proxy``. +Following are the possible combinations and obtained behaviors: + +.. list-table:: + :header-rows: 1 + + * - Enabled Components + - Obtained Behaviors + + * - Only ``./optional-components/stac-data-proxy`` is enabled. + - All data under ``STAC_DATA_PROXY_URL_PATH`` is publicly accessible without authorization control + and specific resource access cannot be managed per content. However, since STAC-API itself is not made public, + the STAC Catalog, Collections and Items cannot be accessed publicly + (*note*: this is most probably never desired). + + * - Both ``./optional-components/stac-data-proxy`` and ``./optional-components/stac-public-access`` are enabled. + - All data under ``STAC_DATA_PROXY_URL_PATH`` is publicly accessible without possibility to manage per-resource + access. However, this public access is aligned with publicly accessible STAC-API endpoints and contents. + + * - Both ``./optional-components/stac-data-proxy`` and ``./optional-components/secure-data-proxy`` are enabled. + - All data under ``STAC_DATA_PROXY_URL_PATH`` is protected (by default, admin-only), but can be granted access + on a per-user, per-group and per-resource basis according to permissions applied by the administrator. + Since STAC-API is not made public by default, the administrator can decide whether they grant access only to + STAC metadata (Catalog, Collection, Items) with permission applied on the ``stac`` Magpie service, only to + assets data with permission under the ``stac-data-proxy``, or both. + + * - All of ``./optional-components/stac-data-proxy``, ``./optional-components/stac-public-access`` and + ``./optional-components/secure-data-proxy`` are enabled. + - Similar to the previous case, allowing full authorization management control by the administrator, but contents + are publicly accessible by default. To revoke access, a Magpie administrator has to apply a ``deny`` permission. + X-Robots-Tag Header --------------------------- diff --git a/birdhouse/optional-components/stac-data-proxy/.gitignore b/birdhouse/optional-components/stac-data-proxy/.gitignore new file mode 100644 index 000000000..6e245d7cc --- /dev/null +++ b/birdhouse/optional-components/stac-data-proxy/.gitignore @@ -0,0 +1,2 @@ +config/proxy/conf.extra-service.d/stac-proxy-data.conf +config/secure-data-proxy/permissions.cfg diff --git a/birdhouse/optional-components/stac-data-proxy/config/proxy/conf.extra-service.d/stac-proxy-data.conf.template b/birdhouse/optional-components/stac-data-proxy/config/proxy/conf.extra-service.d/stac-proxy-data.conf.template new file mode 100644 index 000000000..a9086216e --- /dev/null +++ b/birdhouse/optional-components/stac-data-proxy/config/proxy/conf.extra-service.d/stac-proxy-data.conf.template @@ -0,0 +1,5 @@ + location ${STAC_DATA_PROXY_URL_PATH}/ { + ${SECURE_DATA_PROXY_AUTH_INCLUDE} + + alias /stac-data-proxy/; + } diff --git a/birdhouse/optional-components/stac-data-proxy/config/proxy/docker-compose-extra.yml b/birdhouse/optional-components/stac-data-proxy/config/proxy/docker-compose-extra.yml new file mode 100644 index 000000000..bfb26e640 --- /dev/null +++ b/birdhouse/optional-components/stac-data-proxy/config/proxy/docker-compose-extra.yml @@ -0,0 +1,7 @@ +version: "3.4" +services: + proxy: + volumes: + - ./optional-components/stac-data-proxy/config/proxy/conf.extra-service.d:/etc/nginx/conf.extra-service.d/stac-data-proxy:ro + # NOTE: data for hosted STAC assets, not to be confused with 'stac-db' for internal STAC catalog definitions + - ${STAC_DATA_PROXY_DIR_PATH}:/stac-data-proxy diff --git a/birdhouse/optional-components/stac-data-proxy/config/secure-data-proxy/docker-compose-extra.yml b/birdhouse/optional-components/stac-data-proxy/config/secure-data-proxy/docker-compose-extra.yml new file mode 100644 index 000000000..76f13bec2 --- /dev/null +++ b/birdhouse/optional-components/stac-data-proxy/config/secure-data-proxy/docker-compose-extra.yml @@ -0,0 +1,5 @@ +version: "3.4" +services: + magpie: + volumes: + - ./optional-components/stac-data-proxy/config/secure-data-proxy/permissions.cfg:${MAGPIE_PERMISSIONS_CONFIG_PATH}/stac-data-proxy.cfg:ro diff --git a/birdhouse/optional-components/stac-data-proxy/config/secure-data-proxy/permissions.cfg.template b/birdhouse/optional-components/stac-data-proxy/config/secure-data-proxy/permissions.cfg.template new file mode 100644 index 000000000..791130a1b --- /dev/null +++ b/birdhouse/optional-components/stac-data-proxy/config/secure-data-proxy/permissions.cfg.template @@ -0,0 +1,12 @@ +# NOTE: +# Assume 'secure-data-proxy' would exist if needed (other component dependency). +# Since sorted load order of 'secure-data-proxy' < 'stac-data-proxy' in Magpie, 'secure-data-proxy' should exist. +permissions: + # following permission does not change anything technically (full access for admins) + # it is employed only to set up the relevant resource path and make permission customization easier by Magpie API/UI + - service: secure-data-proxy + resource: ${STAC_DATA_PROXY_URL_PATH} + type: route + permission: read + group: administrators + action: create diff --git a/birdhouse/optional-components/stac-data-proxy/default.env b/birdhouse/optional-components/stac-data-proxy/default.env new file mode 100644 index 000000000..8182fcc72 --- /dev/null +++ b/birdhouse/optional-components/stac-data-proxy/default.env @@ -0,0 +1,42 @@ +#!/bin/sh + +# All env in this default.env can be overridden by env.local. + +# All env in this default.env must NOT depend on any other env. If they do, they +# must use single quotes to avoid early expansion before overrides in env.local +# are applied and must be added to the list of DELAYED_EVAL. + +# add any new variables not already in 'VARS' or 'OPTIONAL_VARS' that must be replaced in templates here +# single quotes are important in below list to keep variable names intact until 'pavics-compose' parses them +EXTRA_VARS=' + $STAC_DATA_PROXY_DIR_PATH + $STAC_DATA_PROXY_URL_PATH +' + +# extend the original 'VARS' from 'birdhouse/pavics-compose.sh' to employ them for template substitution +# adding them to 'VARS', they will also be validated in case of override of 'default.env' using 'env.local' +VARS="$VARS $EXTRA_VARS" + +# Directory path that will be used as volume mount for storing hosted STAC assets data +# NOTE: +# Hosting is not performed by the API itself. Data is expected to already reside in that +# location when referenced by STAC Collections and Items to make them accessible externally. +export STAC_DATA_PROXY_DIR_PATH='${DATA_PERSIST_ROOT}/stac-data' + +# URL path (after PAVICS_FQDN_PUBLIC) that will be used to proxy local STAC assets data +export STAC_DATA_PROXY_URL_PATH="/data/stac" + +DELAYED_EVAL=" + $DELAYED_EVAL + STAC_DATA_PROXY_DIR_PATH +" + +# add any component that this component requires to run +# NOTE: +# './optional-component/secure-data-proxy' is purposely omitted from dependencies +# if 'EXTRA_CONF_DIRS' enabled it as well, the proxy path/alias will have relevant auth request enabled +# otherwise, it will use by default the public access with no prior nginx auth validation +COMPONENT_DEPENDENCIES=" + ./components/stac + ./config/proxy +" diff --git a/birdhouse/optional-components/stac-public-access/.gitignore b/birdhouse/optional-components/stac-public-access/.gitignore index e69de29bb..4261430d6 100644 --- a/birdhouse/optional-components/stac-public-access/.gitignore +++ b/birdhouse/optional-components/stac-public-access/.gitignore @@ -0,0 +1 @@ +config/stac-data-proxy/permissions.cfg diff --git a/birdhouse/optional-components/stac-public-access/config/magpie/permissions.cfg b/birdhouse/optional-components/stac-public-access/config/magpie/permissions.cfg index cc6f39427..c1bd68d8a 100644 --- a/birdhouse/optional-components/stac-public-access/config/magpie/permissions.cfg +++ b/birdhouse/optional-components/stac-public-access/config/magpie/permissions.cfg @@ -4,6 +4,16 @@ permissions: permission: read group: anonymous action: create + + # search can be performed by GET query-string or POST JSON search body + # in both cases, nothing is 'written' or 'created' unlike most common POST requests + # it is sensible to enable it, since 'pystac_client.Client.open' uses POST by default + - service: stac + resource: /stac/search + permission: write-match + group: anonymous + action: create + - service: stac resource: /stac permission: write diff --git a/birdhouse/optional-components/stac-public-access/config/stac-data-proxy/docker-compose-extra.yml b/birdhouse/optional-components/stac-public-access/config/stac-data-proxy/docker-compose-extra.yml new file mode 100644 index 000000000..67e04bab5 --- /dev/null +++ b/birdhouse/optional-components/stac-public-access/config/stac-data-proxy/docker-compose-extra.yml @@ -0,0 +1,5 @@ +version: "3.4" +services: + magpie: + volumes: + - ./optional-components/stac-public-access/config/stac-data-proxy/permissions.cfg:${MAGPIE_PERMISSIONS_CONFIG_PATH}/stac-data-proxy-public.cfg:ro diff --git a/birdhouse/optional-components/stac-public-access/config/stac-data-proxy/permissions.cfg.template b/birdhouse/optional-components/stac-public-access/config/stac-data-proxy/permissions.cfg.template new file mode 100644 index 000000000..71da90c17 --- /dev/null +++ b/birdhouse/optional-components/stac-public-access/config/stac-data-proxy/permissions.cfg.template @@ -0,0 +1,10 @@ +# NOTE: +# Assume 'secure-data-proxy' would exist if needed. +# Since 'secure-data-proxy' < 'stac-data-proxy-public', it should be loaded first. +permissions: + - service: secure-data-proxy + resource: ${STAC_DATA_PROXY_URL_PATH} + type: route + permission: read + group: anonymous + action: create diff --git a/docs/source/conf.py b/docs/source/conf.py index dbdf8be04..c1bd76dee 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -69,9 +69,9 @@ # built documents. # # The short X.Y version. -version = '1.38.0' +version = '1.40.0' # The full version, including alpha/beta/rc tags. -release = '1.38.0' +release = '1.40.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages.