Skip to content

Commit

Permalink
Merge pull request #42 from Vizzuality/data/feature/Vtiles_pipe
Browse files Browse the repository at this point in the history
finish the skeleton of pipes
  • Loading branch information
aagm authored Nov 2, 2023
2 parents 040db96 + c926fad commit f3ce043
Show file tree
Hide file tree
Showing 15 changed files with 729 additions and 334 deletions.
200 changes: 200 additions & 0 deletions data/notebooks/preprocessing_pipes.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "79966c59",
"metadata": {},
"source": [
"This notebook relies on the following cmd line tools, ensure they are installed and in the system\n",
"- tippecanoe\n",
"- mapshaper\n",
"- aws cli"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "def1ef65-1fd6-4367-a620-2dbe415f7104",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import sys\n",
"import logging\n",
"\n",
"from IPython.lib import backgroundjobs as bg\n",
"\n",
"scripts_dir = Path('..').joinpath('src')\n",
"if scripts_dir not in sys.path:\n",
" sys.path.insert(0, scripts_dir.resolve().as_posix())\n",
"\n",
"from pipelines.pipes import get_pipes, execution_order, filter_pipes\n",
"\n",
"logging.basicConfig(level=logging.DEBUG)\n",
"logging.getLogger(\"requests\").setLevel(logging.WARNING)\n",
"logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n",
"logging.getLogger(\"fiona\").setLevel(logging.WARNING)\n",
"jobs = bg.BackgroundJobManager()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "38e82156",
"metadata": {},
"outputs": [],
"source": [
"# def execute_pipes(pipes):\n",
"# ordered_pipes = execution_order(pipes)\n",
"# for pipe in ordered_pipes:\n",
"# jobs.new(pipe().execute)\n",
"\n",
"# # this code will execute all pipelines selected in background.\n",
"\n",
"# mypipes_subset = filter_pipes(get_pipes().items(), ['eez_tiles'])\n",
"# execute_pipes(mypipes_subset)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "47a451d2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'eez_intermediate': pipelines.intermediate_pipes.EEZIntermediatePipe.EEZIntermediatePipe,\n",
" 'eez_tiles': pipelines.tiles_pipes.EEZTilesPipe.EEZTilesPipe,\n",
" 'eez_precalc': pipelines.precalc_pipes.EEZPrecalcPipe.EEZPrecalcPipe}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_pipes()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d85fee01",
"metadata": {},
"outputs": [],
"source": [
"mypipes_subset = filter_pipes(get_pipes(), ['eez_intermediate'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4798cd04",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:pipelines.settings:/home/mambauser/data\n",
"INFO:pipelines.base_pipe:Pipeline eez_intermediate running at 2023-11-02 10:12:39.302930: starting extract...\n",
"INFO:utils:File /home/mambauser/data/eez_intermediate/World_EEZ_v11_20191118.zip already exists.\n",
"INFO:utils:File /home/mambauser/data/eez_intermediate/World_High_Seas_v1_20200826.zip already exists.\n",
"INFO:pipelines.base_pipe:Pipeline eez_intermediate finish at 2023-11-02 10:12:39.304361: Success executing extract\n",
"INFO:pipelines.base_pipe:Pipeline eez_intermediate running at 2023-11-02 10:12:39.304695: starting transform...\n",
"INFO:pipelines.base_pipe:Pipeline eez_intermediate finish at 2023-11-02 10:12:39.305303: Success executing transform\n",
"INFO:pipelines.base_pipe:Pipeline eez_intermediate running at 2023-11-02 10:12:39.305575: starting load...\n",
"DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n",
"ERROR:pipelines.base_pipe:Pipeline eez_intermediate dead at 2023-11-02 10:12:39.608183: Traceback (most recent call last):\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 2602, in _prep_and_do_upload\n",
" created_json = self._do_upload(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 2425, in _do_upload\n",
" response = self._do_resumable_upload(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 2243, in _do_resumable_upload\n",
" upload, transport = self._initiate_resumable_upload(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 2117, in _initiate_resumable_upload\n",
" upload.initiate(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/resumable_media/requests/upload.py\", line 420, in initiate\n",
" return _request_helpers.wait_and_retry(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/resumable_media/requests/_request_helpers.py\", line 155, in wait_and_retry\n",
" response = func()\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/resumable_media/requests/upload.py\", line 416, in retriable_request\n",
" self._process_initiate_response(result)\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/resumable_media/_upload.py\", line 518, in _process_initiate_response\n",
" _helpers.require_status_code(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/resumable_media/_helpers.py\", line 108, in require_status_code\n",
" raise common.InvalidResponse(\n",
"google.resumable_media.common.InvalidResponse: ('Request failed with status code', 403, 'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.CREATED: 201>)\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/home/mambauser/src/pipelines/utils.py\", line 9, in check\n",
" func(self, *args, **kwargs)\n",
" File \"/home/mambauser/src/pipelines/base_pipe.py\", line 139, in load\n",
" writeReadGCP(\n",
" File \"/home/mambauser/src/utils.py\", line 104, in writeReadGCP\n",
" blob.upload_from_file(f)\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 2761, in upload_from_file\n",
" self._prep_and_do_upload(\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 2620, in _prep_and_do_upload\n",
" _raise_from_invalid_response(exc)\n",
" File \"/opt/conda/lib/python3.10/site-packages/google/cloud/storage/blob.py\", line 4774, in _raise_from_invalid_response\n",
" raise exceptions.from_http_status(response.status_code, message, response=response)\n",
"google.api_core.exceptions.Forbidden: 403 POST https://storage.googleapis.com/upload/storage/v1/b/vector-data-raw/o?uploadType=resumable: {\n",
" \"error\": {\n",
" \"code\": 403,\n",
" \"message\": \"[email protected] does not have storage.objects.create access to the Google Cloud Storage object. Permission 'storage.objects.create' denied on resource (or it may not exist).\",\n",
" \"errors\": [\n",
" {\n",
" \"message\": \"[email protected] does not have storage.objects.create access to the Google Cloud Storage object. Permission 'storage.objects.create' denied on resource (or it may not exist).\",\n",
" \"domain\": \"global\",\n",
" \"reason\": \"forbidden\"\n",
" }\n",
" ]\n",
" }\n",
"}\n",
": ('Request failed with status code', 403, 'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.CREATED: 201>)\n",
"\n"
]
}
],
"source": [
"for n, pipe in mypipes_subset.items():\n",
" new_pipe = pipe()\n",
" new_pipe.extract().transform().load()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9dacf2db",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
134 changes: 0 additions & 134 deletions data/notebooks/tiles_generation/EEZ_tiles.ipynb

This file was deleted.

Loading

0 comments on commit f3ce043

Please sign in to comment.