diff --git a/pyproject.toml b/pyproject.toml index b86538d..3b4e7cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "s3worker" -version = "0.1.0" +version = "0.3.1" description = "Sync local storage with S3" authors = ["Eugen Ciur "] readme = "README.md" @@ -34,7 +34,7 @@ s3w = 's3worker.cli.s3w:app' taskipy = "^1.12.2" [tool.taskipy.tasks] -worker = "celery -A s3worker worker -E -c 4 --loglevel debug -Q s3,s3preview" +worker = "celery -A s3worker worker -E -c 4 --loglevel debug -Q dev-coco_s3,dev-coco_s3preview" [build-system] requires = ["poetry-core"] diff --git a/s3worker/celery_app.py b/s3worker/celery_app.py index 253705d..74c3755 100644 --- a/s3worker/celery_app.py +++ b/s3worker/celery_app.py @@ -1,9 +1,6 @@ -import yaml from celery import Celery from s3worker import config, utils from celery.signals import setup_logging -from logging.config import dictConfig - settings = config.get_settings() diff --git a/s3worker/client.py b/s3worker/client.py index 17abd90..c5e81f8 100644 --- a/s3worker/client.py +++ b/s3worker/client.py @@ -7,6 +7,7 @@ from pathlib import Path from s3worker import config, utils from s3worker import plib +from s3worker.exceptions import S3DocumentNotFound settings = config.get_settings() logger = logging.getLogger(__name__) @@ -215,6 +216,20 @@ def sync(): ) +def download_docver(docver_id: UUID, file_name: str): + """Downloads document version from S3""" + doc_ver_path = plib.abs_docver_path(docver_id, file_name) + keyname = Path(get_prefix()) / plib.docver_path(docver_id, file_name) + if not doc_ver_path.exists(): + if not s3_obj_exists(get_bucket_name(), str(keyname)): + # no local version + no s3 version + raise S3DocumentNotFound(f"S3 key {keyname} not found") + + client = get_client() + doc_ver_path.parent.mkdir(parents=True, exist_ok=True) + client.download_file(get_bucket_name(), str(keyname), str(doc_ver_path)) + + def s3_obj_exists( bucket_name: str, keyname: str ) -> bool: diff --git a/s3worker/exceptions.py b/s3worker/exceptions.py new file mode 100644 index 0000000..044c9ec --- /dev/null +++ b/s3worker/exceptions.py @@ -0,0 +1,4 @@ +class S3DocumentNotFound(Exception): + """Raised when document is not found on S3""" + + ... diff --git a/s3worker/tasks.py b/s3worker/tasks.py index 15f43e5..c736c5e 100644 --- a/s3worker/tasks.py +++ b/s3worker/tasks.py @@ -4,6 +4,7 @@ from s3worker import generate, client, db from s3worker import constants as const +from s3worker import exceptions logger = logging.getLogger(__name__) @@ -43,13 +44,24 @@ def remove_page_thumbnail_task(page_ids: list[str]): logger.exception(ex) -@shared_task(name=const.S3_WORKER_GENERATE_PREVIEW) +@shared_task( + name=const.S3_WORKER_GENERATE_PREVIEW, + autoretry_for = (exceptions.S3DocumentNotFound,), + # Wait for 10 seconds before starting each new try. At most retry 6 times. + retry_kwargs = {"max_retries": 6, "countdown": 10}, +) def generate_preview_task(doc_id: str): logger.debug('Task started') + Session = db.get_db() + try: - Session = db.get_db() with Session() as db_session: thumb_path = generate.doc_thumbnail(db_session, UUID(doc_id)) + doc_ver = db.get_last_version(db_session, doc_id=UUID(doc_id)) + + logger.debug(f"doc_ver.id = {doc_ver.id}") + client.download_docver(docver_id=doc_ver.id, + file_name=doc_ver.file_name) client.upload_file(thumb_path)