Skip to content

Commit

Permalink
Merge pull request #8 from papermerge/download-document-from-s3-befor…
Browse files Browse the repository at this point in the history
…e-gen-preview

download doc from s3 before gen preview
  • Loading branch information
ciur authored Aug 17, 2024
2 parents f5b2771 + e43010d commit 7a3e84e
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 7 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "s3worker"
version = "0.1.0"
version = "0.3.1"
description = "Sync local storage with S3"
authors = ["Eugen Ciur <[email protected]>"]
readme = "README.md"
Expand Down Expand Up @@ -34,7 +34,7 @@ s3w = 's3worker.cli.s3w:app'
taskipy = "^1.12.2"

[tool.taskipy.tasks]
worker = "celery -A s3worker worker -E -c 4 --loglevel debug -Q s3,s3preview"
worker = "celery -A s3worker worker -E -c 4 --loglevel debug -Q dev-coco_s3,dev-coco_s3preview"

[build-system]
requires = ["poetry-core"]
Expand Down
3 changes: 0 additions & 3 deletions s3worker/celery_app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import yaml
from celery import Celery
from s3worker import config, utils
from celery.signals import setup_logging
from logging.config import dictConfig


settings = config.get_settings()

Expand Down
15 changes: 15 additions & 0 deletions s3worker/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pathlib import Path
from s3worker import config, utils
from s3worker import plib
from s3worker.exceptions import S3DocumentNotFound

settings = config.get_settings()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -215,6 +216,20 @@ def sync():
)


def download_docver(docver_id: UUID, file_name: str):
"""Downloads document version from S3"""
doc_ver_path = plib.abs_docver_path(docver_id, file_name)
keyname = Path(get_prefix()) / plib.docver_path(docver_id, file_name)
if not doc_ver_path.exists():
if not s3_obj_exists(get_bucket_name(), str(keyname)):
# no local version + no s3 version
raise S3DocumentNotFound(f"S3 key {keyname} not found")

client = get_client()
doc_ver_path.parent.mkdir(parents=True, exist_ok=True)
client.download_file(get_bucket_name(), str(keyname), str(doc_ver_path))


def s3_obj_exists(
bucket_name: str, keyname: str
) -> bool:
Expand Down
4 changes: 4 additions & 0 deletions s3worker/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class S3DocumentNotFound(Exception):
"""Raised when document is not found on S3"""

...
16 changes: 14 additions & 2 deletions s3worker/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from s3worker import generate, client, db
from s3worker import constants as const
from s3worker import exceptions


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -43,13 +44,24 @@ def remove_page_thumbnail_task(page_ids: list[str]):
logger.exception(ex)


@shared_task(name=const.S3_WORKER_GENERATE_PREVIEW)
@shared_task(
name=const.S3_WORKER_GENERATE_PREVIEW,
autoretry_for = (exceptions.S3DocumentNotFound,),
# Wait for 10 seconds before starting each new try. At most retry 6 times.
retry_kwargs = {"max_retries": 6, "countdown": 10},
)
def generate_preview_task(doc_id: str):
logger.debug('Task started')
Session = db.get_db()

try:
Session = db.get_db()
with Session() as db_session:
thumb_path = generate.doc_thumbnail(db_session, UUID(doc_id))
doc_ver = db.get_last_version(db_session, doc_id=UUID(doc_id))

logger.debug(f"doc_ver.id = {doc_ver.id}")
client.download_docver(docver_id=doc_ver.id,
file_name=doc_ver.file_name)

client.upload_file(thumb_path)

Expand Down

0 comments on commit 7a3e84e

Please sign in to comment.