From af87b4f1f05b1f60970fe5c11158a535bd4b5590 Mon Sep 17 00:00:00 2001 From: Guillermo Date: Thu, 1 Aug 2024 15:58:44 -0600 Subject: [PATCH] Refactor google integrations classes --- .../integrations/google/__init__.py | 1 - .../subsequences/integrations/google/base.py | 85 +++++++++++++++++++ .../integrations/google/google_transcribe.py | 84 +----------------- .../integrations/google/google_translate.py | 4 +- .../subsequences/integrations/translate.py | 2 +- 5 files changed, 90 insertions(+), 86 deletions(-) create mode 100644 kobo/apps/subsequences/integrations/google/base.py diff --git a/kobo/apps/subsequences/integrations/google/__init__.py b/kobo/apps/subsequences/integrations/google/__init__.py index cc3b34fbc8..e69de29bb2 100644 --- a/kobo/apps/subsequences/integrations/google/__init__.py +++ b/kobo/apps/subsequences/integrations/google/__init__.py @@ -1 +0,0 @@ -from .google_translate import GoogleTranslationEngine diff --git a/kobo/apps/subsequences/integrations/google/base.py b/kobo/apps/subsequences/integrations/google/base.py new file mode 100644 index 0000000000..cc37f49bdc --- /dev/null +++ b/kobo/apps/subsequences/integrations/google/base.py @@ -0,0 +1,85 @@ +import constance +from abc import ABC, abstractmethod +from google.cloud import storage +from googleapiclient import discovery +from django.conf import settings +from django.core.cache import cache + +from kobo.apps.trackers.utils import update_nlp_counter +from .utils import google_credentials_from_constance_config +from ...constants import GOOGLE_CACHE_TIMEOUT, make_async_cache_key +from ...exceptions import SubsequenceTimeoutError + + +class GoogleTask(ABC): + """ + Base class for Google transcription/translation task + Contains common functions for returning async responses using the Operations API + """ + + def __init__(self): + super().__init__() + self.asset = None + self.destination_path = None + self.credentials = google_credentials_from_constance_config() + self.storage_client = storage.Client(credentials=self.credentials) + self.bucket = self.storage_client.bucket(bucket_name=settings.GS_BUCKET_NAME) + + @abstractmethod + def begin_async_google_operation(self, *args: str) -> (object, int): + return ({}, 0) + + @property + @abstractmethod + def counter_name(self): + """ + Gets used by `update_nlp_counters()` - should begin with `google_` + """ + return 'google_' + + def update_counters(self, amount) -> None: + update_nlp_counter( + self.counter_name, + amount, + self.asset.owner_id, + self.asset.id, + ) + + @abstractmethod + def append_operations_response(self, results, *args) -> [object]: + pass + + @abstractmethod + def append_api_response(self, results, *args) -> [object]: + pass + + def handle_google_task_asynchronously(self, api_name, api_version, resource, *args): + cache_key = make_async_cache_key(*args) + # Stop Me If You Think You've Heard This One Before + if operation_name := cache.get(cache_key): + google_service = discovery.build(api_name, api_version, credentials=self.credentials) + resource_path = resource.split('.') + for subresource in resource_path: + google_service = getattr(google_service, subresource)() + operation = google_service.get(name=operation_name) + operation = operation.execute() + if not (operation.get('done') or operation.get('state') == 'SUCCEEDED'): + raise SubsequenceTimeoutError + + transcript = self.append_operations_response(operation, *args) + else: + print(f'--couldn\'t find key {cache_key}') + (results, amount) = self.begin_async_google_operation(*args) + print(results.operation) + cache.set(cache_key, results.operation.name, GOOGLE_CACHE_TIMEOUT) + print(cache.get(cache_key)) + self.update_counters(amount) + + try: + result = results.result(timeout=REQUEST_TIMEOUT) + except TimeoutError as err: + raise SubsequenceTimeoutError from err + transcript = self.append_api_response(result, *args) + + cache.delete(cache_key) + return transcript diff --git a/kobo/apps/subsequences/integrations/google/google_transcribe.py b/kobo/apps/subsequences/integrations/google/google_transcribe.py index 23106e7a88..34a986ee9f 100644 --- a/kobo/apps/subsequences/integrations/google/google_transcribe.py +++ b/kobo/apps/subsequences/integrations/google/google_transcribe.py @@ -1,20 +1,15 @@ # coding: utf-8 import uuid import posixpath -from abc import ABC, abstractmethod from concurrent.futures import TimeoutError from datetime import timedelta import constance from django.conf import settings from django.contrib.auth.models import User -from django.core.cache import cache from google.cloud import speech, storage -from googleapiclient import discovery -from kobo.apps.trackers.utils import update_nlp_counter -from .utils import google_credentials_from_constance_config -from ...constants import GOOGLE_CACHE_TIMEOUT, make_async_cache_key +from .base import GoogleTask from ...exceptions import ( AudioTooLongError, SubsequenceTimeoutError, @@ -28,81 +23,6 @@ SYNC_MAX_BYTES = 10000000 # 10MB -class GoogleTransXEngine(ABC): - """ - Base class for Google transcription/translation - Contains common functions for returning async responses using the Operations API - """ - - def __init__(self): - super().__init__() - self.asset = None - self.destination_path = None - self.credentials = google_credentials_from_constance_config() - self.storage_client = storage.Client(credentials=self.credentials) - self.bucket = self.storage_client.bucket(bucket_name=settings.GS_BUCKET_NAME) - - @abstractmethod - def begin_async_google_operation(self, *args: str) -> (object, int): - return ({}, 0) - - @property - @abstractmethod - def counter_name(self): - """ - Gets used by `update_nlp_counters()` - should begin with `google_` - """ - return 'google_' - - def update_counters(self, amount) -> None: - update_nlp_counter( - self.counter_name, - amount, - self.asset.owner_id, - self.asset.id, - ) - - @abstractmethod - def append_operations_response(self, results, *args) -> [object]: - pass - - @abstractmethod - def append_api_response(self, results, *args) -> [object]: - pass - - def handle_google_task_asynchronously(self, api_name, api_version, resource, *args): - cache_key = make_async_cache_key(*args) - # Stop Me If You Think You've Heard This One Before - if operation_name := cache.get(cache_key): - google_service = discovery.build(api_name, api_version, credentials=self.credentials) - resource_path = resource.split('.') - for subresource in resource_path: - google_service = getattr(google_service, subresource)() - operation = google_service.get(name=operation_name) - operation = operation.execute() - print(operation) - if not (operation.get('done') or operation.get('state') == 'SUCCEEDED'): - raise SubsequenceTimeoutError - - transcript = self.append_operations_response(operation, *args) - else: - print(f'--couldn\'t find key {cache_key}') - (results, amount) = self.begin_async_google_operation(*args) - print(results.operation) - cache.set(cache_key, results.operation.name, GOOGLE_CACHE_TIMEOUT) - print(cache.get(cache_key)) - self.update_counters(amount) - - try: - result = results.result(timeout=REQUEST_TIMEOUT) - except TimeoutError as err: - raise SubsequenceTimeoutError from err - transcript = self.append_api_response(result, *args) - - cache.delete(cache_key) - return transcript - - class AutoTranscription: """ The engine for transcribing audio files @@ -111,7 +31,7 @@ def store_transcript(self, transcript, asset, submission_id): pass -class GoogleTranscribeEngine(AutoTranscription, GoogleTransXEngine): +class GoogleTranscribeEngine(AutoTranscription, GoogleTask): def __init__(self): super().__init__() self.destination_path = None diff --git a/kobo/apps/subsequences/integrations/google/google_translate.py b/kobo/apps/subsequences/integrations/google/google_translate.py index 79c20f4bec..7d1787020d 100644 --- a/kobo/apps/subsequences/integrations/google/google_translate.py +++ b/kobo/apps/subsequences/integrations/google/google_translate.py @@ -8,7 +8,7 @@ from google.api_core.exceptions import InvalidArgument from google.cloud import translate_v3 as translate, storage -from .google_transcribe import GoogleTransXEngine +from .base import GoogleTask from .utils import google_credentials_from_constance_config from ..misc import ( TranslationException, @@ -22,7 +22,7 @@ def _hashed_strings(self, *strings): return md5(''.join(strings).encode()).hexdigest()[0:10] -class GoogleTranslationEngine(GoogleTransXEngine): +class GoogleTranslationEngine(GoogleTask): def __init__(self): self.translate_client = translate.TranslationServiceClient( credentials=google_credentials_from_constance_config() diff --git a/kobo/apps/subsequences/integrations/translate.py b/kobo/apps/subsequences/integrations/translate.py index ff9e36af89..81426b0ac6 100644 --- a/kobo/apps/subsequences/integrations/translate.py +++ b/kobo/apps/subsequences/integrations/translate.py @@ -7,7 +7,7 @@ Union, ) -from .google import GoogleTranslationEngine +from .google.google_translate import GoogleTranslationEngine from .misc import TranslationException