Skip to content

Commit

Permalink
Refactor and clean obsolete code for translation and transcription fe…
Browse files Browse the repository at this point in the history
…atures on submission extras django kobo app
  • Loading branch information
Guitlle committed Aug 12, 2024
1 parent af87b4f commit 840be4d
Show file tree
Hide file tree
Showing 14 changed files with 506 additions and 603 deletions.
1 change: 1 addition & 0 deletions kobo/apps/subsequences/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
GOOGLETX = 'googletx'
GOOGLETS = 'googlets'
GOOGLE_CODE = 'goog'

ASYNC_TRANSLATION_DELAY_INTERVAL = 5

Expand Down
8 changes: 8 additions & 0 deletions kobo/apps/subsequences/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,11 @@ class TranscriptionResultsNotFound(Exception):
"""
No results returned by specified transcription service
"""


class TranslationResultsNotFound(Exception):
pass


class TranslationAsyncResultAvailable(Exception):
pass
129 changes: 85 additions & 44 deletions kobo/apps/subsequences/integrations/google/base.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,58 @@
import constance
from abc import ABC, abstractmethod
from concurrent.futures import TimeoutError
from google.cloud import storage
from google.api_core.operation import Operation
from googleapiclient import discovery
from django.conf import settings
from django.contrib.auth.models import User
from django.core.cache import cache

from kobo.apps.trackers.utils import update_nlp_counter
from kpi.utils.log import logging
from .utils import google_credentials_from_constance_config
from ...models import SubmissionExtras
from ...constants import GOOGLE_CACHE_TIMEOUT, make_async_cache_key
from ...exceptions import SubsequenceTimeoutError

REQUEST_TIMEOUT = 10 # seconds

class GoogleTask(ABC):

class GoogleService(ABC):
"""
Base class for Google transcription/translation task
Contains common functions for returning async responses using the Operations API
"""

def __init__(self):
# These constants must be set by the inherited service class
API_NAME = None
API_VERSION = None
API_RESOURCE = None

def __init__(self, submission: SubmissionExtras):
super().__init__()
self.asset = None
self.destination_path = None
self.submission = submission
self.asset = submission.asset
self.user = submission.asset.owner
self.credentials = google_credentials_from_constance_config()
self.storage_client = storage.Client(credentials=self.credentials)
self.bucket = self.storage_client.bucket(bucket_name=settings.GS_BUCKET_NAME)
self.bucket = self.storage_client.bucket(
bucket_name=settings.GS_BUCKET_NAME
)

@abstractmethod
def adapt_response(self, results, *args) -> [object]:
pass

@abstractmethod
def begin_async_google_operation(self, *args: str) -> (object, int):
return ({}, 0)
def begin_google_operation(
self,
xpath: str,
source_lang: str,
target_lang: str,
content: str,
) -> (object, int):
pass

@property
@abstractmethod
Expand All @@ -37,49 +62,65 @@ def counter_name(self):
"""
return 'google_'

def update_counters(self, amount) -> None:
update_nlp_counter(
self.counter_name,
amount,
self.asset.owner_id,
self.asset.id,
def handle_google_operation(
self, xpath: str, source_lang: str, target_lang: str, content=None
) -> str:
submission_id = self.submission.submission_uuid
cache_key = make_async_cache_key(
self.user.pk, submission_id, xpath, source_lang, target_lang
)

@abstractmethod
def append_operations_response(self, results, *args) -> [object]:
pass

@abstractmethod
def append_api_response(self, results, *args) -> [object]:
pass

def handle_google_task_asynchronously(self, api_name, api_version, resource, *args):
cache_key = make_async_cache_key(*args)
# Stop Me If You Think You've Heard This One Before
if operation_name := cache.get(cache_key):
google_service = discovery.build(api_name, api_version, credentials=self.credentials)
resource_path = resource.split('.')
google_service = discovery.build(
self.API_NAME, self.API_VERSION, credentials=self.credentials
)
resource_path = self.API_RESOURCE.split('.')
for subresource in resource_path:
google_service = getattr(google_service, subresource)()
operation = google_service.get(name=operation_name)
operation = operation.execute()
if not (operation.get('done') or operation.get('state') == 'SUCCEEDED'):
operation = google_service.get(name=operation_name).execute()
if not (
operation.get('done') or operation.get('state') == 'SUCCEEDED'
):
raise SubsequenceTimeoutError

transcript = self.append_operations_response(operation, *args)
cache.delete(cache_key)
return self.adapt_response(operation)
else:
print(f'--couldn\'t find key {cache_key}')
(results, amount) = self.begin_async_google_operation(*args)
print(results.operation)
cache.set(cache_key, results.operation.name, GOOGLE_CACHE_TIMEOUT)
print(cache.get(cache_key))
self.update_counters(amount)
(response, amount) = self.begin_google_operation(
xpath, source_lang, target_lang, content
)
if isinstance(response, Operation):
cache.set(
cache_key, response.operation.name, GOOGLE_CACHE_TIMEOUT
)
self.update_counters(amount)
try:
result = response.result(timeout=REQUEST_TIMEOUT)
except TimeoutError as err:
raise SubsequenceTimeoutError from err

cache.delete(cache_key)
return self.adapt_response(result)
if isinstance(response, str):
return response

@abstractmethod
def process_data(self, qpath: str, options: dict) -> dict:
pass

try:
result = results.result(timeout=REQUEST_TIMEOUT)
except TimeoutError as err:
raise SubsequenceTimeoutError from err
transcript = self.append_api_response(result, *args)
def qpath_to_xpath(self, qpath: str) -> str:
xpath = None
for row in self.asset.content['survey']:
if '$qpath' in row and '$xpath' in row and row['$qpath'] == qpath:
xpath = row['$xpath']
break
if xpath is None:
raise KeyError(f'xpath for {qpath=} not found')
return xpath

cache.delete(cache_key)
return transcript
def update_counters(self, amount) -> None:
update_nlp_counter(
self.counter_name,
amount,
self.asset.owner_id,
self.asset.id,
)
Loading

0 comments on commit 840be4d

Please sign in to comment.