Skip to content

Commit

Permalink
Refactor google integrations classes
Browse files Browse the repository at this point in the history
  • Loading branch information
Guitlle committed Aug 1, 2024
1 parent 487e396 commit af87b4f
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 86 deletions.
1 change: 0 additions & 1 deletion kobo/apps/subsequences/integrations/google/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from .google_translate import GoogleTranslationEngine
85 changes: 85 additions & 0 deletions kobo/apps/subsequences/integrations/google/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import constance
from abc import ABC, abstractmethod
from google.cloud import storage
from googleapiclient import discovery
from django.conf import settings
from django.core.cache import cache

from kobo.apps.trackers.utils import update_nlp_counter
from .utils import google_credentials_from_constance_config
from ...constants import GOOGLE_CACHE_TIMEOUT, make_async_cache_key
from ...exceptions import SubsequenceTimeoutError


class GoogleTask(ABC):
"""
Base class for Google transcription/translation task
Contains common functions for returning async responses using the Operations API
"""

def __init__(self):
super().__init__()
self.asset = None
self.destination_path = None
self.credentials = google_credentials_from_constance_config()
self.storage_client = storage.Client(credentials=self.credentials)
self.bucket = self.storage_client.bucket(bucket_name=settings.GS_BUCKET_NAME)

@abstractmethod
def begin_async_google_operation(self, *args: str) -> (object, int):
return ({}, 0)

@property
@abstractmethod
def counter_name(self):
"""
Gets used by `update_nlp_counters()` - should begin with `google_`
"""
return 'google_'

def update_counters(self, amount) -> None:
update_nlp_counter(
self.counter_name,
amount,
self.asset.owner_id,
self.asset.id,
)

@abstractmethod
def append_operations_response(self, results, *args) -> [object]:
pass

@abstractmethod
def append_api_response(self, results, *args) -> [object]:
pass

def handle_google_task_asynchronously(self, api_name, api_version, resource, *args):
cache_key = make_async_cache_key(*args)
# Stop Me If You Think You've Heard This One Before
if operation_name := cache.get(cache_key):
google_service = discovery.build(api_name, api_version, credentials=self.credentials)
resource_path = resource.split('.')
for subresource in resource_path:
google_service = getattr(google_service, subresource)()
operation = google_service.get(name=operation_name)
operation = operation.execute()
if not (operation.get('done') or operation.get('state') == 'SUCCEEDED'):
raise SubsequenceTimeoutError

transcript = self.append_operations_response(operation, *args)
else:
print(f'--couldn\'t find key {cache_key}')
(results, amount) = self.begin_async_google_operation(*args)
print(results.operation)
cache.set(cache_key, results.operation.name, GOOGLE_CACHE_TIMEOUT)
print(cache.get(cache_key))
self.update_counters(amount)

try:
result = results.result(timeout=REQUEST_TIMEOUT)
except TimeoutError as err:
raise SubsequenceTimeoutError from err
transcript = self.append_api_response(result, *args)

cache.delete(cache_key)
return transcript
84 changes: 2 additions & 82 deletions kobo/apps/subsequences/integrations/google/google_transcribe.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
# coding: utf-8
import uuid
import posixpath
from abc import ABC, abstractmethod
from concurrent.futures import TimeoutError
from datetime import timedelta

import constance
from django.conf import settings
from django.contrib.auth.models import User
from django.core.cache import cache
from google.cloud import speech, storage
from googleapiclient import discovery

from kobo.apps.trackers.utils import update_nlp_counter
from .utils import google_credentials_from_constance_config
from ...constants import GOOGLE_CACHE_TIMEOUT, make_async_cache_key
from .base import GoogleTask
from ...exceptions import (
AudioTooLongError,
SubsequenceTimeoutError,
Expand All @@ -28,81 +23,6 @@
SYNC_MAX_BYTES = 10000000 # 10MB


class GoogleTransXEngine(ABC):
"""
Base class for Google transcription/translation
Contains common functions for returning async responses using the Operations API
"""

def __init__(self):
super().__init__()
self.asset = None
self.destination_path = None
self.credentials = google_credentials_from_constance_config()
self.storage_client = storage.Client(credentials=self.credentials)
self.bucket = self.storage_client.bucket(bucket_name=settings.GS_BUCKET_NAME)

@abstractmethod
def begin_async_google_operation(self, *args: str) -> (object, int):
return ({}, 0)

@property
@abstractmethod
def counter_name(self):
"""
Gets used by `update_nlp_counters()` - should begin with `google_`
"""
return 'google_'

def update_counters(self, amount) -> None:
update_nlp_counter(
self.counter_name,
amount,
self.asset.owner_id,
self.asset.id,
)

@abstractmethod
def append_operations_response(self, results, *args) -> [object]:
pass

@abstractmethod
def append_api_response(self, results, *args) -> [object]:
pass

def handle_google_task_asynchronously(self, api_name, api_version, resource, *args):
cache_key = make_async_cache_key(*args)
# Stop Me If You Think You've Heard This One Before
if operation_name := cache.get(cache_key):
google_service = discovery.build(api_name, api_version, credentials=self.credentials)
resource_path = resource.split('.')
for subresource in resource_path:
google_service = getattr(google_service, subresource)()
operation = google_service.get(name=operation_name)
operation = operation.execute()
print(operation)
if not (operation.get('done') or operation.get('state') == 'SUCCEEDED'):
raise SubsequenceTimeoutError

transcript = self.append_operations_response(operation, *args)
else:
print(f'--couldn\'t find key {cache_key}')
(results, amount) = self.begin_async_google_operation(*args)
print(results.operation)
cache.set(cache_key, results.operation.name, GOOGLE_CACHE_TIMEOUT)
print(cache.get(cache_key))
self.update_counters(amount)

try:
result = results.result(timeout=REQUEST_TIMEOUT)
except TimeoutError as err:
raise SubsequenceTimeoutError from err
transcript = self.append_api_response(result, *args)

cache.delete(cache_key)
return transcript


class AutoTranscription:
"""
The engine for transcribing audio files
Expand All @@ -111,7 +31,7 @@ def store_transcript(self, transcript, asset, submission_id):
pass


class GoogleTranscribeEngine(AutoTranscription, GoogleTransXEngine):
class GoogleTranscribeEngine(AutoTranscription, GoogleTask):
def __init__(self):
super().__init__()
self.destination_path = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from google.api_core.exceptions import InvalidArgument
from google.cloud import translate_v3 as translate, storage

from .google_transcribe import GoogleTransXEngine
from .base import GoogleTask
from .utils import google_credentials_from_constance_config
from ..misc import (
TranslationException,
Expand All @@ -22,7 +22,7 @@ def _hashed_strings(self, *strings):
return md5(''.join(strings).encode()).hexdigest()[0:10]


class GoogleTranslationEngine(GoogleTransXEngine):
class GoogleTranslationEngine(GoogleTask):
def __init__(self):
self.translate_client = translate.TranslationServiceClient(
credentials=google_credentials_from_constance_config()
Expand Down
2 changes: 1 addition & 1 deletion kobo/apps/subsequences/integrations/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Union,
)

from .google import GoogleTranslationEngine
from .google.google_translate import GoogleTranslationEngine
from .misc import TranslationException


Expand Down

0 comments on commit af87b4f

Please sign in to comment.