Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import Google Drive functions #74

Merged
merged 4 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ For deployment, the following environment variables need to be set:
- `PYTHONPATH=src/api` to properly import Python modules
- `SP_KEY`, the private key for SAML authentication
- `SENDGRID_API_KEY`, the API key needed to use the SendGrid API
- `RESUMES_FOLDER_ID`, the ID of the Google Drive folder to upload to
- Either `SERVICE_ACCOUNT_FILE` or `GOOGLE_SERVICE_ACCOUNT_CREDENTIALS`: We use a Google service acccount in tandem with aiogoogle to automatically upload resumes when submitting a form. The keys are JSON that can either be stored in a file, in which case the path of the file should be stored in `SERVICE_ACCOUNT_FILE`, or be stored directly in `GOOGLE_SERVICE_ACCOUNT_CREDENTIALS`. For local development, it is recommended to take the `SERVICE_ACCOUNT_FILE` approach.

For staging, the following environment variables should also bet set:

Expand Down
1 change: 1 addition & 0 deletions apps/api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ motor==3.3.2
pydantic[email]==2.5.2
aiosendgrid==0.1.0
sendgrid==6.11.0
aiogoogle==5.6.0
57 changes: 57 additions & 0 deletions apps/api/src/services/gdrive_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
import os

from aiogoogle import Aiogoogle
samderanova marked this conversation as resolved.
Show resolved Hide resolved
from aiogoogle.auth.creds import ServiceAccountCreds
samderanova marked this conversation as resolved.
Show resolved Hide resolved

GOOGLE_DRIVE_URL = "https://drive.google.com/file/d/"
SCOPES = ["https://www.googleapis.com/auth/drive.file"]


def _get_credentials() -> ServiceAccountCreds:
"""Get the credentials for the service account used to upload files."""
service_account_file = os.getenv("SERVICE_ACCOUNT_FILE")
service_account_credentials = os.getenv("GOOGLE_SERVICE_ACCOUNT_CREDENTIALS")

if service_account_file:
with open(service_account_file) as f:
service_account_key = json.load(f)
elif service_account_credentials:
service_account_credentials = service_account_credentials.replace("\n", "\\n")
service_account_key = json.loads(service_account_credentials)
else:
raise RuntimeError("Service account credentials not found")

return ServiceAccountCreds(scopes=SCOPES, **service_account_key)


async def upload_file(
folder_id: str, file_name: str, file_bytes: bytes, file_type: str
) -> str:
"""Use the aiogoogle library to upload the provided file to the folder with
the given `folder_id` and return a URL to the uploaded file."""
creds = _get_credentials()
async with Aiogoogle(service_account_creds=creds) as aiogoogle:
drive_v3 = await aiogoogle.discover("drive", "v3")

# Provide the given file name and set the upload destination by
# specifying the given folder ID as a parent
metadata = {"name": file_name, "parents": [folder_id]}

# Create request object:
req = drive_v3.files.create(
upload_file=file_bytes,
fields="id",
json=metadata,
supportsAllDrives=True,
)

# Manually set the content type to the type FastAPI provides, so that the
# Google Drive API doesn't need to.
req.upload_file_content_type = file_type

# Upload file
uploaded_file: dict[str, str] = await aiogoogle.as_service_account(req)
file_id: str = uploaded_file["id"]

return GOOGLE_DRIVE_URL + file_id
52 changes: 52 additions & 0 deletions apps/api/src/utils/resume_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import hashlib
import os
from logging import getLogger
from typing import Protocol

from aiogoogle import HTTPError
samderanova marked this conversation as resolved.
Show resolved Hide resolved
from fastapi import UploadFile

from services import gdrive_handler

log = getLogger(__name__)

RESUMES_FOLDER_ID = os.getenv("RESUMES_FOLDER_ID")
samderanova marked this conversation as resolved.
Show resolved Hide resolved
SIZE_LIMIT = 500_000
ACCEPTED_TYPES = ("application/pdf",)


class Person(Protocol):
first_name: str
last_name: str


async def upload_resume(person: Person, resume_upload: UploadFile) -> str:
"""Upload resume file to Google Drive and provide url to uploaded file.
Reject files larger than size limit"""
if not RESUMES_FOLDER_ID:
raise RuntimeError("RESUMES_FOLDER_ID is not defined")

if resume_upload.content_type not in ACCEPTED_TYPES:
raise TypeError("Invalid resume file type")

# Check file size
raw_resume_file: bytes = await resume_upload.read()
if len(raw_resume_file) > SIZE_LIMIT:
raise ValueError("Resume file is larger than allowed")

# Rename with applicant's name and file digest
digest = hashlib.md5(raw_resume_file).hexdigest()
filename = f"{person.first_name}-{person.last_name}-{digest[:8]}.pdf"

try:
resume_url = await gdrive_handler.upload_file(
RESUMES_FOLDER_ID,
filename,
raw_resume_file,
resume_upload.content_type,
)
except HTTPError as err:
log.error("During resume upload: %s", err)
raise RuntimeError("Could not upload resume to Google Drive")

return resume_url
2 changes: 2 additions & 0 deletions apps/api/stubs/aiogoogle/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .client import Aiogoogle as Aiogoogle
from .excs import HTTPError as HTTPError
2 changes: 2 additions & 0 deletions apps/api/stubs/aiogoogle/auth/creds.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class ServiceAccountCreds(dict[str, str]):
def __init__(self, scopes: list[str] | None = ..., **kwargs: object) -> None: ...
28 changes: 28 additions & 0 deletions apps/api/stubs/aiogoogle/client.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Any, Literal, Optional

from .auth.creds import ServiceAccountCreds
from .resource import GoogleDriveAPI
from .models import Request

class Aiogoogle:
def __init__(
self,
service_account_creds: ServiceAccountCreds | None = ...,
) -> None: ...
async def discover(
self,
api_name: Literal["drive"],
api_version: str | None = ...,
validate: bool = ...,
*,
disco_doc_ver: Optional[int] = ...
) -> GoogleDriveAPI: ...
async def as_service_account(
self,
*requests: Request,
timeout: int | None = ...,
service_account_creds: ServiceAccountCreds | None = ...,
raise_for_status: bool = ...
) -> Any: ...
async def __aenter__(self) -> Aiogoogle: ...
async def __aexit__(self, *args: Any) -> None: ...
2 changes: 2 additions & 0 deletions apps/api/stubs/aiogoogle/excs.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class AiogoogleError(Exception): ...
class HTTPError(AiogoogleError): ...
2 changes: 2 additions & 0 deletions apps/api/stubs/aiogoogle/models.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class Request:
upload_file_content_type: str
25 changes: 25 additions & 0 deletions apps/api/stubs/aiogoogle/resource.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .models import Request
from typing import Mapping, TypeVar

T = TypeVar("T")

class CreateMethod:
def __call__(
self,
validate: bool | None = ...,
data: object | None = ...,
json: Mapping[str, object] | None = ...,
upload_file: bytes | None = ...,
download_file: str | None = ...,
timeout: int | None = ...,
path_params_safe_chars: Mapping[str, object] = ...,
fields: str = ...,
supportsAllDrives: bool = ...,
**uri_params: Mapping[str, object]
) -> Request: ...

class FileResource:
create: CreateMethod

class GoogleDriveAPI:
files: FileResource
42 changes: 42 additions & 0 deletions apps/api/tests/test_gdrive_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from unittest.mock import AsyncMock, MagicMock, patch

from services import gdrive_handler

SAMPLE_NAME = "my-file-name"
SAMPLE_FOLDER_ID = "my-folder-id"
SAMPLE_BYTES = b"my-bytes"
SAMPLE_FILE_TYPE = "my-file-type"
SAMPLE_OUTPUT_ID = "12345"
UPLOAD_PATH = (
"https://www.googleapis.com/upload/drive/v3/files?fields=id&supportsAllDrives=True"
)


@patch("services.gdrive_handler._get_credentials")
@patch("aiogoogle.Aiogoogle.as_service_account")
async def test_upload_single_file(
mock_asServiceAccount: AsyncMock, mock_getCredentials: MagicMock
) -> None:
"""Test whether the Request object sent to the Google Drive API
is generated properly."""
mock_getCredentials.return_value = None

mock_asServiceAccount.return_value = {"id": SAMPLE_OUTPUT_ID}

output = await gdrive_handler.upload_file(
SAMPLE_FOLDER_ID, SAMPLE_NAME, SAMPLE_BYTES, SAMPLE_FILE_TYPE
)

mock_asServiceAccount.assert_called_once()

request = mock_asServiceAccount.call_args.args[0]
assert request.method == "POST"
assert request.media_upload.upload_path == UPLOAD_PATH
assert request.json == {
"name": SAMPLE_NAME,
"parents": [SAMPLE_FOLDER_ID],
}
assert request.upload_file_content_type == SAMPLE_FILE_TYPE
assert request.media_upload.multipart

assert output == gdrive_handler.GOOGLE_DRIVE_URL + SAMPLE_OUTPUT_ID
Loading