diff --git a/apps/api/README.md b/apps/api/README.md index 0f1572ff..30130cfb 100644 --- a/apps/api/README.md +++ b/apps/api/README.md @@ -19,6 +19,8 @@ For deployment, the following environment variables need to be set: - `PYTHONPATH=src/api` to properly import Python modules - `SP_KEY`, the private key for SAML authentication - `SENDGRID_API_KEY`, the API key needed to use the SendGrid API +- `RESUMES_FOLDER_ID`, the ID of the Google Drive folder to upload to +- Either `SERVICE_ACCOUNT_FILE` or `GOOGLE_SERVICE_ACCOUNT_CREDENTIALS`: We use a Google service acccount in tandem with aiogoogle to automatically upload resumes when submitting a form. The keys are JSON that can either be stored in a file, in which case the path of the file should be stored in `SERVICE_ACCOUNT_FILE`, or be stored directly in `GOOGLE_SERVICE_ACCOUNT_CREDENTIALS`. For local development, it is recommended to take the `SERVICE_ACCOUNT_FILE` approach. For staging, the following environment variables should also bet set: diff --git a/apps/api/requirements.txt b/apps/api/requirements.txt index 0513e46e..d4c013d0 100644 --- a/apps/api/requirements.txt +++ b/apps/api/requirements.txt @@ -6,3 +6,4 @@ motor==3.3.2 pydantic[email]==2.5.2 aiosendgrid==0.1.0 sendgrid==6.11.0 +aiogoogle==5.6.0 diff --git a/apps/api/src/services/gdrive_handler.py b/apps/api/src/services/gdrive_handler.py new file mode 100644 index 00000000..fe3ef34d --- /dev/null +++ b/apps/api/src/services/gdrive_handler.py @@ -0,0 +1,57 @@ +import json +import os + +from aiogoogle import Aiogoogle +from aiogoogle.auth.creds import ServiceAccountCreds + +GOOGLE_DRIVE_URL = "https://drive.google.com/file/d/" +SCOPES = ["https://www.googleapis.com/auth/drive.file"] + + +def _get_credentials() -> ServiceAccountCreds: + """Get the credentials for the service account used to upload files.""" + service_account_file = os.getenv("SERVICE_ACCOUNT_FILE") + service_account_credentials = os.getenv("GOOGLE_SERVICE_ACCOUNT_CREDENTIALS") + + if service_account_file: + with open(service_account_file) as f: + service_account_key = json.load(f) + elif service_account_credentials: + service_account_credentials = service_account_credentials.replace("\n", "\\n") + service_account_key = json.loads(service_account_credentials) + else: + raise RuntimeError("Service account credentials not found") + + return ServiceAccountCreds(scopes=SCOPES, **service_account_key) + + +async def upload_file( + folder_id: str, file_name: str, file_bytes: bytes, file_type: str +) -> str: + """Use the aiogoogle library to upload the provided file to the folder with + the given `folder_id` and return a URL to the uploaded file.""" + creds = _get_credentials() + async with Aiogoogle(service_account_creds=creds) as aiogoogle: + drive_v3 = await aiogoogle.discover("drive", "v3") + + # Provide the given file name and set the upload destination by + # specifying the given folder ID as a parent + metadata = {"name": file_name, "parents": [folder_id]} + + # Create request object: + req = drive_v3.files.create( + upload_file=file_bytes, + fields="id", + json=metadata, + supportsAllDrives=True, + ) + + # Manually set the content type to the type FastAPI provides, so that the + # Google Drive API doesn't need to. + req.upload_file_content_type = file_type + + # Upload file + uploaded_file: dict[str, str] = await aiogoogle.as_service_account(req) + file_id: str = uploaded_file["id"] + + return GOOGLE_DRIVE_URL + file_id diff --git a/apps/api/src/utils/resume_handler.py b/apps/api/src/utils/resume_handler.py new file mode 100644 index 00000000..6ef33940 --- /dev/null +++ b/apps/api/src/utils/resume_handler.py @@ -0,0 +1,52 @@ +import hashlib +import os +from logging import getLogger +from typing import Protocol + +from aiogoogle import HTTPError +from fastapi import UploadFile + +from services import gdrive_handler + +log = getLogger(__name__) + +RESUMES_FOLDER_ID = os.getenv("RESUMES_FOLDER_ID") +SIZE_LIMIT = 500_000 +ACCEPTED_TYPES = ("application/pdf",) + + +class Person(Protocol): + first_name: str + last_name: str + + +async def upload_resume(person: Person, resume_upload: UploadFile) -> str: + """Upload resume file to Google Drive and provide url to uploaded file. + Reject files larger than size limit""" + if not RESUMES_FOLDER_ID: + raise RuntimeError("RESUMES_FOLDER_ID is not defined") + + if resume_upload.content_type not in ACCEPTED_TYPES: + raise TypeError("Invalid resume file type") + + # Check file size + raw_resume_file: bytes = await resume_upload.read() + if len(raw_resume_file) > SIZE_LIMIT: + raise ValueError("Resume file is larger than allowed") + + # Rename with applicant's name and file digest + digest = hashlib.md5(raw_resume_file).hexdigest() + filename = f"{person.first_name}-{person.last_name}-{digest[:8]}.pdf" + + try: + resume_url = await gdrive_handler.upload_file( + RESUMES_FOLDER_ID, + filename, + raw_resume_file, + resume_upload.content_type, + ) + except HTTPError as err: + log.error("During resume upload: %s", err) + raise RuntimeError("Could not upload resume to Google Drive") + + return resume_url diff --git a/apps/api/stubs/aiogoogle/__init__.pyi b/apps/api/stubs/aiogoogle/__init__.pyi new file mode 100644 index 00000000..e9cdd05e --- /dev/null +++ b/apps/api/stubs/aiogoogle/__init__.pyi @@ -0,0 +1,2 @@ +from .client import Aiogoogle as Aiogoogle +from .excs import HTTPError as HTTPError diff --git a/apps/api/stubs/aiogoogle/auth/creds.pyi b/apps/api/stubs/aiogoogle/auth/creds.pyi new file mode 100644 index 00000000..338d8652 --- /dev/null +++ b/apps/api/stubs/aiogoogle/auth/creds.pyi @@ -0,0 +1,2 @@ +class ServiceAccountCreds(dict[str, str]): + def __init__(self, scopes: list[str] | None = ..., **kwargs: object) -> None: ... diff --git a/apps/api/stubs/aiogoogle/client.pyi b/apps/api/stubs/aiogoogle/client.pyi new file mode 100644 index 00000000..d48fecde --- /dev/null +++ b/apps/api/stubs/aiogoogle/client.pyi @@ -0,0 +1,28 @@ +from typing import Any, Literal, Optional + +from .auth.creds import ServiceAccountCreds +from .resource import GoogleDriveAPI +from .models import Request + +class Aiogoogle: + def __init__( + self, + service_account_creds: ServiceAccountCreds | None = ..., + ) -> None: ... + async def discover( + self, + api_name: Literal["drive"], + api_version: str | None = ..., + validate: bool = ..., + *, + disco_doc_ver: Optional[int] = ... + ) -> GoogleDriveAPI: ... + async def as_service_account( + self, + *requests: Request, + timeout: int | None = ..., + service_account_creds: ServiceAccountCreds | None = ..., + raise_for_status: bool = ... + ) -> Any: ... + async def __aenter__(self) -> Aiogoogle: ... + async def __aexit__(self, *args: Any) -> None: ... diff --git a/apps/api/stubs/aiogoogle/excs.pyi b/apps/api/stubs/aiogoogle/excs.pyi new file mode 100644 index 00000000..90be9515 --- /dev/null +++ b/apps/api/stubs/aiogoogle/excs.pyi @@ -0,0 +1,2 @@ +class AiogoogleError(Exception): ... +class HTTPError(AiogoogleError): ... diff --git a/apps/api/stubs/aiogoogle/models.pyi b/apps/api/stubs/aiogoogle/models.pyi new file mode 100644 index 00000000..0d4c8219 --- /dev/null +++ b/apps/api/stubs/aiogoogle/models.pyi @@ -0,0 +1,2 @@ +class Request: + upload_file_content_type: str diff --git a/apps/api/stubs/aiogoogle/resource.pyi b/apps/api/stubs/aiogoogle/resource.pyi new file mode 100644 index 00000000..45815aa1 --- /dev/null +++ b/apps/api/stubs/aiogoogle/resource.pyi @@ -0,0 +1,25 @@ +from .models import Request +from typing import Mapping, TypeVar + +T = TypeVar("T") + +class CreateMethod: + def __call__( + self, + validate: bool | None = ..., + data: object | None = ..., + json: Mapping[str, object] | None = ..., + upload_file: bytes | None = ..., + download_file: str | None = ..., + timeout: int | None = ..., + path_params_safe_chars: Mapping[str, object] = ..., + fields: str = ..., + supportsAllDrives: bool = ..., + **uri_params: Mapping[str, object] + ) -> Request: ... + +class FileResource: + create: CreateMethod + +class GoogleDriveAPI: + files: FileResource diff --git a/apps/api/tests/test_gdrive_handler.py b/apps/api/tests/test_gdrive_handler.py new file mode 100644 index 00000000..35245824 --- /dev/null +++ b/apps/api/tests/test_gdrive_handler.py @@ -0,0 +1,42 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +from services import gdrive_handler + +SAMPLE_NAME = "my-file-name" +SAMPLE_FOLDER_ID = "my-folder-id" +SAMPLE_BYTES = b"my-bytes" +SAMPLE_FILE_TYPE = "my-file-type" +SAMPLE_OUTPUT_ID = "12345" +UPLOAD_PATH = ( + "https://www.googleapis.com/upload/drive/v3/files?fields=id&supportsAllDrives=True" +) + + +@patch("services.gdrive_handler._get_credentials") +@patch("aiogoogle.Aiogoogle.as_service_account") +async def test_upload_single_file( + mock_asServiceAccount: AsyncMock, mock_getCredentials: MagicMock +) -> None: + """Test whether the Request object sent to the Google Drive API + is generated properly.""" + mock_getCredentials.return_value = None + + mock_asServiceAccount.return_value = {"id": SAMPLE_OUTPUT_ID} + + output = await gdrive_handler.upload_file( + SAMPLE_FOLDER_ID, SAMPLE_NAME, SAMPLE_BYTES, SAMPLE_FILE_TYPE + ) + + mock_asServiceAccount.assert_called_once() + + request = mock_asServiceAccount.call_args.args[0] + assert request.method == "POST" + assert request.media_upload.upload_path == UPLOAD_PATH + assert request.json == { + "name": SAMPLE_NAME, + "parents": [SAMPLE_FOLDER_ID], + } + assert request.upload_file_content_type == SAMPLE_FILE_TYPE + assert request.media_upload.multipart + + assert output == gdrive_handler.GOOGLE_DRIVE_URL + SAMPLE_OUTPUT_ID