Skip to content

Commit

Permalink
Merge branch 'feat/unit-tests' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Aug 21, 2024
2 parents 5e140af + a52d0fa commit f0748d4
Show file tree
Hide file tree
Showing 18 changed files with 1,223 additions and 768 deletions.
19 changes: 17 additions & 2 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ jobs:
runs-on: ubuntu-22.04
needs:
- init
strategy:
fail-fast: false
matrix:
step:
- test-static
- test-unit
steps:
- name: Checkout
uses: actions/[email protected]
Expand All @@ -56,11 +62,20 @@ jobs:
- name: Install dependencies
run: make install-deps

- name: Configure environment variables
run: echo "${{ secrets.DOTENV_UNIT_TESTS }}" > .env

- name: Run tests
run: make test
run: make ${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Upload artifacts
uses: actions/[email protected]
with:
name: test-results
path: test-reports/*

build-app:
name: Build & publish app
name: Build & archive app
permissions:
contents: write
packages: write
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -735,3 +735,7 @@ FodyWeavers.xsd

# Scraping cache
scraping-results/

# Tests
scraping-test/
test-reports/
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ upgrade:
curl -sSfL https://blocklistproject.github.io/Lists/alt-version/ads-nl.txt > resources/ads-nl.txt

test:
$(MAKE) test-static
$(MAKE) test-unit

test-static:
@echo "➡️ Test generic formatter (Black)..."
python3 -m black --check .

Expand All @@ -78,6 +82,13 @@ test:
@echo "➡️ Test types (Pyright)..."
python3 -m pyright .

test-unit:
@echo "➡️ Unit tests (Pytest)..."
pytest \
--junit-xml=test-reports/$(version_full).xml \
-n auto \
tests/*.py

dev:
python3 -m pip install --editable .
@echo "Now you can run 'scrape-it-now' CLI!"
Expand Down
2 changes: 1 addition & 1 deletion app/helpers/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def hash_url(url: str) -> str:
Hash a URL to a unique identifier.
"""
return hashlib.sha256(
url.encode(),
url.encode("utf-8"),
usedforsecurity=False,
).hexdigest()

Expand Down
2 changes: 1 addition & 1 deletion app/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ async def _process_one(
models = [
IndexedIngestModel(
content=content,
id=doc_id,
indexed_id=doc_id,
url=result.url,
vectors=embedding.embedding,
)
Expand Down
6 changes: 4 additions & 2 deletions app/models/indexed.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from pydantic import BaseModel
from pydantic import BaseModel, Field


class IndexedSearchModel(BaseModel):
content: str | None = None
id: str
indexed_id: str = Field(
validation_alias="id", # Compatibility with v1, don't override a built-in function
)
url: str


Expand Down
65 changes: 55 additions & 10 deletions app/persistence/azure_blob_storage.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator
from uuid import uuid4

from azure.core.exceptions import (
HttpResponseError,
ResourceExistsError,
ResourceNotFoundError,
ServiceRequestError,
Expand All @@ -22,6 +22,7 @@
BlobNotFoundError,
IBlob,
LeaseAlreadyExistsError,
LeaseNotFoundError,
)


Expand All @@ -42,23 +43,16 @@ def __init__(
logger.info('Azure Blob Storage "%s" is configured', config.name)
self._config = config

@retry(
reraise=True,
retry=retry_if_exception_type(ServiceRequestError), # Catch for network errors
stop=stop_after_attempt(8),
wait=wait_random_exponential(multiplier=0.8, max=60),
)
@asynccontextmanager
async def lease(
async def lease_blob(
self,
blob: str,
lease_duration: int,
) -> AsyncGenerator[str, None]:
try:
# Create the lease
async with await self._client.get_blob_client(blob).acquire_lease(
lease_duration=lease_duration,
lease_id=str(uuid4()),
lease_duration
) as lease:
# Return the lease ID
yield lease.id
Expand All @@ -68,6 +62,15 @@ async def lease(
) from e
except ResourceNotFoundError as e:
raise BlobNotFoundError(f'Blob "{blob}" not found') from e
except HttpResponseError as e:
if (
"There is currently a lease on the blob and no lease ID was specified in the request."
in e.message
):
raise LeaseAlreadyExistsError(
"Lease ID is required to overwrite a blob with an existing lease"
) from e
raise e

@retry(
reraise=True,
Expand All @@ -86,13 +89,37 @@ async def upload_blob(
try:
await self._client.upload_blob(
data=data,
encoding=self.encoding,
lease=lease_id,
length=length,
name=blob,
overwrite=overwrite,
)
except ResourceExistsError as e:
raise BlobAlreadyExistsError(f'Blob "{blob}" already exists') from e
except HttpResponseError as e:
if (
"There is currently a lease on the blob and no lease ID was specified in the request."
in e.message
):
raise LeaseAlreadyExistsError(
"Lease ID is required to overwrite a blob with an existing lease"
) from e
if "There is currently no lease on the blob." in e.message:
raise LeaseNotFoundError(f'Lease for blob "{blob}" not found') from e
if (
"The lease ID specified did not match the lease ID for the blob."
in e.message
):
raise LeaseAlreadyExistsError(
"Provided lease ID does not match the existing"
) from e
if (
"A lease ID was specified, but the lease for the blob has expired."
in e.message
):
raise LeaseNotFoundError(f'Lease for blob "{blob}" not found') from e
raise e

@retry(
reraise=True,
Expand All @@ -112,6 +139,24 @@ async def download_blob(
return await f.readall()
except ResourceNotFoundError as e:
raise BlobNotFoundError(f'Blob "{blob}" not found') from e
except HttpResponseError as e:
if (
"The requested URI does not represent any resource on the server."
in e.message
):
raise BlobNotFoundError(f'Blob "{blob}" not found') from e
raise e

@retry(
reraise=True,
retry=retry_if_exception_type(ServiceRequestError), # Catch for network errors
stop=stop_after_attempt(8),
wait=wait_random_exponential(multiplier=0.8, max=60),
)
async def delete_container(
self,
) -> None:
await self._client.delete_container()

async def __aenter__(self) -> "AzureBlobStorage":
self._service = BlobServiceClient.from_connection_string(
Expand Down
18 changes: 17 additions & 1 deletion app/persistence/azure_queue_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ResourceNotFoundError,
ServiceRequestError,
)
from azure.storage.queue import TextBase64DecodePolicy, TextBase64EncodePolicy
from azure.storage.queue.aio import QueueClient, QueueServiceClient
from pydantic import BaseModel
from tenacity import (
Expand Down Expand Up @@ -86,11 +87,26 @@ async def delete_message(
f'Message "{message.message_id}" not found'
) from e

@retry(
reraise=True,
retry=retry_if_exception_type(ServiceRequestError), # Catch for network errors
stop=stop_after_attempt(8),
wait=wait_random_exponential(multiplier=0.8, max=60),
)
async def delete_queue(
self,
) -> None:
await self._client.delete_queue()

async def __aenter__(self) -> "AzureQueueStorage":
self._service = QueueServiceClient.from_connection_string(
self._config.connection_string
)
self._client = self._service.get_queue_client(self._config.name)
self._client = self._service.get_queue_client(
message_decode_policy=TextBase64DecodePolicy(),
message_encode_policy=TextBase64EncodePolicy(),
queue=self._config.name,
)
# Create if it does not exist
try:
await self._client.create_queue()
Expand Down
12 changes: 11 additions & 1 deletion app/persistence/iblob.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ class BlobAlreadyExistsError(Exception):
pass


class LeaseNotFoundError(Exception):
pass


class LeaseAlreadyExistsError(Exception):
pass

Expand All @@ -25,7 +29,7 @@ class IBlob:
encoding = "utf-8"

@abstractmethod
def lease(
def lease_blob(
self,
blob: str,
lease_duration: int,
Expand All @@ -50,6 +54,12 @@ async def download_blob(
) -> str:
pass

@abstractmethod
async def delete_container(
self,
) -> None:
pass

@abstractmethod
async def __aenter__(self) -> "IBlob":
pass
Expand Down
6 changes: 6 additions & 0 deletions app/persistence/iqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ async def delete_message(
) -> None:
pass

@abstractmethod
async def delete_queue(
self,
) -> None:
pass

@abstractmethod
async def __aenter__(self) -> "IQueue":
pass
Expand Down
Loading

0 comments on commit f0748d4

Please sign in to comment.