Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/setup-root-poetry-environment/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ runs:
- if: steps.cached-poetry.outputs.cache-hit != 'true'
run: |
poetry lock --no-update
poetry install --no-interaction
poetry install --no-interaction --with pdf
shell: bash

- name: Clear lint cache
Expand Down
23 changes: 23 additions & 0 deletions encord_agents/core/pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from pathlib import Path

import pymupdf


def extract_page(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't really be done by like other method as doc.close() apparently divorces pix so best to cast to file immediately

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we should fix this straight away. The pumupdf.Pixmap has a pil_image method so we can make the optional dependencies [mymupdf, PILLOW] and then use that. Then, no need to store on disk.

pdf_path: Path,
page_number: int,
) -> Path:
target_file_path = pdf_path.with_name(f"{pdf_path.name}_{page_number}").with_suffix(".png")
# Open the PDF
doc = pymupdf.open(pdf_path)

# Get the specified page
page = doc[page_number] # 0-based index
# Render page to an image (pixmap)
pix = page.get_pixmap()
# Save the image
pix.save(target_file_path)

# Close the document
doc.close()
return target_file_path
12 changes: 12 additions & 0 deletions encord_agents/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from encord_agents import __version__
from encord_agents.core.data_model import FrameData, LabelRowInitialiseLabelsArgs, LabelRowMetadataIncludeArgs
from encord_agents.core.settings import Settings
from encord_agents.exceptions import PrintableError

from .video import get_frame

Expand Down Expand Up @@ -204,6 +205,17 @@ def download_asset(storage_item: StorageItem, frame: int | None = None) -> Gener
cv2.imwrite(frame_file.as_posix(), frame_content)
file_path = frame_file

if storage_item.item_type == StorageItemType.PDF and frame is not None:
try:
from encord_agents.core.pdf import extract_page

page_file_path = extract_page(file_path, frame)
except ImportError as e:
raise PrintableError(
"Trying to access a crop from a pdf. Please install encord-agents[pdf] to access pdf support"
) from e
file_path = page_file_path

yield file_path


Expand Down
4 changes: 2 additions & 2 deletions encord_agents/gcp/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def (
return get_user_client()


def dep_single_frame(storage_item: StorageItem) -> NDArray[np.uint8]:
def dep_single_frame(storage_item: StorageItem, frame_data: FrameData) -> NDArray[np.uint8]:
"""
Dependency to inject the first frame of the underlying asset.

Expand Down Expand Up @@ -103,7 +103,7 @@ def my_agent(
Numpy array of shape [h, w, 3] RGB colors.

"""
with download_asset(storage_item, frame=0) as asset:
with download_asset(storage_item, frame=frame_data.frame) as asset:
img = cv2.cvtColor(cv2.imread(asset.as_posix()), cv2.COLOR_BGR2RGB)

return np.asarray(img, dtype=np.uint8)
Expand Down
19 changes: 18 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ notebook = "^7.3.2"
mkdocs-macros-plugin = "^1.3.7"
fastapi = "^0.115.0"

[tool.poetry.group.pdf]
optional = true

[tool.poetry.group.pdf.dependencies]
PyMuPdf = ">1.25.0"

[tool.poetry.scripts]
encord-agents = "encord_agents.cli.main:app"
Expand Down
107 changes: 101 additions & 6 deletions tests/integration_tests/fastapi/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,18 @@
from encord.storage import StorageItem
from encord.user_client import EncordUserClient

from encord_agents.core.data_model import FrameData, LabelRowInitialiseLabelsArgs, LabelRowMetadataIncludeArgs
from encord_agents.core.data_model import (
FrameData,
InstanceCrop,
LabelRowInitialiseLabelsArgs,
LabelRowMetadataIncludeArgs,
)
from encord_agents.fastapi.cors import EncordCORSMiddleware, authorization_error_exception_handler, get_encord_app
from encord_agents.fastapi.dependencies import (
dep_client,
dep_label_row,
dep_label_row_with_args,
dep_object_crops,
dep_objects,
dep_project,
dep_storage_item,
Expand All @@ -32,16 +38,21 @@
exit()


VIDEO_BOX_1_SIZE = 0.5
VIDEO_BOX_2_SIZE = 0.6


class SharedResolutionContext(NamedTuple):
project: Project
video_label_row: LabelRowV2
pdf_label_row: LabelRowV2
object_hash: str


def build_app(context: SharedResolutionContext) -> FastAPI:
project = context.project
video_label_row = context.video_label_row
object_hash = context.object_hash
video_object_hash = context.object_hash
app = FastAPI()
app.add_middleware(EncordCORSMiddleware)
app.exception_handlers[AuthorisationError] = authorization_error_exception_handler
Expand Down Expand Up @@ -102,11 +113,46 @@ def post_frame_data_with_object_hash(
frame_data: FrameData, object_instances: Annotated[list[ObjectInstance], Depends(dep_objects)]
) -> None:
assert frame_data
assert frame_data.object_hashes == [object_hash]
assert frame_data.object_hashes == [video_object_hash]
assert len(object_instances) == 1
assert object_instances[0].object_hash == object_hash
assert object_instances[0].object_hash == video_object_hash
assert isinstance(object_instances[0], ObjectInstance)

@app.post("/object-instance-crops-video")
def post_object_instance_crops_video(
frame_data: FrameData,
crops: Annotated[
list[InstanceCrop],
Depends(dep_object_crops()),
],
) -> None:
assert crops
assert len(crops) == 1
if frame_data.frame == 0:
assert crops[0].frame == 0
assert crops[0].instance.object_hash == video_object_hash
assert video_label_row.height is not None and video_label_row.width is not None
expected_shape = (video_label_row.height * VIDEO_BOX_1_SIZE, video_label_row.width * VIDEO_BOX_1_SIZE, 3)
assert crops[0].content.shape == expected_shape
else:
assert crops[0].frame == 1
assert crops[0].instance.object_hash != video_object_hash
assert video_label_row.height is not None and video_label_row.width is not None
expected_shape = (video_label_row.height * VIDEO_BOX_2_SIZE, video_label_row.width * VIDEO_BOX_2_SIZE, 3)
assert crops[0].content.shape == expected_shape

@app.post("/object-instance-crops-pdf")
def post_object_instance_crops_pdf(
frame_data: FrameData,
crops: Annotated[list[InstanceCrop], Depends(dep_object_crops())],
) -> None:
assert crops
assert len(crops) == 1
assert crops[0].frame == 0
# Hard-coded shape: Depends on object crop size and PDF file
expected_shape = (554, 428, 3)
assert crops[0].content.shape == expected_shape

return app


Expand All @@ -118,14 +164,32 @@ def context(user_client: EncordUserClient, class_level_ephemeral_project_hash: s
video_label_row = next(
row for row in label_rows if row.data_type == DataType.VIDEO
) # Pick a video such that frame obviously makes sense
pdf_label_row = next(row for row in label_rows if row.data_type == DataType.PDF)
video_label_row.initialise_labels()
pdf_label_row.initialise_labels()
bbox_object = project.ontology_structure.get_child_by_hash(BBOX_ONTOLOGY_HASH, type_=Object)
pdf_obj_instance = bbox_object.create_instance()
pdf_obj_instance.set_for_frames(
BoundingBoxCoordinates(height=0.7, width=0.7, top_left_x=0, top_left_y=0), frames=[0]
)
pdf_label_row.add_object_instance(pdf_obj_instance)
obj_instance = bbox_object.create_instance()
obj_instance.set_for_frames(BoundingBoxCoordinates(height=0.5, width=0.5, top_left_x=0, top_left_y=0))
obj_instance.set_for_frames(
BoundingBoxCoordinates(height=VIDEO_BOX_1_SIZE, width=VIDEO_BOX_1_SIZE, top_left_x=0, top_left_y=0), frames=[0]
)
obj_instance_frame_2 = bbox_object.create_instance()
obj_instance_frame_2.set_for_frames(
BoundingBoxCoordinates(height=VIDEO_BOX_2_SIZE, width=VIDEO_BOX_2_SIZE, top_left_x=0, top_left_y=0), frames=[1]
)
video_label_row.add_object_instance(obj_instance)
video_label_row.add_object_instance(obj_instance_frame_2)
video_label_row.save()
pdf_label_row.save()
return SharedResolutionContext(
project=project, video_label_row=video_label_row, object_hash=obj_instance.object_hash
project=project,
video_label_row=video_label_row,
pdf_label_row=pdf_label_row,
object_hash=obj_instance.object_hash,
)


Expand Down Expand Up @@ -210,6 +274,37 @@ def test_httpError_raised_appropriately(self, router_path: str) -> None:
assert json_resp
assert json_resp["message"]

def test_object_instance_crops_video(self) -> None:
resp = self.client.post(
"/object-instance-crops-video",
json={
"projectHash": self.context.project.project_hash,
"dataHash": self.context.video_label_row.data_hash,
"frame": 0,
},
)
assert resp.status_code == 200, resp.content
resp = self.client.post(
"/object-instance-crops-video",
json={
"projectHash": self.context.project.project_hash,
"dataHash": self.context.video_label_row.data_hash,
"frame": 1,
},
)
assert resp.status_code == 200, resp.content

def test_object_instance_crops_pdf(self) -> None:
resp = self.client.post(
"/object-instance-crops-pdf",
json={
"projectHash": self.context.project.project_hash,
"dataHash": self.context.pdf_label_row.data_hash,
"frame": 0,
},
)
assert resp.status_code == 200, resp.content


class TestCustomCorsRegex:
def test_custom_cors_regex(self) -> None:
Expand Down