speciesnet/display.py

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Visualization utilities."""

__all__ = [
    "draw_bboxes",
]

from matplotlib import font_manager
import PIL
import PIL.Image
import PIL.ImageColor
import PIL.ImageDraw
import PIL.ImageFile
import PIL.ImageFont

from speciesnet.constants import Detection
from speciesnet.utils import BBox

# Color map for different detection types.
DETECTIONS_COLOR_MAP = {
    Detection.ANIMAL.value: "red",
    Detection.HUMAN.value: "blue",
    Detection.VEHICLE.value: "magenta",
}


def draw_bboxes(img: PIL.Image.Image, detections: list[dict]) -> PIL.Image.Image:
    """Draws bounding boxes on a given image.

    The detections are assumed to be in decreasing order of their confidence score. They
    are drawn from the least confident to the most confident, to make sure that the
    latter is always on top. The opacity of each bounding box is proportional to its
    confidence score, so the least confident boxes may sometimes be barely visible,
    indicating that low confidence.

    Args:
        img:
            PIL image to draw bounding boxes on.
        detections:
            List of detections sorted in decreasing order of their confidence score.

    Returns:
        A new PIL image generated by overlaying detection bounding boxes on top of the
        original PIL image.
    """

    font_prop = font_manager.FontProperties(family="sans serif", weight="bold")
    font = PIL.ImageFont.truetype(font_manager.findfont(font_prop), size=12)
    border_size = 3

    img = img.convert("RGBA")
    overlay = PIL.Image.new("RGBA", img.size, (255, 255, 255, 0))
    draw = PIL.ImageDraw.Draw(overlay)

    # Draw bounding boxes in reversed order, from the least confident to the most
    # confident, to make sure that the latter is always on top.
    for detection in reversed(detections):
        label = detection["label"]
        score = detection["conf"]
        bbox = BBox(*detection["bbox"])

        x0 = bbox.xmin * img.width
        y0 = bbox.ymin * img.height
        x1 = (bbox.xmin + bbox.width) * img.width
        y1 = (bbox.ymin + bbox.height) * img.height
        rgb = PIL.ImageColor.getrgb(DETECTIONS_COLOR_MAP[label])
        alpha = int(score * 255)
        color = *rgb[:3], alpha

        text = f"{label}: {score:.2f}"
        text_rel_xy = font.getbbox(text, anchor="lb")
        text_bg_xy = (
            text_rel_xy[0] + x0,
            text_rel_xy[1] + y0 - 2 * border_size,
            text_rel_xy[2] + x0 + 2 * border_size,
            text_rel_xy[3] + y0,
        )
        text_color = (255, 255, 255, alpha)

        draw.rectangle((x0, y0, x1, y1), outline=color, width=border_size)
        draw.rectangle(text_bg_xy, fill=color, width=border_size)
        draw.text(
            (x0 + border_size, y0 - border_size),
            text,
            fill=text_color,
            font=font,
            anchor="lb",
        )

    return PIL.Image.alpha_composite(img, overlay)