Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion glmocr/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,16 @@ def process(
user_msg["content"].append(
{"type": "image_url", "image_url": {"url": data_url}}
)
per_request = self.page_loader.build_request(per_request)
# Set default parameters without calling build_request(),
# which would re-process the already-encoded image through
# load_image_to_base64 a second time.
per_request.setdefault("max_tokens", self.page_loader.max_tokens)
per_request.setdefault("temperature", self.page_loader.temperature)
per_request.setdefault("top_p", self.page_loader.top_p)
per_request.setdefault("top_k", self.page_loader.top_k)
per_request.setdefault(
"repetition_penalty", self.page_loader.repetition_penalty
)
response, status_code = self.ocr_client.process(per_request)
if status_code != 200:
raise Exception(
Expand Down
16 changes: 11 additions & 5 deletions glmocr/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,23 @@
configure_logging,
set_log_level,
)
from .visualization_utils import (
draw_layout_boxes,
save_layout_visualization,
get_colormap,
)
from .result_postprocess_utils import (
find_consecutive_repeat,
clean_repeated_content,
clean_formula_number,
)


def __getattr__(name):
# Lazy imports for layout-only symbols that require opencv-python.
_viz_names = {"draw_layout_boxes", "save_layout_visualization", "get_colormap"}
if name in _viz_names:
from . import visualization_utils

return getattr(visualization_utils, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


__all__ = [
"smart_resize",
"load_image_to_base64",
Expand Down
2 changes: 1 addition & 1 deletion glmocr/utils/image_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Image processing utilities."""

import io
import cv2
import base64
import math
from io import BytesIO
Expand Down Expand Up @@ -191,6 +190,7 @@ def crop_image_region(image, bbox_2d, polygon=None, fill_color=255):
Returns:
PIL.Image.Image: Cropped region with optional polygon mask applied
"""
import cv2
image_width, image_height = image.size

# De-normalize bbox to pixel coordinates
Expand Down
28 changes: 5 additions & 23 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ authors = [
{name = "ZHIPUAI", email = "info@zhipuai.cn"}
]
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"
license = {text = "Apache-2.0"}
keywords = ["ocr", "glm", "ai", "vision"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

dependencies = [
Expand All @@ -34,19 +34,8 @@ dependencies = [
"portalocker>=2.8.2",
"python-dotenv>=0.21.0",

# Layout detection
"torch>=2.0.0",
"torchvision>=0.15.0",
"transformers>=5.1.0",
"sentencepiece>=0.1.99",
"accelerate>=0.20.0",
"opencv-python>=4.8.0",

# PDF support
"pypdfium2>=5.3.0",

# Flask server
"flask>=2.3.0",
]

[project.optional-dependencies]
Expand All @@ -68,14 +57,7 @@ server = [
]

all = [
"torch>=2.0.0",
"torchvision>=0.15.0",
"transformers>=4.30.0",
"sentencepiece>=0.1.99",
"accelerate>=0.20.0",
"opencv-python>=4.8.0",
"pdf2image>=1.16.0",
"flask>=2.3.0",
"glmocr[layout,server]",
]
dev = [
"pytest>=7.0.0",
Expand All @@ -100,7 +82,7 @@ include = ["glmocr*"]

[tool.black]
line-length = 100
target-version = ["py38", "py39", "py310", "py311"]
target-version = ["py310", "py311", "py312", "py313"]

[tool.pytest.ini_options]
testpaths = ["glmocr/tests"]
Expand Down