diff --git a/README.md b/README.md
index 7635611..2ea7807 100644
--- a/README.md
+++ b/README.md
@@ -222,6 +222,11 @@ with GlmOcr() as parser:
     print(result.json_result)
     result.save()
 
+# Extract printed page numbers from PP-DocLayoutV3 `number` regions
+with GlmOcr(detect_printed_page_numbers=True) as parser:
+    result = parser.parse("document.pdf")
+    print(result.to_dict().get("page_metadata", []))
+
 # Place layout model on CPU (useful when GPU is reserved for OCR)
 with GlmOcr(layout_device="cpu") as parser:
     result = parser.parse("image.png")
@@ -302,6 +307,7 @@ pipeline:
   # Result formatting
   result_formatter:
     output_format: both # json, markdown, or both
+    detect_printed_page_numbers: false
 
   # Layout model device placement
   layout:
@@ -310,6 +316,23 @@ pipeline:
 
 See [config.yaml](glmocr/config.yaml) for all options.
 
+Printed page number detection can be enabled in three ways:
+
+```python
+with GlmOcr(detect_printed_page_numbers=True) as parser:
+    result = parser.parse("document.pdf")
+```
+
+```powershell
+$env:GLMOCR_DETECT_PRINTED_PAGE_NUMBERS = 'true'
+```
+
+```yaml
+pipeline:
+  result_formatter:
+    detect_printed_page_numbers: true
+```
+
 ### Output Formats
 
 Here are two examples of output formats:
@@ -320,6 +343,43 @@ Here are two examples of output formats:
 [[{ "index": 0, "label": "text", "content": "...", "bbox_2d": null }]]
 ```
 
+When printed page detection is enabled and printed-page data is actually found,
+saved `paper.json` is wrapped as a top-level object and includes:
+
+```json
+{
+  "json_result": [[{ "index": 0, "label": "text", "content": "...", "bbox_2d": null }]],
+  "page_number_candidates": [
+    {
+      "page_index": 1,
+      "label": "number",
+      "content": "22",
+      "layout_index": 0,
+      "bbox_2d": [92, 26, 120, 41],
+      "layout_score": 0.77,
+      "numeric_like": true,
+      "roman_like": false
+    }
+  ],
+  "document_page_numbering": {
+    "strategy": "visual_sequence",
+    "confidence": 1.0,
+    "sequence_type": "arabic",
+    "page_offset": 21,
+    "candidate_pages": 4
+  },
+  "page_metadata": [
+    {
+      "page_index": 1,
+      "printed_page_label": "22",
+      "printed_page_block_index": 0,
+      "printed_page_bbox_2d": [92, 26, 120, 41],
+      "printed_page_confidence": 0.77
+    }
+  ]
+}
+```
+
 - Markdown
 
 ```markdown
diff --git a/README_zh.md b/README_zh.md
index 8a2f3fe..71504f0 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -214,6 +214,11 @@ with GlmOcr() as parser:
     result = parser.parse("image.png")
     print(result.json_result)
     result.save()
+
+# 从 PP-DocLayoutV3 的 `number` 区域提取印刷页码
+with GlmOcr(detect_printed_page_numbers=True) as parser:
+    result = parser.parse("document.pdf")
+    print(result.to_dict().get("page_metadata", []))
 ```
 
 #### Flask 服务
@@ -287,10 +292,28 @@ pipeline:
   # Result formatting
   result_formatter:
     output_format: both # json, markdown, or both
+    detect_printed_page_numbers: false
 ```
 
 更多选项请参考 [config.yaml](glmocr/config.yaml)。
 
+印刷页码检测支持以下三种启用方式：
+
+```python
+with GlmOcr(detect_printed_page_numbers=True) as parser:
+    result = parser.parse("document.pdf")
+```
+
+```powershell
+$env:GLMOCR_DETECT_PRINTED_PAGE_NUMBERS = 'true'
+```
+
+```yaml
+pipeline:
+  result_formatter:
+    detect_printed_page_numbers: true
+```
+
 ### 输出格式
 
 这里给出两种输出格式示例：
@@ -301,6 +324,42 @@ pipeline:
 [[{ "index": 0, "label": "text", "content": "...", "bbox_2d": null }]]
 ```
 
+启用印刷页码检测且实际检测到印刷页码数据时，保存的 `paper.json` 会变成顶层对象，并包含：
+
+```json
+{
+  "json_result": [[{ "index": 0, "label": "text", "content": "...", "bbox_2d": null }]],
+  "page_number_candidates": [
+    {
+      "page_index": 1,
+      "label": "number",
+      "content": "22",
+      "layout_index": 0,
+      "bbox_2d": [92, 26, 120, 41],
+      "layout_score": 0.77,
+      "numeric_like": true,
+      "roman_like": false
+    }
+  ],
+  "document_page_numbering": {
+    "strategy": "visual_sequence",
+    "confidence": 1.0,
+    "sequence_type": "arabic",
+    "page_offset": 21,
+    "candidate_pages": 4
+  },
+  "page_metadata": [
+    {
+      "page_index": 1,
+      "printed_page_label": "22",
+      "printed_page_block_index": 0,
+      "printed_page_bbox_2d": [92, 26, 120, 41],
+      "printed_page_confidence": 0.77
+    }
+  ]
+}
+```
+
 - Markdown
 
 ```markdown
diff --git a/glmocr/api.py b/glmocr/api.py
index dcf5570..ffb218a 100644
--- a/glmocr/api.py
+++ b/glmocr/api.py
@@ -84,6 +84,7 @@ def __init__(
         ocr_api_port: Optional[int] = None,
         cuda_visible_devices: Optional[str] = None,
         layout_device: Optional[str] = None,
+        detect_printed_page_numbers: Optional[bool] = None,
         **kwargs: Any,
     ):
         """Initialize GlmOcr.
@@ -130,6 +131,7 @@ def __init__(
             ocr_api_port=ocr_api_port,
             cuda_visible_devices=cuda_visible_devices,
             layout_device=layout_device,
+            detect_printed_page_numbers=detect_printed_page_numbers,
             **kwargs,
         )
         # Apply logging config for API/SDK usage.
@@ -441,8 +443,11 @@ def _maas_response_to_pipeline_result(
                     {
                         "index": region.get("index", 0),
                         "label": region.get("label", "text"),
+                        "native_label": region.get("label", "text"),
                         "content": region.get("content", ""),
                         "bbox_2d": bbox,
+                        "layout_index": region.get("index", 0),
+                        "layout_score": float(region.get("score") or 0.0),
                     }
                 )
             json_result.append(page_result)
@@ -460,12 +465,32 @@ def _maas_response_to_pipeline_result(
             source,
         )
 
+        page_metadata = None
+        page_number_candidates = None
+        document_page_numbering = None
+        if self.config_model.pipeline.result_formatter.detect_printed_page_numbers:
+            from glmocr.postprocess import ResultFormatter
+
+            formatter = ResultFormatter(self.config_model.pipeline.result_formatter)
+            (
+                page_number_candidates,
+                document_page_numbering,
+                page_metadata,
+            ) = formatter.extract_printed_page_data(json_result)
+
+        from glmocr.postprocess import ResultFormatter
+
+        ResultFormatter._strip_layout_metadata(json_result)
+
         # Create PipelineResult
         result = PipelineResult(
             json_result=json_result,
             markdown_result=markdown_result,
             original_images=[source],
             image_files=image_files or None,
+            page_metadata=page_metadata,
+            page_number_candidates=page_number_candidates,
+            document_page_numbering=document_page_numbering,
         )
 
         # Store additional MaaS response data
diff --git a/glmocr/config.py b/glmocr/config.py
index 789e206..e348db5 100644
--- a/glmocr/config.py
+++ b/glmocr/config.py
@@ -48,6 +48,8 @@ def _find_dotenv(start: Optional[Path] = None) -> Optional[Path]:
     "LAYOUT_CUDA_VISIBLE_DEVICES": "pipeline.layout.cuda_visible_devices",
     # Explicit device for layout model: "cpu", "cuda", "cuda:0", etc.
     "LAYOUT_DEVICE": "pipeline.layout.device",
+    # Result formatter
+    "DETECT_PRINTED_PAGE_NUMBERS": "pipeline.result_formatter.detect_printed_page_numbers",
     # Logging
     "LOG_LEVEL": "logging.level",
 }
@@ -175,6 +177,7 @@ class ResultFormatterConfig(_BaseConfig):
     enable_merge_formula_numbers: bool = True
     enable_merge_text_blocks: bool = True
     enable_format_bullet_points: bool = True
+    detect_printed_page_numbers: bool = False
     label_visualization_mapping: Dict[str, Any] = Field(default_factory=dict)
 
 
@@ -260,6 +263,8 @@ def _coerce_env_value(dotted_path: str, raw: str) -> Any:
     # Boolean fields
     if dotted_path == "pipeline.maas.enabled":
         return raw.strip().lower() in ("maas", "true", "1", "yes")
+    if dotted_path == "pipeline.result_formatter.detect_printed_page_numbers":
+        return raw.strip().lower() in ("true", "1", "yes", "on")
     # Integer fields
     if dotted_path.endswith((".api_port", ".request_timeout", ".connect_timeout")):
         return int(raw)
@@ -429,6 +434,7 @@ def from_env(
             "mode": "pipeline.maas.enabled",
             "timeout": "pipeline.maas.request_timeout",
             "log_level": "logging.level",
+            "detect_printed_page_numbers": "pipeline.result_formatter.detect_printed_page_numbers",
             # Self-hosted OCR API
             "ocr_api_host": "pipeline.ocr_api.api_host",
             "ocr_api_port": "pipeline.ocr_api.api_port",
diff --git a/glmocr/config.yaml b/glmocr/config.yaml
index 8c287fe..5679570 100644
--- a/glmocr/config.yaml
+++ b/glmocr/config.yaml
@@ -164,6 +164,7 @@ pipeline:
         - content
         - doc_title
         - figure_title
+        - number
         - paragraph_title
         - reference_content
         - text
@@ -256,6 +257,7 @@ pipeline:
         - content
         - doc_title
         - figure_title
+        - number
         - paragraph_title
         - reference_content
         - text
@@ -274,7 +276,6 @@ pipeline:
       abandon:
         - header
         - footer
-        - number
         - footnote
         - aside_text
         - reference
diff --git a/glmocr/parser_result/base.py b/glmocr/parser_result/base.py
index 0b996c2..ae8963c 100644
--- a/glmocr/parser_result/base.py
+++ b/glmocr/parser_result/base.py
@@ -30,6 +30,9 @@ def __init__(
         original_images: Optional[List[str]] = None,
         image_files: Optional[Dict[str, Any]] = None,
         raw_json_result: Optional[list] = None,
+        page_metadata: Optional[List[Dict[str, Any]]] = None,
+        page_number_candidates: Optional[List[Dict[str, Any]]] = None,
+        document_page_numbering: Optional[Dict[str, Any]] = None,
     ):
         """Initialize.
 
@@ -41,6 +44,9 @@ def __init__(
                 regions, to be saved under ``imgs/`` during :meth:`save`.
             raw_json_result: Raw model output before post-processing;
                 saved as ``{name}_model.json`` alongside the final result.
+            page_metadata: Derived per-page printed page metadata.
+            page_number_candidates: Raw printed page-number candidates.
+            document_page_numbering: Document-level numbering inference.
         """
         if isinstance(json_result, str):
             try:
@@ -56,6 +62,9 @@ def __init__(
         ]
         self.image_files = image_files
         self.raw_json_result = raw_json_result
+        self.page_metadata = page_metadata
+        self.page_number_candidates = page_number_candidates
+        self.document_page_numbering = document_page_numbering
 
     @abstractmethod
     def save(
@@ -88,6 +97,27 @@ def _save_json_and_markdown(self, output_dir: Union[str, Path]) -> None:
                     json_data = json.loads(json_data)
                 except json.JSONDecodeError:
                     pass
+
+            has_printed_page_data = (
+                bool(self.page_metadata)
+                or bool(self.page_number_candidates)
+                or self.document_page_numbering is not None
+            )
+
+            if has_printed_page_data:
+                json_data = {
+                    "json_result": json_data,
+                    "page_metadata": (
+                        self.page_metadata if self.page_metadata is not None else []
+                    ),
+                    "page_number_candidates": (
+                        self.page_number_candidates
+                        if self.page_number_candidates is not None
+                        else []
+                    ),
+                    "document_page_numbering": self.document_page_numbering,
+                }
+
             with open(json_file, "w", encoding="utf-8") as f:
                 if isinstance(json_data, (dict, list)):
                     json.dump(json_data, f, ensure_ascii=False, indent=2)
@@ -134,6 +164,12 @@ def to_dict(self) -> dict:
             "markdown_result": self.markdown_result or "",
             "original_images": self.original_images,
         }
+        if self.page_metadata is not None:
+            d["page_metadata"] = self.page_metadata
+        if self.page_number_candidates is not None:
+            d["page_number_candidates"] = self.page_number_candidates
+        if self.document_page_numbering is not None:
+            d["document_page_numbering"] = self.document_page_numbering
         # Include optional metadata set by MaaS mode.
         for attr in ("_usage", "_data_info", "_error"):
             val = getattr(self, attr, None)
diff --git a/glmocr/parser_result/pipeline_result.py b/glmocr/parser_result/pipeline_result.py
index 800084c..7a9e9d9 100644
--- a/glmocr/parser_result/pipeline_result.py
+++ b/glmocr/parser_result/pipeline_result.py
@@ -26,6 +26,9 @@ def __init__(
         image_files: Optional[dict] = None,
         raw_json_result: Optional[list] = None,
         layout_vis_images: Optional[Dict[int, Any]] = None,
+        page_metadata: Optional[List[Dict[str, Any]]] = None,
+        page_number_candidates: Optional[List[Dict[str, Any]]] = None,
+        document_page_numbering: Optional[Dict[str, Any]] = None,
     ):
         """Initialize.
 
@@ -38,6 +41,9 @@ def __init__(
             raw_json_result: Raw model output before post-processing (optional).
             layout_vis_images: Mapping of ``page_idx`` → PIL Image for layout
                 visualization; saved to ``layout_vis/`` during :meth:`save`.
+            page_metadata: Derived per-page printed page metadata.
+            page_number_candidates: Raw printed page-number candidates.
+            document_page_numbering: Document-level numbering inference.
         """
         super().__init__(
             json_result=json_result,
@@ -45,6 +51,9 @@ def __init__(
             original_images=original_images,
             image_files=image_files,
             raw_json_result=raw_json_result,
+            page_metadata=page_metadata,
+            page_number_candidates=page_number_candidates,
+            document_page_numbering=document_page_numbering,
         )
         self.layout_vis_images = layout_vis_images
 
diff --git a/glmocr/pipeline/pipeline.py b/glmocr/pipeline/pipeline.py
index 699f48d..b1c48ac 100644
--- a/glmocr/pipeline/pipeline.py
+++ b/glmocr/pipeline/pipeline.py
@@ -362,6 +362,9 @@ def _emit_results(
                 grouped,
                 cropped_images=cropped_images or None,
             )
+            page_metadata = self.result_formatter.page_metadata
+            page_number_candidates = self.result_formatter.page_number_candidates
+            document_page_numbering = self.result_formatter.document_page_numbering
 
             vis_images = {}
             for pi in page_indices:
@@ -378,6 +381,9 @@ def _emit_results(
                 image_files=image_files or None,
                 raw_json_result=raw_json,
                 layout_vis_images=vis_images or None,
+                page_metadata=page_metadata,
+                page_number_candidates=page_number_candidates,
+                document_page_numbering=document_page_numbering,
             )
             built.add(u)
             if preserve_order:
diff --git a/glmocr/postprocess/result_formatter.py b/glmocr/postprocess/result_formatter.py
index 8b31d5d..764ab27 100644
--- a/glmocr/postprocess/result_formatter.py
+++ b/glmocr/postprocess/result_formatter.py
@@ -12,10 +12,11 @@
 
 from __future__ import annotations
 
+import collections
 import re
 import json
 from copy import deepcopy
-from typing import TYPE_CHECKING, List, Dict, Tuple, Any
+from typing import TYPE_CHECKING, List, Dict, Tuple, Any, Optional
 
 try:  # Optional dependency for better English word validation quality.
     from wordfreq import zipf_frequency
@@ -70,6 +71,10 @@ def __init__(self, config: "ResultFormatterConfig"):
         self.enable_merge_formula_numbers = config.enable_merge_formula_numbers
         self.enable_merge_text_blocks = config.enable_merge_text_blocks
         self.enable_format_bullet_points = config.enable_format_bullet_points
+        self.detect_printed_page_numbers = config.detect_printed_page_numbers
+        self.page_metadata: Optional[List[Dict[str, Any]]] = None
+        self.page_number_candidates: Optional[List[Dict[str, Any]]] = None
+        self.document_page_numbering: Optional[Dict[str, Any]] = None
 
     # =========================================================================
     # OCR-only mode
@@ -160,6 +165,10 @@ def process(
             (json_str, markdown_str, image_files) where *image_files* maps
             ``filename`` → PIL Image for the caller to persist.
         """
+        self.page_metadata = None
+        self.page_number_candidates = None
+        self.document_page_numbering = None
+
         json_final_results = []
 
         with profiler.measure("format_regions"):
@@ -173,6 +182,12 @@ def process(
 
                 for item in sorted_results:
                     result = deepcopy(item)
+                    result["layout_index"] = result.get(
+                        "layout_index", result.get("index", 0)
+                    )
+                    result["layout_score"] = float(
+                        result.get("layout_score", result.get("score") or 0.0)
+                    )
                     result["native_label"] = result.get("label", "text")
 
                     # Map labels
@@ -215,6 +230,15 @@ def process(
 
                 json_final_results.append(json_page_results)
 
+        if self.detect_printed_page_numbers:
+            (
+                self.page_number_candidates,
+                self.document_page_numbering,
+                self.page_metadata,
+            ) = self.extract_printed_page_data(json_final_results)
+
+        self._strip_layout_metadata(json_final_results)
+
         # Generate markdown results and resolve image regions
         image_files: Dict[str, Any] = {}
         image_counter = 0
@@ -251,6 +275,190 @@ def process(
 
         return json_str, markdown_str, image_files
 
+    def extract_printed_page_data(
+        self,
+        pages: List[List[Dict[str, Any]]],
+    ) -> Tuple[
+        List[Dict[str, Any]],
+        Optional[Dict[str, Any]],
+        List[Dict[str, Any]],
+    ]:
+        """Extract number candidates and derived printed page metadata."""
+        candidates = self._extract_page_number_candidates(pages)
+        document_page_numbering = self._infer_document_page_numbering(candidates)
+        page_metadata = self._build_printed_page_metadata(candidates)
+        return candidates, document_page_numbering, page_metadata
+
+    def _extract_page_number_candidates(
+        self,
+        pages: List[List[Dict[str, Any]]],
+    ) -> List[Dict[str, Any]]:
+        """Extract raw `number` candidates for printed page inference."""
+        candidates: List[Dict[str, Any]] = []
+        for page_index, page_blocks in enumerate(pages):
+            for block in page_blocks:
+                candidate = self._build_page_number_candidate(page_index, block)
+                if candidate is not None:
+                    candidates.append(candidate)
+        return candidates
+
+    def _build_page_number_candidate(
+        self,
+        page_index: int,
+        block: Dict[str, Any],
+    ) -> Optional[Dict[str, Any]]:
+        """Build a normalized page-number candidate from one layout block."""
+        if block.get("native_label") != "number":
+            return None
+
+        bbox = block.get("bbox_2d")
+        if not isinstance(bbox, list) or len(bbox) != 4:
+            return None
+
+        label = self._normalize_printed_page_label(block.get("content"))
+        if label is None:
+            return None
+
+        x1, y1, x2, y2 = bbox
+        width = x2 - x1
+        height = y2 - y1
+        if width <= 0 or height <= 0 or width > 140 or height > 120:
+            return None
+        if not self._is_margin_candidate(x1, y1, x2, y2):
+            return None
+
+        return {
+            "page_index": page_index,
+            "label": "number",
+            "content": label,
+            "layout_index": block.get("layout_index", block.get("index", 0)),
+            "bbox_2d": bbox,
+            "layout_score": float(block.get("layout_score") or 0.0),
+            "numeric_like": label.isdigit(),
+            "roman_like": self._is_roman_like(label),
+        }
+
+    @staticmethod
+    def _is_margin_candidate(x1: int, y1: int, x2: int, y2: int) -> bool:
+        """Return whether a candidate lies in a plausible page-margin folio area."""
+        in_margin_band = y1 <= 120 or y2 >= 880
+        in_outer_margin = x1 <= 180 or x2 >= 820
+        return in_margin_band and in_outer_margin
+
+    @staticmethod
+    def _is_roman_like(content: str) -> bool:
+        """Check whether a label looks like a Roman numeral folio."""
+        return bool(re.fullmatch(r"(?i)[ivxlcdm]+", content))
+
+    def _infer_document_page_numbering(
+        self,
+        candidates: List[Dict[str, Any]],
+    ) -> Optional[Dict[str, Any]]:
+        """Infer document-level numbering from number-only candidates."""
+        if not candidates:
+            return None
+
+        best_candidates = self._best_candidates_by_page(candidates)
+        page_count = len(best_candidates)
+        numeric_candidates = [c for c in best_candidates if c["numeric_like"]]
+        roman_candidates = [c for c in best_candidates if c["roman_like"]]
+
+        if numeric_candidates:
+            offsets = collections.Counter(
+                int(c["content"]) - int(c["page_index"]) for c in numeric_candidates
+            )
+            page_offset, support = offsets.most_common(1)[0]
+            return {
+                "strategy": "visual_sequence",
+                "confidence": round(support / max(1, page_count), 3),
+                "sequence_type": "arabic",
+                "page_offset": page_offset,
+                "candidate_pages": page_count,
+            }
+
+        if roman_candidates:
+            return {
+                "strategy": "visual_sequence",
+                "confidence": round(len(roman_candidates) / max(1, page_count), 3),
+                "sequence_type": "roman",
+                "page_offset": None,
+                "candidate_pages": len(roman_candidates),
+            }
+
+        return None
+
+    def _build_printed_page_metadata(
+        self,
+        candidates: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Build per-page printed page metadata from selected candidates."""
+        if not candidates:
+            return []
+
+        metadata: List[Dict[str, Any]] = []
+        for candidate in self._best_candidates_by_page(candidates):
+            metadata.append(
+                {
+                    "page_index": candidate["page_index"],
+                    "printed_page_label": candidate["content"],
+                    "printed_page_block_index": candidate["layout_index"],
+                    "printed_page_bbox_2d": candidate["bbox_2d"],
+                    "printed_page_confidence": candidate["layout_score"],
+                }
+            )
+        return metadata
+
+    def _best_candidates_by_page(
+        self,
+        candidates: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Select the strongest candidate per page."""
+        by_page: Dict[int, List[Dict[str, Any]]] = collections.defaultdict(list)
+        for candidate in candidates:
+            by_page[int(candidate["page_index"])].append(candidate)
+        return [
+            min(by_page[page_index], key=self._candidate_sort_key)
+            for page_index in sorted(by_page)
+        ]
+
+    @staticmethod
+    def _candidate_sort_key(block: Dict[str, Any]) -> tuple[int, int, int, int]:
+        """Prefer blocks nearest to outer top/bottom page margins."""
+        bbox = block.get("bbox_2d") or [0, 0, 1000, 1000]
+        x1, y1, x2, y2 = bbox
+        top_distance = y1
+        bottom_distance = 1000 - y2
+        edge_distance = min(top_distance, bottom_distance)
+        side_distance = min(x1, 1000 - x2)
+        return (
+            edge_distance,
+            side_distance,
+            -int(block.get("layout_score", 0) * 1000),
+            int(block.get("layout_index", block.get("index", 0))),
+        )
+
+    @staticmethod
+    def _normalize_printed_page_label(content: Any) -> Optional[str]:
+        """Normalize OCR text from a printed page-number candidate."""
+        if not isinstance(content, str):
+            return None
+        label = content.strip()
+        if not label or len(label) > 12:
+            return None
+        if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9\-./]*", label):
+            return None
+        if not (re.search(r"\d", label) or ResultFormatter._is_roman_like(label)):
+            return None
+        return label
+
+    @staticmethod
+    def _strip_layout_metadata(pages: List[List[Dict[str, Any]]]) -> None:
+        """Remove broad layout-only metadata from final JSON blocks."""
+        for page in pages:
+            for block in page:
+                block.pop("layout_index", None)
+                block.pop("layout_score", None)
+
     # =========================================================================
     # Content handling
     # =========================================================================
diff --git a/glmocr/tests/test_unit.py b/glmocr/tests/test_unit.py
index 62ba50d..7d55353 100644
--- a/glmocr/tests/test_unit.py
+++ b/glmocr/tests/test_unit.py
@@ -1,6 +1,7 @@
 """Unit tests for glmocr (no external services required)."""
 
 import json
+import tempfile
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
@@ -24,6 +25,14 @@ def test_config_to_dict(self):
         cfg = load_config().to_dict()
         assert isinstance(cfg, dict)
 
+    def test_default_config_routes_number_to_text_ocr(self):
+        """Default SDK config preserves PP-DocLayoutV3 number regions for OCR."""
+        from glmocr.config import load_config
+
+        cfg = load_config()
+        text_labels = cfg.pipeline.layout.label_task_mapping["text"]
+        assert "number" in text_labels
+
 
 class TestLayoutDeviceUnit:
     """Unit tests for layout device selection and config plumbing (mocked)."""
@@ -494,6 +503,164 @@ def test_result_formatter_clean_content(self):
         cleaned = formatter._clean_content("Hello....World")
         assert "....." not in cleaned
 
+    def test_result_formatter_feature_off_keeps_json_result_lean(self):
+        """Feature disabled does not leak broad layout metadata into json_result."""
+        from glmocr.postprocess import ResultFormatter
+        from glmocr.config import ResultFormatterConfig
+
+        formatter = ResultFormatter(ResultFormatterConfig())
+        grouped_results = [
+            [
+                {
+                    "index": 7,
+                    "label": "number",
+                    "content": "12",
+                    "bbox_2d": [944, 12, 972, 42],
+                    "score": 0.88,
+                }
+            ]
+        ]
+
+        parsed = json.loads(formatter.process(grouped_results)[0])
+        assert parsed[0][0]["native_label"] == "number"
+        assert "layout_index" not in parsed[0][0]
+        assert "layout_score" not in parsed[0][0]
+
+    def test_result_formatter_extracts_page_number_data(self):
+        """Formatter extracts printed page data from number blocks."""
+        from glmocr.postprocess import ResultFormatter
+        from glmocr.config import ResultFormatterConfig
+
+        formatter = ResultFormatter(
+            ResultFormatterConfig(detect_printed_page_numbers=True)
+        )
+        grouped_results = [
+            [
+                {
+                    "index": 7,
+                    "label": "number",
+                    "content": "12",
+                    "bbox_2d": [944, 12, 972, 42],
+                    "score": 0.88,
+                }
+            ]
+        ]
+
+        formatter.process(grouped_results)
+
+        assert formatter.page_number_candidates[0]["layout_index"] == 7
+        assert formatter.page_number_candidates[0]["layout_score"] == 0.88
+        assert formatter.page_metadata == [
+            {
+                "page_index": 0,
+                "printed_page_label": "12",
+                "printed_page_block_index": 7,
+                "printed_page_bbox_2d": [944, 12, 972, 42],
+                "printed_page_confidence": 0.88,
+            }
+        ]
+        assert formatter.page_number_candidates == [
+            {
+                "page_index": 0,
+                "label": "number",
+                "content": "12",
+                "layout_index": 7,
+                "bbox_2d": [944, 12, 972, 42],
+                "layout_score": 0.88,
+                "numeric_like": True,
+                "roman_like": False,
+            }
+        ]
+        assert formatter.document_page_numbering == {
+            "strategy": "visual_sequence",
+            "confidence": 1.0,
+            "sequence_type": "arabic",
+            "page_offset": 12,
+            "candidate_pages": 1,
+        }
+
+    def test_result_formatter_ignores_non_margin_number_blocks(self):
+        """Formatter ignores number blocks that are not in page margins."""
+        from glmocr.postprocess import ResultFormatter
+        from glmocr.config import ResultFormatterConfig
+
+        formatter = ResultFormatter(
+            ResultFormatterConfig(detect_printed_page_numbers=True)
+        )
+        grouped_results = [
+            [
+                {
+                    "index": 7,
+                    "label": "number",
+                    "content": "12",
+                    "bbox_2d": [400, 400, 428, 430],
+                    "score": 0.88,
+                }
+            ]
+        ]
+
+        formatter.process(grouped_results)
+
+        parsed = json.loads(formatter.process(grouped_results)[0])
+        assert "layout_index" not in parsed[0][0]
+        assert "layout_score" not in parsed[0][0]
+        assert formatter.page_metadata == []
+        assert formatter.page_number_candidates == []
+        assert formatter.document_page_numbering is None
+
+    def test_result_formatter_accepts_roman_number_candidates(self):
+        """Formatter preserves Roman numeral number candidates."""
+        from glmocr.postprocess import ResultFormatter
+        from glmocr.config import ResultFormatterConfig
+
+        formatter = ResultFormatter(
+            ResultFormatterConfig(detect_printed_page_numbers=True)
+        )
+        grouped_results = [
+            [
+                {
+                    "index": 7,
+                    "label": "number",
+                    "content": "iv",
+                    "bbox_2d": [944, 12, 972, 42],
+                    "score": 0.75,
+                }
+            ]
+        ]
+
+        formatter.process(grouped_results)
+
+        assert formatter.page_number_candidates[0]["layout_index"] == 7
+        assert formatter.page_number_candidates[0]["layout_score"] == 0.75
+        assert formatter.page_metadata == [
+            {
+                "page_index": 0,
+                "printed_page_label": "iv",
+                "printed_page_block_index": 7,
+                "printed_page_bbox_2d": [944, 12, 972, 42],
+                "printed_page_confidence": 0.75,
+            }
+        ]
+        assert formatter.page_number_candidates == [
+            {
+                "page_index": 0,
+                "label": "number",
+                "content": "iv",
+                "layout_index": 7,
+                "bbox_2d": [944, 12, 972, 42],
+                "layout_score": 0.75,
+                "numeric_like": False,
+                "roman_like": True,
+            }
+        ]
+        assert formatter.document_page_numbering == {
+            "strategy": "visual_sequence",
+            "confidence": 1.0,
+            "sequence_type": "roman",
+            "page_offset": None,
+            "candidate_pages": 1,
+        }
+
 
 class TestMaaSClient:
     """Tests for MaaSClient."""
@@ -845,6 +1012,17 @@ def test_no_env_returns_empty(self, monkeypatch):
 
         assert _collect_env_overrides() == {}
 
+    def test_detect_printed_page_numbers_env_var(self, monkeypatch):
+        """Printed page detection can be enabled via environment variable."""
+        from glmocr.config import _collect_env_overrides
+
+        monkeypatch.setenv("GLMOCR_DETECT_PRINTED_PAGE_NUMBERS", "true")
+        overrides = _collect_env_overrides()
+        assert (
+            overrides["pipeline"]["result_formatter"]["detect_printed_page_numbers"]
+            is True
+        )
+
 
 class TestFromEnv:
     """Tests for GlmOcrConfig.from_env() – full priority chain."""
@@ -981,6 +1159,107 @@ def test_to_json_unicode_preserved(self):
         # ensure_ascii=False by default → raw CJK characters
         assert "中文测试" in s
 
+    def test_to_dict_includes_printed_page_fields(self):
+        r = self._make_result(
+            page_metadata=[
+                {
+                    "page_index": 0,
+                    "printed_page_label": "12",
+                    "printed_page_block_index": 7,
+                    "printed_page_bbox_2d": [944, 12, 972, 42],
+                    "printed_page_confidence": 0.88,
+                }
+            ],
+            page_number_candidates=[
+                {
+                    "page_index": 0,
+                    "label": "number",
+                    "content": "12",
+                    "layout_index": 7,
+                    "bbox_2d": [944, 12, 972, 42],
+                    "layout_score": 0.88,
+                    "numeric_like": True,
+                    "roman_like": False,
+                }
+            ],
+            document_page_numbering={
+                "strategy": "visual_sequence",
+                "confidence": 1.0,
+                "sequence_type": "arabic",
+                "page_offset": 12,
+                "candidate_pages": 1,
+            },
+        )
+        d = r.to_dict()
+        assert d["page_metadata"][0]["printed_page_label"] == "12"
+        assert d["page_number_candidates"][0]["label"] == "number"
+        assert d["document_page_numbering"]["page_offset"] == 12
+
+    def test_save_wraps_json_with_printed_page_fields(self):
+        r = self._make_result(
+            original_images=["paper.pdf"],
+            page_metadata=[
+                {
+                    "page_index": 0,
+                    "printed_page_label": "12",
+                    "printed_page_block_index": 7,
+                    "printed_page_bbox_2d": [944, 12, 972, 42],
+                    "printed_page_confidence": 0.88,
+                }
+            ],
+            page_number_candidates=[
+                {
+                    "page_index": 0,
+                    "label": "number",
+                    "content": "12",
+                    "layout_index": 7,
+                    "bbox_2d": [944, 12, 972, 42],
+                    "layout_score": 0.88,
+                    "numeric_like": True,
+                    "roman_like": False,
+                }
+            ],
+            document_page_numbering={
+                "strategy": "visual_sequence",
+                "confidence": 1.0,
+                "sequence_type": "arabic",
+                "page_offset": 12,
+                "candidate_pages": 1,
+            },
+        )
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            r.save(output_dir=tmp_dir, save_layout_visualization=False)
+            saved = json.loads(Path(tmp_dir, "paper", "paper.json").read_text("utf-8"))
+
+        assert "json_result" in saved
+        assert saved["page_metadata"][0]["printed_page_label"] == "12"
+        assert saved["page_number_candidates"][0]["label"] == "number"
+        assert saved["document_page_numbering"]["page_offset"] == 12
+
+    def test_save_keeps_legacy_json_shape_without_printed_page_data(self):
+        r = self._make_result(original_images=["paper.pdf"])
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            r.save(output_dir=tmp_dir, save_layout_visualization=False)
+            saved = json.loads(Path(tmp_dir, "paper", "paper.json").read_text("utf-8"))
+
+        assert isinstance(saved, list)
+
+    def test_save_keeps_legacy_json_shape_when_detection_has_no_hits(self):
+        r = self._make_result(
+            original_images=["paper.pdf"],
+            page_metadata=[],
+            page_number_candidates=[],
+            document_page_numbering=None,
+        )
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            r.save(output_dir=tmp_dir, save_layout_visualization=False)
+            saved = json.loads(Path(tmp_dir, "paper", "paper.json").read_text("utf-8"))
+
+        assert isinstance(saved, list)
+
     def test_repr(self):
         r = self._make_result()
         assert "PipelineResult" in repr(r)
@@ -1251,6 +1530,93 @@ def test_parse_stream_selfhosted_delegates(self):
             preserve_order=True,
         )
 
+    def test_maas_response_includes_printed_page_metadata_when_enabled(self):
+        """MaaS conversion derives printed page data from number blocks."""
+        from glmocr.api import GlmOcr
+        from glmocr.config import GlmOcrConfig, ResultFormatterConfig
+
+        parser = object.__new__(GlmOcr)
+        parser._use_maas = True
+        parser._pipeline = None
+        parser._maas_client = MagicMock()
+        parser.config_model = GlmOcrConfig()
+        parser.config_model.pipeline.result_formatter = ResultFormatterConfig(
+            detect_printed_page_numbers=True
+        )
+
+        response = {
+            "md_results": "",
+            "layout_details": [
+                [
+                    {
+                        "index": 7,
+                        "label": "number",
+                        "content": "12",
+                        "bbox_2d": [1926, 32, 1982, 111],
+                        "score": 0.88,
+                    }
+                ]
+            ],
+            "data_info": {"pages": [{"width": 2040, "height": 2640}]},
+        }
+
+        result = parser._maas_response_to_pipeline_result(response, "paper.pdf")
+
+        assert result.page_number_candidates == [
+            {
+                "page_index": 0,
+                "label": "number",
+                "content": "12",
+                "layout_index": 7,
+                "bbox_2d": [944, 12, 972, 42],
+                "layout_score": 0.88,
+                "numeric_like": True,
+                "roman_like": False,
+            }
+        ]
+        assert result.document_page_numbering == {
+            "strategy": "visual_sequence",
+            "confidence": 1.0,
+            "sequence_type": "arabic",
+            "page_offset": 12,
+            "candidate_pages": 1,
+        }
+        assert result.page_metadata[0]["printed_page_label"] == "12"
+
+    def test_maas_response_feature_off_keeps_json_result_lean(self):
+        """MaaS conversion does not leak broad layout metadata when feature is off."""
+        from glmocr.api import GlmOcr
+        from glmocr.config import GlmOcrConfig
+
+        parser = object.__new__(GlmOcr)
+        parser._use_maas = True
+        parser._pipeline = None
+        parser._maas_client = MagicMock()
+        parser.config_model = GlmOcrConfig()
+
+        response = {
+            "md_results": "",
+            "layout_details": [
+                [
+                    {
+                        "index": 7,
+                        "label": "number",
+                        "content": "12",
+                        "bbox_2d": [1926, 32, 1982, 111],
+                        "score": 0.88,
+                    }
+                ]
+            ],
+            "data_info": {"pages": [{"width": 2040, "height": 2640}]},
+        }
+
+        result = parser._maas_response_to_pipeline_result(response, "paper.pdf")
+
+        block = result.json_result[0][0]
+        assert block["native_label"] == "number"
+        assert "layout_index" not in block
+        assert "layout_score" not in block
+
 
 class TestGlmOcrConstructor:
     """Tests for GlmOcr.__init__ kwarg handling (config assembly only)."""
@@ -1308,6 +1674,25 @@ def test_selfhosted_model_kwarg_is_forwarded_to_ocr_api(self, monkeypatch):
             assert parser.config_model.pipeline.ocr_api.model == "glm-ocr"
             parser.close()
 
+    def test_detect_printed_page_numbers_kwarg_is_forwarded(self, monkeypatch):
+        """Public constructor flag enables printed page detection in config."""
+        from glmocr.config import _ENV_MAP, ENV_PREFIX
+
+        for suffix in _ENV_MAP:
+            monkeypatch.delenv(f"{ENV_PREFIX}{suffix}", raising=False)
+        monkeypatch.setattr("glmocr.config._find_dotenv", lambda: None)
+
+        with patch("glmocr.maas_client.MaaSClient") as mock_maas:
+            mock_maas.return_value.start = MagicMock()
+            from glmocr.api import GlmOcr
+
+            parser = GlmOcr(api_key="sk-test", detect_printed_page_numbers=True)
+            assert (
+                parser.config_model.pipeline.result_formatter.detect_printed_page_numbers
+                is True
+            )
+            parser.close()
+
 
 class TestOCRClientOllamaConfig:
     """Tests for OCRClient initialization with Ollama api_mode."""