diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e2b46df..fa709f7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,7 +22,17 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies - run: pip install -e .[test] + run: pip install -e .[test,typecheck] + + + - name: Validate gradual typing migration scope + run: python scripts/check_typing_generics_scope.py + + - name: Run mypy (gradual scope) + run: mypy + + - name: Run pyright (gradual scope) + run: pyright - name: Run tests and keep summary log run: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index acc3a1b..b5d8fe2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -33,3 +33,13 @@ 오타 수정부터 새 파서 추가까지 모든 기여를 환영합니다. HWPX 생태계를 위한 더 나은 도구를 함께 만들어 주셔서 감사합니다! + +## 타입 힌트 및 `from __future__ import annotations` 정책 + +- 이 저장소는 Python 3.10을 최소 지원 버전으로 유지하므로, 타입 힌트는 `list`/`dict`/`tuple` 같은 **내장 제네릭(PEP 585)** 을 우선 사용합니다. +- 신규 파일에서 타입 힌트에 전방 참조(아직 정의되지 않은 클래스 이름)나 `|` 유니온 표기를 사용한다면 `from __future__ import annotations`를 파일 상단에 추가하세요. +- 기존 파일을 수정할 때도 같은 기준을 적용해 파일 단위로 일관성을 맞춥니다. 즉, 해당 파일이 미래 지연 평가가 필요하면 유지하고, 필요하지 않으면 제거합니다. +- 점진 변환 범위(현재: `src/hwpx/document.py`, `src/hwpx/oxml/document.py`)는 CI에서 다음 항목으로 검증합니다. + - `scripts/check_typing_generics_scope.py`: `List`/`Dict`/`Tuple` 별칭 사용 금지 확인 + - `mypy`, `pyright`: 지정된 파일 범위 타입 검사 + diff --git a/pyproject.toml b/pyproject.toml index 466dfc5..76446c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,10 @@ dev = [ test = [ "pytest>=7.4", ] +typecheck = [ + "mypy>=1.10", + "pyright>=1.1.390", +] [project.urls] Homepage = "https://github.com/airmang/python-hwpx" @@ -63,3 +67,19 @@ include = ["hwpx*"] pythonpath = ["src"] addopts = "-ra" testpaths = ["tests"] + + +[tool.mypy] +python_version = "3.10" +files = ["src/hwpx/document.py", "src/hwpx/oxml/document.py"] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["hwpx.document", "hwpx.oxml.document"] +ignore_errors = true + +[tool.pyright] +include = ["src/hwpx/document.py", "src/hwpx/oxml/document.py"] +pythonVersion = "3.10" +typeCheckingMode = "off" +reportMissingTypeStubs = false diff --git a/scripts/check_typing_generics_scope.py b/scripts/check_typing_generics_scope.py new file mode 100644 index 0000000..aae1ed8 --- /dev/null +++ b/scripts/check_typing_generics_scope.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""점진 변환 대상 파일에서 typing 제네릭 별칭 사용을 검사한다.""" + +from __future__ import annotations + +from pathlib import Path +import re +import sys + +TARGET_FILES = [ + Path("src/hwpx/document.py"), + Path("src/hwpx/oxml/document.py"), +] + +FORBIDDEN = ("List[", "Dict[", "Tuple[") + + +def main() -> int: + has_error = False + for path in TARGET_FILES: + text = path.read_text(encoding="utf-8") + for token in FORBIDDEN: + for match in re.finditer(re.escape(token), text): + line_no = text.count("\n", 0, match.start()) + 1 + print(f"{path}:{line_no}: 금지된 typing 별칭 '{token}' 사용 발견") + has_error = True + + if has_error: + print("\n점진 변환 범위 검사 실패: list/dict/tuple 내장 제네릭을 사용하세요.") + return 1 + + print("점진 변환 범위 검사 통과: 대상 파일에서 List/Dict/Tuple 사용이 없습니다.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/hwpx/document.py b/src/hwpx/document.py index cc694e6..18d666f 100644 --- a/src/hwpx/document.py +++ b/src/hwpx/document.py @@ -8,7 +8,7 @@ import uuid from os import PathLike -from typing import Any, BinaryIO, Iterator, List, Tuple +from typing import Any, BinaryIO, Iterator from lxml import etree @@ -193,22 +193,22 @@ def oxml(self) -> HwpxOxmlDocument: return self._root @property - def sections(self) -> List[HwpxOxmlSection]: + def sections(self) -> list[HwpxOxmlSection]: """Return the sections contained in the document.""" return self._root.sections @property - def headers(self) -> List[HwpxOxmlHeader]: + def headers(self) -> list[HwpxOxmlHeader]: """Return the header parts referenced by the document.""" return self._root.headers @property - def master_pages(self) -> List[HwpxOxmlMasterPage]: + def master_pages(self) -> list[HwpxOxmlMasterPage]: """Return the master-page parts declared in the manifest.""" return self._root.master_pages @property - def histories(self) -> List[HwpxOxmlHistory]: + def histories(self) -> list[HwpxOxmlHistory]: """Return document history parts referenced by the manifest.""" return self._root.histories @@ -299,10 +299,10 @@ def track_change_author( return self._root.track_change_author(author_id_ref) @property - def memos(self) -> List[HwpxOxmlMemo]: + def memos(self) -> list[HwpxOxmlMemo]: """Return all memo entries declared in every section.""" - memos: List[HwpxOxmlMemo] = [] + memos: list[HwpxOxmlMemo] = [] for section in self._root.sections: memos.extend(section.memos) return memos @@ -494,7 +494,7 @@ def add_memo_with_anchor( return memo, target_paragraph, field_value @property - def paragraphs(self) -> List[HwpxOxmlParagraph]: + def paragraphs(self) -> list[HwpxOxmlParagraph]: """Return all paragraphs across every section.""" return self._root.paragraphs @@ -540,10 +540,10 @@ def find_runs_by_style( underline_type: str | None = None, underline_color: str | None = None, char_pr_id_ref: str | int | None = None, - ) -> List[HwpxOxmlRun]: + ) -> list[HwpxOxmlRun]: """Return runs matching the requested style criteria.""" - matches: List[HwpxOxmlRun] = [] + matches: list[HwpxOxmlRun] = [] target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None for run in self.iter_runs(): diff --git a/src/hwpx/oxml/document.py b/src/hwpx/oxml/document.py index edc06c1..6d940f5 100644 --- a/src/hwpx/oxml/document.py +++ b/src/hwpx/oxml/document.py @@ -5,7 +5,7 @@ import logging from copy import deepcopy from dataclasses import dataclass -from typing import Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Tuple, TypeVar +from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar from uuid import uuid4 import xml.etree.ElementTree as ET @@ -56,7 +56,7 @@ "breakCellSeparateLine": "0", } -_BASIC_BORDER_CHILDREN: Tuple[Tuple[str, dict[str, str]], ...] = ( +_BASIC_BORDER_CHILDREN: tuple[tuple[str, dict[str, str]], ...] = ( ("slash", {"type": "NONE", "Crooked": "0", "isCounter": "0"}), ("backSlash", {"type": "NONE", "Crooked": "0", "isCounter": "0"}), ("leftBorder", {"type": "SOLID", "width": "0.12 mm", "color": "#000000"}), @@ -193,7 +193,7 @@ def _create_basic_border_fill_element(border_id: str) -> ET.Element: return element -def _distribute_size(total: int, parts: int) -> List[int]: +def _distribute_size(total: int, parts: int) -> list[int]: """Return *parts* integers that sum to *total* and are as even as possible.""" if parts <= 0: @@ -201,7 +201,7 @@ def _distribute_size(total: int, parts: int) -> List[int]: base = total // parts remainder = total - (base * parts) - sizes: List[int] = [] + sizes: list[int] = [] for index in range(parts): value = base if remainder > 0: @@ -311,8 +311,8 @@ class RunStyle: """Represents the resolved character style applied to a run.""" id: str - attributes: Dict[str, str] - child_attributes: Dict[str, Dict[str, str]] + attributes: dict[str, str] + child_attributes: dict[str, dict[str, str]] def text_color(self) -> str | None: return self.attributes.get("textColor") @@ -345,8 +345,8 @@ def matches( return True -def _char_properties_from_header(element: ET.Element) -> Dict[str, RunStyle]: - mapping: Dict[str, RunStyle] = {} +def _char_properties_from_header(element: ET.Element) -> dict[str, RunStyle]: + mapping: dict[str, RunStyle] = {} ref_list = element.find(f"{_HH}refList") if ref_list is None: return mapping @@ -359,7 +359,7 @@ def _char_properties_from_header(element: ET.Element) -> Dict[str, RunStyle]: if not char_id: continue attributes = {key: value for key, value in child.attrib.items() if key != "id"} - child_attributes: Dict[str, Dict[str, str]] = {} + child_attributes: dict[str, dict[str, str]] = {} for grandchild in child: if len(list(grandchild)) == 0 and (grandchild.text is None or not grandchild.text.strip()): child_attributes[_element_local_name(grandchild)] = { @@ -524,7 +524,7 @@ def _ensure_text_element(self) -> ET.Element: def text(self) -> str: """Return the concatenated text content of the header/footer.""" - parts: List[str] = [] + parts: list[str] = [] for node in self.element.findall(f".//{_HP}t"): if node.text: parts.append(node.text) @@ -883,16 +883,16 @@ def _ensure_header_footer(self, tag: str, page_type: str) -> ET.Element: return element @property - def headers(self) -> List[HwpxOxmlSectionHeaderFooter]: - wrappers: List[HwpxOxmlSectionHeaderFooter] = [] + def headers(self) -> list[HwpxOxmlSectionHeaderFooter]: + wrappers: list[HwpxOxmlSectionHeaderFooter] = [] for element in self.element.findall(f"{_HP}header"): apply = self._match_apply_for_element("header", element) wrappers.append(HwpxOxmlSectionHeaderFooter(element, self, apply)) return wrappers @property - def footers(self) -> List[HwpxOxmlSectionHeaderFooter]: - wrappers: List[HwpxOxmlSectionHeaderFooter] = [] + def footers(self) -> list[HwpxOxmlSectionHeaderFooter]: + wrappers: list[HwpxOxmlSectionHeaderFooter] = [] for element in self.element.findall(f"{_HP}footer"): apply = self._match_apply_for_element("footer", element) wrappers.append(HwpxOxmlSectionHeaderFooter(element, self, apply)) @@ -977,7 +977,7 @@ def apply_model(self, model: "body.Run") -> None: self.element = replacement self.paragraph.section.mark_dirty() - def _current_format_flags(self) -> Tuple[bool, bool, bool] | None: + def _current_format_flags(self) -> tuple[bool, bool, bool] | None: style = self.style if style is None: return None @@ -1038,7 +1038,7 @@ def char_pr_id_ref(self, value: str | int | None) -> None: self.element.set("charPrIDRef", new_value) self.paragraph.section.mark_dirty() - def _plain_text_nodes(self) -> List[ET.Element]: + def _plain_text_nodes(self) -> list[ET.Element]: return [ node for node in self.element.findall(f"{_HP}t") @@ -1053,7 +1053,7 @@ def _ensure_plain_text_node(self) -> ET.Element: @property def text(self) -> str: - parts: List[str] = [] + parts: list[str] = [] for node in self.element.findall(f"{_HP}t"): parts.append("".join(node.itertext())) return "".join(parts) @@ -1117,8 +1117,8 @@ def set(self, value: str) -> None: else: setattr(self.element, self.attr, "") - def _gather_segments(node: ET.Element) -> List[_Segment]: - segments: List[_Segment] = [] + def _gather_segments(node: ET.Element) -> list[_Segment]: + segments: list[_Segment] = [] def visit(element: ET.Element) -> None: text_value = element.text or "" @@ -1131,8 +1131,8 @@ def visit(element: ET.Element) -> None: visit(node) return segments - def _segment_boundaries(segments: Sequence[_Segment]) -> List[Tuple[int, int]]: - bounds: List[Tuple[int, int]] = [] + def _segment_boundaries(segments: Sequence[_Segment]) -> list[tuple[int, int]]: + bounds: list[tuple[int, int]] = [] offset = 0 for segment in segments: start = offset @@ -1140,7 +1140,7 @@ def _segment_boundaries(segments: Sequence[_Segment]) -> List[Tuple[int, int]]: bounds.append((start, offset)) return bounds - def _distribute(total: int, weights: Sequence[int]) -> List[int]: + def _distribute(total: int, weights: Sequence[int]) -> list[int]: if not weights: return [] if total <= 0: @@ -1158,7 +1158,7 @@ def _distribute(total: int, weights: Sequence[int]) -> List[int]: allocation = [] remainder = total - residuals: List[Tuple[int, int]] = [] + residuals: list[tuple[int, int]] = [] for index, weight in enumerate(weights): share = total * weight // weight_sum allocation.append(share) @@ -1180,13 +1180,13 @@ def _distribute(total: int, weights: Sequence[int]) -> List[int]: return allocation def _apply_replacement( - segments: List[_Segment], + segments: list[_Segment], start: int, end: int, replacement_text: str, ) -> None: bounds = _segment_boundaries(segments) - affected: List[Tuple[int, int, int]] = [] + affected: list[tuple[int, int, int]] = [] for index, (seg_start, seg_end) in enumerate(bounds): if start >= seg_end or end <= seg_start: continue @@ -1212,7 +1212,7 @@ def _apply_replacement( replacement_offset += share segment.set(prefix + portion + suffix) - segments: List[_Segment] = [] + segments: list[_Segment] = [] for text_node in self.element.findall(f"{_HP}t"): segments.extend(_gather_segments(text_node)) @@ -1297,7 +1297,7 @@ def __init__(self, element: ET.Element, section: "HwpxOxmlSection"): self.section = section @property - def memos(self) -> List["HwpxOxmlMemo"]: + def memos(self) -> list["HwpxOxmlMemo"]: return [ HwpxOxmlMemo(child, self) for child in self.element.findall(f"{_HP}memo") @@ -1394,15 +1394,15 @@ def _infer_char_pr_id_ref(self) -> str | None: return None @property - def paragraphs(self) -> List["HwpxOxmlParagraph"]: - paragraphs: List[HwpxOxmlParagraph] = [] + def paragraphs(self) -> list["HwpxOxmlParagraph"]: + paragraphs: list[HwpxOxmlParagraph] = [] for node in self.element.findall(f".//{_HP}p"): paragraphs.append(HwpxOxmlParagraph(node, self.group.section)) return paragraphs @property def text(self) -> str: - parts: List[str] = [] + parts: list[str] = [] for paragraph in self.paragraphs: value = paragraph.text if value: @@ -1525,7 +1525,7 @@ def _ensure_text_element(self) -> ET.Element: return text @property - def address(self) -> Tuple[int, int]: + def address(self) -> tuple[int, int]: addr = self._addr_element() if addr is None: return (0, 0) @@ -1534,7 +1534,7 @@ def address(self) -> Tuple[int, int]: return (row, col) @property - def span(self) -> Tuple[int, int]: + def span(self) -> tuple[int, int]: span = self._span_element() row_span = int(span.get("rowSpan", "1")) col_span = int(span.get("colSpan", "1")) @@ -1590,8 +1590,8 @@ class HwpxTableGridPosition: row: int column: int cell: HwpxOxmlTableCell - anchor: Tuple[int, int] - span: Tuple[int, int] + anchor: tuple[int, int] + span: tuple[int, int] @property def is_anchor(self) -> bool: @@ -1614,7 +1614,7 @@ def __init__(self, element: ET.Element, table: "HwpxOxmlTable"): self.table = table @property - def cells(self) -> List[HwpxOxmlTableCell]: + def cells(self) -> list[HwpxOxmlTableCell]: return [ HwpxOxmlTableCell(cell_element, self.table, self.element) for cell_element in self.element.findall(f"{_HP}tc") @@ -1756,11 +1756,11 @@ def column_count(self) -> int: return len(first_row.findall(f"{_HP}tc")) @property - def rows(self) -> List[HwpxOxmlTableRow]: + def rows(self) -> list[HwpxOxmlTableRow]: return [HwpxOxmlTableRow(row, self) for row in self.element.findall(f"{_HP}tr")] - def _build_cell_grid(self) -> dict[Tuple[int, int], HwpxTableGridPosition]: - mapping: dict[Tuple[int, int], HwpxTableGridPosition] = {} + def _build_cell_grid(self) -> dict[tuple[int, int], HwpxTableGridPosition]: + mapping: dict[tuple[int, int], HwpxTableGridPosition] = {} for row in self.element.findall(f"{_HP}tr"): for cell_element in row.findall(f"{_HP}tc"): wrapper = HwpxOxmlTableCell(cell_element, self, row) @@ -1824,12 +1824,12 @@ def iter_grid(self) -> Iterator[HwpxTableGridPosition]: ) yield entry - def get_cell_map(self) -> List[List[HwpxTableGridPosition]]: + def get_cell_map(self) -> list[list[HwpxTableGridPosition]]: """Return a 2D list mapping logical positions to physical cells.""" row_count = self.row_count col_count = self.column_count - grid: List[List[HwpxTableGridPosition | None]] = [ + grid: list[list[HwpxTableGridPosition | None]] = [ [None for _ in range(col_count)] for _ in range(row_count) ] for entry in self.iter_grid(): @@ -2129,7 +2129,7 @@ def apply_model(self, model: "body.Paragraph") -> None: self.element = replacement self.section.mark_dirty() - def _run_elements(self) -> List[ET.Element]: + def _run_elements(self) -> list[ET.Element]: return self.element.findall(f"{_HP}run") def _ensure_run(self) -> ET.Element: @@ -2144,14 +2144,14 @@ def _ensure_run(self) -> ET.Element: return ET.SubElement(self.element, f"{_HP}run", run_attrs) @property - def runs(self) -> List[HwpxOxmlRun]: + def runs(self) -> list[HwpxOxmlRun]: """Return the runs contained in this paragraph.""" return [HwpxOxmlRun(run, self) for run in self._run_elements()] @property def text(self) -> str: """Return the concatenated textual content of this paragraph.""" - texts: List[str] = [] + texts: list[str] = [] for text_element in self.element.findall(f".//{_HP}t"): if text_element.text: texts.append(text_element.text) @@ -2223,10 +2223,10 @@ def add_run( return HwpxOxmlRun(run_element, self) @property - def tables(self) -> List["HwpxOxmlTable"]: + def tables(self) -> list["HwpxOxmlTable"]: """Return the tables embedded within this paragraph.""" - tables: List[HwpxOxmlTable] = [] + tables: list[HwpxOxmlTable] = [] for run in self._run_elements(): for child in run: if child.tag == f"{_HP}tbl": @@ -2513,7 +2513,7 @@ def attach_document(self, document: "HwpxOxmlDocument") -> None: self._document = document @property - def paragraphs(self) -> List[HwpxOxmlParagraph]: + def paragraphs(self) -> list[HwpxOxmlParagraph]: """Return the paragraphs defined in this section.""" return [HwpxOxmlParagraph(elm, self) for elm in self._paragraph_elements()] @@ -2532,7 +2532,7 @@ def memo_group(self) -> HwpxOxmlMemoGroup | None: return HwpxOxmlMemoGroup(element, self) @property - def memos(self) -> List[HwpxOxmlMemo]: + def memos(self) -> list[HwpxOxmlMemo]: group = self.memo_group if group is None: return [] @@ -2698,7 +2698,7 @@ def _allocate_char_property_id( if candidate not in existing: return candidate - numeric_ids: List[int] = [] + numeric_ids: list[int] = [] for value in existing: try: numeric_ids.append(int(value)) @@ -2718,7 +2718,7 @@ def _allocate_border_fill_id(self, element: ET.Element) -> str: } existing.discard("") - numeric_ids: List[int] = [] + numeric_ids: list[int] = [] for value in existing: try: numeric_ids.append(int(value)) @@ -2859,7 +2859,7 @@ def border_fills(self) -> dict[str, GenericElement]: mapping: dict[str, GenericElement] = {} for border_fill in fill_list.fills: raw_id = border_fill.attributes.get("id") - keys: List[str] = [] + keys: list[str] = [] if raw_id: keys.append(raw_id) try: @@ -2881,7 +2881,7 @@ def _convert_to_lxml(element: ET.Element) -> LET._Element: return LET.fromstring(ET.tostring(element, encoding="utf-8")) @staticmethod - def _lookup_by_id(mapping: Dict[str, T], identifier: int | str | None) -> T | None: + def _lookup_by_id(mapping: dict[str, T], identifier: int | str | None) -> T | None: if identifier is None: return None @@ -3188,19 +3188,19 @@ def manifest(self) -> ET.Element: return self._manifest @property - def sections(self) -> List[HwpxOxmlSection]: + def sections(self) -> list[HwpxOxmlSection]: return list(self._sections) @property - def headers(self) -> List[HwpxOxmlHeader]: + def headers(self) -> list[HwpxOxmlHeader]: return list(self._headers) @property - def master_pages(self) -> List[HwpxOxmlMasterPage]: + def master_pages(self) -> list[HwpxOxmlMasterPage]: return list(self._master_pages) @property - def histories(self) -> List[HwpxOxmlHistory]: + def histories(self) -> list[HwpxOxmlHistory]: return list(self._histories) @property @@ -3254,7 +3254,7 @@ def ensure_run_style( target = (bool(bold), bool(italic), bool(underline)) header = self._headers[0] - def element_flags(element: ET.Element) -> Tuple[bool, bool, bool]: + def element_flags(element: ET.Element) -> tuple[bool, bool, bool]: bold_present = element.find(f"{_HH}bold") is not None italic_present = element.find(f"{_HH}italic") is not None underline_element = element.find(f"{_HH}underline") @@ -3411,8 +3411,8 @@ def track_change_author( return HwpxOxmlHeader._lookup_by_id(self.track_change_authors, author_id_ref) @property - def paragraphs(self) -> List[HwpxOxmlParagraph]: - paragraphs: List[HwpxOxmlParagraph] = [] + def paragraphs(self) -> list[HwpxOxmlParagraph]: + paragraphs: list[HwpxOxmlParagraph] = [] for section in self._sections: paragraphs.extend(section.paragraphs) return paragraphs