diff --git a/src/hwpx/document.py b/src/hwpx/document.py index 4755f9b..d20b438 100644 --- a/src/hwpx/document.py +++ b/src/hwpx/document.py @@ -3,6 +3,7 @@ from __future__ import annotations from datetime import datetime +import logging import uuid from os import PathLike @@ -40,6 +41,8 @@ _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head" _HH = f"{{{_HH_NS}}}" +logger = logging.getLogger(__name__) + def _append_element( parent: Any, diff --git a/src/hwpx/opc/package.py b/src/hwpx/opc/package.py index e17cda0..05d3e55 100644 --- a/src/hwpx/opc/package.py +++ b/src/hwpx/opc/package.py @@ -2,6 +2,7 @@ from __future__ import annotations +import logging import io from dataclasses import dataclass from pathlib import Path @@ -19,6 +20,8 @@ __all__ = ["HwpxPackage", "HwpxPackageError", "HwpxStructureError", "RootFile", "VersionInfo"] +logger = logging.getLogger(__name__) + _OPF_NS = "http://www.idpf.org/2007/opf/" @@ -144,6 +147,7 @@ def open(cls, pkg_file: str | Path | bytes | bytearray | BinaryIO) -> HwpxPackag with ZipFile(stream, "r") as zf: files = {info.filename: zf.read(info.filename) for info in zf.infolist()} + logger.debug("HWPX 패키지 파일 목록 %d개를 로드했습니다.", len(files)) if cls.MIMETYPE_PATH not in files: raise HwpxStructureError("HWPX package is missing the mandatory 'mimetype' file.") mimetype = files[cls.MIMETYPE_PATH].decode("utf-8") @@ -158,21 +162,29 @@ def _parse_container(data: bytes | None) -> list[RootFile]: raise HwpxStructureError( "HWPX package is missing 'META-INF/container.xml'." ) - root = parse_xml(data) + try: + root = parse_xml(data) + except Exception: + logger.exception("container.xml 파싱에 실패했습니다.") + raise rootfiles = [] for elem in root.findall(".//{*}rootfile"): - full_path = ( - elem.get("full-path") - or elem.get("fullPath") - or elem.get("full_path") - ) + full_path_attr = elem.get("full-path") + full_path = full_path_attr or elem.get("fullPath") or elem.get("full_path") + if full_path and not full_path_attr: + logger.warning( + "container.xml rootfile이 비표준 경로 속성명을 사용했습니다: %s", + elem.attrib, + ) if not full_path: raise HwpxStructureError("container.xml contains a rootfile without 'full-path'.") - media_type = ( - elem.get("media-type") - or elem.get("mediaType") - or elem.get("media_type") - ) + media_type_attr = elem.get("media-type") + media_type = media_type_attr or elem.get("mediaType") or elem.get("media_type") + if media_type and not media_type_attr: + logger.warning( + "container.xml rootfile이 비표준 media-type 속성명을 사용했습니다: %s", + elem.attrib, + ) rootfiles.append(RootFile(full_path, media_type)) if not rootfiles: raise HwpxStructureError("container.xml does not declare any rootfiles.") @@ -208,7 +220,12 @@ def main_content(self) -> RootFile: for rootfile in self._rootfiles: if rootfile.media_type == "application/hwpml-package+xml": return rootfile - return self._rootfiles[0] + selected = self._rootfiles[0] + logger.warning( + "표준 media_type 메인 rootfile이 없어 첫 항목으로 대체합니다: path=%s", + selected.full_path, + ) + return selected @property def version_info(self) -> VersionInfo: @@ -219,6 +236,7 @@ def read(self, path: str) -> bytes: try: return self._files[norm_path] except KeyError as exc: + logger.warning("파트 누락: path=%s", norm_path) raise HwpxPackageError(f"File '{norm_path}' is not present in the package.") from exc def write(self, path: str, data: bytes | str) -> None: @@ -348,6 +366,7 @@ def section_paths(self) -> list[str]: if path and PurePosixPath(path).name.startswith("section") ] if not paths: + logger.warning("manifest spine에서 section 경로를 찾지 못해 파일명 기반 fallback을 사용합니다.") paths = [ name for name in self._files.keys() @@ -366,6 +385,10 @@ def header_paths(self) -> list[str]: if path and PurePosixPath(path).name.startswith("header") ] if not paths and self.has_part(self.HEADER_PATH): + logger.warning( + "manifest spine에서 header 경로를 찾지 못해 기본 header 경로 fallback을 사용합니다: %s", + self.HEADER_PATH, + ) paths = [self.HEADER_PATH] self._header_paths_cache = paths return list(self._header_paths_cache) @@ -381,6 +404,7 @@ def master_page_paths(self) -> list[str]: and item.attrib.get("href") ] if not paths: + logger.warning("manifest에서 masterPage를 찾지 못해 파일명 탐색 fallback을 사용합니다.") paths = [ name for name in self._files.keys() @@ -400,6 +424,7 @@ def history_paths(self) -> list[str]: if self._manifest_matches(item, "history") and item.attrib.get("href") ] if not paths: + logger.warning("manifest에서 history를 찾지 못해 파일명 탐색 fallback을 사용합니다.") paths = [ name for name in self._files.keys() @@ -418,6 +443,10 @@ def version_path(self) -> str | None: path = href break if path is None and self.has_part(self.VERSION_PATH): + logger.warning( + "manifest에서 version 파트를 찾지 못해 기본 경로 fallback을 사용합니다: %s", + self.VERSION_PATH, + ) path = self.VERSION_PATH self._version_path_cache = path self._version_path_cache_resolved = True diff --git a/src/hwpx/oxml/__init__.py b/src/hwpx/oxml/__init__.py index f67c07b..eadcf2f 100644 --- a/src/hwpx/oxml/__init__.py +++ b/src/hwpx/oxml/__init__.py @@ -3,6 +3,7 @@ from __future__ import annotations +import logging from .body import ( Paragraph, Run, @@ -215,3 +216,5 @@ "parse_text_span", ] +logger = logging.getLogger(__name__) + diff --git a/src/hwpx/oxml/body.py b/src/hwpx/oxml/body.py index 5f00af7..8c7e1a3 100644 --- a/src/hwpx/oxml/body.py +++ b/src/hwpx/oxml/body.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from dataclasses import dataclass, field from typing import Dict, List, Optional, Union @@ -430,3 +431,5 @@ def serialize_paragraph(paragraph: Paragraph) -> etree._Element: "serialize_paragraph", "serialize_run", ] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/common.py b/src/hwpx/oxml/common.py index d541982..cb65c16 100644 --- a/src/hwpx/oxml/common.py +++ b/src/hwpx/oxml/common.py @@ -1,8 +1,11 @@ from __future__ import annotations +import logging from dataclasses import dataclass, field from typing import Dict, List, Optional +logger = logging.getLogger(__name__) + from lxml import etree from .utils import local_name diff --git a/src/hwpx/oxml/document.py b/src/hwpx/oxml/document.py index 08097a0..18f99c8 100644 --- a/src/hwpx/oxml/document.py +++ b/src/hwpx/oxml/document.py @@ -2,6 +2,7 @@ from __future__ import annotations +import logging from copy import deepcopy from dataclasses import dataclass from typing import Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Tuple, TypeVar @@ -30,6 +31,8 @@ ) from .utils import parse_int +logger = logging.getLogger(__name__) + _HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph" _HP = f"{{{_HP_NS}}}" _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head" @@ -2602,13 +2605,15 @@ def attach_document(self, document: "HwpxOxmlDocument") -> None: def _begin_num_element(self, create: bool = False) -> ET.Element | None: element = self._element.find(f"{_HH}beginNum") if element is None and create: - element = ET.SubElement(self._element, f"{_HH}beginNum") + element = self._element.makeelement(f"{_HH}beginNum", {}) + self._element.append(element) return element def _ref_list_element(self, create: bool = False) -> ET.Element | None: element = self._element.find(f"{_HH}refList") if element is None and create: - element = ET.SubElement(self._element, f"{_HH}refList") + element = self._element.makeelement(f"{_HH}refList", {}) + self._element.append(element) self.mark_dirty() return element @@ -2618,7 +2623,8 @@ def _border_fills_element(self, create: bool = False) -> ET.Element | None: return None element = ref_list.find(f"{_HH}borderFills") if element is None and create: - element = ET.SubElement(ref_list, f"{_HH}borderFills", {"itemCnt": "0"}) + element = ref_list.makeelement(f"{_HH}borderFills", {"itemCnt": "0"}) + ref_list.append(element) self.mark_dirty() return element @@ -2628,7 +2634,8 @@ def _char_properties_element(self, create: bool = False) -> ET.Element | None: return None element = ref_list.find(f"{_HH}charProperties") if element is None and create: - element = ET.SubElement(ref_list, f"{_HH}charProperties", {"itemCnt": "0"}) + element = ref_list.makeelement(f"{_HH}charProperties", {"itemCnt": "0"}) + ref_list.append(element) self.mark_dirty() return element @@ -2800,7 +2807,10 @@ def ensure_basic_border_fill(self) -> str: return existing new_id = self._allocate_border_fill_id(element) - element.append(_create_basic_border_fill_element(new_id)) + new_border_fill = _create_basic_border_fill_element(new_id) + if isinstance(element, LET._Element): + new_border_fill = LET.fromstring(ET.tostring(new_border_fill, encoding="utf-8")) + element.append(new_border_fill) self._update_border_fills_item_count(element) self.mark_dirty() return new_id @@ -3079,23 +3089,57 @@ def from_package(cls, package: "HwpxPackage") -> "HwpxOxmlDocument": history_paths = package.history_paths() version_path = package.version_path() - sections = [ - HwpxOxmlSection(path, package.get_xml(path)) for path in section_paths - ] - headers = [HwpxOxmlHeader(path, package.get_xml(path)) for path in header_paths] - master_pages = [ - HwpxOxmlMasterPage(path, package.get_xml(path)) - for path in master_page_paths - if package.has_part(path) - ] - histories = [ - HwpxOxmlHistory(path, package.get_xml(path)) - for path in history_paths - if package.has_part(path) - ] + sections: list[HwpxOxmlSection] = [] + for section_index, path in enumerate(section_paths): + try: + sections.append(HwpxOxmlSection(path, package.get_xml(path))) + except Exception: + logger.exception( + "section 파싱 실패: section_index=%d, part_path=%s", + section_index, + path, + ) + raise + + headers: list[HwpxOxmlHeader] = [] + for path in header_paths: + try: + headers.append(HwpxOxmlHeader(path, package.get_xml(path))) + except Exception: + logger.exception("header 파싱 실패: part_path=%s", path) + raise + + master_pages: list[HwpxOxmlMasterPage] = [] + for path in master_page_paths: + if not package.has_part(path): + logger.warning("masterPage 파트 누락: part_path=%s", path) + continue + try: + master_pages.append(HwpxOxmlMasterPage(path, package.get_xml(path))) + except Exception: + logger.exception("masterPage 파싱 실패: part_path=%s", path) + raise + + histories: list[HwpxOxmlHistory] = [] + for path in history_paths: + if not package.has_part(path): + logger.warning("history 파트 누락: part_path=%s", path) + continue + try: + histories.append(HwpxOxmlHistory(path, package.get_xml(path))) + except Exception: + logger.exception("history 파싱 실패: part_path=%s", path) + raise + version = None if version_path and package.has_part(version_path): - version = HwpxOxmlVersion(version_path, package.get_xml(version_path)) + try: + version = HwpxOxmlVersion(version_path, package.get_xml(version_path)) + except Exception: + logger.exception("version 파싱 실패: part_path=%s", version_path) + raise + elif version_path: + logger.warning("manifest가 가리키는 version 파트가 누락되었습니다: part_path=%s", version_path) return cls( manifest, sections, diff --git a/src/hwpx/oxml/header.py b/src/hwpx/oxml/header.py index f4a26a2..f0adbc8 100644 --- a/src/hwpx/oxml/header.py +++ b/src/hwpx/oxml/header.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import base64 import binascii from dataclasses import dataclass, field @@ -1364,3 +1365,5 @@ def parse_header_element(node: etree._Element) -> Header: "parse_track_change_authors", "parse_track_changes", ] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/header_part.py b/src/hwpx/oxml/header_part.py index b05949a..bbce725 100644 --- a/src/hwpx/oxml/header_part.py +++ b/src/hwpx/oxml/header_part.py @@ -2,6 +2,9 @@ from __future__ import annotations +import logging from .document import HwpxOxmlHeader __all__ = ["HwpxOxmlHeader"] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/memo.py b/src/hwpx/oxml/memo.py index 81706c4..556e2d3 100644 --- a/src/hwpx/oxml/memo.py +++ b/src/hwpx/oxml/memo.py @@ -2,6 +2,9 @@ from __future__ import annotations +import logging from .document import HwpxOxmlMemo __all__ = ["HwpxOxmlMemo"] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/paragraph.py b/src/hwpx/oxml/paragraph.py index 6387c26..c714859 100644 --- a/src/hwpx/oxml/paragraph.py +++ b/src/hwpx/oxml/paragraph.py @@ -2,6 +2,9 @@ from __future__ import annotations +import logging from .document import HwpxOxmlParagraph __all__ = ["HwpxOxmlParagraph"] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/parser.py b/src/hwpx/oxml/parser.py index 27e6cf6..4c51e62 100644 --- a/src/hwpx/oxml/parser.py +++ b/src/hwpx/oxml/parser.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import Callable, Dict, Optional from lxml import etree @@ -67,3 +68,5 @@ def parse_section_xml(source: XmlSource, *, schema_path: Optional[SchemaPath] = "parse_header_xml", "parse_section_xml", ] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/schema.py b/src/hwpx/oxml/schema.py index 3bfc0ab..5d1f162 100644 --- a/src/hwpx/oxml/schema.py +++ b/src/hwpx/oxml/schema.py @@ -1,9 +1,12 @@ from __future__ import annotations +import logging from pathlib import Path from typing import Union from urllib.parse import unquote, urlparse +logger = logging.getLogger(__name__) + from lxml import etree diff --git a/src/hwpx/oxml/section.py b/src/hwpx/oxml/section.py index a67e092..12dad5f 100644 --- a/src/hwpx/oxml/section.py +++ b/src/hwpx/oxml/section.py @@ -2,6 +2,9 @@ from __future__ import annotations +import logging from .document import HwpxOxmlSection __all__ = ["HwpxOxmlSection"] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/table.py b/src/hwpx/oxml/table.py index b7a0fee..34a862f 100644 --- a/src/hwpx/oxml/table.py +++ b/src/hwpx/oxml/table.py @@ -2,6 +2,9 @@ from __future__ import annotations +import logging from .document import HwpxOxmlTable __all__ = ["HwpxOxmlTable"] + +logger = logging.getLogger(__name__) diff --git a/src/hwpx/oxml/utils.py b/src/hwpx/oxml/utils.py index 73056b2..0ec8952 100644 --- a/src/hwpx/oxml/utils.py +++ b/src/hwpx/oxml/utils.py @@ -1,8 +1,11 @@ from __future__ import annotations +import logging from pathlib import Path from typing import Optional, Tuple, Union +logger = logging.getLogger(__name__) + from lxml import etree _TRUE_VALUES = {"1", "true", "True", "TRUE"} diff --git a/src/hwpx/package.py b/src/hwpx/package.py index 310002a..fe38bad 100644 --- a/src/hwpx/package.py +++ b/src/hwpx/package.py @@ -5,14 +5,20 @@ from __future__ import annotations +import logging from warnings import warn from .opc.package import HwpxPackage, HwpxPackageError, HwpxStructureError, RootFile, VersionInfo __all__ = ["HwpxPackage", "HwpxPackageError", "HwpxStructureError", "RootFile", "VersionInfo"] +logger = logging.getLogger(__name__) + warn( "'hwpx.package' 모듈은 더 이상 권장되지 않습니다. 'hwpx.opc.package'를 사용하세요.", DeprecationWarning, stacklevel=2, ) +logger.warning( + "'hwpx.package' 모듈은 더 이상 권장되지 않습니다. 'hwpx.opc.package'를 사용하세요." +)