Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/hwpx/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

from datetime import datetime
import logging
import uuid

from os import PathLike
Expand Down Expand Up @@ -40,6 +41,8 @@
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
_HH = f"{{{_HH_NS}}}"

logger = logging.getLogger(__name__)


def _append_element(
parent: Any,
Expand Down
53 changes: 41 additions & 12 deletions src/hwpx/opc/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import logging
import io
from dataclasses import dataclass
from pathlib import Path
Expand All @@ -19,6 +20,8 @@

__all__ = ["HwpxPackage", "HwpxPackageError", "HwpxStructureError", "RootFile", "VersionInfo"]

logger = logging.getLogger(__name__)

_OPF_NS = "http://www.idpf.org/2007/opf/"


Expand Down Expand Up @@ -144,6 +147,7 @@ def open(cls, pkg_file: str | Path | bytes | bytearray | BinaryIO) -> HwpxPackag

with ZipFile(stream, "r") as zf:
files = {info.filename: zf.read(info.filename) for info in zf.infolist()}
logger.debug("HWPX 패키지 파일 목록 %d개를 로드했습니다.", len(files))
if cls.MIMETYPE_PATH not in files:
raise HwpxStructureError("HWPX package is missing the mandatory 'mimetype' file.")
mimetype = files[cls.MIMETYPE_PATH].decode("utf-8")
Expand All @@ -158,21 +162,29 @@ def _parse_container(data: bytes | None) -> list[RootFile]:
raise HwpxStructureError(
"HWPX package is missing 'META-INF/container.xml'."
)
root = parse_xml(data)
try:
root = parse_xml(data)
except Exception:
logger.exception("container.xml 파싱에 실패했습니다.")
raise
rootfiles = []
for elem in root.findall(".//{*}rootfile"):
full_path = (
elem.get("full-path")
or elem.get("fullPath")
or elem.get("full_path")
)
full_path_attr = elem.get("full-path")
full_path = full_path_attr or elem.get("fullPath") or elem.get("full_path")
if full_path and not full_path_attr:
logger.warning(
"container.xml rootfile이 비표준 경로 속성명을 사용했습니다: %s",
elem.attrib,
)
if not full_path:
raise HwpxStructureError("container.xml contains a rootfile without 'full-path'.")
media_type = (
elem.get("media-type")
or elem.get("mediaType")
or elem.get("media_type")
)
media_type_attr = elem.get("media-type")
media_type = media_type_attr or elem.get("mediaType") or elem.get("media_type")
if media_type and not media_type_attr:
logger.warning(
"container.xml rootfile이 비표준 media-type 속성명을 사용했습니다: %s",
elem.attrib,
)
rootfiles.append(RootFile(full_path, media_type))
if not rootfiles:
raise HwpxStructureError("container.xml does not declare any rootfiles.")
Expand Down Expand Up @@ -208,7 +220,12 @@ def main_content(self) -> RootFile:
for rootfile in self._rootfiles:
if rootfile.media_type == "application/hwpml-package+xml":
return rootfile
return self._rootfiles[0]
selected = self._rootfiles[0]
logger.warning(
"표준 media_type 메인 rootfile이 없어 첫 항목으로 대체합니다: path=%s",
selected.full_path,
)
return selected

@property
def version_info(self) -> VersionInfo:
Expand All @@ -219,6 +236,7 @@ def read(self, path: str) -> bytes:
try:
return self._files[norm_path]
except KeyError as exc:
logger.warning("파트 누락: path=%s", norm_path)
raise HwpxPackageError(f"File '{norm_path}' is not present in the package.") from exc

def write(self, path: str, data: bytes | str) -> None:
Expand Down Expand Up @@ -348,6 +366,7 @@ def section_paths(self) -> list[str]:
if path and PurePosixPath(path).name.startswith("section")
]
if not paths:
logger.warning("manifest spine에서 section 경로를 찾지 못해 파일명 기반 fallback을 사용합니다.")
paths = [
name
for name in self._files.keys()
Expand All @@ -366,6 +385,10 @@ def header_paths(self) -> list[str]:
if path and PurePosixPath(path).name.startswith("header")
]
if not paths and self.has_part(self.HEADER_PATH):
logger.warning(
"manifest spine에서 header 경로를 찾지 못해 기본 header 경로 fallback을 사용합니다: %s",
self.HEADER_PATH,
)
paths = [self.HEADER_PATH]
self._header_paths_cache = paths
return list(self._header_paths_cache)
Expand All @@ -381,6 +404,7 @@ def master_page_paths(self) -> list[str]:
and item.attrib.get("href")
]
if not paths:
logger.warning("manifest에서 masterPage를 찾지 못해 파일명 탐색 fallback을 사용합니다.")
paths = [
name
for name in self._files.keys()
Expand All @@ -400,6 +424,7 @@ def history_paths(self) -> list[str]:
if self._manifest_matches(item, "history") and item.attrib.get("href")
]
if not paths:
logger.warning("manifest에서 history를 찾지 못해 파일명 탐색 fallback을 사용합니다.")
paths = [
name
for name in self._files.keys()
Expand All @@ -418,6 +443,10 @@ def version_path(self) -> str | None:
path = href
break
if path is None and self.has_part(self.VERSION_PATH):
logger.warning(
"manifest에서 version 파트를 찾지 못해 기본 경로 fallback을 사용합니다: %s",
self.VERSION_PATH,
)
path = self.VERSION_PATH
self._version_path_cache = path
self._version_path_cache_resolved = True
Expand Down
3 changes: 3 additions & 0 deletions src/hwpx/oxml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import logging
from .body import (
Paragraph,
Run,
Expand Down Expand Up @@ -215,3 +216,5 @@
"parse_text_span",
]

logger = logging.getLogger(__name__)

3 changes: 3 additions & 0 deletions src/hwpx/oxml/body.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union

Expand Down Expand Up @@ -430,3 +431,5 @@ def serialize_paragraph(paragraph: Paragraph) -> etree._Element:
"serialize_paragraph",
"serialize_run",
]

logger = logging.getLogger(__name__)
3 changes: 3 additions & 0 deletions src/hwpx/oxml/common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional

logger = logging.getLogger(__name__)

from lxml import etree

from .utils import local_name
Expand Down
84 changes: 64 additions & 20 deletions src/hwpx/oxml/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import logging
from copy import deepcopy
from dataclasses import dataclass
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Tuple, TypeVar
Expand Down Expand Up @@ -30,6 +31,8 @@
)
from .utils import parse_int

logger = logging.getLogger(__name__)

_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
_HP = f"{{{_HP_NS}}}"
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
Expand Down Expand Up @@ -2602,13 +2605,15 @@ def attach_document(self, document: "HwpxOxmlDocument") -> None:
def _begin_num_element(self, create: bool = False) -> ET.Element | None:
element = self._element.find(f"{_HH}beginNum")
if element is None and create:
element = ET.SubElement(self._element, f"{_HH}beginNum")
element = self._element.makeelement(f"{_HH}beginNum", {})
self._element.append(element)
return element

def _ref_list_element(self, create: bool = False) -> ET.Element | None:
element = self._element.find(f"{_HH}refList")
if element is None and create:
element = ET.SubElement(self._element, f"{_HH}refList")
element = self._element.makeelement(f"{_HH}refList", {})
self._element.append(element)
self.mark_dirty()
return element

Expand All @@ -2618,7 +2623,8 @@ def _border_fills_element(self, create: bool = False) -> ET.Element | None:
return None
element = ref_list.find(f"{_HH}borderFills")
if element is None and create:
element = ET.SubElement(ref_list, f"{_HH}borderFills", {"itemCnt": "0"})
element = ref_list.makeelement(f"{_HH}borderFills", {"itemCnt": "0"})
ref_list.append(element)
self.mark_dirty()
return element

Expand All @@ -2628,7 +2634,8 @@ def _char_properties_element(self, create: bool = False) -> ET.Element | None:
return None
element = ref_list.find(f"{_HH}charProperties")
if element is None and create:
element = ET.SubElement(ref_list, f"{_HH}charProperties", {"itemCnt": "0"})
element = ref_list.makeelement(f"{_HH}charProperties", {"itemCnt": "0"})
ref_list.append(element)
self.mark_dirty()
return element

Expand Down Expand Up @@ -2800,7 +2807,10 @@ def ensure_basic_border_fill(self) -> str:
return existing

new_id = self._allocate_border_fill_id(element)
element.append(_create_basic_border_fill_element(new_id))
new_border_fill = _create_basic_border_fill_element(new_id)
if isinstance(element, LET._Element):
new_border_fill = LET.fromstring(ET.tostring(new_border_fill, encoding="utf-8"))
element.append(new_border_fill)
self._update_border_fills_item_count(element)
self.mark_dirty()
return new_id
Expand Down Expand Up @@ -3079,23 +3089,57 @@ def from_package(cls, package: "HwpxPackage") -> "HwpxOxmlDocument":
history_paths = package.history_paths()
version_path = package.version_path()

sections = [
HwpxOxmlSection(path, package.get_xml(path)) for path in section_paths
]
headers = [HwpxOxmlHeader(path, package.get_xml(path)) for path in header_paths]
master_pages = [
HwpxOxmlMasterPage(path, package.get_xml(path))
for path in master_page_paths
if package.has_part(path)
]
histories = [
HwpxOxmlHistory(path, package.get_xml(path))
for path in history_paths
if package.has_part(path)
]
sections: list[HwpxOxmlSection] = []
for section_index, path in enumerate(section_paths):
try:
sections.append(HwpxOxmlSection(path, package.get_xml(path)))
except Exception:
logger.exception(
"section 파싱 실패: section_index=%d, part_path=%s",
section_index,
path,
)
raise

headers: list[HwpxOxmlHeader] = []
for path in header_paths:
try:
headers.append(HwpxOxmlHeader(path, package.get_xml(path)))
except Exception:
logger.exception("header 파싱 실패: part_path=%s", path)
raise

master_pages: list[HwpxOxmlMasterPage] = []
for path in master_page_paths:
if not package.has_part(path):
logger.warning("masterPage 파트 누락: part_path=%s", path)
continue
try:
master_pages.append(HwpxOxmlMasterPage(path, package.get_xml(path)))
except Exception:
logger.exception("masterPage 파싱 실패: part_path=%s", path)
raise

histories: list[HwpxOxmlHistory] = []
for path in history_paths:
if not package.has_part(path):
logger.warning("history 파트 누락: part_path=%s", path)
continue
try:
histories.append(HwpxOxmlHistory(path, package.get_xml(path)))
except Exception:
logger.exception("history 파싱 실패: part_path=%s", path)
raise

version = None
if version_path and package.has_part(version_path):
version = HwpxOxmlVersion(version_path, package.get_xml(version_path))
try:
version = HwpxOxmlVersion(version_path, package.get_xml(version_path))
except Exception:
logger.exception("version 파싱 실패: part_path=%s", version_path)
raise
elif version_path:
logger.warning("manifest가 가리키는 version 파트가 누락되었습니다: part_path=%s", version_path)
return cls(
manifest,
sections,
Expand Down
3 changes: 3 additions & 0 deletions src/hwpx/oxml/header.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import logging
import base64
import binascii
from dataclasses import dataclass, field
Expand Down Expand Up @@ -1364,3 +1365,5 @@ def parse_header_element(node: etree._Element) -> Header:
"parse_track_change_authors",
"parse_track_changes",
]

logger = logging.getLogger(__name__)
3 changes: 3 additions & 0 deletions src/hwpx/oxml/header_part.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from __future__ import annotations

import logging
from .document import HwpxOxmlHeader

__all__ = ["HwpxOxmlHeader"]

logger = logging.getLogger(__name__)
3 changes: 3 additions & 0 deletions src/hwpx/oxml/memo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from __future__ import annotations

import logging
from .document import HwpxOxmlMemo

__all__ = ["HwpxOxmlMemo"]

logger = logging.getLogger(__name__)
3 changes: 3 additions & 0 deletions src/hwpx/oxml/paragraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from __future__ import annotations

import logging
from .document import HwpxOxmlParagraph

__all__ = ["HwpxOxmlParagraph"]

logger = logging.getLogger(__name__)
3 changes: 3 additions & 0 deletions src/hwpx/oxml/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import logging
from typing import Callable, Dict, Optional

from lxml import etree
Expand Down Expand Up @@ -67,3 +68,5 @@ def parse_section_xml(source: XmlSource, *, schema_path: Optional[SchemaPath] =
"parse_header_xml",
"parse_section_xml",
]

logger = logging.getLogger(__name__)
3 changes: 3 additions & 0 deletions src/hwpx/oxml/schema.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import annotations

import logging
from pathlib import Path
from typing import Union
from urllib.parse import unquote, urlparse

logger = logging.getLogger(__name__)

from lxml import etree


Expand Down
Loading