Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix add keyword #229

Merged
merged 2 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 48 additions & 13 deletions scripts/python/src/fodt/add_keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,20 @@ def __init__(self, keyword: str, status: KeywordStatus, title: str) -> None:
self.in_table_row = False
self.keyword_table_number = self.get_keyword_table_number()
self.current_table_number = 0
self.start_tag_open = False # Flag for empty tags, close with />

def characters(self, content: str):
if self.in_styles:
self.maybe_close_start_tag(self.content)
self.content.write(XMLHelper.escape(content))
elif self.in_appendix_table:
if self.in_table_row:
self.maybe_close_start_tag(self.current_row)
self.current_row.write(XMLHelper.escape(content))
else:
if self.start_tag_open:
self.between_rows += ">"
self.start_tag_open = False
self.between_rows += content
# Capture stuff between the rows, such that we
# can add it back. There can be tags like
Expand All @@ -64,6 +70,7 @@ def characters(self, content: str):
self.current_table_number += 1
if self.current_table_number == self.keyword_table_number:
self.found_appendix_table = True
self.maybe_close_start_tag(self.content)
self.content.write(XMLHelper.escape(content))

def endElement(self, name: str):
Expand All @@ -73,7 +80,7 @@ def endElement(self, name: str):
self.in_table_row = False
if self.in_appendix_table:
if name == "table:table-row":
self.current_row.write(XMLHelper.endtag(name))
self.write_end_tag(self.current_row, name)
current_row = self.between_rows + self.current_row.getvalue()
self.between_rows = ''
self.rows.append(current_row)
Expand All @@ -82,21 +89,25 @@ def endElement(self, name: str):
self.in_appendix_table = False
self.write_appendix_table()
self.content.write(self.between_rows)
self.content.write(XMLHelper.endtag(name))
self.write_end_tag(self.content, name)
elif self.in_table_row:
self.current_row.write(XMLHelper.endtag(name))
self.write_end_tag(self.current_row, name)
else:
self.between_rows += XMLHelper.endtag(name)
if self.start_tag_open:
self.between_rows += "/>"
self.start_tag_open = False
else:
self.between_rows += XMLHelper.endtag(name)
else:
if self.in_styles:
if name == "office:automatic-styles":
self.in_styles = False
self.write_missing_styles()
self.content.write(XMLHelper.endtag(name))
self.write_end_tag(self.content, name)

def extract_keyword_name(self, href: str) -> str:
# Assume href starts with "#xxx.yyy.zzz.KEYWORD_NAME<space>"
if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)\s+", href):
if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)(?:\s+|$)", href):
return m.group(1)
else:
return '<NOT FOUND>'
Expand Down Expand Up @@ -124,6 +135,14 @@ def get_new_appendix_row(self) -> str:
new_row = re.sub(r'###COLOR###', color, new_row)
return new_row

def maybe_close_start_tag(self, buffer: io.StringIO) -> None:
if self.start_tag_open:
# NOTE: characters() is only called if there is content between the start
# tag and the end tag. If there is no content, characters() is not called.
buffer.write(">")
self.start_tag_open = False


def startDocument(self):
self.content.write(XMLHelper.header)

Expand All @@ -138,7 +157,7 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
elif name == "office:automatic-styles":
self.in_styles = True
if self.in_styles:
self.content.write(XMLHelper.starttag(name, attrs))
self.write_start_tag(self.content, name, attrs)
else:
if name == "table:table-row":
self.in_table_row = True
Expand All @@ -157,11 +176,12 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
)
if self.in_appendix_table:
if self.in_table_row:
self.current_row.write(XMLHelper.starttag(name, attrs))
self.write_start_tag(self.current_row, name, attrs)
else:
self.between_rows += XMLHelper.starttag(name, attrs)
self.start_tag_open = True
self.between_rows += XMLHelper.starttag(name, attrs, close_tag=False)
else:
self.content.write(XMLHelper.starttag(name, attrs))
self.write_start_tag(self.content, name, attrs)

def write_appendix_table(self) -> None:
idx_found = False
Expand All @@ -175,11 +195,26 @@ def write_appendix_table(self) -> None:
if not idx_found: # last item in the list
self.content.write(new_row)

def write_end_tag(self, buffer: io.StringIO, name: str) -> None:
if self.start_tag_open:
buffer.write("/>")
self.start_tag_open = False
else:
buffer.write(XMLHelper.endtag(name))

def write_missing_styles(self):
for style_name in self.style_names:
self.content.write(self.style_templates[style_name])
self.content.write("\n")

def write_start_tag(
self, buffer: io.StringIO, name: str, attrs: xml.sax.xmlreader.AttributesImpl
) -> None:
if self.start_tag_open:
buffer.write(">") # Close the start tag
self.start_tag_open = True
buffer.write(XMLHelper.starttag(name, attrs, close_tag=False))


class AddKeyword():
def __init__(
Expand All @@ -192,8 +227,8 @@ def __init__(
title: str,
status: KeywordStatus
) -> None:
self.maindir = maindir
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir)
self.maindir = Helpers.get_maindir(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
self.keyword = keyword
self.chapter = chapter
self.section = section
Expand Down Expand Up @@ -221,7 +256,7 @@ def add_keyword(self) -> None:

def update_appendixA(self) -> None:
logging.info(f"Updating appendix A.")
self.filename = Path(self.maindir) / Directories.appendices / f"A.{FileExtensions.fodt}"
self.filename = self.maindir / Directories.appendices / f"A.{FileExtensions.fodt}"
if not self.filename.is_file():
raise FileNotFoundError(f"File {self.filename} not found.")
# parse the xml file
Expand Down
1 change: 1 addition & 0 deletions scripts/python/src/fodt/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Directories():
backup = "backup"
info = "info"
keywords = "keywords"
keyword_names = "keyword-names"
meta = "meta"
meta_sections = "sections"
parts = "parts"
Expand Down
4 changes: 2 additions & 2 deletions scripts/python/src/fodt/create_subdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,14 @@ def get_parts(self) -> list[str]:
class CreateSubDocument3(CreateSubDocument):
def __init__(
self,
maindir: str,
maindir: Path,
keyword_dir: str,
chapter: str,
section: str,
keyword: str,
title: str,
) -> None:
self.maindir = Path(maindir)
self.maindir = maindir
self.keyword_dir = keyword_dir
self.chapter = chapter
self.section = section
Expand Down
35 changes: 29 additions & 6 deletions scripts/python/src/fodt/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import importlib.resources # access non-code resources
import shutil
import xml.sax.saxutils

from pathlib import Path
from fodt.constants import Directories, FileExtensions, FileNames
Expand Down Expand Up @@ -55,12 +54,14 @@ def derive_maindir_from_filename(filename: str) -> Path:
# This should never be reached

@staticmethod
def get_keyword_dir(keyword_dir: str) -> str:
def get_keyword_dir(keyword_dir: str, maindir: Path) -> str:
if keyword_dir is None:
try_path = Path('../keyword-names')
if try_path.exists():
keyword_dir = try_path
else:
# Default value for keyword_dir is a relative path like "../../keyword-names"
keyword_dir = Path(f'../../{Directories.keyword_names}')
if not keyword_dir.exists():
main_dir = Helpers.locate_maindir_from_current_dir()
keyword_dir = main_dir.parent / Directories.keyword_names
if not keyword_dir.exists():
raise FileNotFoundError(f"Keyword names directory not found.")
return keyword_dir

Expand Down Expand Up @@ -160,6 +161,28 @@ def locate_maindir_and_filename(
f"called '{maindir.name}'.")


@staticmethod
def locate_maindir_from_current_dir() -> Path:
cwd = Path.cwd()
# We cannot use derive_maindir_from_filename() here because cwd does not
# have to be inside maindir in this case
while True:
# Check if we have reached the root directory
# cwd.parent == cwd is True if filename is the root directory
if cwd.parent == cwd:
raise FileNotFoundError(f"Could not derive maindir from cwd: "
f"Could not find '{FileNames.main_document}' in a directory "
f"called '{Directories.parts}' by searching the parent "
f"directories of cwd."
)
# Check if there is a sibling directory called "parts" with a file main.fodt
dir_ = cwd / Directories.parts
if dir_.is_dir():
if (dir_ / FileNames.main_document).exists():
return dir_
cwd = cwd.parent
# This line should never be reached

@staticmethod
def locate_maindir_from_current_dir() -> Path:
cwd = Path.cwd()
Expand Down
18 changes: 16 additions & 2 deletions scripts/python/src/fodt/remove_subsections.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ def __init__(
self.done = False
self.remove_section = False
self.in_main_section = False
self.start_tag_open = False # Handle empty tags

def characters(self, content: str):
if self.start_tag_open:
# NOTE: characters() is only called if there is content between the start
# tag and the end tag. If there is no content, characters() is not called.
self.content.write(">")
self.start_tag_open = False
# if (not self.in_subsection) and (not self.remove_section):
if not self.in_main_section:
self.content.write(XMLHelper.escape(content))
Expand All @@ -60,7 +66,11 @@ def endElement(self, name: str):
self.done = True
self.in_main_section = False
if (not self.in_subsection) and (not self.remove_section):
self.content.write(XMLHelper.endtag(name))
if self.start_tag_open:
self.content.write("/>")
self.start_tag_open = False
else:
self.content.write(XMLHelper.endtag(name))
if name == "text:section":
if self.remove_section:
self.remove_section = False
Expand Down Expand Up @@ -104,14 +114,18 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
if self.check_included_section(name, attrs):
self.remove_section = True
self.in_main_section = True
if self.start_tag_open:
self.content.write(">") # Close the start tag
self.start_tag_open = False
if write_include:
self.in_main_section = True
part = f"{self.chapter}.{self.section}.{self.current_subsection}"
keyword = self.keywords[self.current_subsection - 1]
callback = self.replace_callback
self.content.write(callback(part, keyword))
if (not self.in_subsection) and (not self.remove_section):
self.content.write(XMLHelper.starttag(name, attrs))
self.start_tag_open = True
self.content.write(XMLHelper.starttag(name, attrs, close_tag=False))

def write_file(self):
filename = Path(self.outputfn)
Expand Down
4 changes: 2 additions & 2 deletions scripts/python/src/fodt/split_subdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class Splitter():
def __init__(self, maindir: str, keyword_dir: str, chapter: int, section: int) -> None:
self.chapter = chapter
self.section = section
self.maindir = Path(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir)
self.maindir = Helpers.get_maindir(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
self.metadata_dir = self.maindir / Directories.meta
assert self.maindir.is_dir()

Expand Down
10 changes: 10 additions & 0 deletions scripts/python/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def test_locate_with_absolute_path_exists(self, tmp_path: Path) -> None:
assert result_filename == filename

def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None:
"""Test locating maindir and filename when the maindir is given as an absolute path
and the main file does not exist. This should raise an error."""
maindir = tmp_path / Directories.parts
maindir.mkdir()
mainfile = maindir / FileNames.main_document
Expand All @@ -39,6 +41,8 @@ def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None:
f"called '{Directories.parts}'" in str(excinfo.value))

def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> None:
"""Test locating maindir and filename when the maindir is absolute and the
filename is a relative path."""
maindir = tmp_path / Directories.parts
maindir.mkdir()
mainfile = maindir / FileNames.main_document
Expand All @@ -60,6 +64,9 @@ def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> No
def test_locate_with_relative_path_not_in_maindir_but_in_cwd(
self, tmp_path: Path
):
"""Test locating maindir and filename when the maindir is absolute and the
filename is a relative path. The filename is not found in the maindir but
is found in the current working directory."""
cwd = tmp_path / "cwd"
cwd.mkdir()
os.chdir(str(cwd))
Expand All @@ -78,6 +85,9 @@ def test_locate_with_relative_path_not_in_maindir_but_in_cwd(
)

def test_locate_with_absolute_path_not_exists(self, tmp_path: Path):
"""Test locating maindir and filename when the maindir is absolute and the
filename is a relative path. The filename is not found in the maindir and
is not found in the current working directory. This should raise an error."""
maindir = tmp_path / Directories.parts
maindir.mkdir()
filename = tmp_path / "nonexistent.fodt"
Expand Down
Loading