-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Description
I took a pass at .git-hooks/pretty_xml.py to see if I could improve it. Here is what I came up with. There might be some mistakes and things that can be improved. I removed the --cache flag from one of the git commands. I'm not sure if that's better or not.
#!/usr/bin/env python
"""Formats XML into a more human readable form."""
import argparse
import os
import pathlib
import subprocess
from xml.dom import minidom
from xml.parsers import expat
def remove_whitespace_nodes(node: minidom.Node) -> None:
"""Recursively remove unnecessary whitespace-only text nodes."""
remove_list = []
for child in node.childNodes:
if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == "":
remove_list.append(child)
elif child.hasChildNodes():
remove_whitespace_nodes(child)
for node in remove_list:
node.parentNode.removeChild(node)
def format_xml(file_path: pathlib.Path, dry_run: bool) -> None:
"""Format XML file using xmllint or fallback to Python."""
# First try with xmllint.
str_path = str(file_path)
command = ["xmllint", "--format", str_path]
if dry_run:
command += ["--noout"]
else:
command += ["-o", str_path]
try:
subprocess.run(command, check=True)
return
except subprocess.CalledProcessError:
# Fall through to minidom and let that show the error.
pass
# Fallback to using minidom plus fixing trailing spaces.
content = file_path.read_text(encoding="utf-8")
try:
dom = minidom.parseString(content)
except expat.ExpatError as e:
print(f"[error] Failed to parse {file_path}: {e}")
raise e
remove_whitespace_nodes(dom)
pretty_xml = dom.toprettyxml(indent=" ")
# Avoid extra blank lines.
pretty_xml = "\n".join([line for line in pretty_xml.splitlines() if line.strip()])
file_path.write_text(pretty_xml, encoding="utf-8")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dry-run", action=argparse.BooleanOptionalAction)
args = parser.parse_args()
# Get list of changed files.
command = ["git", "diff", "--name-only", "--diff-filter=ACM"]
result = subprocess.run(command, capture_output=True, encoding="utf-8").stdout
files = [pathlib.Path(f) for f in result.splitlines() if f.endswith(".xml")]
found_xml_files = False
for file in files:
found_xml_files = True
print(f"Formatting {file}")
format_xml(file, dry_run=args.dry_run)
if args.dry_run:
print("Skipping git add for {file}.")
else:
subprocess.run(["git", "add", file])
if not found_xml_files:
print("Failed to find any xml files with changes.")
if __name__ == "__main__":
main()Metadata
Metadata
Assignees
Labels
No labels