Skip to content

Commit

Permalink
Programs to "sort" and compare IFEX content
Browse files Browse the repository at this point in the history
stable_sort_ifex defines a known order of keys so that IFEX files can be
reliably compared.  It is a module but can also be run as a standalone
script.

The diff_ifex.py script will print the diff between two files, after
normalizing the order of the content using the stable_sort_ifex module.
It can also be used with an external diff program.

Signed-off-by: Gunnar Andersson <gunnar_dev@[email protected]>
  • Loading branch information
Gunnar Andersson authored and gunnar-mb committed Jun 18, 2024
1 parent 08d92c5 commit 474e9f2
Show file tree
Hide file tree
Showing 2 changed files with 210 additions and 0 deletions.
109 changes: 109 additions & 0 deletions ifex/model/stable_sort_ifex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/python
# SPDX-FileCopyrightText: Copyright (c) 2023 Novaspring AB
# SPDX-License-Identifier: MPL-2.0

# This file is part of the IFEX project

from collections import OrderedDict
import argparse
import sys
import yaml

# This file formats a YAML input in a fixed order ("sorted", basically).
# This facilitates reliable comparisons / diffing.

# The implementation is separated from diff/comparison-scripts to factor
# it out of that code. The first implementation is simple but it might be
# refined over time.


def ifex_stable_order(data):
# To define a stable order we need two things.
# 1. Use an OrderedDict instead of a normal dict
# 2. Fill the dict by inputting the keys in the right order.
if isinstance(data, dict):
ordered_data = OrderedDict()
# Insert the name first, but only if the node has a name.
name = data.get("name")
if name is not None:
ordered_data["name"] = name

# (Note: Make sure to use a loop instead of dict comprehension here
# because dict comprehension supposedly does not guarantee key order?)
for key in sorted(data.keys()):
ordered_data[key] = ifex_stable_order(data[key])

return ordered_data

elif isinstance(data, list):
return [ifex_stable_order(item) for item in data]

else:
return data


# If an ordered dict is printed as a normal dict we get a lot of unrelated
# metadata output. Therefore, we need to specify how PyYAML shall represent an
# ordered dict: (PyYAML does not seemingly have OrderedDict support built in...?)
#
# Solution from:
# https://stackoverflow.com/questions/16782112/can-pyyaml-dump-dict-items-in-non-alphabetical-order
def represent_ordereddict(dumper, data):
value = []

for key, val in data.items():
node_key = dumper.represent_data(key)
node_val = dumper.represent_data(val)
value.append((node_key, node_val))

return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)


# ---------------------------------------------------------------------
# MAIN, used if this file is run standalone
# ---------------------------------------------------------------------
def usage():
print(
"""
This script reorders IFEX (YAML) input into a stable ("sorted") order and prints the result back out.
The stable order is basically:
0. Comments have no semantic meaning so they will be filtered out completely
1. For dicts with key-value mappings, put the item 'name' first (if there is a key for 'name')
2. Then, all other keys in alphabetical order
3. Lists are not re-arranged (TODO: consider if lists should be sorted "by name" somehow?)
4. Anything else remains in the input order.
"""
)


def main():
# Create the parser
parser = argparse.ArgumentParser(
description='Reorder IFEX (YAML) input ) input into a stable ("sorted") order and prints the result back out.'
)
# Add the arguments
parser.add_argument("file1", help="Input file. (- to use STDIN)", nargs="?")

# Parse the arguments
args = parser.parse_args()

if args.file1 is None:
parser.print_help()
usage()
sys.exit(1)

if args.file1 == "-":
# Use STDIN if file is '-'
data = yaml.safe_load(sys.stdin)
out = ifex_stable_order(data)
else:
with open(args.file1, "r") as file:
data = yaml.safe_load(file)
out = ifex_stable_order(data)

yaml.add_representer(OrderedDict, represent_ordereddict)
print(yaml.dump(out, sort_keys=False))


if __name__ == "__main__":
main()
101 changes: 101 additions & 0 deletions ifex/scripts/diff_ifex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/python

# SPDX-FileCopyrightText: Copyright (c) 2023 Novaspring AB
# SPDX-License-Identifier: MPL-2.0

# This file is part of the IFEX project

import argparse
import subprocess
import tempfile
import yaml
from collections import OrderedDict
from ifex.model.stable_sort_ifex import ifex_stable_order, represent_ordereddict

# The program compares two IFEX (YAML) files after normalizing ("sorting",
# basically) the order of elements so that the comparison becomes more relevant.

# The normal unix diff command seems to give the most useful output:
def diff_files_with_external_program(path1, path2):
"""Run standard unix diff program on the given paths"""
# diff returns an error code if there is a difference => use run with check
# False to ignore the error, (instead of check_output())
return subprocess.run(
["diff", path1, path2], text=True, check=False, stdout=subprocess.PIPE
).stdout


# Alternative, using difflib
def diff_files(path1, path2):
import difflib

"""Use difflib to print the difference between the given files"""
with open(path1, "r") as f1:
l1 = f1.readlines()
with open(path2, "r") as f2:
l2 = f2.readlines()

for line in difflib.context_diff(l1, l2):
print(line, end="")


def stable_order_file(file1):
"""Writes a new file containing the YAML content with keys in order, and
returns the file name"""
with open(file1, "r") as f1:
with tempfile.NamedTemporaryFile("w", delete=False) as f2:
yaml.add_representer(OrderedDict, represent_ordereddict)
f2.write(yaml.dump(ifex_stable_order(yaml.safe_load(f1)), sort_keys=False))
return f2.name

return None # Will fail on exception before this


def compare_yaml_files(file1, file2):
"""Order the keys of the given file names, write them to new temporary
files, then diff the results"""
f1 = stable_order_file(file1)
f2 = stable_order_file(file2)

print("Stable sorting...")
print(f"temporary files are {file1} -> {f1}, {file2} -> {f2}")
print("Comparing files:")
return diff_files_with_external_program(f1, f2)


# ---------------------------------------------------------------------
# MAIN, used if this file is run standalone
# ---------------------------------------------------------------------


def main():
# Create the parser
parser = argparse.ArgumentParser(
description="Compare IFEX (YAML) file contents, after normalizing order of elements."
)

# Add the arguments
parser.add_argument("file1", help="First, original file")
parser.add_argument("file2", help="Second, possibly changed file")
parser.add_argument(
"-p",
action="store_true",
default=False,
help="Only print the created temporary file paths, for use with an external diff program",
)

# Parse the arguments
args = parser.parse_args()

# If print filenames only
if args.p:
print(stable_order_file(args.file1))
print(stable_order_file(args.file2))
return

# Otherwise, output diff as well
print(compare_yaml_files(args.file1, args.file2))


if __name__ == "__main__":
main()

0 comments on commit 474e9f2

Please sign in to comment.