From ffce3a8c2324a3dabb4c4fcc5f44f2d187b44342 Mon Sep 17 00:00:00 2001 From: dchiller Date: Thu, 16 Jun 2022 11:54:40 -0500 Subject: [PATCH 1/9] Adds mei_cleaning script The script can be run at the command line to remove unreferenced zones and multiple identical zones that refer to identical objects. --- utilities/mei_cleaning.py | 114 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 utilities/mei_cleaning.py diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py new file mode 100644 index 0000000..14798ae --- /dev/null +++ b/utilities/mei_cleaning.py @@ -0,0 +1,114 @@ +import xml.etree.ElementTree as ET +from itertools import combinations +import argparse +import re + +parser = argparse.ArgumentParser(description="Utilities for cleaning mei files") +parser.add_argument('mei_file', type = str, nargs = '?', help = "Path to mei file for cleaning",action = 'store') +parser.add_argument('--remove_unreferenced_zones', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") +parser.add_argument('--remove_identical_duplicates', action = 'store_true', help = "If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.") +parser.add_argument('--destination_file', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_file location.") +args = parser.parse_args() + +MEINS = "{http://www.music-encoding.org/ns/mei}" +XMLNS = "{http://www.w3.org/XML/1998/namespace}" + +def clean_mei_file(filepath, + remove_unreferenced = True, + remove_identical_duplicates = True): + print(f"CLEANING MEI FILE: {filepath}") + xml_tree, xml_declarations = read_mei_file(filepath) + mei = xml_tree.getroot() + if remove_unreferenced: + mei = remove_unreferenced_zones(mei) + if remove_identical_duplicates: + mei = remove_identical_elements(mei) + return mei, xml_declarations + +def parse_zones(mei): + """Get the zones (bounding boxes) from an MEI root element.""" + zones = {} + for zone in mei.iter(f"{MEINS}zone"): + zone_id = zone.get(f"{XMLNS}id") + coordinate_names = ["ulx", "uly", "lrx", "lry"] + coordinates = [int(zone.get(c, -1)) for c in coordinate_names] + rotate = float(zone.get("rotate", 0.0)) + zones[f"#{zone_id}"] = { + "coordinates": tuple(coordinates), + "rotate": rotate, + } + return zones + +def find_duplicate_zones(mei): + zones = parse_zones(mei) + dupe_zone_list = [] + for z1, z2 in combinations(zones.keys(), 2): + if zones[z1] == zones[z2]: + dupe_zone_list.append((z1,z2)) + return dupe_zone_list + +def remove_unreferenced_zones(mei): + """Removes any zones defined in the facsimile section of mei (ie. + zone elements for which coordinates are defined) but that are not + associated with any mei element in the score.""" + music = mei.find(f'{MEINS}music') + surface = music.find(f'{MEINS}facsimile/{MEINS}surface') + defined_zones = surface.findall(f'{MEINS}zone') + body_str = ET.tostring(music.find(f'{MEINS}body'), encoding = 'unicode') + for def_z in defined_zones: + zone_id = def_z.get(f'{XMLNS}id') + if zone_id not in body_str: + surface.remove(def_z) + print(f"Unreferenced zone removed: {zone_id}") + return mei + +def remove_identical_elements(mei): + """Removes elements that are identical and associated with + two bounding boxes with the same coordinates.""" + duplicate_zones = find_duplicate_zones(mei) + surface = mei.find(f'{MEINS}music/{MEINS}facsimile/{MEINS}surface') + layer = mei.find(f'./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer') + for dup_zone_pair in duplicate_zones: + parent_elems = [layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair] + elems = [layer.find(f".//*[@facs='{dup}']") for dup in dup_zone_pair] + attribs = [elem.attrib for elem in elems] + attribs_copy = [a.copy() for a in attribs] + for a in attribs_copy: + a.pop(f'{XMLNS}id') + a.pop('facs') + if attribs_copy[0] == attribs_copy[1]: + parent_elems[1].remove(elems[1]) + zone_id_to_del = dup_zone_pair[1] + zone_id_to_del = zone_id_to_del.replace('#','') + zone_to_del = surface.find(f"*[@{XMLNS}id='{zone_id_to_del}']") + surface.remove(zone_to_del) + print(f"Identical zones/elements removed: {zone_id_to_del}") + return mei + +def read_mei_file(filepath): + xml_tree = ET.parse(filepath) + declarations = [] + with open(filepath, 'r') as in_file: + for f_line in in_file: + if re.fullmatch("^<\?.*\?>\n$", f_line): + declarations.append(f_line) + else: + break + xml_declarations = ''.join(declarations) + return xml_tree, xml_declarations + +def save_mei_file(xml_tree, xml_declarations, filepath): + xml_str = ET.tostring(xml_tree, encoding = 'unicode') + formatted_xml_str = re.sub(" \/>", "/>", xml_str) + formatted_xml_str = ''.join([xml_declarations, formatted_xml_str]) + with open(filepath, 'w') as out_file: + out_file.write(formatted_xml_str) + +if __name__ == "__main__": + ET.register_namespace("","http://www.music-encoding.org/ns/mei") + cleaned_mei, xml_declarations = clean_mei_file(args.mei_file, remove_unreferenced=args.remove_unreferenced_zones, + remove_identical_duplicates=args.remove_identical_duplicates) + if args.destination_file: + save_mei_file(cleaned_mei, xml_declarations, args.destination_file) + else: + save_mei_file(cleaned_mei, xml_declarations, args.mei_file) From 517b3d176d49de591a589261f19aec6a26325d01 Mon Sep 17 00:00:00 2001 From: dchiller Date: Thu, 16 Jun 2022 12:35:29 -0500 Subject: [PATCH 2/9] Add multi-file support to utilities/mei_cleaning.py --- utilities/mei_cleaning.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index 14798ae..9cfd219 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -2,18 +2,40 @@ from itertools import combinations import argparse import re +import os parser = argparse.ArgumentParser(description="Utilities for cleaning mei files") -parser.add_argument('mei_file', type = str, nargs = '?', help = "Path to mei file for cleaning",action = 'store') +parser.add_argument('mei_path', type = str, nargs = '?', help = "Path to mei file for cleaning. If a directory, cleans all mei files in the directory.",action = 'store') parser.add_argument('--remove_unreferenced_zones', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") parser.add_argument('--remove_identical_duplicates', action = 'store_true', help = "If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.") -parser.add_argument('--destination_file', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_file location.") +parser.add_argument('--destination_path', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_path location. If mei_path is a directory, this should also be a directory.") args = parser.parse_args() MEINS = "{http://www.music-encoding.org/ns/mei}" XMLNS = "{http://www.w3.org/XML/1998/namespace}" -def clean_mei_file(filepath, +ET.register_namespace("","http://www.music-encoding.org/ns/mei") + +def clean_mei_files(path, destination_path = None, + remove_unreferenced = True, + remove_identical_duplicates = True): + if os.path.isfile(path): + cleaned_mei, xml_declarations = clean_mei(path, remove_unreferenced=remove_unreferenced,remove_identical_duplicates=remove_identical_duplicates) + if destination_path: + save_mei_file(cleaned_mei, xml_declarations, destination_path) + else: + save_mei_file(cleaned_mei, xml_declarations, path) + if os.path.isdir(path): + mei_files = [file for file in os.listdir(path) if file.endswith('.mei')] + for mei_f in mei_files: + cleaned_mei, xml_declarations = clean_mei(os.path.join(path, mei_f), remove_unreferenced=remove_unreferenced,remove_identical_duplicates=remove_identical_duplicates) + if destination_path: + save_mei_file(cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f)) + else: + save_mei_file(cleaned_mei, xml_declarations, os.path.join(path, mei_f)) + + +def clean_mei(filepath, remove_unreferenced = True, remove_identical_duplicates = True): print(f"CLEANING MEI FILE: {filepath}") @@ -105,10 +127,5 @@ def save_mei_file(xml_tree, xml_declarations, filepath): out_file.write(formatted_xml_str) if __name__ == "__main__": - ET.register_namespace("","http://www.music-encoding.org/ns/mei") - cleaned_mei, xml_declarations = clean_mei_file(args.mei_file, remove_unreferenced=args.remove_unreferenced_zones, - remove_identical_duplicates=args.remove_identical_duplicates) - if args.destination_file: - save_mei_file(cleaned_mei, xml_declarations, args.destination_file) - else: - save_mei_file(cleaned_mei, xml_declarations, args.mei_file) + clean_mei_files(path = args.mei_path, destination_path = args.destination_path, remove_unreferenced=args.remove_unreferenced_zones, + remove_identical_duplicates=args.remove_identical_duplicates) \ No newline at end of file From 431d7b241458e43cc84bb06726843f96914642ef Mon Sep 17 00:00:00 2001 From: dchiller Date: Thu, 16 Jun 2022 11:54:40 -0500 Subject: [PATCH 3/9] Adds mei_cleaning script The script can be run at the command line to remove unreferenced zones and multiple identical zones that refer to identical objects. --- utilities/mei_cleaning.py | 114 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 utilities/mei_cleaning.py diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py new file mode 100644 index 0000000..14798ae --- /dev/null +++ b/utilities/mei_cleaning.py @@ -0,0 +1,114 @@ +import xml.etree.ElementTree as ET +from itertools import combinations +import argparse +import re + +parser = argparse.ArgumentParser(description="Utilities for cleaning mei files") +parser.add_argument('mei_file', type = str, nargs = '?', help = "Path to mei file for cleaning",action = 'store') +parser.add_argument('--remove_unreferenced_zones', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") +parser.add_argument('--remove_identical_duplicates', action = 'store_true', help = "If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.") +parser.add_argument('--destination_file', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_file location.") +args = parser.parse_args() + +MEINS = "{http://www.music-encoding.org/ns/mei}" +XMLNS = "{http://www.w3.org/XML/1998/namespace}" + +def clean_mei_file(filepath, + remove_unreferenced = True, + remove_identical_duplicates = True): + print(f"CLEANING MEI FILE: {filepath}") + xml_tree, xml_declarations = read_mei_file(filepath) + mei = xml_tree.getroot() + if remove_unreferenced: + mei = remove_unreferenced_zones(mei) + if remove_identical_duplicates: + mei = remove_identical_elements(mei) + return mei, xml_declarations + +def parse_zones(mei): + """Get the zones (bounding boxes) from an MEI root element.""" + zones = {} + for zone in mei.iter(f"{MEINS}zone"): + zone_id = zone.get(f"{XMLNS}id") + coordinate_names = ["ulx", "uly", "lrx", "lry"] + coordinates = [int(zone.get(c, -1)) for c in coordinate_names] + rotate = float(zone.get("rotate", 0.0)) + zones[f"#{zone_id}"] = { + "coordinates": tuple(coordinates), + "rotate": rotate, + } + return zones + +def find_duplicate_zones(mei): + zones = parse_zones(mei) + dupe_zone_list = [] + for z1, z2 in combinations(zones.keys(), 2): + if zones[z1] == zones[z2]: + dupe_zone_list.append((z1,z2)) + return dupe_zone_list + +def remove_unreferenced_zones(mei): + """Removes any zones defined in the facsimile section of mei (ie. + zone elements for which coordinates are defined) but that are not + associated with any mei element in the score.""" + music = mei.find(f'{MEINS}music') + surface = music.find(f'{MEINS}facsimile/{MEINS}surface') + defined_zones = surface.findall(f'{MEINS}zone') + body_str = ET.tostring(music.find(f'{MEINS}body'), encoding = 'unicode') + for def_z in defined_zones: + zone_id = def_z.get(f'{XMLNS}id') + if zone_id not in body_str: + surface.remove(def_z) + print(f"Unreferenced zone removed: {zone_id}") + return mei + +def remove_identical_elements(mei): + """Removes elements that are identical and associated with + two bounding boxes with the same coordinates.""" + duplicate_zones = find_duplicate_zones(mei) + surface = mei.find(f'{MEINS}music/{MEINS}facsimile/{MEINS}surface') + layer = mei.find(f'./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer') + for dup_zone_pair in duplicate_zones: + parent_elems = [layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair] + elems = [layer.find(f".//*[@facs='{dup}']") for dup in dup_zone_pair] + attribs = [elem.attrib for elem in elems] + attribs_copy = [a.copy() for a in attribs] + for a in attribs_copy: + a.pop(f'{XMLNS}id') + a.pop('facs') + if attribs_copy[0] == attribs_copy[1]: + parent_elems[1].remove(elems[1]) + zone_id_to_del = dup_zone_pair[1] + zone_id_to_del = zone_id_to_del.replace('#','') + zone_to_del = surface.find(f"*[@{XMLNS}id='{zone_id_to_del}']") + surface.remove(zone_to_del) + print(f"Identical zones/elements removed: {zone_id_to_del}") + return mei + +def read_mei_file(filepath): + xml_tree = ET.parse(filepath) + declarations = [] + with open(filepath, 'r') as in_file: + for f_line in in_file: + if re.fullmatch("^<\?.*\?>\n$", f_line): + declarations.append(f_line) + else: + break + xml_declarations = ''.join(declarations) + return xml_tree, xml_declarations + +def save_mei_file(xml_tree, xml_declarations, filepath): + xml_str = ET.tostring(xml_tree, encoding = 'unicode') + formatted_xml_str = re.sub(" \/>", "/>", xml_str) + formatted_xml_str = ''.join([xml_declarations, formatted_xml_str]) + with open(filepath, 'w') as out_file: + out_file.write(formatted_xml_str) + +if __name__ == "__main__": + ET.register_namespace("","http://www.music-encoding.org/ns/mei") + cleaned_mei, xml_declarations = clean_mei_file(args.mei_file, remove_unreferenced=args.remove_unreferenced_zones, + remove_identical_duplicates=args.remove_identical_duplicates) + if args.destination_file: + save_mei_file(cleaned_mei, xml_declarations, args.destination_file) + else: + save_mei_file(cleaned_mei, xml_declarations, args.mei_file) From c800a86c11716a1eba81dfdc83a48663227e1c9e Mon Sep 17 00:00:00 2001 From: dchiller Date: Thu, 16 Jun 2022 12:35:29 -0500 Subject: [PATCH 4/9] Add multi-file support to utilities/mei_cleaning.py --- utilities/mei_cleaning.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index 14798ae..9cfd219 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -2,18 +2,40 @@ from itertools import combinations import argparse import re +import os parser = argparse.ArgumentParser(description="Utilities for cleaning mei files") -parser.add_argument('mei_file', type = str, nargs = '?', help = "Path to mei file for cleaning",action = 'store') +parser.add_argument('mei_path', type = str, nargs = '?', help = "Path to mei file for cleaning. If a directory, cleans all mei files in the directory.",action = 'store') parser.add_argument('--remove_unreferenced_zones', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") parser.add_argument('--remove_identical_duplicates', action = 'store_true', help = "If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.") -parser.add_argument('--destination_file', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_file location.") +parser.add_argument('--destination_path', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_path location. If mei_path is a directory, this should also be a directory.") args = parser.parse_args() MEINS = "{http://www.music-encoding.org/ns/mei}" XMLNS = "{http://www.w3.org/XML/1998/namespace}" -def clean_mei_file(filepath, +ET.register_namespace("","http://www.music-encoding.org/ns/mei") + +def clean_mei_files(path, destination_path = None, + remove_unreferenced = True, + remove_identical_duplicates = True): + if os.path.isfile(path): + cleaned_mei, xml_declarations = clean_mei(path, remove_unreferenced=remove_unreferenced,remove_identical_duplicates=remove_identical_duplicates) + if destination_path: + save_mei_file(cleaned_mei, xml_declarations, destination_path) + else: + save_mei_file(cleaned_mei, xml_declarations, path) + if os.path.isdir(path): + mei_files = [file for file in os.listdir(path) if file.endswith('.mei')] + for mei_f in mei_files: + cleaned_mei, xml_declarations = clean_mei(os.path.join(path, mei_f), remove_unreferenced=remove_unreferenced,remove_identical_duplicates=remove_identical_duplicates) + if destination_path: + save_mei_file(cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f)) + else: + save_mei_file(cleaned_mei, xml_declarations, os.path.join(path, mei_f)) + + +def clean_mei(filepath, remove_unreferenced = True, remove_identical_duplicates = True): print(f"CLEANING MEI FILE: {filepath}") @@ -105,10 +127,5 @@ def save_mei_file(xml_tree, xml_declarations, filepath): out_file.write(formatted_xml_str) if __name__ == "__main__": - ET.register_namespace("","http://www.music-encoding.org/ns/mei") - cleaned_mei, xml_declarations = clean_mei_file(args.mei_file, remove_unreferenced=args.remove_unreferenced_zones, - remove_identical_duplicates=args.remove_identical_duplicates) - if args.destination_file: - save_mei_file(cleaned_mei, xml_declarations, args.destination_file) - else: - save_mei_file(cleaned_mei, xml_declarations, args.mei_file) + clean_mei_files(path = args.mei_path, destination_path = args.destination_path, remove_unreferenced=args.remove_unreferenced_zones, + remove_identical_duplicates=args.remove_identical_duplicates) \ No newline at end of file From 24d3df9ae5b3f994ad3c2dea49d5abe353934a2c Mon Sep 17 00:00:00 2001 From: dchiller Date: Fri, 17 Jun 2022 11:44:41 -0400 Subject: [PATCH 5/9] Revise mei_cleaning script Adds functionality for finding and raising non-identical duplicates. Restructures file. --- utilities/mei_cleaning.py | 250 ++++++++++++++++++++++++-------------- 1 file changed, 161 insertions(+), 89 deletions(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index 9cfd219..ea0554d 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -6,9 +6,11 @@ parser = argparse.ArgumentParser(description="Utilities for cleaning mei files") parser.add_argument('mei_path', type = str, nargs = '?', help = "Path to mei file for cleaning. If a directory, cleans all mei files in the directory.",action = 'store') -parser.add_argument('--remove_unreferenced_zones', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") +parser.add_argument('--remove_unreferenced_bounding_boxes', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") parser.add_argument('--remove_identical_duplicates', action = 'store_true', help = "If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.") +parser.add_argument('--raise_nonidentical_duplicates', action = 'store_true', help = "Find and record instances where duplicate zones/bounding boxes are referenced by different, non-identical objects.") parser.add_argument('--destination_path', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_path location. If mei_path is a directory, this should also be a directory.") +parser.add_argument('--report_file', action = 'store', default = None, type =str, nargs = '?', help = "File in which to report any raised non-identical duplicates. If not given, results are printed.") args = parser.parse_args() MEINS = "{http://www.music-encoding.org/ns/mei}" @@ -16,96 +18,137 @@ ET.register_namespace("","http://www.music-encoding.org/ns/mei") -def clean_mei_files(path, destination_path = None, - remove_unreferenced = True, - remove_identical_duplicates = True): - if os.path.isfile(path): - cleaned_mei, xml_declarations = clean_mei(path, remove_unreferenced=remove_unreferenced,remove_identical_duplicates=remove_identical_duplicates) - if destination_path: - save_mei_file(cleaned_mei, xml_declarations, destination_path) - else: - save_mei_file(cleaned_mei, xml_declarations, path) - if os.path.isdir(path): - mei_files = [file for file in os.listdir(path) if file.endswith('.mei')] - for mei_f in mei_files: - cleaned_mei, xml_declarations = clean_mei(os.path.join(path, mei_f), remove_unreferenced=remove_unreferenced,remove_identical_duplicates=remove_identical_duplicates) - if destination_path: - save_mei_file(cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f)) - else: - save_mei_file(cleaned_mei, xml_declarations, os.path.join(path, mei_f)) +class MEIFileCleaner: + def __init__(self, remove_unreferenced_bounding_boxes, + remove_identical_duplicates, + raise_nonidentical_duplicates, + report_file = None): + """See argument parser for a description of these arguments.""" + self.remove_unreferenced_bounding_boxes = remove_unreferenced_bounding_boxes + self.remove_identical_duplicates = remove_identical_duplicates + self.raise_nonidentical_duplicates = raise_nonidentical_duplicates + self.report_file = report_file + + def parse_zones(self, mei): + """Get the zones (bounding boxes) from an MEI root element.""" + zones = {} + for zone in mei.iter(f"{MEINS}zone"): + zone_id = zone.get(f"{XMLNS}id") + coordinate_names = ["ulx", "uly", "lrx", "lry"] + coordinates = [int(zone.get(c, -1)) for c in coordinate_names] + rotate = float(zone.get("rotate", 0.0)) + zones[f"#{zone_id}"] = { + "coordinates": tuple(coordinates), + "rotate": rotate, + } + return zones + + def find_duplicate_zones(self, mei): + zones = self.parse_zones(mei) + dupe_zone_list = [] + for z1, z2 in combinations(zones.keys(), 2): + if zones[z1] == zones[z2]: + dupe_zone_list.append((z1,z2)) + return dupe_zone_list -def clean_mei(filepath, - remove_unreferenced = True, - remove_identical_duplicates = True): - print(f"CLEANING MEI FILE: {filepath}") - xml_tree, xml_declarations = read_mei_file(filepath) - mei = xml_tree.getroot() - if remove_unreferenced: - mei = remove_unreferenced_zones(mei) - if remove_identical_duplicates: - mei = remove_identical_elements(mei) - return mei, xml_declarations - -def parse_zones(mei): - """Get the zones (bounding boxes) from an MEI root element.""" - zones = {} - for zone in mei.iter(f"{MEINS}zone"): - zone_id = zone.get(f"{XMLNS}id") - coordinate_names = ["ulx", "uly", "lrx", "lry"] - coordinates = [int(zone.get(c, -1)) for c in coordinate_names] - rotate = float(zone.get("rotate", 0.0)) - zones[f"#{zone_id}"] = { - "coordinates": tuple(coordinates), - "rotate": rotate, - } - return zones - -def find_duplicate_zones(mei): - zones = parse_zones(mei) - dupe_zone_list = [] - for z1, z2 in combinations(zones.keys(), 2): - if zones[z1] == zones[z2]: - dupe_zone_list.append((z1,z2)) - return dupe_zone_list - -def remove_unreferenced_zones(mei): - """Removes any zones defined in the facsimile section of mei (ie. - zone elements for which coordinates are defined) but that are not - associated with any mei element in the score.""" - music = mei.find(f'{MEINS}music') - surface = music.find(f'{MEINS}facsimile/{MEINS}surface') - defined_zones = surface.findall(f'{MEINS}zone') - body_str = ET.tostring(music.find(f'{MEINS}body'), encoding = 'unicode') - for def_z in defined_zones: - zone_id = def_z.get(f'{XMLNS}id') - if zone_id not in body_str: - surface.remove(def_z) - print(f"Unreferenced zone removed: {zone_id}") - return mei - -def remove_identical_elements(mei): - """Removes elements that are identical and associated with - two bounding boxes with the same coordinates.""" - duplicate_zones = find_duplicate_zones(mei) - surface = mei.find(f'{MEINS}music/{MEINS}facsimile/{MEINS}surface') - layer = mei.find(f'./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer') - for dup_zone_pair in duplicate_zones: - parent_elems = [layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair] - elems = [layer.find(f".//*[@facs='{dup}']") for dup in dup_zone_pair] - attribs = [elem.attrib for elem in elems] - attribs_copy = [a.copy() for a in attribs] - for a in attribs_copy: + def remove_unreferenced_zones(mei): + """Removes any zones defined in the facsimile section of mei (ie. + zone elements for which coordinates are defined) but that are not + associated with any mei element in the score.""" + music = mei.find(f'{MEINS}music') + surface = music.find(f'{MEINS}facsimile/{MEINS}surface') + defined_zones = surface.findall(f'{MEINS}zone') + body_str = ET.tostring(music.find(f'{MEINS}body'), encoding = 'unicode') + for def_z in defined_zones: + zone_id = def_z.get(f'{XMLNS}id') + if zone_id not in body_str: + surface.remove(def_z) + print(f"Unreferenced zone removed: {zone_id}") + return None + + def get_elements_with_duplicate_references(self, mei): + """Finds elements that reference duplicate bounding boxes. + Returns a list of lists, one for each set of duplicate bouding boxes, + each of which contains two dictionaries. These dictionaries have: + - "element": the ElementTree object of one of the elements + - "bb_id": the id of the zone/bounding box of that element + - "parent": the ElementTree object of the parent of that element (necessary to remove an element)""" + duplicate_zones = self.find_duplicate_zones(mei) + layer = mei.find(f'./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer') + duplicate_references_list = [] + for dup_zone_pair in duplicate_zones: + elems = [layer.find(f".//*[@facs='{dup}']") for dup in dup_zone_pair] + parent_elems = [layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair] + dup_ref_list = [{'element': elems[0], 'bb_id': dup_zone_pair[0],'parent': parent_elems[0]}, + {'element': elems[1], 'bb_id': dup_zone_pair[1], 'parent': parent_elems[1]}] + duplicate_references_list.append(dup_ref_list) + return duplicate_references_list + + def check_element_identity(self, elem1, elem2): + elem_attribs = [elem1.attrib.copy(), + elem2.attrib.copy()] + for a in elem_attribs: a.pop(f'{XMLNS}id') a.pop('facs') - if attribs_copy[0] == attribs_copy[1]: - parent_elems[1].remove(elems[1]) - zone_id_to_del = dup_zone_pair[1] - zone_id_to_del = zone_id_to_del.replace('#','') - zone_to_del = surface.find(f"*[@{XMLNS}id='{zone_id_to_del}']") - surface.remove(zone_to_del) - print(f"Identical zones/elements removed: {zone_id_to_del}") - return mei + if (elem_attribs[0] == elem_attribs[1]) and elem1.text == elem2.text: + return True + else: + return False + + def delete_element_and_referenced_zone(self, surface, element, parent, zone_id): + elem_id = element.attrib[f'{XMLNS}id'] + parent.remove(element) + zone_to_del = surface.find(f"*[@{XMLNS}id='{zone_id.replace('#','')}']") + surface.remove(zone_to_del) + print(f"Identical zone and referencing element removed: {zone_id} & {elem_id}") + return None + + def register_nonidentical_duplicates(self, dup_dict_1, dup_dict_2): + str_to_print = f""" + ###### NON-IDENTICAL DUPLICATE FOUND ###### \n + {dup_dict_1['bb_id']} > \n + \t {dup_dict_1['element'].attrib} {dup_dict_1['element'].text} \n + {dup_dict_2['bb_id']} > \n + \t {dup_dict_2['element'].attrib} {dup_dict_2['element'].text} \n + """ + if self.report_file: + with open(self.report_file, 'wa') as rf: + rf.write(str_to_print) + else: + print(str_to_print) + return None + + def handle_referenced_duplicates(self, mei): + dup_ref_list = self.get_elements_with_duplicate_references(mei) + for dup_ref in dup_ref_list: + identical = self.check_element_identity(dup_ref[0]['element'], + dup_ref[1]['element']) + if identical: + if self.remove_identical_duplicates: + surface = mei.find(f'{MEINS}music/{MEINS}facsimile/{MEINS}surface') + self.delete_element_and_referenced_zone(surface = surface, + element = dup_ref[1]['element'], + parent = dup_ref[1]['parent'], + zone_id = dup_ref[1]['bb_id']) + else: + if self.raise_nonidentical_duplicates: + self.register_nonidentical_duplicates(dup_ref[0], dup_ref[1]) + if self.raise_nonidentical_duplicates & self.report_file: + print(f'Non-identical duplicates checked and raised in {self.report_file}') + return None + + def clean_mei(self, filepath): + print(f"CLEANING MEI FILE: {filepath}") + if self.report_file: + with open(self.report_file, 'wa') as rf: + rf.write(f"CLEANING MEI FILE: {filepath}") + xml_tree, xml_declarations = read_mei_file(filepath) + mei = xml_tree.getroot() + if self.remove_unreferenced_bounding_boxes: + self.remove_unreferenced_zones(mei) + self.handle_referenced_duplicates(mei) + return mei, xml_declarations def read_mei_file(filepath): xml_tree = ET.parse(filepath) @@ -126,6 +169,35 @@ def save_mei_file(xml_tree, xml_declarations, filepath): with open(filepath, 'w') as out_file: out_file.write(formatted_xml_str) +def clean_mei_files(path, destination_path = None, + remove_unreferenced_bounding_boxes = True, + remove_identical_duplicates = True, + raise_nonidentical_duplicates = True, + report_file = None): + mei_cleaner = MEIFileCleaner(remove_unreferenced_bounding_boxes=remove_unreferenced_bounding_boxes, + remove_identical_duplicates=remove_identical_duplicates, + raise_nonidentical_duplicates=raise_nonidentical_duplicates, + report_file=report_file) + if os.path.isfile(path): + cleaned_mei, xml_declarations = mei_cleaner.clean_mei(path) + if destination_path: + save_mei_file(cleaned_mei, xml_declarations, destination_path) + else: + save_mei_file(cleaned_mei, xml_declarations, path) + if os.path.isdir(path): + mei_files = [file for file in os.listdir(path) if file.endswith('.mei')] + for mei_f in mei_files: + cleaned_mei, xml_declarations = mei_cleaner.clean_mei(os.path.join(path, mei_f)) + if destination_path: + save_mei_file(cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f)) + else: + save_mei_file(cleaned_mei, xml_declarations, os.path.join(path, mei_f)) + if __name__ == "__main__": - clean_mei_files(path = args.mei_path, destination_path = args.destination_path, remove_unreferenced=args.remove_unreferenced_zones, - remove_identical_duplicates=args.remove_identical_duplicates) \ No newline at end of file + clean_mei_files(path = args.mei_path, + destination_path = args.destination_path, + remove_unreferenced_bounding_boxes = args.remove_unreferenced_bounding_boxes, + remove_identical_duplicates = args.remove_identical_duplicates, + raise_nonidentical_duplicates = args.raise_nonidentical_duplicates, + report_file = args.report_file + ) \ No newline at end of file From 393dcd2bae48b8062b3ac6eb44c0b2d30795b8a0 Mon Sep 17 00:00:00 2001 From: dchiller Date: Fri, 17 Jun 2022 11:55:44 -0400 Subject: [PATCH 6/9] Fix arguments in remove_unreferenced_zones --- utilities/mei_cleaning.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index ea0554d..924a2a3 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -52,7 +52,7 @@ def find_duplicate_zones(self, mei): dupe_zone_list.append((z1,z2)) return dupe_zone_list - def remove_unreferenced_zones(mei): + def remove_unreferenced_zones(self, mei): """Removes any zones defined in the facsimile section of mei (ie. zone elements for which coordinates are defined) but that are not associated with any mei element in the score.""" @@ -134,8 +134,9 @@ def handle_referenced_duplicates(self, mei): else: if self.raise_nonidentical_duplicates: self.register_nonidentical_duplicates(dup_ref[0], dup_ref[1]) - if self.raise_nonidentical_duplicates & self.report_file: - print(f'Non-identical duplicates checked and raised in {self.report_file}') + if self.raise_nonidentical_duplicates: + if self.report_file: + print(f'Non-identical duplicates checked and raised in {self.report_file}') return None def clean_mei(self, filepath): From 2925c626fefdbb30c07f24ed870c3f7bc8122b24 Mon Sep 17 00:00:00 2001 From: dchiller Date: Fri, 17 Jun 2022 12:33:17 -0400 Subject: [PATCH 7/9] Fixes file writing for raising nonidentical duplicates --- utilities/mei_cleaning.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index 924a2a3..e0f0a0b 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -110,10 +110,9 @@ def register_nonidentical_duplicates(self, dup_dict_1, dup_dict_2): {dup_dict_1['bb_id']} > \n \t {dup_dict_1['element'].attrib} {dup_dict_1['element'].text} \n {dup_dict_2['bb_id']} > \n - \t {dup_dict_2['element'].attrib} {dup_dict_2['element'].text} \n - """ + \t {dup_dict_2['element'].attrib} {dup_dict_2['element'].text} \n \n""" if self.report_file: - with open(self.report_file, 'wa') as rf: + with open(self.report_file, 'a') as rf: rf.write(str_to_print) else: print(str_to_print) @@ -142,8 +141,8 @@ def handle_referenced_duplicates(self, mei): def clean_mei(self, filepath): print(f"CLEANING MEI FILE: {filepath}") if self.report_file: - with open(self.report_file, 'wa') as rf: - rf.write(f"CLEANING MEI FILE: {filepath}") + with open(self.report_file, 'a') as rf: + rf.write(f"CLEANING MEI FILE: {filepath} \n") xml_tree, xml_declarations = read_mei_file(filepath) mei = xml_tree.getroot() if self.remove_unreferenced_bounding_boxes: From 03ccaa8de3f74989de37324f92af58649d4f3851 Mon Sep 17 00:00:00 2001 From: dchiller Date: Fri, 17 Jun 2022 12:46:13 -0400 Subject: [PATCH 8/9] Blackify utilities/mei_cleaning.py --- utilities/mei_cleaning.py | 191 +++++++++++++++++++++++++------------- 1 file changed, 129 insertions(+), 62 deletions(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index e0f0a0b..ebbc7ac 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -5,25 +5,60 @@ import os parser = argparse.ArgumentParser(description="Utilities for cleaning mei files") -parser.add_argument('mei_path', type = str, nargs = '?', help = "Path to mei file for cleaning. If a directory, cleans all mei files in the directory.",action = 'store') -parser.add_argument('--remove_unreferenced_bounding_boxes', action = 'store_true', help = "If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.") -parser.add_argument('--remove_identical_duplicates', action = 'store_true', help = "If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.") -parser.add_argument('--raise_nonidentical_duplicates', action = 'store_true', help = "Find and record instances where duplicate zones/bounding boxes are referenced by different, non-identical objects.") -parser.add_argument('--destination_path', action = 'store', default = None, type = str, nargs = '?', help = "If provided, the cleaned file is save here. If omitted, file is save to mei_path location. If mei_path is a directory, this should also be a directory.") -parser.add_argument('--report_file', action = 'store', default = None, type =str, nargs = '?', help = "File in which to report any raised non-identical duplicates. If not given, results are printed.") +parser.add_argument( + "mei_path", + type=str, + nargs="?", + help="Path to mei file for cleaning. If a directory, cleans all mei files in the directory.", + action="store", +) +parser.add_argument( + "--remove_unreferenced_bounding_boxes", + action="store_true", + help="If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.", +) +parser.add_argument( + "--remove_identical_duplicates", + action="store_true", + help="If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.", +) +parser.add_argument( + "--raise_nonidentical_duplicates", + action="store_true", + help="Find and record instances where duplicate zones/bounding boxes are referenced by different, non-identical objects.", +) +parser.add_argument( + "--destination_path", + action="store", + default=None, + type=str, + nargs="?", + help="If provided, the cleaned file is save here. If omitted, file is save to mei_path location. If mei_path is a directory, this should also be a directory.", +) +parser.add_argument( + "--report_file", + action="store", + default=None, + type=str, + nargs="?", + help="File in which to report any raised non-identical duplicates. If not given, results are printed.", +) args = parser.parse_args() MEINS = "{http://www.music-encoding.org/ns/mei}" XMLNS = "{http://www.w3.org/XML/1998/namespace}" -ET.register_namespace("","http://www.music-encoding.org/ns/mei") +ET.register_namespace("", "http://www.music-encoding.org/ns/mei") + class MEIFileCleaner: - - def __init__(self, remove_unreferenced_bounding_boxes, - remove_identical_duplicates, - raise_nonidentical_duplicates, - report_file = None): + def __init__( + self, + remove_unreferenced_bounding_boxes, + remove_identical_duplicates, + raise_nonidentical_duplicates, + report_file=None, + ): """See argument parser for a description of these arguments.""" self.remove_unreferenced_bounding_boxes = remove_unreferenced_bounding_boxes self.remove_identical_duplicates = remove_identical_duplicates @@ -49,19 +84,19 @@ def find_duplicate_zones(self, mei): dupe_zone_list = [] for z1, z2 in combinations(zones.keys(), 2): if zones[z1] == zones[z2]: - dupe_zone_list.append((z1,z2)) + dupe_zone_list.append((z1, z2)) return dupe_zone_list def remove_unreferenced_zones(self, mei): """Removes any zones defined in the facsimile section of mei (ie. zone elements for which coordinates are defined) but that are not associated with any mei element in the score.""" - music = mei.find(f'{MEINS}music') - surface = music.find(f'{MEINS}facsimile/{MEINS}surface') - defined_zones = surface.findall(f'{MEINS}zone') - body_str = ET.tostring(music.find(f'{MEINS}body'), encoding = 'unicode') + music = mei.find(f"{MEINS}music") + surface = music.find(f"{MEINS}facsimile/{MEINS}surface") + defined_zones = surface.findall(f"{MEINS}zone") + body_str = ET.tostring(music.find(f"{MEINS}body"), encoding="unicode") for def_z in defined_zones: - zone_id = def_z.get(f'{XMLNS}id') + zone_id = def_z.get(f"{XMLNS}id") if zone_id not in body_str: surface.remove(def_z) print(f"Unreferenced zone removed: {zone_id}") @@ -75,29 +110,42 @@ def get_elements_with_duplicate_references(self, mei): - "bb_id": the id of the zone/bounding box of that element - "parent": the ElementTree object of the parent of that element (necessary to remove an element)""" duplicate_zones = self.find_duplicate_zones(mei) - layer = mei.find(f'./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer') + layer = mei.find( + f"./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer" + ) duplicate_references_list = [] for dup_zone_pair in duplicate_zones: elems = [layer.find(f".//*[@facs='{dup}']") for dup in dup_zone_pair] - parent_elems = [layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair] - dup_ref_list = [{'element': elems[0], 'bb_id': dup_zone_pair[0],'parent': parent_elems[0]}, - {'element': elems[1], 'bb_id': dup_zone_pair[1], 'parent': parent_elems[1]}] + parent_elems = [ + layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair + ] + dup_ref_list = [ + { + "element": elems[0], + "bb_id": dup_zone_pair[0], + "parent": parent_elems[0], + }, + { + "element": elems[1], + "bb_id": dup_zone_pair[1], + "parent": parent_elems[1], + }, + ] duplicate_references_list.append(dup_ref_list) return duplicate_references_list def check_element_identity(self, elem1, elem2): - elem_attribs = [elem1.attrib.copy(), - elem2.attrib.copy()] + elem_attribs = [elem1.attrib.copy(), elem2.attrib.copy()] for a in elem_attribs: - a.pop(f'{XMLNS}id') - a.pop('facs') + a.pop(f"{XMLNS}id") + a.pop("facs") if (elem_attribs[0] == elem_attribs[1]) and elem1.text == elem2.text: return True else: return False - + def delete_element_and_referenced_zone(self, surface, element, parent, zone_id): - elem_id = element.attrib[f'{XMLNS}id'] + elem_id = element.attrib[f"{XMLNS}id"] parent.remove(element) zone_to_del = surface.find(f"*[@{XMLNS}id='{zone_id.replace('#','')}']") surface.remove(zone_to_del) @@ -112,7 +160,7 @@ def register_nonidentical_duplicates(self, dup_dict_1, dup_dict_2): {dup_dict_2['bb_id']} > \n \t {dup_dict_2['element'].attrib} {dup_dict_2['element'].text} \n \n""" if self.report_file: - with open(self.report_file, 'a') as rf: + with open(self.report_file, "a") as rf: rf.write(str_to_print) else: print(str_to_print) @@ -121,27 +169,32 @@ def register_nonidentical_duplicates(self, dup_dict_1, dup_dict_2): def handle_referenced_duplicates(self, mei): dup_ref_list = self.get_elements_with_duplicate_references(mei) for dup_ref in dup_ref_list: - identical = self.check_element_identity(dup_ref[0]['element'], - dup_ref[1]['element']) + identical = self.check_element_identity( + dup_ref[0]["element"], dup_ref[1]["element"] + ) if identical: if self.remove_identical_duplicates: - surface = mei.find(f'{MEINS}music/{MEINS}facsimile/{MEINS}surface') - self.delete_element_and_referenced_zone(surface = surface, - element = dup_ref[1]['element'], - parent = dup_ref[1]['parent'], - zone_id = dup_ref[1]['bb_id']) + surface = mei.find(f"{MEINS}music/{MEINS}facsimile/{MEINS}surface") + self.delete_element_and_referenced_zone( + surface=surface, + element=dup_ref[1]["element"], + parent=dup_ref[1]["parent"], + zone_id=dup_ref[1]["bb_id"], + ) else: if self.raise_nonidentical_duplicates: self.register_nonidentical_duplicates(dup_ref[0], dup_ref[1]) if self.raise_nonidentical_duplicates: if self.report_file: - print(f'Non-identical duplicates checked and raised in {self.report_file}') + print( + f"Non-identical duplicates checked and raised in {self.report_file}" + ) return None def clean_mei(self, filepath): print(f"CLEANING MEI FILE: {filepath}") if self.report_file: - with open(self.report_file, 'a') as rf: + with open(self.report_file, "a") as rf: rf.write(f"CLEANING MEI FILE: {filepath} \n") xml_tree, xml_declarations = read_mei_file(filepath) mei = xml_tree.getroot() @@ -150,34 +203,42 @@ def clean_mei(self, filepath): self.handle_referenced_duplicates(mei) return mei, xml_declarations + def read_mei_file(filepath): xml_tree = ET.parse(filepath) declarations = [] - with open(filepath, 'r') as in_file: + with open(filepath, "r") as in_file: for f_line in in_file: if re.fullmatch("^<\?.*\?>\n$", f_line): declarations.append(f_line) else: break - xml_declarations = ''.join(declarations) + xml_declarations = "".join(declarations) return xml_tree, xml_declarations + def save_mei_file(xml_tree, xml_declarations, filepath): - xml_str = ET.tostring(xml_tree, encoding = 'unicode') + xml_str = ET.tostring(xml_tree, encoding="unicode") formatted_xml_str = re.sub(" \/>", "/>", xml_str) - formatted_xml_str = ''.join([xml_declarations, formatted_xml_str]) - with open(filepath, 'w') as out_file: + formatted_xml_str = "".join([xml_declarations, formatted_xml_str]) + with open(filepath, "w") as out_file: out_file.write(formatted_xml_str) -def clean_mei_files(path, destination_path = None, - remove_unreferenced_bounding_boxes = True, - remove_identical_duplicates = True, - raise_nonidentical_duplicates = True, - report_file = None): - mei_cleaner = MEIFileCleaner(remove_unreferenced_bounding_boxes=remove_unreferenced_bounding_boxes, - remove_identical_duplicates=remove_identical_duplicates, - raise_nonidentical_duplicates=raise_nonidentical_duplicates, - report_file=report_file) + +def clean_mei_files( + path, + destination_path=None, + remove_unreferenced_bounding_boxes=True, + remove_identical_duplicates=True, + raise_nonidentical_duplicates=True, + report_file=None, +): + mei_cleaner = MEIFileCleaner( + remove_unreferenced_bounding_boxes=remove_unreferenced_bounding_boxes, + remove_identical_duplicates=remove_identical_duplicates, + raise_nonidentical_duplicates=raise_nonidentical_duplicates, + report_file=report_file, + ) if os.path.isfile(path): cleaned_mei, xml_declarations = mei_cleaner.clean_mei(path) if destination_path: @@ -185,19 +246,25 @@ def clean_mei_files(path, destination_path = None, else: save_mei_file(cleaned_mei, xml_declarations, path) if os.path.isdir(path): - mei_files = [file for file in os.listdir(path) if file.endswith('.mei')] + mei_files = [file for file in os.listdir(path) if file.endswith(".mei")] for mei_f in mei_files: - cleaned_mei, xml_declarations = mei_cleaner.clean_mei(os.path.join(path, mei_f)) + cleaned_mei, xml_declarations = mei_cleaner.clean_mei( + os.path.join(path, mei_f) + ) if destination_path: - save_mei_file(cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f)) + save_mei_file( + cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f) + ) else: save_mei_file(cleaned_mei, xml_declarations, os.path.join(path, mei_f)) + if __name__ == "__main__": - clean_mei_files(path = args.mei_path, - destination_path = args.destination_path, - remove_unreferenced_bounding_boxes = args.remove_unreferenced_bounding_boxes, - remove_identical_duplicates = args.remove_identical_duplicates, - raise_nonidentical_duplicates = args.raise_nonidentical_duplicates, - report_file = args.report_file - ) \ No newline at end of file + clean_mei_files( + path=args.mei_path, + destination_path=args.destination_path, + remove_unreferenced_bounding_boxes=args.remove_unreferenced_bounding_boxes, + remove_identical_duplicates=args.remove_identical_duplicates, + raise_nonidentical_duplicates=args.raise_nonidentical_duplicates, + report_file=args.report_file, + ) From 17ba25afb812c4e6723a0e441bc2838b8313a5da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9stor=20N=C3=A1poles=20L=C3=B3pez?= Date: Fri, 17 Jun 2022 15:46:43 -0400 Subject: [PATCH 9/9] Update mei_cleaning.py --- utilities/mei_cleaning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py index ebbc7ac..19de96a 100644 --- a/utilities/mei_cleaning.py +++ b/utilities/mei_cleaning.py @@ -43,7 +43,6 @@ nargs="?", help="File in which to report any raised non-identical duplicates. If not given, results are printed.", ) -args = parser.parse_args() MEINS = "{http://www.music-encoding.org/ns/mei}" XMLNS = "{http://www.w3.org/XML/1998/namespace}" @@ -260,6 +259,7 @@ def clean_mei_files( if __name__ == "__main__": + args = parser.parse_args() clean_mei_files( path=args.mei_path, destination_path=args.destination_path,