Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix files so that they are validated by the schema #15

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
dchiller marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
Binary file modified Liber Usualis - mei3/__pycache__/meichecker.cpython-310.pyc
Binary file not shown.
27 changes: 27 additions & 0 deletions Liber Usualis - mei3/liberbatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
import meichecker
import liberupdatev5

"""
Updates and validates all MEI files in the current directory.

This script updates each MEI file using liberupdatev5 and then validates
the updated file using meichecker.
"""

error_message = ""

for me_file in os.listdir("."):
file_name = os.fsdecode(me_file)
if file_name.endswith("corr.mei"):
# Update the MEI file using liberupdatev5
liberupdatev5.main(file_name)
print(f"{file_name} has been updated")

# Validate the updated MEI file using meichecker
error_log = meichecker.main(file_name[:-4] + ' - mei5.mei')
dchiller marked this conversation as resolved.
Show resolved Hide resolved
error_message += f"{error_log}\n"
print(f"{file_name} has been checked")

# Print the accumulated error messages
print(error_message)
239 changes: 239 additions & 0 deletions Liber Usualis - mei3/liberupdatev5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
import lxml.etree as ET

def main(filename: str) -> None:
"""
Converts an MEI file to MEI 5.0 format.

Args:
filename: The path to the MEI file to be converted.
"""
ET.register_namespace("mei", "http://www.music-encoding.org/ns/mei")

tree = ET.parse(filename)
root = tree.getroot()

# Dictionaries for mapping attribute values
div_dict = {"minor": "maior", "minior": "maior", "major": "maxima", "final": "finalis", "small": "minima", "comma": "virgula"}
epi_dict = {"horizontal": "h", "vertical": "v"}

# Flags for tracking certain conditions
theres_an_episema = False
theres_a_pb = False
theres_a_sb = False
flag = False
dchiller marked this conversation as resolved.
Show resolved Hide resolved
neume_flag = False
neume_flag = False

# Extra attributes for neumes and notes
extra_nc = {}
extra_neume = {}

# Set MEI version to 5.0
root.attrib["meiversion"] = "5.0"

# Iterate over all elements in the tree
for child in root.iter("*"):
child.tag = ET.QName(child).localname

# Handle graphic elements (e.g., change href to target)
if child.tag.endswith("graphic"): #changes some stuff in the headers
dchiller marked this conversation as resolved.
Show resolved Hide resolved
for att in list(child.attrib):
if att.endswith("href"):
child.attrib['target'] = child.attrib.pop(att)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems counterintuitive to me to iterate through the child attributes to get a single attribute?

Why not child.get("href") (maybe you have to namespace it)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iteration is used to avoid problems with namespace

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the namespaces of the liber MEI 3 files inconsistent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sometimes the namespace shows up, sometimes it doesn't

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like, different mei3 files will have different namespaces?

Do you have an example?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just ran a script to print out every unique attribute of the files. One attribute has {http://www.w3.org/1999/xlink} as a namespace, one has {http://www.w3.org/XML/1998/namespace}, the remaining have no namespace. I can look into removing the iteration for attributes with no namespace


# Handle page breaks
if child.tag.endswith("pb"): #adds in pagebreak info from layout element (which gets deleted later)
child.tag = "peeb"
for att in child.attrib:
if att.endswith("pageref"):
pageref = child.attrib.pop(att)
break
for child_2 in root.iter():
for attr, value in child_2.attrib.items():
if attr.endswith("id") and value == pageref:
child.attrib["n"] = child_2.attrib["n"]
break
pa_bre = ET.tostring(child)
theres_a_pb = True
child.tag = "TODELETE"

# Handle system breaks
if child.tag.endswith("sb"):
child.tag = "seeb"
for att in child.attrib:
if att.endswith("systemref"):
systemref = child.attrib.pop(att)
break
for child_2 in root.iter():
for attr, value in child_2.attrib.items():
if attr.endswith("id") and value == systemref:
child.attrib["facs"] = child_2.attrib["facs"]
break
sy_bre = ET.tostring(child)
theres_a_sb = True
child.tag = "TODELETE"

# Moves system breaks and page breaks inside layer
if child.tag.endswith("layer"):
if theres_a_pb:
theres_a_pb = False
child.append(ET.fromstring(pa_bre))
if theres_a_sb:
theres_a_sb = False
child.append(ET.fromstring(sy_bre))

# Handle zone elements (adjusts negative ulx and lrx attributes)
if child.tag.endswith("zone"):
label = ""
dchiller marked this conversation as resolved.
Show resolved Hide resolved
for attr, value in child.attrib.items():
if attr.endswith("ulx") and int(value) < 0:
child.attrib["label"] = label + "ulx = " + value + " "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So on zone elements, you are setting the label attribute to "ulx = some_number " ? The label attribute looks like is designed for a tooltip or some such?

Why not use the Element.set method here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code rewritten to use child.set

The label attribute here is used when ulx or lrx is negative to save their original values

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that necessary? We are saving the original files if we ever need those values and it seems to me we should be trying to have the MEI v5 files created be "up to code".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

label attribute removed

child.attrib[attr] = str(0)
break
for attr, value in child.attrib.items():
if attr.endswith("lrx") and int(value) < 0:
child.attrib["label"] = label + "lrx = " + value + " "
child.attrib[attr] = str(0)
dchiller marked this conversation as resolved.
Show resolved Hide resolved
break

# Handle dot elements (temporary fix until morae get added to MEI)
if child.tag.endswith("dot"):
child.tag = "signifLet"
for att in child.attrib:
if att.endswith("form"):
child.attrib.pop(att)
break

# Handle neume elements (become syllables, ncs become neumes, and notes become ncs)
if child.tag.endswith("neume"):
syl = ET.SubElement(child, "syl")
dchiller marked this conversation as resolved.
Show resolved Hide resolved
extra_neume= dict(child.attrib)
child.attrib.clear()
neume_flag = True
for att in extra_neume:
if att.endswith("name"):
child.attrib['type'] = extra_neume.pop(att)
break
for att in extra_neume:
if att.endswith("variant"):
child.attrib['type'] = child.attrib['type'] + extra_neume.pop(att)
break
for att in extra_neume:
if att.endswith("id"):
child.attrib[att] = extra_neume.pop(att)
break

#Handle nc elements
if child.tag.endswith("nc"):
extra_nc = dict(child.attrib)
child.attrib.clear()
flag = True
if neume_flag:
neume_flag = False
child.attrib.update(extra_neume)
for att in extra_nc:
if att.endswith("id"):
child.attrib[att] = extra_nc.pop(att)
break

# Handle division elements (become divLine)
if child.tag.endswith("division"):
for att, value in child.attrib.items():
if att.endswith("form"):
child.attrib[att] = div_dict[value]

# Handle note elements
if child.tag.endswith("note"):
if flag:
flag = False
child.attrib.update(extra_nc)
for att, value in child.attrib.items():
if att.endswith("inclinatum"):
child.attrib["tilt"] = "se"
child.attrib.pop(att)
break
for att in child.attrib:
if att.endswith("quilisma"):
child.attrib.pop(att)
quil = ET.SubElement(child, "quilisma")
dchiller marked this conversation as resolved.
Show resolved Hide resolved
break
if theres_an_episema:
theres_an_episema = False
child.append(ET.fromstring(epis))

# Handle accid elements (remove oct and pname attributes)
if child.tag.endswith("accid"):
for att in child.attrib:
if att.endswith("oct"):
child.attrib.pop(att)
break
for att in child.attrib:
if att.endswith("pname"):
child.attrib.pop(att)
break

# Handle episema elements
if child.tag.endswith("episema"):
child.tag = "apisema"
theres_an_episema = True
for att, value in child.attrib.items():
if att.endswith("form"):
child.attrib[att] = epi_dict[value]
for att in child.attrib:
if att.endswith("startid"):
child.attrib.pop(att)
break
for att in child.attrib:
if att.endswith("endid"):
child.attrib.pop(att)
break
epis = ET.tostring(child)
child.tag = "TODELETE"


# Create a new root element with MEI 5.0 namespace
new_root = ET.Element(root.tag)
new_root.attrib["xmlns"] = "http://www.music-encoding.org/ns/mei"
new_root.attrib["meiversion"] = "5.0"

# Create processing instructions
pi1 = ET.ProcessingInstruction('xml-model', 'href="https://music-encoding.org/schema/5.0/mei-all.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"')
pi2 = ET.ProcessingInstruction('xml-model', 'href="https://music-encoding.org/schema/5.0/mei-all.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"')

# Add processing instructions to the tree
new_root.addprevious(pi1)
new_root.addprevious(pi2)

# Recursively build the new tree
stack = [(new_root, root)]
while stack:
new_element, old_element = stack.pop()
for child in old_element:
tag = child.tag
if tag.endswith("layout"): # delete some elements
continue
if tag.endswith("TODELETE"):
continue

if tag.endswith("neume"): # rename some others
tag = "syllable"
elif tag.endswith("nc"):
tag = "neume"
elif tag.endswith("note"):
tag = "nc"
if tag.endswith("apisema"):
tag = "episema"
if tag.endswith("peeb"):
tag = "pb"
if tag.endswith("seeb"):
tag = "sb"
if tag.endswith("division"):
tag = "divLine"

new_child = ET.Element(tag, child.attrib)
new_element.append(new_child)
stack.append((new_child, child))

# Write the new tree to a file
new_tree = ET.ElementTree(new_root)
new_tree.write(filename[:-4] + " - mei5.mei", encoding="utf8", xml_declaration=True)
Loading