Skip to content

Commit 13e021f

Browse files
committed
Merge branch 'main' into 2024-release-candidate
2 parents 3b9a76d + a513594 commit 13e021f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+972
-75
lines changed

NEW_SYNSETS.md

+5
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ substituted, e.g., "a happy life"/"a felicitous outcome". This does not mean
7272
that they can be substituted in every sense, e.g., "happy to help" but not
7373
*"felicitous to help".
7474

75+
For verbs, two verbs are distinct if they differ in the subject, but not if the
76+
direct object or any other argument can be substituted with a quantifier such as
77+
"something" or "someone". For example, "to eat" and "to eat something" are not
78+
distinct senses. However, "to change" and "to change something" are distinct senses
79+
as the subject has a different semantic role.
7580

7681
## Well-defined
7782

scripts/from_yaml.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,20 @@ def map_sense_key(sk):
2222
"""
2323
if "%" in sk:
2424
e = sk.split("%")
25-
return ("oewn-" + e[0].replace("'","-ap-").replace("/","-sl-").replace("!","-ex-").replace(",","-cm-").replace(":","-cn-").replace("+","-pl-") +
26-
"__" + e[1].replace("_","-sp-").replace(":","."))
25+
if len(e) > 2:
26+
lemma = "%".join(e[:-1])
27+
info = e[-1]
28+
else:
29+
lemma = e[0]
30+
info = e[1]
31+
return ("oewn-" + lemma.replace("'","-ap-").replace("/","-sl-").
32+
replace("!","-ex-").replace(",","-cm-")
33+
.replace(":","-cn-").replace("+","-pl-") +
34+
"__" + info.replace("_","-sp-").replace(":","."))
2735
else:
28-
return "oewn-" + sk.replace("%", "__").replace("'","-ap-").replace("/","-sl-").replace("!","-ex-").replace(",","-cm-").replace(":","-cn-").replace("+","-pl-")
36+
return ("oewn-" + sk.replace("%", "__").replace("'","-ap-").
37+
replace("/","-sl-").replace("!","-ex-").
38+
replace(",","-cm-").replace(":","-cn-").replace("+","-pl-"))
2939

3040
def unmap_sense_key(sk):
3141
"""
@@ -52,15 +62,6 @@ def make_pos(y, pos):
5262
else:
5363
return pos
5464

55-
56-
def make_sense_id(y, lemma, pos):
57-
"""
58-
Create a sense ID from a YAML entry
59-
"""
60-
return "oewn-%s-%s-%s" % (
61-
escape_lemma(lemma), make_pos(y, pos), y["synset"][:-2])
62-
63-
6465
def sense_from_yaml(y, lemma, pos, n):
6566
"""
6667
Create a Sense object from the YAML data

scripts/sense_keys.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ def get_sense_key(wn, e, s):
124124
lemma = (e.lemma.written_form
125125
.replace(" ", "_")
126126
.replace("&apos", "'")
127-
.replace(":", "-cn-")
128127
.lower())
129128
ss_type = ss_types[ss.part_of_speech]
130129
lex_filenum = lex_filenums[ss.lex_name]
@@ -149,12 +148,12 @@ def unmap_sense_key(sk, KEY_PREFIX_LEN=5):
149148
oewn_key = e[0][KEY_PREFIX_LEN:]
150149
r = "__".join(e[1:])
151150
return (oewn_key.replace("-ap-", "'").replace("-sl-", "/").replace("-ex-", "!")
152-
.replace("-cm-",",").replace("-cl-",":").replace("-pl-","+") +
151+
.replace("-cm-",",").replace("-cn-",":").replace("-pl-","+") +
153152
"%" + r.replace(".", ":").replace("-sp-","_"))
154153
else:
155154
return (sk[KEY_PREFIX_LEN:].replace("__", "%").replace("-ap-", "'")
156155
.replace("-sl-", "/").replace("-ex-", "!").replace("-cm-",",")
157-
.replace("-cl-",":").replace("-pl-","+"))
156+
.replace("-cn-",":").replace("-pl-","+"))
158157

159158

160159

scripts/validate.py

+7
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ def main():
277277
errors += 1
278278

279279
instances = set()
280+
ilis = set()
280281

281282
for synset in wn.synsets:
282283
if synset.id[-1:] != synset.part_of_speech.value:
@@ -374,6 +375,12 @@ def main():
374375
(synset.id, item[1], item[0]))
375376
errors += 1
376377

378+
if synset.ili != "in" and synset.ili in ilis:
379+
print(f"ERROR: ILI {synset.ili} is duplicated")
380+
errors += 1
381+
else:
382+
ilis.add(synset.ili)
383+
377384
for synset in wn.synsets:
378385
for sr in synset.synset_relations:
379386
if sr.rel_type == SynsetRelType.HYPERNYM:

scripts/wordnet.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def change_sense_id(self, sense, new_id):
102102
self.sense2synset[new_id] = sense.synset
103103
self.id2sense[new_id] = sense
104104

105-
def to_xml(self, xml_file, part=True):
105+
def to_xml(self, xml_file, part=False):
106106
xml_file.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
107107
if part:
108108
xml_file.write(
@@ -241,8 +241,6 @@ def to_xml(self, xml_file, comments):
241241
n_str = " adjposition=\"%s\"" % self.adjposition
242242
else:
243243
n_str = ""
244-
if self.n >= 0:
245-
n_str = "%s n=\"%d\"" % (n_str, self.n)
246244
if self.sense_key:
247245
sk_str = " dc:identifier=\"%s\"" % escape_xml_lit(self.sense_key)
248246
else:
@@ -820,6 +818,8 @@ def elc(c):
820818
return '-ex-'
821819
elif c == '+':
822820
return '-pl-'
821+
elif c == '%':
822+
return '-pc-'
823823
elif xml_id_char_re.match(c):
824824
return c
825825
raise ValueError(f'Illegal character {c}')

0 commit comments

Comments
 (0)