Skip to content

Commit faf6316

Browse files
committed
change each __type__ in catalog to a dictionary of two fields: module and name, rather than snake of class name, and use _class_register only for special cases (like deprecated classes)
Signed-off-by: dafnapension <[email protected]>
1 parent 92827a3 commit faf6316

File tree

4,831 files changed

+123359
-25577
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

4,831 files changed

+123359
-25577
lines changed

.github/workflows/catalog_preparation.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
preparation:
1515

1616
runs-on: ubuntu-latest
17-
timeout-minutes: 30
17+
timeout-minutes: 45
1818
env:
1919
OS: ubuntu-latest
2020
UNITXT_DEFAULT_VERBOSITY: error
@@ -27,7 +27,7 @@ jobs:
2727

2828
strategy:
2929
matrix:
30-
modulo: [0,1,2,3,4,5,6,7]
30+
modulo: [0,1,2,3,4,5,6,7,8,9,10,11]
3131

3232
steps:
3333
- uses: actions/checkout@v5
@@ -53,7 +53,7 @@ jobs:
5353
run: |
5454
modulo="${{ matrix.modulo }}"
5555
echo "modulo=${modulo}" >> $GITHUB_STEP_SUMMARY
56-
echo "sed -i 's/^num_par = 1 /num_par = 8 /' tests/catalog/test_preparation.py" > sedit.sh
56+
echo "sed -i 's/^num_par = 1 /num_par = 12 /' tests/catalog/test_preparation.py" > sedit.sh
5757
echo "sed -i 's/^modulo = 0/modulo = ${modulo}/' tests/catalog/test_preparation.py" >> sedit.sh
5858
sh sedit.sh
5959
python -m unittest tests.catalog.test_preparation

docs/catalog.py

Lines changed: 32 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
from pygments import highlight
1111
from pygments.formatters import HtmlFormatter
1212
from pygments.lexers import PythonLexer
13-
from unitxt.artifact import Artifact
13+
from unitxt.artifact import (
14+
get_class_or_function_from_artifact_type,
15+
get_module_class_names,
16+
)
1417
from unitxt.text_utils import print_dict_as_python
1518
from unitxt.utils import load_json
1619

@@ -51,8 +54,8 @@ def imports_to_syntax_highlighted_html(subtypes: List[str]) -> str:
5154
return ""
5255
module_to_class_names = defaultdict(list)
5356
for subtype in subtypes:
54-
subtype_class = Artifact._class_register.get(subtype)
55-
module_to_class_names[subtype_class.__module__].append(subtype_class.__name__)
57+
(module, class_name) = get_module_class_names(subtype)
58+
module_to_class_names[module].append(class_name)
5659

5760
imports_txt = ""
5861
for modu in sorted(module_to_class_names.keys()):
@@ -103,31 +106,6 @@ def custom_walk(top):
103106
yield entry
104107

105108

106-
def all_subtypes_of_artifact(artifact):
107-
if (
108-
artifact is None
109-
or isinstance(artifact, str)
110-
or isinstance(artifact, bool)
111-
or isinstance(artifact, int)
112-
or isinstance(artifact, float)
113-
):
114-
return []
115-
if isinstance(artifact, list):
116-
to_return = []
117-
for art in artifact:
118-
to_return.extend(all_subtypes_of_artifact(art))
119-
return to_return
120-
# artifact is a dict
121-
to_return = []
122-
for key, value in artifact.items():
123-
if isinstance(value, str):
124-
if key == "__type__":
125-
to_return.append(value)
126-
else:
127-
to_return.extend(all_subtypes_of_artifact(value))
128-
return to_return
129-
130-
131109
def get_all_type_elements(nested_dict):
132110
type_elements = set()
133111

@@ -137,32 +115,33 @@ def recursive_search(d):
137115
d.pop("__tags__", None)
138116
for key, value in d.items():
139117
if key == "__type__":
140-
type_elements.add(value)
118+
type_elements.add(json.dumps(value))
141119
elif isinstance(value, dict):
142120
recursive_search(value)
143121
elif isinstance(value, list):
144122
for item in value:
145123
recursive_search(item)
146124

147125
recursive_search(nested_dict)
148-
return list(type_elements)
126+
return [json.loads(type_element) for type_element in type_elements]
149127

150128

151129
@lru_cache(maxsize=None)
152130
def artifact_type_to_link(artifact_type):
153-
artifact_class = Artifact._class_register.get(artifact_type)
154-
type_class_name = artifact_class.__name__
155-
artifact_class_id = f"{artifact_class.__module__}.{type_class_name}"
156-
return f'<a class="reference internal" href="../{artifact_class.__module__}.html#{artifact_class_id}" title="{artifact_class_id}"><code class="xref py py-class docutils literal notranslate"><span class="pre">{type_class_name}</span></code></a>'
131+
artifact_module, artifact_class_name = get_module_class_names(
132+
json.loads(artifact_type)
133+
)
134+
return f'<a class="reference internal" href="../{artifact_module}.html#{artifact_module}.{artifact_class_name}" title="{artifact_module}.{artifact_class_name}"><code class="xref py py-class docutils literal notranslate"><span class="pre">{artifact_class_name}</span></code></a>'
157135

158136

159137
# flake8: noqa: C901
138+
139+
160140
def make_content(artifact, label, all_labels):
161-
artifact_type = artifact["__type__"]
162-
artifact_class = Artifact._class_register.get(artifact_type)
163-
type_class_name = artifact_class.__name__
164-
catalog_id = label.replace("catalog.", "")
141+
artifact_type = artifact["__type__"] # dict with fields "module" and "name"
142+
artifact_class = get_class_or_function_from_artifact_type(artifact_type)
165143

144+
catalog_id = label.replace("catalog.", "")
166145
result = ""
167146

168147
if "__description__" in artifact and artifact["__description__"] is not None:
@@ -205,25 +184,22 @@ def make_content(artifact, label, all_labels):
205184
)
206185

207186
for type_name in type_elements:
208-
# source = f'<span class="nt">__type__</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">{type_name}</span>'
209-
source = f'<span class="n">__type__{type_name}</span><span class="p">'
210-
target = artifact_type_to_link(type_name)
211-
html_for_dict = html_for_dict.replace(
212-
source,
213-
f'<span class="n" STYLE="font-size:108%">{target}</span><span class="p">',
214-
# '<span class="nt">&quot;type&quot;</span><span class="p">:</span><span class="w"> </span>'
215-
# + target,
187+
artifact_module, artifact_class_name = get_module_class_names(type_name)
188+
pattern = re.compile(
189+
f'<span class="n">__type__(.*?)<span class="n">{artifact_class_name}</span>'
216190
)
191+
repl = (
192+
'<span class="n" STYLE="font-size:108%">'
193+
+ artifact_type_to_link(json.dumps(type_name))
194+
+ "</span>"
195+
)
196+
html_for_dict = pattern.sub(repl, html_for_dict)
217197

218-
pattern = r'(<span class="nt">)&quot;(.*?)&quot;(</span>)'
219-
220-
# Replacement function
221-
html_for_dict = re.sub(pattern, r"\1\2\3", html_for_dict)
222-
223-
subtypes = all_subtypes_of_artifact(artifact)
224-
subtypes = list(set(subtypes))
198+
subtypes = type_elements
225199
subtypes.remove(artifact_type) # this was already documented
226-
html_for_imports = imports_to_syntax_highlighted_html(subtypes)
200+
html_for_imports = imports_to_syntax_highlighted_html(
201+
get_all_type_elements(artifact)
202+
)
227203

228204
source_link = f"""<a class="reference external" href="https://github.com/IBM/unitxt/blob/main/src/unitxt/catalog/{catalog_id.replace(".", "/")}.json"><span class="viewcode-link"><span class="pre">[source]</span></span></a>"""
229205
html_for_element = f"""<div class="admonition note">
@@ -237,13 +213,13 @@ def make_content(artifact, label, all_labels):
237213
result += " " + html_for_element + "\n"
238214

239215
if artifact_class.__doc__:
240-
explanation_str = f"Explanation about `{type_class_name}`"
216+
explanation_str = f"Explanation about `{artifact_class.__name__}`"
241217
result += f"\n{explanation_str}\n"
242218
result += "+" * len(explanation_str) + "\n\n"
243219
result += artifact_class.__doc__ + "\n"
244220

245221
for subtype in subtypes:
246-
subtype_class = Artifact._class_register.get(subtype)
222+
subtype_class = get_class_or_function_from_artifact_type(subtype)
247223
subtype_class_name = subtype_class.__name__
248224
if subtype_class.__doc__:
249225
explanation_str = f"Explanation about `{subtype_class_name}`"

docs/conf.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,7 @@ def autodoc_skip_member(app, what, name, obj, would_skip, options):
113113

114114
if hasattr(obj, "__qualname__"):
115115
class_name = obj.__qualname__.split(".")[0]
116-
if (
117-
class_name
118-
and Artifact.is_registered_class_name(class_name)
119-
and class_name != name
120-
):
116+
if class_name and class_name != name:
121117
return True
122118

123119
return None

prepare/metrics/custom_f1.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,4 +433,7 @@ class NERWithoutClassReporting(NER):
433433
global_target=global_target,
434434
)
435435

436-
add_to_catalog(metric, "metrics.ner", overwrite=True)
436+
if __name__ == "__main__" or __name__ == "custom_f1":
437+
# because a class is defined in this module, need to not add_to_catalog just for importing that module in order to retrieve the defined class
438+
# and need to prepare for case when this module is run directly from python (__main__) or, for example, from test_preparation (custom_f1)
439+
add_to_catalog(metric, "metrics.ner", overwrite=True)

0 commit comments

Comments
 (0)