Skip to content

Commit cc9e005

Browse files
author
Martin Hirzel
committed
Added one more Lale operator schema and three more dataset schemas to bring the total to 5x10.
1 parent 165f893 commit cc9e005

File tree

1 file changed

+269
-9
lines changed

1 file changed

+269
-9
lines changed

test/test_ai_subschema.py

+269-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
class TestAIExamples(unittest.TestCase):
1010

11-
lr_schema = {
11+
lrOld_schema = {
1212
'$schema': 'http://json-schema.org/draft-04/schema#',
1313
'description': 'Input data schema for training.',
1414
'type': 'object',
@@ -29,6 +29,28 @@ class TestAIExamples(unittest.TestCase):
2929
'type': 'number'}}}
3030
}
3131

32+
lrNew_schema = {
33+
'$schema': 'http://json-schema.org/draft-04/schema#',
34+
'description': 'Input data schema for training.',
35+
'type': 'object',
36+
'required': ['X', 'y'],
37+
'additionalProperties': False,
38+
'properties': {
39+
'X': {
40+
'description': 'Features; the outer array is over samples.',
41+
'type': 'array',
42+
'items': {
43+
'type': 'array',
44+
'items': {
45+
'type': 'number'}}},
46+
'y': {
47+
'description': 'Target class labels; the array is over samples.',
48+
"anyOf": [
49+
{"type": "array", "items": {"type": "number"}},
50+
{"type": "array", "items": {"type": "string"}},
51+
{"type": "array", "items": {"type": "boolean"}}]}}
52+
}
53+
3254
project_schema = {
3355
'$schema': 'http://json-schema.org/draft-04/schema#',
3456
'description': 'Input data schema for training Project.',
@@ -307,22 +329,260 @@ class TestAIExamples(unittest.TestCase):
307329
'maxItems': 161297}}
308330
}
309331

310-
operator_names = ['lr', 'project', 'nmf', 'tfidf']
311-
dataset_names = ['irisArr', 'irisDf', 'digits', 'housing', 'creditG', 'movies', 'drugRev']
332+
adultCat_schema = {
333+
'$schema': 'http://json-schema.org/draft-04/schema#',
334+
'type': 'object',
335+
'additionalProperties': False,
336+
'required': ['X', 'y'],
337+
'properties': {
338+
'X': {
339+
'type': 'array',
340+
'minItems': 32724, 'maxItems': 32724,
341+
'items': {
342+
'type': 'array',
343+
'minItems': 14, 'maxItems': 14,
344+
'items': [
345+
{'description': 'age', 'type': 'number'},
346+
{'description': 'workclass', 'type': 'string'},
347+
{'description': 'fnlwgt', 'type': 'number'},
348+
{'description': 'education', 'type': 'string'},
349+
{'description': 'education-num', 'type': 'number'},
350+
{'description': 'marital-status', 'type': 'string'},
351+
{'description': 'occupation', 'type': 'string'},
352+
{'description': 'relationship', 'type': 'string'},
353+
{'description': 'race', 'type': 'string'},
354+
{'description': 'sex', 'type': 'string'},
355+
{'description': 'capital-gain', 'type': 'number'},
356+
{'description': 'capital-loss', 'type': 'number'},
357+
{'description': 'hours-per-week', 'type': 'number'},
358+
{'description': 'native-country', 'type': 'string'}]}},
359+
'y': {
360+
'type': 'array',
361+
'minItems': 32724, 'maxItems': 32724,
362+
'items': {'description': 'class', 'enum': ['<=50K', '>50K']}}}}
363+
364+
adultNum_schema = {
365+
'$schema': 'http://json-schema.org/draft-04/schema#',
366+
'type': 'object',
367+
'additionalProperties': False,
368+
'required': ['X', 'y'],
369+
'properties': {
370+
'X': {
371+
'type': 'array',
372+
'minItems': 32724, 'maxItems': 32724,
373+
'items': {
374+
'type': 'array',
375+
'minItems': 105, 'maxItems': 105,
376+
'items': [
377+
{'description': 'workclass_Federal-gov', 'enum': [0,1]},
378+
{'description': 'workclass_Local-gov', 'enum': [0,1]},
379+
{'description': 'workclass_Never-worked', 'enum': [0,1]},
380+
{'description': 'workclass_Private', 'enum': [0,1]},
381+
{'description': 'workclass_Self-emp-inc', 'enum': [0,1]},
382+
{'description': 'workclass_Self-emp-not-inc', 'enum': [0,1]},
383+
{'description': 'workclass_State-gov', 'enum': [0,1]},
384+
{'description': 'workclass_Without-pay', 'enum': [0,1]},
385+
{'description': 'education_10th', 'enum': [0,1]},
386+
{'description': 'education_11th', 'enum': [0,1]},
387+
{'description': 'education_12th', 'enum': [0,1]},
388+
{'description': 'education_1st-4th', 'enum': [0,1]},
389+
{'description': 'education_5th-6th', 'enum': [0,1]},
390+
{'description': 'education_7th-8th', 'enum': [0,1]},
391+
{'description': 'education_9th', 'enum': [0,1]},
392+
{'description': 'education_Assoc-acdm', 'enum': [0,1]},
393+
{'description': 'education_Assoc-voc', 'enum': [0,1]},
394+
{'description': 'education_Bachelors', 'enum': [0,1]},
395+
{'description': 'education_Doctorate', 'enum': [0,1]},
396+
{'description': 'education_HS-grad', 'enum': [0,1]},
397+
{'description': 'education_Masters', 'enum': [0,1]},
398+
{'description': 'education_Preschool', 'enum': [0,1]},
399+
{'description': 'education_Prof-school', 'enum': [0,1]},
400+
{'description': 'education_Some-college', 'enum': [0,1]},
401+
{'description': 'marital-status_Divorced', 'enum': [0,1]},
402+
{'description': 'marital-status_Married-AF-spouse', 'enum': [0,1]},
403+
{'description': 'marital-status_Married-civ-spouse', 'enum': [0,1]},
404+
{'description': 'marital-status_Married-spouse-absent', 'enum': [0,1]},
405+
{'description': 'marital-status_Never-married', 'enum': [0,1]},
406+
{'description': 'marital-status_Separated', 'enum': [0,1]},
407+
{'description': 'marital-status_Widowed', 'enum': [0,1]},
408+
{'description': 'occupation_Adm-clerical', 'enum': [0,1]},
409+
{'description': 'occupation_Armed-Forces', 'enum': [0,1]},
410+
{'description': 'occupation_Craft-repair', 'enum': [0,1]},
411+
{'description': 'occupation_Exec-managerial', 'enum': [0,1]},
412+
{'description': 'occupation_Farming-fishing', 'enum': [0,1]},
413+
{'description': 'occupation_Handlers-cleaners', 'enum': [0,1]},
414+
{'description': 'occupation_Machine-op-inspct', 'enum': [0,1]},
415+
{'description': 'occupation_Other-service', 'enum': [0,1]},
416+
{'description': 'occupation_Priv-house-serv', 'enum': [0,1]},
417+
{'description': 'occupation_Prof-specialty', 'enum': [0,1]},
418+
{'description': 'occupation_Protective-serv', 'enum': [0,1]},
419+
{'description': 'occupation_Sales', 'enum': [0,1]},
420+
{'description': 'occupation_Tech-support', 'enum': [0,1]},
421+
{'description': 'occupation_Transport-moving', 'enum': [0,1]},
422+
{'description': 'relationship_Husband', 'enum': [0,1]},
423+
{'description': 'relationship_Not-in-family', 'enum': [0,1]},
424+
{'description': 'relationship_Other-relative', 'enum': [0,1]},
425+
{'description': 'relationship_Own-child', 'enum': [0,1]},
426+
{'description': 'relationship_Unmarried', 'enum': [0,1]},
427+
{'description': 'relationship_Wife', 'enum': [0,1]},
428+
{'description': 'race_Amer-Indian-Eskimo', 'enum': [0,1]},
429+
{'description': 'race_Asian-Pac-Islander', 'enum': [0,1]},
430+
{'description': 'race_Black', 'enum': [0,1]},
431+
{'description': 'race_Other', 'enum': [0,1]},
432+
{'description': 'race_White', 'enum': [0,1]},
433+
{'description': 'sex_Female', 'enum': [0,1]},
434+
{'description': 'sex_Male', 'enum': [0,1]},
435+
{'description': 'native-country_Cambodia', 'enum': [0,1]},
436+
{'description': 'native-country_Canada', 'enum': [0,1]},
437+
{'description': 'native-country_China', 'enum': [0,1]},
438+
{'description': 'native-country_Columbia', 'enum': [0,1]},
439+
{'description': 'native-country_Cuba', 'enum': [0,1]},
440+
{'description': 'native-country_Dominican-Republic', 'enum': [0,1]},
441+
{'description': 'native-country_Ecuador', 'enum': [0,1]},
442+
{'description': 'native-country_El-Salvador', 'enum': [0,1]},
443+
{'description': 'native-country_England', 'enum': [0,1]},
444+
{'description': 'native-country_France', 'enum': [0,1]},
445+
{'description': 'native-country_Germany', 'enum': [0,1]},
446+
{'description': 'native-country_Greece', 'enum': [0,1]},
447+
{'description': 'native-country_Guatemala', 'enum': [0,1]},
448+
{'description': 'native-country_Haiti', 'enum': [0,1]},
449+
{'description': 'native-country_Holand-Netherlands', 'enum': [0,1]},
450+
{'description': 'native-country_Honduras', 'enum': [0,1]},
451+
{'description': 'native-country_Hong', 'enum': [0,1]},
452+
{'description': 'native-country_Hungary', 'enum': [0,1]},
453+
{'description': 'native-country_India', 'enum': [0,1]},
454+
{'description': 'native-country_Iran', 'enum': [0,1]},
455+
{'description': 'native-country_Ireland', 'enum': [0,1]},
456+
{'description': 'native-country_Italy', 'enum': [0,1]},
457+
{'description': 'native-country_Jamaica', 'enum': [0,1]},
458+
{'description': 'native-country_Japan', 'enum': [0,1]},
459+
{'description': 'native-country_Laos', 'enum': [0,1]},
460+
{'description': 'native-country_Mexico', 'enum': [0,1]},
461+
{'description': 'native-country_Nicaragua', 'enum': [0,1]},
462+
{'description': 'native-country_Outlying-US(Guam-USVI-etc)', 'enum': [0,1]},
463+
{'description': 'native-country_Peru', 'enum': [0,1]},
464+
{'description': 'native-country_Philippines', 'enum': [0,1]},
465+
{'description': 'native-country_Poland', 'enum': [0,1]},
466+
{'description': 'native-country_Portugal', 'enum': [0,1]},
467+
{'description': 'native-country_Puerto-Rico', 'enum': [0,1]},
468+
{'description': 'native-country_Scotland', 'enum': [0,1]},
469+
{'description': 'native-country_South', 'enum': [0,1]},
470+
{'description': 'native-country_Taiwan', 'enum': [0,1]},
471+
{'description': 'native-country_Thailand', 'enum': [0,1]},
472+
{'description': 'native-country_Trinadad&Tobago', 'enum': [0,1]},
473+
{'description': 'native-country_United-States', 'enum': [0,1]},
474+
{'description': 'native-country_Vietnam', 'enum': [0,1]},
475+
{'description': 'native-country_Yugoslavia', 'enum': [0,1]},
476+
{'description': 'age', 'type': 'number'},
477+
{'description': 'fnlwgt', 'type': 'number'},
478+
{'description': 'education-num', 'type': 'number'},
479+
{'description': 'capital-gain', 'type': 'number'},
480+
{'description': 'capital-loss', 'type': 'number'},
481+
{'description': 'hours-per-week', 'type': 'number'}]}},
482+
'y': {
483+
'type': 'array',
484+
'minItems': 32724, 'maxItems': 32724,
485+
'items': {'description': 'class', 'enum': [0, 1]}}}}
486+
487+
covtype_schema = {
488+
'$schema': 'http://json-schema.org/draft-04/schema#',
489+
'documentation_url': 'https://scikit-learn.org/0.20/datasets/index.html#forest-covertypes',
490+
'type': 'object',
491+
'additionalProperties': False,
492+
'required': ['X', 'y'],
493+
'properties': {
494+
'X': {
495+
'type': 'array',
496+
'items': {
497+
'type': 'array',
498+
'minItems': 54,
499+
'maxItems': 54,
500+
'items': [
501+
{'description': 'Elevation', 'type': 'integer'},
502+
{'description': 'Aspect', 'type': 'integer'},
503+
{'description': 'Slope', 'type': 'integer'},
504+
{'description': 'Horizontal_Distance_To_Hydrology', 'type': 'integer'},
505+
{'description': 'Vertical_Distance_To_Hydrology', 'type': 'integer'},
506+
{'description': 'Horizontal_Distance_To_Roadways', 'type': 'integer'},
507+
{'description': 'Hillshade_9am', 'type': 'integer'},
508+
{'description': 'Hillshade_Noon', 'type': 'integer'},
509+
{'description': 'Hillshade_3pm', 'type': 'integer'},
510+
{'description': 'Horizontal_Distance_To_Fire_Points', 'type': 'integer'},
511+
{'description': 'Wilderness_Area1', 'enum': [0, 1]},
512+
{'description': 'Wilderness_Area2', 'enum': [0, 1]},
513+
{'description': 'Wilderness_Area3', 'enum': [0, 1]},
514+
{'description': 'Wilderness_Area4', 'enum': [0, 1]},
515+
{'description': 'Soil_Type1', 'enum': [0, 1]},
516+
{'description': 'Soil_Type2', 'enum': [0, 1]},
517+
{'description': 'Soil_Type3', 'enum': [0, 1]},
518+
{'description': 'Soil_Type4', 'enum': [0, 1]},
519+
{'description': 'Soil_Type5', 'enum': [0, 1]},
520+
{'description': 'Soil_Type6', 'enum': [0, 1]},
521+
{'description': 'Soil_Type7', 'enum': [0, 1]},
522+
{'description': 'Soil_Type8', 'enum': [0, 1]},
523+
{'description': 'Soil_Type9', 'enum': [0, 1]},
524+
{'description': 'Soil_Type10', 'enum': [0, 1]},
525+
{'description': 'Soil_Type11', 'enum': [0, 1]},
526+
{'description': 'Soil_Type12', 'enum': [0, 1]},
527+
{'description': 'Soil_Type13', 'enum': [0, 1]},
528+
{'description': 'Soil_Type14', 'enum': [0, 1]},
529+
{'description': 'Soil_Type15', 'enum': [0, 1]},
530+
{'description': 'Soil_Type16', 'enum': [0, 1]},
531+
{'description': 'Soil_Type17', 'enum': [0, 1]},
532+
{'description': 'Soil_Type18', 'enum': [0, 1]},
533+
{'description': 'Soil_Type19', 'enum': [0, 1]},
534+
{'description': 'Soil_Type20', 'enum': [0, 1]},
535+
{'description': 'Soil_Type21', 'enum': [0, 1]},
536+
{'description': 'Soil_Type22', 'enum': [0, 1]},
537+
{'description': 'Soil_Type23', 'enum': [0, 1]},
538+
{'description': 'Soil_Type24', 'enum': [0, 1]},
539+
{'description': 'Soil_Type25', 'enum': [0, 1]},
540+
{'description': 'Soil_Type26', 'enum': [0, 1]},
541+
{'description': 'Soil_Type27', 'enum': [0, 1]},
542+
{'description': 'Soil_Type28', 'enum': [0, 1]},
543+
{'description': 'Soil_Type29', 'enum': [0, 1]},
544+
{'description': 'Soil_Type30', 'enum': [0, 1]},
545+
{'description': 'Soil_Type31', 'enum': [0, 1]},
546+
{'description': 'Soil_Type32', 'enum': [0, 1]},
547+
{'description': 'Soil_Type33', 'enum': [0, 1]},
548+
{'description': 'Soil_Type34', 'enum': [0, 1]},
549+
{'description': 'Soil_Type35', 'enum': [0, 1]},
550+
{'description': 'Soil_Type36', 'enum': [0, 1]},
551+
{'description': 'Soil_Type37', 'enum': [0, 1]},
552+
{'description': 'Soil_Type38', 'enum': [0, 1]},
553+
{'description': 'Soil_Type39', 'enum': [0, 1]},
554+
{'description': 'Soil_Type40', 'enum': [0, 1]}]}},
555+
'y': {
556+
'type': 'array',
557+
'items': {
558+
'description': 'The cover type, i.e., the dominant species of trees.',
559+
'enum': ['spruce_fir', 'lodgepole_pine', 'ponderosa_pine', 'cottonwood_willow', 'aspen', 'douglas_fir', 'krummholz']}}}}
560+
561+
operator_names = ['lrOld', 'lrNew', 'project', 'nmf', 'tfidf']
562+
dataset_names = ['irisArr', 'irisDf', 'digits', 'housing', 'creditG',
563+
'movies', 'drugRev', 'adultCat', 'adultNum', 'covtype']
312564

313565
expected = {
314-
'lr': {
566+
'lrOld': {
567+
'irisArr': True, 'irisDf': True, 'digits': True, 'housing': True,
568+
'creditG': False, 'movies': False, 'drugRev': False,
569+
'adultCat': False, 'adultNum': True, 'covtype': False},
570+
'lrNew': {
315571
'irisArr': True, 'irisDf': True, 'digits': True, 'housing': True,
316-
'creditG': False, 'movies': False, 'drugRev': False},
572+
'creditG': False, 'movies': False, 'drugRev': False,
573+
'adultCat': False, 'adultNum': True, 'covtype': True},
317574
'project': {
318575
'irisArr': True, 'irisDf': True, 'digits': True, 'housing': True,
319-
'creditG': True, 'movies': False, 'drugRev': True},
576+
'creditG': True, 'movies': False, 'drugRev': True,
577+
'adultCat': True, 'adultNum': True, 'covtype': True},
320578
'nmf': {
321579
'irisArr': False, 'irisDf': False, 'digits': True, 'housing': False,
322-
'creditG': False, 'movies': False, 'drugRev': False},
580+
'creditG': False, 'movies': False, 'drugRev': False,
581+
'adultCat': False, 'adultNum': False, 'covtype': False},
323582
'tfidf': {
324583
'irisArr': False, 'irisDf': False, 'digits': False, 'housing': False,
325-
'creditG': False, 'movies': True, 'drugRev': False}}
584+
'creditG': False, 'movies': True, 'drugRev': False,
585+
'adultCat': False, 'adultNum': False, 'covtype': False}}
326586

327587
def test_dataset_op(self):
328588
# for name in TestAIExamples.operator_names + TestAIExamples.dataset_names:
@@ -378,4 +638,4 @@ def test_dataset_op(self):
378638
# print("False positives", fp)
379639
# print("False negatives", fn)
380640
# print("True positives", tp)
381-
# print("True negatives", tn)
641+
# print("True negatives", tn)

0 commit comments

Comments
 (0)