tasks

sileod · sileod · commit 40ad476646c8 · 2023-01-31T10:58:21.000+01:00
diff --git a/src/tasksource/preprocess.py b/src/tasksource/preprocess.py
@@ -27,7 +27,6 @@ def sample_dataset(dataset,n=10000, n_eval=1000):
 
 class Preprocessing(DotWiz):
     default_splits = ('train','validation','test')
-        
     @staticmethod
     def __map_to_target(x,fn=lambda x:None, target=None):
         x[target]=fn(x)
@@ -170,6 +169,7 @@ class SharedFields:
     config_name:str = None
     pre_process: callable = lambda x:x
     post_process: callable = lambda x:x
+    #language:str="en"
 
 @dataclass
 class Classification(SharedFields, ClassificationFields): pass
diff --git a/src/tasksource/tasks.py b/src/tasksource/tasks.py
@@ -1,7 +1,6 @@
 from .preprocess import cat, get, regen, constant, Classification, TokenClassification, MultipleChoice
 from .metadata import bigbench_discriminative_english, blimp_hard, imppres_presupposition, imppres_implicature
-from datasets import get_dataset_config_names, ClassLabel
-
+from datasets import get_dataset_config_names, ClassLabel, Dataset, DatasetDict
 # variable name: dataset___config__task
 
 ###################### NLI/paraphrase ###############################
@@ -649,12 +648,12 @@ def _split_choices(s):
     dataset_name="lucasmccabe/logiqa"
 )
 
-proto_qa = MultipleChoice(
-    "question",
-    choices_list=lambda x:x['answer-clusters']['answers'],
-    labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
-    config_name='proto_qa'
-)
+#proto_qa = MultipleChoice(
+#    "question",
+#    choices_list=lambda x:x['answer-clusters']['answers'],
+#    labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
+#    config_name='proto_qa'
+#)
 
 wiki_qa = Classification("question","answer","label")
 
@@ -705,4 +704,15 @@ def _preprocess_chatgpt_detection(ex):
 
 moral_stories = MultipleChoice(cat(["situation","intention"]),
     choices=['moral_action',"immoral_action"],labels=constant(0),
-    dataset_name="demelin/moral_stories", config_name="full")
+    dataset_name="demelin/moral_stories", config_name="full")
+
+prost = MultipleChoice(cat(["context","ex_question"]), choices=['A','B','C','D'],labels="label",
+    dataset_name="corypaik/prost")
+
+dyna_hate = Classification("text",labels="label",dataset_name="aps/dynahate",splits=['train',None,None])
+
+syntactic_augmentation_nli = Classification('sentence1',"sentence2","gold_label",dataset_name="metaeval/syntactic-augmentation-nli")
+
+
+#autotnli = Classification("premises", "hypothesis", "label", dataset_name="metaeval/autotnli")
+#equate = Classification("sentence1", "sentence2", "gold_label",dataset_name="metaeval/equate")
diff --git a/tasks.md b/tasks.md