Skip to content

Commit 40ad476

Browse files
committed
tasks
1 parent 1febc88 commit 40ad476

File tree

3 files changed

+241
-228
lines changed

3 files changed

+241
-228
lines changed

src/tasksource/preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def sample_dataset(dataset,n=10000, n_eval=1000):
2727

2828
class Preprocessing(DotWiz):
2929
default_splits = ('train','validation','test')
30-
3130
@staticmethod
3231
def __map_to_target(x,fn=lambda x:None, target=None):
3332
x[target]=fn(x)
@@ -170,6 +169,7 @@ class SharedFields:
170169
config_name:str = None
171170
pre_process: callable = lambda x:x
172171
post_process: callable = lambda x:x
172+
#language:str="en"
173173

174174
@dataclass
175175
class Classification(SharedFields, ClassificationFields): pass

src/tasksource/tasks.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from .preprocess import cat, get, regen, constant, Classification, TokenClassification, MultipleChoice
22
from .metadata import bigbench_discriminative_english, blimp_hard, imppres_presupposition, imppres_implicature
3-
from datasets import get_dataset_config_names, ClassLabel
4-
3+
from datasets import get_dataset_config_names, ClassLabel, Dataset, DatasetDict
54
# variable name: dataset___config__task
65

76
###################### NLI/paraphrase ###############################
@@ -649,12 +648,12 @@ def _split_choices(s):
649648
dataset_name="lucasmccabe/logiqa"
650649
)
651650

652-
proto_qa = MultipleChoice(
653-
"question",
654-
choices_list=lambda x:x['answer-clusters']['answers'],
655-
labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
656-
config_name='proto_qa'
657-
)
651+
#proto_qa = MultipleChoice(
652+
# "question",
653+
# choices_list=lambda x:x['answer-clusters']['answers'],
654+
# labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
655+
# config_name='proto_qa'
656+
#)
658657

659658
wiki_qa = Classification("question","answer","label")
660659

@@ -705,4 +704,15 @@ def _preprocess_chatgpt_detection(ex):
705704

706705
moral_stories = MultipleChoice(cat(["situation","intention"]),
707706
choices=['moral_action',"immoral_action"],labels=constant(0),
708-
dataset_name="demelin/moral_stories", config_name="full")
707+
dataset_name="demelin/moral_stories", config_name="full")
708+
709+
prost = MultipleChoice(cat(["context","ex_question"]), choices=['A','B','C','D'],labels="label",
710+
dataset_name="corypaik/prost")
711+
712+
dyna_hate = Classification("text",labels="label",dataset_name="aps/dynahate",splits=['train',None,None])
713+
714+
syntactic_augmentation_nli = Classification('sentence1',"sentence2","gold_label",dataset_name="metaeval/syntactic-augmentation-nli")
715+
716+
717+
#autotnli = Classification("premises", "hypothesis", "label", dataset_name="metaeval/autotnli")
718+
#equate = Classification("sentence1", "sentence2", "gold_label",dataset_name="metaeval/equate")

0 commit comments

Comments
 (0)