|
17 | 17 | config_name=set(get_dataset_config_names("metaeval/babi_nli"))-{"agents-motivations"}
|
18 | 18 | ) # agents-motivations task is not as clear-cut as the others
|
19 | 19 |
|
20 |
| -def ling_nli_postprocess(ds): |
21 |
| - return ds.cast_column('labels', ClassLabel( |
22 |
| - names=['entailment','neutral','contradiction'])) |
23 | 20 |
|
24 |
| -ling_nli = Classification("premise_original","hypothesis_original","label", |
25 |
| - dataset_name="metaeval/lingnli", post_process=ling_nli_postprocess |
26 |
| -) |
| 21 | +ling_nli = Classification("premise","hypothesis","label",dataset_name="metaeval/lingnli") |
27 | 22 |
|
28 | 23 |
|
29 | 24 | sick__label = Classification('sentence_A','sentence_B','label')
|
@@ -124,23 +119,23 @@ def ling_nli_postprocess(ds):
|
124 | 119 | add_one_rte = Classification("premise","hypothesis","label",
|
125 | 120 | dataset_name="pietrolesci/add_one_rte",splits=["train","dev","test"])
|
126 | 121 |
|
127 |
| -def imppres_post_process(ds,prefix=''): |
| 122 | +def _imppres_post_process(ds,prefix=''): |
128 | 123 | # imppres entailment definition is either purely semantic or purely pragmatic
|
129 | 124 | # because of that, we assign differentiate the labels from anli/mnli notation
|
130 | 125 | return ds.cast_column('labels', ClassLabel(
|
131 | 126 | names=[f'imppres{prefix}_entailment',f'imppres{prefix}_neutral',f'imppres{prefix}_contradiction']))
|
132 | 127 |
|
133 | 128 | imppres__presupposition = imppres__prag = Classification("premise","hypothesis","gold_label",
|
134 | 129 | dataset_name="metaeval/imppres", config_name=imppres_presupposition,
|
135 |
| - post_process=imppres_post_process) |
| 130 | + post_process=_imppres_post_process) |
136 | 131 |
|
137 | 132 | imppres__prag = Classification("premise","hypothesis","gold_label_prag",
|
138 | 133 | dataset_name="metaeval/imppres", config_name=imppres_implicature,
|
139 |
| - post_process=lambda x: imppres_post_process(x,'_prag')) |
| 134 | + post_process=lambda x: _imppres_post_process(x,'_prag')) |
140 | 135 |
|
141 | 136 | imppres__log = Classification("premise","hypothesis","gold_label_log",
|
142 | 137 | dataset_name="metaeval/imppres", config_name=imppres_implicature,
|
143 |
| - post_process=lambda x: imppres_post_process(x,'_log')) |
| 138 | + post_process=lambda x: _imppres_post_process(x,'_log')) |
144 | 139 |
|
145 | 140 |
|
146 | 141 | glue__diagnostics = Classification("premise","hypothesis","label",
|
@@ -312,13 +307,13 @@ def imppres_post_process(ds,prefix=''):
|
312 | 307 |
|
313 | 308 | swag=MultipleChoice(cat(["sent1","sent2"]),regen("ending[0-3]"),"label")
|
314 | 309 |
|
315 |
| -def split_choices(s): |
| 310 | +def _split_choices(s): |
316 | 311 | import re
|
317 | 312 | return [x.rstrip(', ') for x in re.split(r'[a-e] \) (.*?)',s) if x.strip(', ')]
|
318 | 313 |
|
319 | 314 | math_qa = MultipleChoice(
|
320 | 315 | 'Problem',
|
321 |
| - choices_list = lambda x: split_choices(x['options']), |
| 316 | + choices_list = lambda x: _split_choices(x['options']), |
322 | 317 | labels = lambda x:'abcde'.index(x['correct'])
|
323 | 318 | )
|
324 | 319 |
|
@@ -500,15 +495,14 @@ def split_choices(s):
|
500 | 495 |
|
501 | 496 | metaeval_linguisticprobing = Classification("sentence", labels="label", dataset_name="metaeval/linguisticprobing",
|
502 | 497 | config_name=['subj_number',
|
503 |
| - 'word_content', |
504 | 498 | 'obj_number',
|
505 | 499 | 'past_present',
|
506 | 500 | 'sentence_length',
|
507 | 501 | 'top_constituents',
|
508 | 502 | 'tree_depth',
|
509 | 503 | 'coordination_inversion',
|
510 | 504 | 'odd_man_out',
|
511 |
| - 'bigram_shift'] |
| 505 | + 'bigram_shift']#+['word_content'] #too many labels |
512 | 506 | )
|
513 | 507 |
|
514 | 508 | metaeval_crowdflower = Classification("text", labels="label",
|
@@ -664,5 +658,22 @@ def split_choices(s):
|
664 | 658 | config_name='proto_qa'
|
665 | 659 | )
|
666 | 660 |
|
667 |
| -###END |
668 |
| -################### END OF SUPPORT ###################### |
| 661 | +wiki_qa = Classification("question","answer","label") |
| 662 | + |
| 663 | +cycic_classification = Classification("question",labels="correct_answer", |
| 664 | + dataset_name = "metaeval/cycic_classification") |
| 665 | +cycic_mc = MultipleChoice("question", choices=regen('answer\_option[0-4]'), labels="correct_answer", |
| 666 | + dataset_name = "metaeval/cycic_multiplechoice") |
| 667 | + |
| 668 | + |
| 669 | +def _preprocess_chatgpt_detection(ex): |
| 670 | + import random |
| 671 | + label=random.random()<=0.5 |
| 672 | + ex['label']=label |
| 673 | + ex['answer']=[ex['human_answers'],ex['chatgpt_answers']][label] |
| 674 | + return ex |
| 675 | + |
| 676 | +chatgpt_detection = Classification("question","answer","label", |
| 677 | + dataset_name = 'Hello-SimpleAI/HC3', config_name="all", |
| 678 | + pre_process=lambda dataset:dataset.map(_preprocess_chatgpt_detection) |
| 679 | +) |
0 commit comments