new tasks

Damien Sileo · Damien Sileo · commit 42d41a747ba2 · 2023-07-06T10:06:35.000+02:00
diff --git a/src/tasksource/mtasks.py b/src/tasksource/mtasks.py
@@ -1,4 +1,4 @@
-from .preprocess import cat, get, regen, constant, Classification, TokenClassification, MultipleChoice
+from .preprocess import cat, get,name, regen, constant, Classification, TokenClassification, MultipleChoice
 from .metadata import udep_labels
 from datasets import get_dataset_config_names, ClassLabel, Dataset, DatasetDict, concatenate_datasets, Sequence
 
@@ -19,47 +19,34 @@ def concatenate_configs(dataset):
 
 americas_nli = Classification("premise","hypothesis","label",config_name="all_languages")
 
-moritz_xnli = Classification("premise","hypothesis","label", 
+moritz_xnli = Classification("premise","hypothesis",name("label",["entailment", "neutral","contradiction"]), 
     pre_process=concatenate_configs, dataset_name="MoritzLaurer/multilingual-NLI-26lang-2mil7")
 
 stsb_multi_mt = Classification("sentence1", "sentence2",
     lambda x: float(x["similarity_score"]/5),
     **all('stsb_multi_mt'))
 
-pawsx = Classification("sentence1","sentence2","label", **all('paws-x'))
+pawsx = Classification("sentence1","sentence2",name('label',['not_paraphrase','paraphrase']), **all('paws-x'))
 
 miam = Classification("Utterance",labels="Label", **all('miam'))
 
 xstance = Classification("question", "comment", "label",
     **all("strombergnlp/x-stance"))
 
-sentiment = Classification("text",labels="label",
-    dataset_name="tyqiangz/multilingual-sentiments",config_name="all",
-    pre_process=lambda ds:ds.filter(lambda x: "amazon_reviews" not in x['source'])    
-)
 
-emotion = Classification("text",labels="emotion",dataset_name="metaeval/universal-joy")
-
-review_sentiment = Classification("review_body",labels="stars",
-    dataset_name="amazon_reviews_multi",config_name="all_languages")
-
-tweet_sentiment = Classification("text", labels="label",
-    **all('cardiffnlp/tweet_sentiment_multilingual'))
-
-offenseval = Classification(lambda x: str(x["text"]), labels="subtask_a",
+offenseval = Classification(lambda x: str(x["text"]), labels=name("subtask_a",['not offensive','offensive']),
+    pre_process=lambda ds:ds.filter(lambda x:  x['subtask_a'] in [0,1]),
     dataset_name='strombergnlp/offenseval_2020',
     config_name=["ar","da","gr","tr"])
 
 offenseval_dravidian = Classification("text",labels="label",config_name=['kannada','malayalam','tamil'])
 
-mlma_hate = Classification("tweet", labels="sentiment",
+mlma_hate = Classification("tweet", labels=lambda x:x["sentiment"].split('_'),
     dataset_name="nedjmaou/MLMA_hate_speech")
 
-
 qam = Classification("question","answer","label", dataset_name="xglue",config_name="qam")
 
-x_sum_factuality = Classification("summary","generated_summary","label",
-    dataset_name="ylacombe/xsum_factuality")
+#x_sum_factuality = Classification("summary","generated_summary","label", dataset_name="ylacombe/xsum_factuality")
 
 x_fact = Classification('evidence','claim','label', dataset_name="metaeval/x-fact")
 
@@ -73,8 +60,6 @@ def concatenate_configs(dataset):
     sentence2=cat(["target_word","context_2"], " : "),
     labels='label',dataset_name="pasinit/xlwic",config_name=['xlwic_de_de','xlwic_it_it','xlwic_fr_fr','xlwic_en_ko'])
 
-
-
 #[ "spam", "fails_task", "lang_mismatch", "pii", "not_appropriate", "hate_speech", "sexual_content", "quality", "toxicity", "humor", "helpfulness", "creativity", "violence" ]
 
 oasst1__quality = Classification("parent_text","text",labels="quality", dataset_name="tasksource/oasst1_dense_flat",
@@ -119,10 +104,30 @@ def udep_post_process(ds):
 oasst_rlhf = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0),
     dataset_name="tasksource/oasst1_pairwise_rlhf_reward")
 
-#Classification(
+sentiment = Classification("text",labels="label", dataset_name="tyqiangz/multilingual-sentiments",config_name="all",
+    pre_process=lambda ds:ds.filter(lambda x: "amazon_reviews" not in x['source']) )
+tweet_sentiment = Classification("text", labels="label", **all('cardiffnlp/tweet_sentiment_multilingual'))
+review_sentiment = Classification("review_body",labels="stars", dataset_name="amazon_reviews_multi",config_name="all_languages")
+emotion = Classification("text",labels="emotion",dataset_name="metaeval/universal-joy")
+# in mms
+
+mms_sentiment = Classification("text",labels="label",dataset_name='Brand24/mms')
+
+mapa_fine = TokenClassification("tokens","coarse_grained",dataset_name='joelito/mapa')
+mapa_corase = TokenClassification("tokens","fine_grained",dataset_name='joelito/mapa')
+
+aces_ranking = MultipleChoice("source",choices=['good-translation','incorrect-translation'],labels=constant(0), dataset_name='nikitam/ACES')
+aces_phenomena = Classification('source','incorrect-translation','phenomena', dataset_name='nikitam/ACES')
+
+amazon_intent = Classification("utt",labels="intent",**all('AmazonScience/massive'))
 #    dataset_name='glue',config_name=['ocnli','afqmc'])
 
-# 
+tidy_as2=Classification("Question","Sentence","Label",dataset_name='tasksource/tydi-as2-balanced') 
+
+multiconer = TokenClassification("tokens","ner_tags_index", **all("MultiCoNER/multiconer_v2"))
+
+mtop = Classification("question",labels="intent", dataset_name="tasksource/mtop")
+
 #wino_x
 # clue, klue, indic_glue
 # SMS_Spam_Multilingual_Collection_Dataset
diff --git a/src/tasksource/recast.py b/src/tasksource/recast.py
@@ -5,7 +5,7 @@
 
 improper_labels = ['recast/recast_kg_relations','linguisticprobing',"lexglue/scotus","pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
 improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments']
-
+improper_labels += ['stsb_multi_mt','MLMA_hate_speech']
 
 def render_options(options):
     options = [f'"{x}"' for x in options]
@@ -48,14 +48,14 @@ def shuffle_choices(x):
     x["labels"]=choices_texts.index(correct_choice)
     return x
 
-def recast_dataset_classification_to_mc(dataset,N=4):
+def recast_dataset_classification_to_mc(dataset,sep="[SEP]",N=4):
 
     def recast_split(d,N=N):
         labels = d.features['labels']
         df=d.to_pandas()
         df['inputs'] = df.sentence1
         if "sentence2" in df:
-            df['inputs'] +="[SEP]" + df.sentence2
+            df['inputs'] +=sep + df.sentence2
 
         N=min(N, len(labels.names))
         df['choices']=df.apply(lambda x:negative_sample_options(labels.int2str(x['labels']), labels.names,N),axis=1)     
diff --git a/src/tasksource/tasks.py b/src/tasksource/tasks.py
@@ -1,6 +1,7 @@
 from .preprocess import cat, get, regen, name, constant, Classification, TokenClassification, MultipleChoice
 from .metadata import bigbench_discriminative_english, blimp_hard, imppres_presupposition, imppres_implicature, udep_en_configs, udep_en_labels
 from datasets import get_dataset_config_names, Sequence, ClassLabel, Dataset, DatasetDict
+
 # variable name: dataset___config__task
 
 ###################### NLI/paraphrase ###############################
@@ -1008,8 +1009,27 @@ def _udep_post_process(ds):
 
 monli = Classification("sentence1","sentence2","gold_label", dataset_name="tasksource/monli")
 
-causality = Classification('input',labels=name('label',['not_entailment','entailment']),dataset_name='causalnlp/corr2cause')
+causality = Classification('premise','hypothesis','relation', dataset_name='tasksource/corr2cause')
 
 lsat = MultipleChoice(cat(['passage','question']), choices_list='references',labels='gold_index',dataset_name='lighteval/lsat_qa',config_name='all')
 
-apt = Classification('text_a','text_b',name('labels',['not_paraphrase','paraprhase']),dataset_name='tasksource/apt')
+apt = Classification('text_a','text_b',name('labels',['not_paraphrase','paraphrase']),dataset_name='tasksource/apt')
+
+#xsum_factuality = Classification("summary",labels="is_factual")
+
+financial_sentiment = Classification("text",labels="label",dataset_name="zeroshot/twitter-financial-news-sentiment")
+
+def _icl_rand(x):
+    import random
+    return random.Random(x['sentence1'][:50]).randint(0,1) #deterministic label for each input
+
+icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
+    labels=lambda x: int(x['symbols'][_icl_rand(x)]==x['targets']),
+    dataset_name="tasksource/icl-symbol-tuning-instruct",
+    pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char 
+    post_process=lambda ds:ds.cast_column('labels',ClassLabel(names=['False','True']))
+)
+
+space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
+
+# hate_context