ruff lint fix

malteos · Jul 18, 2024 · 2656044 · 2656044
1 parent ee00a32
commit 2656044
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 15 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -54,9 +54,6 @@
     "python.testing.pytestArgs": [
         "tests"
     ],
-    "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter"
-    },
     "python.formatting.provider": "none",
     "python.testing.pytestEnabled": true,
 }
diff --git a/src/llm_datasets/datasets/multilingual/legal_mc4.py b/src/llm_datasets/datasets/multilingual/legal_mc4.py
@@ -29,13 +29,10 @@
 
 class LegalMC4BaseDataset(HFDataset):
     SOURCE_ID = "legal_mc4"
-    DESCRIPTION = (
-        "MC4_Legal: A Corpus Covering the Legal Part of MC4 for European Languages"
-    )
+    DESCRIPTION = "MC4_Legal: A Corpus Covering the Legal Part of MC4 for European Languages"
     HOMEPAGE = "https://huggingface.co/datasets/joelito/legal-mc4"
     AVAILIBILITY = Availability.DIRECT_DOWNLOAD
     WEB_CRAWLED = True
-    # DUMMY = True
     LICENSE = License(
         "AllenAI are releasing this dataset under the terms of ODC-BY. By using this, you are also bound by the Common"
         " Crawl terms of use in respect of the content contained in the dataset.",
@@ -72,12 +69,6 @@ def HF_DATASET_CONFIGS(self):
 
 def get_legal_mc4_auto_classes():
     """Auto generate dataset classes with token count"""
-    lang_to_tokens = {
-        row.split("\t")[0]: int(row.split("\t")[1])
-        for row in RAW_LANG_TO_TOKENS.splitlines()
-    }
+    lang_to_tokens = {row.split("\t")[0]: int(row.split("\t")[1]) for row in RAW_LANG_TO_TOKENS.splitlines()}
 
-    return [
-        get_legal_mc4_auto_cls_by_language(lang, tokens)
-        for lang, tokens in lang_to_tokens.items()
-    ]
+    return [get_legal_mc4_auto_cls_by_language(lang, tokens) for lang, tokens in lang_to_tokens.items()]