Merge branch 'dev'

rosettatype · Apr 8, 2024 · 5780c29 · 5780c29
2 parents 8fdd804 + 76b44b8
commit 5780c29
Show file tree

Hide file tree

Showing 15 changed files with 258 additions and 100 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,5 @@ tools/udhr/
 *.egg-info
 *.egg
 venv
-.coverage
+.coverage
+.tox
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # A changelog for the lib/hyperglot language database and CLI tool (dd.mm.yyyy)
 
+## 0.6.3 (08.04.2024)
+- FIX: Set correct default values for `Language.status` and `Orthography.preferred_as_group` and provide validation and tests for these.
+- TWEAK: Deprecated plain list `SUPPORTLEVELS, VALIDITYLEVELS, STATUSES, ORTHOGRAPHY_STATUSES` and replaced them with `SupportLevel, LanguageValidity, LanguageStatus, OrthographyStatus` enums throughout the code base. The deprecated values will be removed in the next minor version.
+- TESTS: Added simple tox config for running test on all supported minor python versions
+
 ## 0.6.2 (22.3.2024)
 - FIX: Fixed type hinting issue causing failure on python 3.8.x
 - DATA: Added Banjar (`bjn`) (thanks @mahalisyarifuddin)

diff --git a/README_releasing.md b/README_releasing.md
@@ -4,7 +4,7 @@ This documentation for releasing new versions to PIP is relevant only for reposi
 
 - merge dev to master
 - run hyperglot-validate
-- run pytest tests
+- run pytest tests (with all tox environments)
 - manually sanity-check cli font check works
 - bump version number
 - push dev to github, test install from commit in new environment: pip install git+https://github.com/rosettatype/hyperglot.git@dev

diff --git a/lib/hyperglot/__init__.py b/lib/hyperglot/__init__.py
@@ -2,48 +2,106 @@
 Gather a few package wide constants
 """
 from os import path
-__version__ = "0.6.2"
+from enum import Enum
+from typing import List
+
+__version__ = "0.6.3"
 
 DB = path.abspath(path.join(path.dirname(__file__), "data"))
 DB_EXTRA = path.abspath(path.join(path.dirname(__file__), "extra_data"))
 
-SUPPORTLEVELS = {
-    "base": "base",
-    "aux": "auxiliary"
-}
+# ~~DONE Refactor these levels and status as Enum's~~
+# TODO Eventaully remove deprecated "CONSTANTS"
 
-# TODO Refactor these levels and status as Enum's
 
-# Note that order matters, since these may be used like a logging level
-VALIDITYLEVELS = [
-    "todo",
-    "draft",
-    "preliminary",
-    "verified",
-]
+class SupportLevel(Enum):
+    """
+    Valid support levels for querying Hyperglot.
+    """
+
+    BASE = "base"
+    AUX = "auxiliary"
+
+
+# Deprecated: SUPPORTLEVELS will be removed in the future, use SupportLevel!
+SUPPORTLEVELS = {"base": "base", "aux": "auxiliary"}
+
+
+class LanguageValidity(Enum):
+    """
+    Allowed hyperglot.Language["validity"] values.
+
+    Order from least to most valid matters for comparison!
+    """
+
+    TODO = "todo"
+    DRAFT = "draft"
+    PRELIMINARY = "preliminary"
+    VERIFIED = "verified"
+
+    @classmethod
+    def values(self) -> List:
+        return [v.value for v in self]
+
+    @classmethod
+    def index(self, val: str) -> int:
+        """
+        Get the index of a given value, useful for comparing the validities in
+        order.
+        """
+        return self.values().index(val)
+
+
+# Deprecated: VALIDIRITLEVELS will be removed in the future, use LanguageValidity!
+VALIDITYLEVELS = LanguageValidity.values()
 
 
 # Note that "secondary" as status is also used, but on orthographies!
-STATUSES = [
-    "historical",
-    "constructed",
-    "ancient",
-    "living",
-    "extinct",
-    "deprecated",
-]
+class LanguageStatus(Enum):
+    """
+    Allowed hyperglot.Language["status"] values, with LIVING being the default.
 
+    Deprecated values for 'status' previously used are: ancient, extinct and
+    deprecated.
+    """
+
+    LIVING = "living"
+    HISTORICAL = "historical"
+    CONSTRUCTED = "constructed"
+
+    @classmethod
+    def values(self) -> List:
+        return [s.value for s in self]
+
+
+# Deprecated: STATUSES will be removed in the future, use LanguageStatus!
+STATUSES = LanguageStatus.values()
+
+
+class OrthographyStatus(Enum):
+    """
+    Possible hyperglot.orthography.Orthography["status"] values.
+
+    Note: Order matters for preference of first found orthography.
+
+    Deprecated: "deprecated" orthography status removed in favour of "historical"
+    """
+
+    PRIMARY = "primary"
+    LOCAL = "local"
+    SECONDARY = "secondary"
+    HISTORICAL = "historical"
+    TRANSLITERATION = "transliteration"
+
+    @classmethod
+    def values(self) -> List:
+        return [s.value for s in self]
+
+
+# Deprecated: ORTHOGRAPHY_STATUSES will be removed in the futute, use
+# OrthographyStatus!
+ORTHOGRAPHY_STATUSES = OrthographyStatus.values()
 
-# Possible orthography statuses, in no meaningful order
-# "deprecated" orthography status removed in favour of "historical"
-# Note: Order matters for preference of first found orthography
-ORTHOGRAPHY_STATUSES = [
-    "primary",
-    "local",
-    "secondary",
-    "historical",
-    "transliteration",
-]
 
 # Those attributes of orthographies that contain non-mark characters
 CHARACTER_ATTRIBUTES = [
@@ -55,7 +113,7 @@
 
 SORTING = {
     "alphabetic": lambda lang: lang.get_name(),
-    "speakers": lambda lang: lang["speakers"]
+    "speakers": lambda lang: lang["speakers"],
 }
 
 SORTING_DIRECTIONS = ["asc", "desc"]

diff --git a/lib/hyperglot/checker.py b/lib/hyperglot/checker.py
@@ -8,7 +8,7 @@
 from hyperglot.language import Language
 from hyperglot.orthography import Orthography
 from hyperglot.parse import parse_chars
-from hyperglot import SUPPORTLEVELS, VALIDITYLEVELS
+from hyperglot import SupportLevel, LanguageValidity
 
 log = logging.getLogger(__name__)
 log.setLevel(logging.WARNING)
@@ -43,25 +43,25 @@ class Checker:
     support checks.
     """
 
-    def __init__(self, fontpath=None, characters=None):
+    def __init__(self, fontpath: str = None, characters: List = None):
         self.fontpath = fontpath
         self.characters = characters
         self.font = None
         self.shaper = None
 
     def get_supported_languages(
         self,
-        supportlevel=list(SUPPORTLEVELS.keys())[0],
-        validity=VALIDITYLEVELS[1],
-        decomposed=False,
-        marks=False,
-        shaping=False,
-        include_all_orthographies=False,
-        include_historical=False,
-        include_constructed=False,
-        report_missing=-1,
-        report_marks=-1,
-        report_joining=-1,
+        supportlevel: str = SupportLevel.BASE.value,
+        validity: str = LanguageValidity.DRAFT.value,
+        decomposed: bool = False,
+        marks: bool = False,
+        shaping: bool = False,
+        include_all_orthographies: bool = False,
+        include_historical: bool = False,
+        include_constructed: bool = False,
+        report_missing: int = -1,
+        report_marks: int = -1,
+        report_joining: int = -1,
     ) -> dict:
         """
         Get all languages supported based on the passed in characters.
@@ -102,7 +102,7 @@ def get_supported_languages(
                 continue
 
             # Skip languages below the currently selected validity level.
-            if VALIDITYLEVELS.index(l["validity"]) < VALIDITYLEVELS.index(validity):
+            if LanguageValidity.index(l["validity"]) < LanguageValidity.index(validity):
                 log.info("Skipping language '%s' which has lower " "'validity'" % iso)
                 continue
 
@@ -151,8 +151,8 @@ def get_supported_languages(
     def supports_language(
         self,
         iso: str,
-        supportlevel: str = "base",
-        validity: str = VALIDITYLEVELS[1],
+        supportlevel: str = SupportLevel.BASE.value,
+        validity: str = LanguageValidity.DRAFT.value,
         decomposed: bool = False,
         marks: bool = False,
         shaping: bool = False,
@@ -206,12 +206,13 @@ def supports_language(
 
         # Exit if validity is not met
         if "validity" not in language or (
-            VALIDITYLEVELS.index(language["validity"]) < VALIDITYLEVELS.index(validity)
+            LanguageValidity.index(language["validity"])
+            < LanguageValidity.index(validity)
         ):
             return False
 
-        if supportlevel not in SUPPORTLEVELS.keys():
-            log.warning(
+        if supportlevel not in [s.value for s in SupportLevel]:
+            raise Exception(
                 "Provided support level '%s' not valid, "
                 "defaulting to 'base'" % supportlevel
             )
@@ -298,7 +299,7 @@ def supports_language(
 
             # If an orthography has no "auxiliary" we consider it supported on
             # "auxiliary" level, too.
-            if supportlevel == "aux" and ort.auxiliary:
+            if supportlevel == SupportLevel.AUX.value and ort.auxiliary:
                 if marks:
                     req_marks_aux = ort.auxiliary_marks
                 else:
@@ -317,9 +318,7 @@ def supports_language(
 
                     # Validation
                     supported = False
-                    logging.info(
-                        f"{language} missing {len(aux_missing)} 'aux'"
-                    )
+                    logging.info(f"{language} missing {len(aux_missing)} 'aux'")
 
                 if shaping:
                     joining_errors, mark_errors = self._check_shaping(

diff --git a/lib/hyperglot/cli.py b/lib/hyperglot/cli.py
@@ -10,8 +10,8 @@
     __version__,
     SORTING_DIRECTIONS,
     DB,
-    SUPPORTLEVELS,
-    VALIDITYLEVELS,
+    SupportLevel,
+    LanguageValidity,
     CHARACTER_ATTRIBUTES,
     MARK_BASE,
     SORTING,
@@ -237,7 +237,7 @@ def hyperglot_options(f):
     @click.option(
         "-s",
         "--support",
-        type=click.Choice(SUPPORTLEVELS.keys(), case_sensitive=False),
+        type=click.Choice([s.value for s in SupportLevel], case_sensitive=False),
         default="base",
         show_default=True,
         help="Option to test only for the language's base charset, or to"
@@ -264,8 +264,8 @@ def hyperglot_options(f):
     )
     @click.option(
         "--validity",
-        type=click.Choice(VALIDITYLEVELS, case_sensitive=False),
-        default=VALIDITYLEVELS[1],
+        type=click.Choice([v.value for v in LanguageValidity], case_sensitive=False),
+        default=LanguageValidity.DRAFT.value,
         show_default=True,
         help="The level of validity for languages matched against the "
         "font. Weaker levels always include more strict levels. The "
@@ -453,7 +453,7 @@ def cli(
             report_joining=report_joining,
         )
 
-        level = SUPPORTLEVELS[support]
+        level = SupportLevel(support).value
 
         # Sort each script's results by the chosen sorting logic
         sorted_entries = {}