Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
kontur committed Apr 8, 2024
2 parents 8fdd804 + 76b44b8 commit 5780c29
Show file tree
Hide file tree
Showing 15 changed files with 258 additions and 100 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ tools/udhr/
*.egg-info
*.egg
venv
.coverage
.coverage
.tox
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# A changelog for the lib/hyperglot language database and CLI tool (dd.mm.yyyy)

## 0.6.3 (08.04.2024)
- FIX: Set correct default values for `Language.status` and `Orthography.preferred_as_group` and provide validation and tests for these.
- TWEAK: Deprecated plain list `SUPPORTLEVELS, VALIDITYLEVELS, STATUSES, ORTHOGRAPHY_STATUSES` and replaced them with `SupportLevel, LanguageValidity, LanguageStatus, OrthographyStatus` enums throughout the code base. The deprecated values will be removed in the next minor version.
- TESTS: Added simple tox config for running test on all supported minor python versions

## 0.6.2 (22.3.2024)
- FIX: Fixed type hinting issue causing failure on python 3.8.x
- DATA: Added Banjar (`bjn`) (thanks @mahalisyarifuddin)
Expand Down
2 changes: 1 addition & 1 deletion README_releasing.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This documentation for releasing new versions to PIP is relevant only for reposi

- merge dev to master
- run hyperglot-validate
- run pytest tests
- run pytest tests (with all tox environments)
- manually sanity-check cli font check works
- bump version number
- push dev to github, test install from commit in new environment: pip install git+https://github.com/rosettatype/hyperglot.git@dev
Expand Down
122 changes: 90 additions & 32 deletions lib/hyperglot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,106 @@
Gather a few package wide constants
"""
from os import path
__version__ = "0.6.2"
from enum import Enum
from typing import List

__version__ = "0.6.3"

DB = path.abspath(path.join(path.dirname(__file__), "data"))
DB_EXTRA = path.abspath(path.join(path.dirname(__file__), "extra_data"))

SUPPORTLEVELS = {
"base": "base",
"aux": "auxiliary"
}
# ~~DONE Refactor these levels and status as Enum's~~
# TODO Eventaully remove deprecated "CONSTANTS"

# TODO Refactor these levels and status as Enum's

# Note that order matters, since these may be used like a logging level
VALIDITYLEVELS = [
"todo",
"draft",
"preliminary",
"verified",
]
class SupportLevel(Enum):
"""
Valid support levels for querying Hyperglot.
"""

BASE = "base"
AUX = "auxiliary"


# Deprecated: SUPPORTLEVELS will be removed in the future, use SupportLevel!
SUPPORTLEVELS = {"base": "base", "aux": "auxiliary"}


class LanguageValidity(Enum):
"""
Allowed hyperglot.Language["validity"] values.
Order from least to most valid matters for comparison!
"""

TODO = "todo"
DRAFT = "draft"
PRELIMINARY = "preliminary"
VERIFIED = "verified"

@classmethod
def values(self) -> List:
return [v.value for v in self]

@classmethod
def index(self, val: str) -> int:
"""
Get the index of a given value, useful for comparing the validities in
order.
"""
return self.values().index(val)


# Deprecated: VALIDIRITLEVELS will be removed in the future, use LanguageValidity!
VALIDITYLEVELS = LanguageValidity.values()


# Note that "secondary" as status is also used, but on orthographies!
STATUSES = [
"historical",
"constructed",
"ancient",
"living",
"extinct",
"deprecated",
]
class LanguageStatus(Enum):
"""
Allowed hyperglot.Language["status"] values, with LIVING being the default.
Deprecated values for 'status' previously used are: ancient, extinct and
deprecated.
"""

LIVING = "living"
HISTORICAL = "historical"
CONSTRUCTED = "constructed"

@classmethod
def values(self) -> List:
return [s.value for s in self]


# Deprecated: STATUSES will be removed in the future, use LanguageStatus!
STATUSES = LanguageStatus.values()


class OrthographyStatus(Enum):
"""
Possible hyperglot.orthography.Orthography["status"] values.
Note: Order matters for preference of first found orthography.
Deprecated: "deprecated" orthography status removed in favour of "historical"
"""

PRIMARY = "primary"
LOCAL = "local"
SECONDARY = "secondary"
HISTORICAL = "historical"
TRANSLITERATION = "transliteration"

@classmethod
def values(self) -> List:
return [s.value for s in self]


# Deprecated: ORTHOGRAPHY_STATUSES will be removed in the futute, use
# OrthographyStatus!
ORTHOGRAPHY_STATUSES = OrthographyStatus.values()

# Possible orthography statuses, in no meaningful order
# "deprecated" orthography status removed in favour of "historical"
# Note: Order matters for preference of first found orthography
ORTHOGRAPHY_STATUSES = [
"primary",
"local",
"secondary",
"historical",
"transliteration",
]

# Those attributes of orthographies that contain non-mark characters
CHARACTER_ATTRIBUTES = [
Expand All @@ -55,7 +113,7 @@

SORTING = {
"alphabetic": lambda lang: lang.get_name(),
"speakers": lambda lang: lang["speakers"]
"speakers": lambda lang: lang["speakers"],
}

SORTING_DIRECTIONS = ["asc", "desc"]
Expand Down
45 changes: 22 additions & 23 deletions lib/hyperglot/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from hyperglot.language import Language
from hyperglot.orthography import Orthography
from hyperglot.parse import parse_chars
from hyperglot import SUPPORTLEVELS, VALIDITYLEVELS
from hyperglot import SupportLevel, LanguageValidity

log = logging.getLogger(__name__)
log.setLevel(logging.WARNING)
Expand Down Expand Up @@ -43,25 +43,25 @@ class Checker:
support checks.
"""

def __init__(self, fontpath=None, characters=None):
def __init__(self, fontpath: str = None, characters: List = None):
self.fontpath = fontpath
self.characters = characters
self.font = None
self.shaper = None

def get_supported_languages(
self,
supportlevel=list(SUPPORTLEVELS.keys())[0],
validity=VALIDITYLEVELS[1],
decomposed=False,
marks=False,
shaping=False,
include_all_orthographies=False,
include_historical=False,
include_constructed=False,
report_missing=-1,
report_marks=-1,
report_joining=-1,
supportlevel: str = SupportLevel.BASE.value,
validity: str = LanguageValidity.DRAFT.value,
decomposed: bool = False,
marks: bool = False,
shaping: bool = False,
include_all_orthographies: bool = False,
include_historical: bool = False,
include_constructed: bool = False,
report_missing: int = -1,
report_marks: int = -1,
report_joining: int = -1,
) -> dict:
"""
Get all languages supported based on the passed in characters.
Expand Down Expand Up @@ -102,7 +102,7 @@ def get_supported_languages(
continue

# Skip languages below the currently selected validity level.
if VALIDITYLEVELS.index(l["validity"]) < VALIDITYLEVELS.index(validity):
if LanguageValidity.index(l["validity"]) < LanguageValidity.index(validity):
log.info("Skipping language '%s' which has lower " "'validity'" % iso)
continue

Expand Down Expand Up @@ -151,8 +151,8 @@ def get_supported_languages(
def supports_language(
self,
iso: str,
supportlevel: str = "base",
validity: str = VALIDITYLEVELS[1],
supportlevel: str = SupportLevel.BASE.value,
validity: str = LanguageValidity.DRAFT.value,
decomposed: bool = False,
marks: bool = False,
shaping: bool = False,
Expand Down Expand Up @@ -206,12 +206,13 @@ def supports_language(

# Exit if validity is not met
if "validity" not in language or (
VALIDITYLEVELS.index(language["validity"]) < VALIDITYLEVELS.index(validity)
LanguageValidity.index(language["validity"])
< LanguageValidity.index(validity)
):
return False

if supportlevel not in SUPPORTLEVELS.keys():
log.warning(
if supportlevel not in [s.value for s in SupportLevel]:
raise Exception(
"Provided support level '%s' not valid, "
"defaulting to 'base'" % supportlevel
)
Expand Down Expand Up @@ -298,7 +299,7 @@ def supports_language(

# If an orthography has no "auxiliary" we consider it supported on
# "auxiliary" level, too.
if supportlevel == "aux" and ort.auxiliary:
if supportlevel == SupportLevel.AUX.value and ort.auxiliary:
if marks:
req_marks_aux = ort.auxiliary_marks
else:
Expand All @@ -317,9 +318,7 @@ def supports_language(

# Validation
supported = False
logging.info(
f"{language} missing {len(aux_missing)} 'aux'"
)
logging.info(f"{language} missing {len(aux_missing)} 'aux'")

if shaping:
joining_errors, mark_errors = self._check_shaping(
Expand Down
12 changes: 6 additions & 6 deletions lib/hyperglot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
__version__,
SORTING_DIRECTIONS,
DB,
SUPPORTLEVELS,
VALIDITYLEVELS,
SupportLevel,
LanguageValidity,
CHARACTER_ATTRIBUTES,
MARK_BASE,
SORTING,
Expand Down Expand Up @@ -237,7 +237,7 @@ def hyperglot_options(f):
@click.option(
"-s",
"--support",
type=click.Choice(SUPPORTLEVELS.keys(), case_sensitive=False),
type=click.Choice([s.value for s in SupportLevel], case_sensitive=False),
default="base",
show_default=True,
help="Option to test only for the language's base charset, or to"
Expand All @@ -264,8 +264,8 @@ def hyperglot_options(f):
)
@click.option(
"--validity",
type=click.Choice(VALIDITYLEVELS, case_sensitive=False),
default=VALIDITYLEVELS[1],
type=click.Choice([v.value for v in LanguageValidity], case_sensitive=False),
default=LanguageValidity.DRAFT.value,
show_default=True,
help="The level of validity for languages matched against the "
"font. Weaker levels always include more strict levels. The "
Expand Down Expand Up @@ -453,7 +453,7 @@ def cli(
report_joining=report_joining,
)

level = SUPPORTLEVELS[support]
level = SupportLevel(support).value

# Sort each script's results by the chosen sorting logic
sorted_entries = {}
Expand Down
Loading

0 comments on commit 5780c29

Please sign in to comment.