diff --git a/.ci/aptPackagesToInstall.txt b/.ci/aptPackagesToInstall.txt new file mode 100644 index 0000000..e69de29 diff --git a/.ci/pythonPackagesToInstallFromGit.txt b/.ci/pythonPackagesToInstallFromGit.txt new file mode 100644 index 0000000..f9c9b6d --- /dev/null +++ b/.ci/pythonPackagesToInstallFromGit.txt @@ -0,0 +1 @@ +https://github.com/UniGrammar/charRanges.py diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..c9162b9 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +root = true + +[*] +charset = utf-8 +indent_style = tab +indent_size = 4 +insert_final_newline = true +end_of_line = lf + +[*.{yml,yaml}] +indent_style = space +indent_size = 2 diff --git a/.github/.templateMarker b/.github/.templateMarker new file mode 100644 index 0000000..5e3a3e0 --- /dev/null +++ b/.github/.templateMarker @@ -0,0 +1 @@ +KOLANICH/python_project_boilerplate.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..89ff339 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + allow: + - dependency-type: "all" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..7fe33b3 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,15 @@ +name: CI +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-22.04 + steps: + - name: typical python workflow + uses: KOLANICH-GHActions/typical-python-workflow@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..71fb1b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +__pycache__ +*.py[co] +/*.egg-info +*.srctrlbm +*.srctrldb +build +dist +.eggs +monkeytype.sqlite3 +/.ipynb_checkpoints diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..6fcdcd8 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,51 @@ +image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest + +variables: + DOCKER_DRIVER: overlay2 + SAST_ANALYZER_IMAGE_TAG: latest + SAST_DISABLE_DIND: "true" + SAST_CONFIDENCE_LEVEL: 5 + CODECLIMATE_VERSION: latest + +include: + - template: SAST.gitlab-ci.yml + - template: Code-Quality.gitlab-ci.yml + - template: License-Management.gitlab-ci.yml + +build: + tags: + - shared + - linux + stage: build + variables: + GIT_DEPTH: "1" + PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages + + before_script: + - export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables` + - apt-get update + # todo: + #- apt-get -y install + #- pip3 install --upgrade + #- python3 ./fix_python_modules_paths.py + + script: + - python3 -m build -nw bdist_wheel + - mv ./dist/*.whl ./dist/escapelib-0.CI-py3-none-any.whl + - pip3 install --upgrade ./dist/*.whl + - coverage run --source=escapelib -m --branch pytest --junitxml=./rspec.xml ./tests/test.py + - coverage report -m + - coverage xml + + coverage: "/^TOTAL(?:\\s+\\d+){4}\\s+(\\d+%).+/" + + cache: + paths: + - $PYTHONUSERBASE + + artifacts: + paths: + - dist + reports: + junit: ./rspec.xml + cobertura: ./coverage.xml diff --git a/Code_Of_Conduct.md b/Code_Of_Conduct.md new file mode 100644 index 0000000..bcaa2bf --- /dev/null +++ b/Code_Of_Conduct.md @@ -0,0 +1 @@ +No codes of conduct! \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..20f0fa8 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include UNLICENSE +include *.md +include tests +include .editorconfig diff --git a/ReadMe.md b/ReadMe.md new file mode 100644 index 0000000..dd1dc86 --- /dev/null +++ b/ReadMe.md @@ -0,0 +1,11 @@ +escapelib.py [![Unlicensed work](https://raw.githubusercontent.com/unlicense/unlicense.org/master/static/favicon.png)](https://unlicense.org/) +============ +~~[wheel (GitLab)](https://gitlab.com/UniGrammar/escapelib.py/-/jobs/artifacts/master/raw/dist/escapelib-0.CI-py3-none-any.whl?job=build)~~ +[wheel (GHA via `nightly.link`)](https://nightly.link/UniGrammar/escapelib.py/workflows/CI/master/escapelib-0.CI-py3-none-any.whl) +~~![GitLab Build Status](https://gitlab.com/UniGrammar/escapelib.py/badges/master/pipeline.svg)~~ +~~![GitLab Coverage](https://gitlab.com/UniGrammar/escapelib.py/badges/master/coverage.svg)~~ +~~[![GitHub Actions](https://github.com/UniGrammar/escapelib.py/workflows/CI/badge.svg)](https://github.com/UniGrammar/escapelib.py/actions/)~~ +[![Libraries.io Status](https://img.shields.io/librariesio/github/UniGrammar/escapelib.py.svg)](https://libraries.io/github/UniGrammar/escapelib.py) +[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://codeberg.org/KOLANICH-tools/antiflash.py) + +A library to escape characters for various contexts. diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..efb9808 --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/escapelib/__init__.py b/escapelib/__init__.py new file mode 100644 index 0000000..f852552 --- /dev/null +++ b/escapelib/__init__.py @@ -0,0 +1,168 @@ +import re +import string +import typing +from abc import ABC, abstractmethod +from ast import literal_eval + +from rangeslicetools.tree import RangesTree + +from charRanges import multiRSub, stringToRanges + + +def generateEscapeMapForRegExpCharClass() -> typing.Dict[int, str]: + try: + specialCharsMap = type(re._special_chars_map)(re._special_chars_map) # pylint: disable=protected-access + allowedChars = " *+|#~&(){}^[$?" + for c in allowedChars: + del specialCharsMap[ord(c)] + except BaseException: + specialCharsMap = { + "\n": "\\n", + "\r": "\\r", + ".": "\\.", + 0: "\\0", + "]": "\\]" + } + return specialCharsMap + + +ourSpecialCharsMap = generateEscapeMapForRegExpCharClass() + + +def reCharClassEscape(c: str) -> str: + c = repr(c)[1:-1] + if len(c) == 1: + c = c.translate(ourSpecialCharsMap) + return c + + +def hexStringizer(x: int) -> str: + return hex(x)[2:] + + +def backslashUHex4CodePointsStringizer(c: str) -> str: + res = hexStringizer(c) + threshold = 4 + res = "0" * (threshold - len(res)) + res + return res + + +stringizers = { + "hex": hexStringizer, + "uhex": backslashUHex4CodePointsStringizer, + "oct": lambda x: oct(x)[2:], + "dec": str, +} + + +class CharEscaper(ABC): + @abstractmethod + def __call__(self, c: str) -> str: + raise NotImplementedError() + + +def createDefaultCharsToEscape() -> RangesTree: + visible = stringToRanges("".join(sorted(set(string.printable) - set(string.whitespace) | {" "}))) + invisible = tuple(multiRSub(visible, base=range(0, 0xff))) + return RangesTree.build(invisible) + + +defaultCharsToEscape = createDefaultCharsToEscape() + + +class UnicodeEscaper(CharEscaper): + __slots__ = ("range", "template", "stringizer") + + def __init__(self, template: str, ranges: RangesTree = None, stringizer: str = "hex") -> None: + self.template = template + + if ranges is None: + ranges = defaultCharsToEscape + + self.range = ranges + if isinstance(stringizer, str): + stringizer = stringizers[stringizer] + self.stringizer = stringizer + + def __call__(self, c: str) -> str: + cc = ord(c) + res = tuple(self.range[cc]) + if res: + return self.template.format(self.stringizer(cc)) + return c + + +class CompositeEscaper(CharEscaper): + __slots__ = ("children",) + + def __init__(self, *children) -> None: + self.children = children + + def __call__(self, c: str) -> str: + for e in self.children: + c = e(c) + if len(c) != 1: + return c + return c + + +def genRemappingEscapeCharsLiterally(s: str) -> typing.Dict[int, str]: + return {ord(c): ("\\" + c) for c in s} + + +class RemappingEscaper(CharEscaper): + __slots__ = ("mapping",) + + def __init__(self, mapping: typing.Union[str, typing.Dict[int, str]]) -> None: + if isinstance(mapping, str): + mapping = genRemappingEscapeCharsLiterally(mapping) + self.mapping = mapping + + def __call__(self, c: str) -> str: + return c.translate(self.mapping) + + +pythonRegExpEscaper = RemappingEscaper(ourSpecialCharsMap) + + +def genCommonRemapping() -> typing.Dict[int, str]: + res = genRemappingEscapeCharsLiterally("\\") + for cc in "abtrn": + escSeq = "\\" + cc + res[ord(literal_eval('"' + escSeq + '"'))] = escSeq + return res + + +commonCharsEscaper = RemappingEscaper(genCommonRemapping()) + + +class PythonReprEscaper(CharEscaper): + __slots__ = () + + def __call__(self, c: str) -> str: + return repr(c)[1:-1] + + +class PythonREEscaper(CharEscaper): + __slots__ = () + + def __call__(self, c: str) -> str: + return re.escape(c) + + +pythonReprEscaper = PythonReprEscaper() +pythonREEscaper = PythonREEscaper() + +pythonRegexEscaper = CompositeEscaper(pythonReprEscaper, pythonREEscaper) + + +singleTickEscaper = RemappingEscaper("'") +doubleTickEscaper = RemappingEscaper('"') +closingSquareBracketEscaper = RemappingEscaper("]") +backslashUHexEscaper = UnicodeEscaper("\\u{{{}}}", stringizer="uhex") +backslashXHexEscaper = UnicodeEscaper("\\x{}", stringizer="hex") + +commonEscaper = CompositeEscaper(commonCharsEscaper, backslashUHexEscaper) + +defaultCharClassEscaper = CompositeEscaper(commonEscaper, closingSquareBracketEscaper) +defaultStringEscaper = CompositeEscaper(commonEscaper, doubleTickEscaper) diff --git a/escapelib/py.typed b/escapelib/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..643ab7b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,39 @@ +[build-system] +requires = ["setuptools>=61.2.0", "wheel", "setuptools_scm[toml]>=3.4.3"] +build-backend = "setuptools.build_meta" + +[project] +name = "escapelib" +readme = "ReadMe.md" +description = "A library to escape characters for various contexts." +authors = [{name = "KOLANICH"}] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Other Environment", + "Intended Audience :: Developers", + "License :: Public Domain", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Software Development :: Libraries :: Python Modules", +] +keywords = ["escapelib"] +license = {text = "Unlicense"} +requires-python = ">=3.4" +dynamic = ["version"] +dependencies = [ + "rangeslicetools", # @ git+https://codeberg.org/KOLANICH-libs/rangeslicetools.py.git + "charRanges", # @ git+https://codeberg.org/UniGrammar/charRanges.py +] + +[project.urls] +Homepage = "https://codeberg.org/UniGrammar/escapelib.py" + +[tool.setuptools] +zip-safe = true + +[tool.setuptools.packages.find] +include = ["escapelib", "escapelib.*"] + +[tool.setuptools_scm] diff --git a/tests/tests.py b/tests/tests.py new file mode 100755 index 0000000..4bc45e0 --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +import sys +from pathlib import Path +import unittest +import itertools, re +import colorama + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from collections import OrderedDict + +dict = OrderedDict + +import escapelib +from escapelib import * + +class Tests(unittest.TestCase): + + def test_escape(self): + testVectors = { + ("A", PythonREEscaper): "A", + ("z", PythonREEscaper): "z", + ("9", PythonREEscaper): "9", + ("-", PythonREEscaper): "\\-", + ("+", PythonREEscaper): "\\+" + } + for (chall, escaperCtor), resp in testVectors.items(): + with self.subTest(chall=chall, escaperCtor=escaperCtor, resp=resp): + escaper = escaperCtor() + actual = escaper(chall) + self.assertEqual(resp, actual) + + +if __name__ == "__main__": + unittest.main()