Skip to content

Commit

Permalink
gh-130453: pygettext: Extend support for specifying custom keywords (G…
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasr8 authored Feb 25, 2025
1 parent 31ef8fd commit 44213bc
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 18 deletions.
45 changes: 45 additions & 0 deletions Lib/test/test_tools/i18n_data/custom_keywords.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"


#: custom_keywords.py:9 custom_keywords.py:10
msgid "bar"
msgstr ""

#: custom_keywords.py:12
msgid "cat"
msgid_plural "cats"
msgstr[0] ""
msgstr[1] ""

#: custom_keywords.py:13
msgid "dog"
msgid_plural "dogs"
msgstr[0] ""
msgstr[1] ""

#: custom_keywords.py:15
msgctxt "context"
msgid "bar"
msgstr ""

#: custom_keywords.py:17
msgctxt "context"
msgid "cat"
msgid_plural "cats"
msgstr[0] ""
msgstr[1] ""

30 changes: 30 additions & 0 deletions Lib/test/test_tools/i18n_data/custom_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from gettext import (
gettext as foo,
ngettext as nfoo,
pgettext as pfoo,
npgettext as npfoo,
gettext as bar,
)

foo('bar')
foo('bar', 'baz')

nfoo('cat', 'cats', 1)
nfoo('dog', 'dogs')

pfoo('context', 'bar')

npfoo('context', 'cat', 'cats', 1)

# This is an unknown keyword and should be ignored
bar('baz')

# 'nfoo' requires at least 2 arguments
nfoo('dog')

# 'pfoo' requires at least 2 arguments
pfoo('context')

# 'npfoo' requires at least 3 arguments
npfoo('context')
npfoo('context', 'cat')
80 changes: 63 additions & 17 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path

from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
from test.test_tools import imports_under_tool, skip_if_missing, toolsdir
from test.support.os_helper import temp_cwd, temp_dir


Expand All @@ -17,6 +17,10 @@
DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'


with imports_under_tool("i18n"):
from pygettext import parse_spec


def normalize_POT_file(pot):
"""Normalize the POT creation timestamp, charset and
file locations to make the POT file easier to compare.
Expand Down Expand Up @@ -377,16 +381,8 @@ class _(object):

def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
with self.subTest(input_file=f'i18n_data/{input_file}'):
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')

for input_file, output_file, output in extract_from_snapshots():
with self.subTest(input_file=input_file):
expected = output_file.read_text(encoding='utf-8')
self.assert_POT_equal(expected, output)

Expand Down Expand Up @@ -485,17 +481,67 @@ def test_comments_not_extracted_without_tags(self):
'''), raw=True)
self.assertNotIn('#.', data)


def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
def test_parse_keyword_spec(self):
valid = (
('foo', ('foo', {0: 'msgid'})),
('foo:1', ('foo', {0: 'msgid'})),
('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
)
for spec, expected in valid:
with self.subTest(spec=spec):
self.assertEqual(parse_spec(spec), expected)

invalid = (
('foo:', "Invalid keyword spec 'foo:': missing argument positions"),
('foo:bar', "Invalid keyword spec 'foo:bar': position is not an integer"),
('foo:0', "Invalid keyword spec 'foo:0': argument positions must be strictly positive"),
('foo:-2', "Invalid keyword spec 'foo:-2': argument positions must be strictly positive"),
('foo:1,1', "Invalid keyword spec 'foo:1,1': duplicate positions"),
('foo:1,2,1', "Invalid keyword spec 'foo:1,2,1': duplicate positions"),
('foo:1c,2,1c', "Invalid keyword spec 'foo:1c,2,1c': duplicate positions"),
('foo:1c,2,3c', "Invalid keyword spec 'foo:1c,2,3c': msgctxt can only appear once"),
('foo:1,2,3', "Invalid keyword spec 'foo:1,2,3': too many positions"),
('foo:1c', "Invalid keyword spec 'foo:1c': msgctxt cannot appear without msgid"),
)
for spec, message in invalid:
with self.subTest(spec=spec):
with self.assertRaises(ValueError) as cm:
parse_spec(spec)
self.assertEqual(str(cm.exception), message)


def extract_from_snapshots():
snapshots = {
'messages.py': (),
'fileloc.py': ('--docstrings',),
'docstrings.py': ('--docstrings',),
'comments.py': ('--add-comments=i18n:',),
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
'--keyword=pfoo:1c,2',
'--keyword=npfoo:1c,2,3'),
}

for filename, args in snapshots.items():
input_file = DATA_DIR / filename
output_file = input_file.with_suffix('.pot')
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
assert_python_ok('-Xutf8', Test_pygettext.script, *args,
input_file.name)
yield (input_file, output_file,
Path('messages.pot').read_text(encoding='utf-8'))


def update_POT_snapshots():
for _, output_file, output in extract_from_snapshots():
output = normalize_POT_file(output)
output_file.write_text(output, encoding='utf-8')

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend support for specifying custom keywords in :program:`pygettext`.
88 changes: 87 additions & 1 deletion Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,88 @@ def getFilesForName(name):
}


def parse_spec(spec):
"""Parse a keyword spec string into a dictionary.
The keyword spec format defines the name of the gettext function and the
positions of the arguments that correspond to msgid, msgid_plural, and
msgctxt. The format is as follows:
name - the name of the gettext function, assumed to
have a single argument that is the msgid.
name:pos1 - the name of the gettext function and the position
of the msgid argument.
name:pos1,pos2 - the name of the gettext function and the positions
of the msgid and msgid_plural arguments.
name:pos1,pos2c - the name of the gettext function and the positions
of the msgid and msgctxt arguments.
name:pos1,pos2,pos3c - the name of the gettext function and the
positions of the msgid, msgid_plural, and
msgctxt arguments.
As an example, the spec 'foo:1,2,3c' means that the function foo has three
arguments, the first one is the msgid, the second one is the msgid_plural,
and the third one is the msgctxt. The positions are 1-based.
The msgctxt argument can appear in any position, but it can only appear
once. For example, the keyword specs 'foo:3c,1,2' and 'foo:1,2,3c' are
equivalent.
See https://www.gnu.org/software/gettext/manual/gettext.html
for more information.
"""
parts = spec.strip().split(':', 1)
if len(parts) == 1:
name = parts[0]
return name, {0: 'msgid'}

name, args = parts
if not args:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'missing argument positions')

result = {}
for arg in args.split(','):
arg = arg.strip()
is_context = False
if arg.endswith('c'):
is_context = True
arg = arg[:-1]

try:
pos = int(arg) - 1
except ValueError as e:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'position is not an integer') from e

if pos < 0:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'argument positions must be strictly positive')

if pos in result.values():
raise ValueError(f'Invalid keyword spec {spec!r}: '
'duplicate positions')

if is_context:
if 'msgctxt' in result:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt can only appear once')
result['msgctxt'] = pos
elif 'msgid' not in result:
result['msgid'] = pos
elif 'msgid_plural' not in result:
result['msgid_plural'] = pos
else:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'too many positions')

if 'msgid' not in result and 'msgctxt' in result:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt cannot appear without msgid')

return name, {v: k for k, v in result.items()}


@dataclass(frozen=True)
class Location:
filename: str
Expand Down Expand Up @@ -646,7 +728,11 @@ class Options:
make_escapes(not options.escape)

# calculate all keywords
options.keywords = {kw: {0: 'msgid'} for kw in options.keywords}
try:
options.keywords = dict(parse_spec(spec) for spec in options.keywords)
except ValueError as e:
print(e, file=sys.stderr)
sys.exit(1)
if not no_default_keywords:
options.keywords |= DEFAULTKEYWORDS

Expand Down

0 comments on commit 44213bc

Please sign in to comment.