gh-130453: pygettext: Extend support for specifying custom keywords (G…

…H-130463)
python · Feb 25, 2025 · 44213bc · 44213bc
1 parent 31ef8fd
commit 44213bc
Show file tree

Hide file tree

Showing 5 changed files with 226 additions and 18 deletions.
diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.pot b/Lib/test/test_tools/i18n_data/custom_keywords.pot
@@ -0,0 +1,45 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <[email protected]>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: custom_keywords.py:9 custom_keywords.py:10
+msgid "bar"
+msgstr ""
+
+#: custom_keywords.py:12
+msgid "cat"
+msgid_plural "cats"
+msgstr[0] ""
+msgstr[1] ""
+
+#: custom_keywords.py:13
+msgid "dog"
+msgid_plural "dogs"
+msgstr[0] ""
+msgstr[1] ""
+
+#: custom_keywords.py:15
+msgctxt "context"
+msgid "bar"
+msgstr ""
+
+#: custom_keywords.py:17
+msgctxt "context"
+msgid "cat"
+msgid_plural "cats"
+msgstr[0] ""
+msgstr[1] ""
+
diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.py b/Lib/test/test_tools/i18n_data/custom_keywords.py
@@ -0,0 +1,30 @@
+from gettext import (
+    gettext as foo,
+    ngettext as nfoo,
+    pgettext as pfoo,
+    npgettext as npfoo,
+    gettext as bar,
+)
+
+foo('bar')
+foo('bar', 'baz')
+
+nfoo('cat', 'cats', 1)
+nfoo('dog', 'dogs')
+
+pfoo('context', 'bar')
+
+npfoo('context', 'cat', 'cats', 1)
+
+# This is an unknown keyword and should be ignored
+bar('baz')
+
+# 'nfoo' requires at least 2 arguments
+nfoo('dog')
+
+# 'pfoo' requires at least 2 arguments
+pfoo('context')
+
+# 'npfoo' requires at least 3 arguments
+npfoo('context')
+npfoo('context', 'cat')
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 
 from test.support.script_helper import assert_python_ok
-from test.test_tools import skip_if_missing, toolsdir
+from test.test_tools import imports_under_tool, skip_if_missing, toolsdir
 from test.support.os_helper import temp_cwd, temp_dir
 
 
@@ -17,6 +17,10 @@
 DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'
 
 
+with imports_under_tool("i18n"):
+    from pygettext import parse_spec
+
+
 def normalize_POT_file(pot):
     """Normalize the POT creation timestamp, charset and
     file locations to make the POT file easier to compare.
@@ -377,16 +381,8 @@ class _(object):
 
     def test_pygettext_output(self):
         """Test that the pygettext output exactly matches snapshots."""
-        for input_file in DATA_DIR.glob('*.py'):
-            output_file = input_file.with_suffix('.pot')
-            with self.subTest(input_file=f'i18n_data/{input_file}'):
-                contents = input_file.read_text(encoding='utf-8')
-                with temp_cwd(None):
-                    Path(input_file.name).write_text(contents)
-                    assert_python_ok('-Xutf8', self.script, '--docstrings',
-                                     '--add-comments=i18n:', input_file.name)
-                    output = Path('messages.pot').read_text(encoding='utf-8')
-
+        for input_file, output_file, output in extract_from_snapshots():
+            with self.subTest(input_file=input_file):
                 expected = output_file.read_text(encoding='utf-8')
                 self.assert_POT_equal(expected, output)
 
@@ -485,17 +481,67 @@ def test_comments_not_extracted_without_tags(self):
         '''), raw=True)
         self.assertNotIn('#.', data)
 
-
-def update_POT_snapshots():
-    for input_file in DATA_DIR.glob('*.py'):
+    def test_parse_keyword_spec(self):
+        valid = (
+            ('foo', ('foo', {0: 'msgid'})),
+            ('foo:1', ('foo', {0: 'msgid'})),
+            ('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
+            ('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
+            ('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})),
+            ('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
+            ('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
+            ('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
+            ('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
+            ('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
+        )
+        for spec, expected in valid:
+            with self.subTest(spec=spec):
+                self.assertEqual(parse_spec(spec), expected)
+
+        invalid = (
+            ('foo:', "Invalid keyword spec 'foo:': missing argument positions"),
+            ('foo:bar', "Invalid keyword spec 'foo:bar': position is not an integer"),
+            ('foo:0', "Invalid keyword spec 'foo:0': argument positions must be strictly positive"),
+            ('foo:-2', "Invalid keyword spec 'foo:-2': argument positions must be strictly positive"),
+            ('foo:1,1', "Invalid keyword spec 'foo:1,1': duplicate positions"),
+            ('foo:1,2,1', "Invalid keyword spec 'foo:1,2,1': duplicate positions"),
+            ('foo:1c,2,1c', "Invalid keyword spec 'foo:1c,2,1c': duplicate positions"),
+            ('foo:1c,2,3c', "Invalid keyword spec 'foo:1c,2,3c': msgctxt can only appear once"),
+            ('foo:1,2,3', "Invalid keyword spec 'foo:1,2,3': too many positions"),
+            ('foo:1c', "Invalid keyword spec 'foo:1c': msgctxt cannot appear without msgid"),
+        )
+        for spec, message in invalid:
+            with self.subTest(spec=spec):
+                with self.assertRaises(ValueError) as cm:
+                    parse_spec(spec)
+                self.assertEqual(str(cm.exception), message)
+
+
+def extract_from_snapshots():
+    snapshots = {
+        'messages.py': (),
+        'fileloc.py': ('--docstrings',),
+        'docstrings.py': ('--docstrings',),
+        'comments.py': ('--add-comments=i18n:',),
+        'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
+                               '--keyword=pfoo:1c,2',
+                               '--keyword=npfoo:1c,2,3'),
+    }
+
+    for filename, args in snapshots.items():
+        input_file = DATA_DIR / filename
         output_file = input_file.with_suffix('.pot')
         contents = input_file.read_bytes()
         with temp_cwd(None):
             Path(input_file.name).write_bytes(contents)
-            assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
-                             '--add-comments=i18n:', input_file.name)
-            output = Path('messages.pot').read_text(encoding='utf-8')
+            assert_python_ok('-Xutf8', Test_pygettext.script, *args,
+                             input_file.name)
+            yield (input_file, output_file,
+                   Path('messages.pot').read_text(encoding='utf-8'))
 
+
+def update_POT_snapshots():
+    for _, output_file, output in extract_from_snapshots():
         output = normalize_POT_file(output)
         output_file.write_text(output, encoding='utf-8')
 

diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst
@@ -0,0 +1 @@
+Extend support for specifying custom keywords in :program:`pygettext`.
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
@@ -294,6 +294,88 @@ def getFilesForName(name):
 }
 
 
+def parse_spec(spec):
+    """Parse a keyword spec string into a dictionary.
+
+    The keyword spec format defines the name of the gettext function and the
+    positions of the arguments that correspond to msgid, msgid_plural, and
+    msgctxt. The format is as follows:
+
+        name - the name of the gettext function, assumed to
+               have a single argument that is the msgid.
+        name:pos1 - the name of the gettext function and the position
+                    of the msgid argument.
+        name:pos1,pos2 - the name of the gettext function and the positions
+                         of the msgid and msgid_plural arguments.
+        name:pos1,pos2c - the name of the gettext function and the positions
+                          of the msgid and msgctxt arguments.
+        name:pos1,pos2,pos3c - the name of the gettext function and the
+                               positions of the msgid, msgid_plural, and
+                               msgctxt arguments.
+
+    As an example, the spec 'foo:1,2,3c' means that the function foo has three
+    arguments, the first one is the msgid, the second one is the msgid_plural,
+    and the third one is the msgctxt. The positions are 1-based.
+
+    The msgctxt argument can appear in any position, but it can only appear
+    once. For example, the keyword specs 'foo:3c,1,2' and 'foo:1,2,3c' are
+    equivalent.
+
+    See https://www.gnu.org/software/gettext/manual/gettext.html
+    for more information.
+    """
+    parts = spec.strip().split(':', 1)
+    if len(parts) == 1:
+        name = parts[0]
+        return name, {0: 'msgid'}
+
+    name, args = parts
+    if not args:
+        raise ValueError(f'Invalid keyword spec {spec!r}: '
+                         'missing argument positions')
+
+    result = {}
+    for arg in args.split(','):
+        arg = arg.strip()
+        is_context = False
+        if arg.endswith('c'):
+            is_context = True
+            arg = arg[:-1]
+
+        try:
+            pos = int(arg) - 1
+        except ValueError as e:
+            raise ValueError(f'Invalid keyword spec {spec!r}: '
+                             'position is not an integer') from e
+
+        if pos < 0:
+            raise ValueError(f'Invalid keyword spec {spec!r}: '
+                             'argument positions must be strictly positive')
+
+        if pos in result.values():
+            raise ValueError(f'Invalid keyword spec {spec!r}: '
+                             'duplicate positions')
+
+        if is_context:
+            if 'msgctxt' in result:
+                raise ValueError(f'Invalid keyword spec {spec!r}: '
+                                 'msgctxt can only appear once')
+            result['msgctxt'] = pos
+        elif 'msgid' not in result:
+            result['msgid'] = pos
+        elif 'msgid_plural' not in result:
+            result['msgid_plural'] = pos
+        else:
+            raise ValueError(f'Invalid keyword spec {spec!r}: '
+                             'too many positions')
+
+    if 'msgid' not in result and 'msgctxt' in result:
+        raise ValueError(f'Invalid keyword spec {spec!r}: '
+                         'msgctxt cannot appear without msgid')
+
+    return name, {v: k for k, v in result.items()}
+
+
 @dataclass(frozen=True)
 class Location:
     filename: str
@@ -646,7 +728,11 @@ class Options:
     make_escapes(not options.escape)
 
     # calculate all keywords
-    options.keywords = {kw: {0: 'msgid'} for kw in options.keywords}
+    try:
+        options.keywords = dict(parse_spec(spec) for spec in options.keywords)
+    except ValueError as e:
+        print(e, file=sys.stderr)
+        sys.exit(1)
     if not no_default_keywords:
         options.keywords |= DEFAULTKEYWORDS
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Extend support for specifying custom keywords in :program:`pygettext`.