Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new CLI command to lint PO files #1168

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,18 @@
import sys
import tempfile
import warnings
from collections.abc import Generator
from configparser import RawConfigParser
from dataclasses import dataclass
from io import StringIO
from pathlib import Path
from string import Formatter
from typing import BinaryIO, Iterable, Literal

from babel import Locale, localedata
from babel import __version__ as VERSION
from babel.core import UnknownLocaleError
from babel.messages import Message
from babel.messages.catalog import DEFAULT_HEADER, Catalog
from babel.messages.extract import (
DEFAULT_KEYWORDS,
Expand Down Expand Up @@ -852,6 +857,127 @@
return


class LintCatalog(CommandMixin):
description = 'check message catalogs for common problems'
user_options = [
('input-paths=', None,
'files or directories that should be checked. Separate multiple '
'files or directories with commas(,)'), # TODO: Support repetition of this argument
]
as_args = 'input-paths'

@dataclass(frozen=True)
class MessagePair:
original: str
translated: str
plural_number: int | None = None

def initialize_options(self):
self.input_paths: list[str] = None

def finalize_options(self):
if not self.input_paths:
raise OptionError("no input files or directories specified")

def run(self):
for input_path in self.input_paths:
path = Path(input_path)
if path.is_dir():
self._lint_directory(path)

Check warning on line 886 in babel/messages/frontend.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/frontend.py#L886

Added line #L886 was not covered by tests
else:
self._lint_file(path)

def _lint_directory(self, directory: Path) -> None:
for path in Path(directory).rglob('*.po'):
if path.is_file():
self._lint_file(path)

Check warning on line 893 in babel/messages/frontend.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/frontend.py#L891-L893

Added lines #L891 - L893 were not covered by tests

def _lint_file(self, path: Path) -> None:
with open(path, 'rb') as f:
catalog = read_po(f)

for msg in catalog:
if not msg.id:
continue

all_strings = msg.string if isinstance(msg.string, tuple) else (msg.string,)
if not any(all_strings): # Not translated, skip.
continue

Check warning on line 905 in babel/messages/frontend.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/frontend.py#L905

Added line #L905 was not covered by tests

for msg_pair in self._iter_msg_pairs(msg, num_plurals=catalog.num_plurals):
orig_placeholders = self._extract_placeholders(msg_pair.original)
trans_placeholders = self._extract_placeholders(msg_pair.translated)
if orig_placeholders != trans_placeholders:
formatted = self._format_message(orig_placeholders, trans_placeholders, msg_pair.plural_number)
print(f'{path}:{msg.lineno}: {formatted}')

def _format_message(
self,
original_placeholders: set[str],
translated_placeholders: set[str],
plural_number: int | None,
) -> str:
def _sort_and_format(placeholders: set[str]) -> str:
return ', '.join(sorted(placeholders))

msgid = 'msgid' if plural_number is None else 'msgid_plural'
msgstr = 'msgstr' if plural_number is None else f'msgstr[{plural_number}]'

msg = f'placeholders in {msgid} differ from placeholders in {msgstr}:\n'
if only_in_msgid := original_placeholders - translated_placeholders:
formatted = _sort_and_format(only_in_msgid)
msg += f'\tplaceholders in {msgid} but missing in {msgstr}: {formatted}'
if only_in_msgstr := translated_placeholders - original_placeholders:
formatted = _sort_and_format(only_in_msgstr)
msg += f'\n\tplaceholders in {msgstr} but missing in {msgid}: {formatted}'
return msg

def _iter_msg_pairs(self, msg: Message, *, num_plurals: int) -> Generator[LintCatalog.MessagePair, None, None]:
"""Iterate over all (original, translated) message pairs in a given message.

For singular messages, this produces a single pair (original, translated).
For plural messages, this produces a pair for each plural form. For example,
for a language with 4 plural forms, this will generate:

(orig_singular, trans_singular),
(orig_plural, trans_plural_1),
(orig_plural, trans_plural_2),
(orig_plural, trans_plural_3)

For languages with nplurals=1, this generates a single pair:

(orig_plural, trans_plural)
"""
if not msg.pluralizable:
yield self.MessagePair(msg.id, msg.string)
elif num_plurals == 1:
# Pluralized messages with nplurals=1 should be compared against the 'msgid_plural'.
yield self.MessagePair(msg.id[1], msg.string[0], plural_number=0)
else:
# Pluralized messages with nplurals>1 should compare 'msgstr[0]' against the singular and
# any other 'msgstr[X]' against 'msgid_plural'.
yield self.MessagePair(msg.id[0], msg.string[0])
for i, string in enumerate(msg.string[1:], start=1):
yield self.MessagePair(msg.id[1], string, plural_number=i)

def _extract_placeholders(self, string: str) -> set[str]:
fmt = Formatter()
try:
parsed = list(fmt.parse(string))
except ValueError:
return set()
return {self._unparse_placeholder(field_name, conversion, format_spec)
for _, field_name, format_spec, conversion in parsed if field_name is not None}

def _unparse_placeholder(
self,
field_name: str,
conversion: str | None = None,
format_spec: str | None = None,
) -> str:
return f'{{{field_name}{"!" + conversion if conversion else ""}{":" + format_spec if format_spec else ""}}}'


class CommandLineInterface:
"""Command-line interface.

Expand All @@ -866,13 +992,15 @@
'extract': 'extract messages from source files and generate a POT file',
'init': 'create new message catalogs from a POT file',
'update': 'update existing message catalogs from a POT file',
'lint': 'check message catalogs for common problems',
}

command_classes = {
'compile': CompileCatalog,
'extract': ExtractMessages,
'init': InitCatalog,
'update': UpdateCatalog,
'lint': LintCatalog,
}

log = None # Replaced on instance level
Expand Down
116 changes: 115 additions & 1 deletion tests/messages/test_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from babel import __version__ as VERSION
from babel.dates import format_datetime
from babel.messages import Catalog, extract, frontend
from babel.messages import Catalog, Message, extract, frontend
from babel.messages.frontend import (
BaseError,
CommandLineInterface,
Expand Down Expand Up @@ -715,6 +715,120 @@ def test_supports_width(self):
assert expected_content == actual_content


MessagePair = frontend.LintCatalog.MessagePair

class TestLintCatalog:

def test_no_directory_or_input_file_specified(self):
cmd = frontend.LintCatalog()
with pytest.raises(OptionError):
cmd.finalize_options()

@pytest.mark.parametrize(['string', 'expected'], [
('', set()),
('{', set()),
('}', set()),
('{}', {'{}'}),
('{} {', set()),
('{{}}', set()),
('{foo}', {'{foo}'}),
('{foo} {bar}', {'{foo}', '{bar}'}),
('{foo:.2f}', {'{foo:.2f}'}),
('{foo!r:.2f=}', {'{foo!r:.2f=}'}),
])
def test__extract_placeholders(self, string, expected):
cmd = frontend.LintCatalog()
assert cmd._extract_placeholders(string) == expected

@pytest.mark.parametrize(['num_plurals', 'message', 'expected'], [
(3, ('foo', 'bar'), [MessagePair('foo', 'bar')]),
(3, (['foo', 'foos'], ['bar', 'bars 1', 'bars 2']), [
MessagePair('foo', 'bar'),
MessagePair('foos', 'bars 1', plural_number=1),
MessagePair('foos', 'bars 2', plural_number=2),
]),
(1, (['foo', 'foos'], ['bars']), [MessagePair('foos', 'bars', plural_number=0)]),
])
def test__iter_msg_pairs(self, num_plurals, message, expected):
cmd = frontend.LintCatalog()
msg = Message(id=message[0], string=message[1])
msg_pairs = list(cmd._iter_msg_pairs(msg, num_plurals=num_plurals))
assert msg_pairs == expected

def test_lint_singular(self, tmp_path, capsys):
cmd = frontend.LintCatalog()
po_file = tmp_path / 'messages.po'
cmd.input_paths = [po_file]
po_file.write_text(r"""
msgid "{foo}"
msgstr "{bar} {baz}"

msgid "{foo} {bar}"
msgstr "{bar} {baz}"
""")

cmd.run()
captured = capsys.readouterr()
assert captured.err == ''
assert captured.out == (f"{po_file}:2: placeholders in msgid differ from placeholders in msgstr:\n"
"\tplaceholders in msgid but missing in msgstr: {foo}\n"
"\tplaceholders in msgstr but missing in msgid: {bar}, {baz}\n"
f"{po_file}:5: placeholders in msgid differ from placeholders in msgstr:\n"
"\tplaceholders in msgid but missing in msgstr: {foo}\n"
"\tplaceholders in msgstr but missing in msgid: {baz}\n")

def test_lint_many_plurals(self, tmp_path, capsys):
cmd = frontend.LintCatalog()
po_file = tmp_path / 'lint.po'
cmd.input_paths = [po_file]
po_file.write_text(r"""
msgid ""
msgstr ""
"Language: cs_CZ\n"

msgid "You have {count} new message."
msgid_plural "You have {count} new messages."
msgstr[0] "You have {foo} new message."
msgstr[1] "You have {bar} new messages."
msgstr[2] "You have {baz} new messages."
""")

cmd.run()
captured = capsys.readouterr()
assert captured.err == ''
assert captured.out == (f"{po_file}:6: placeholders in msgid differ from placeholders in msgstr:\n"
"\tplaceholders in msgid but missing in msgstr: {count}\n"
"\tplaceholders in msgstr but missing in msgid: {foo}\n"
f"{po_file}:6: placeholders in msgid_plural differ from placeholders in msgstr[1]:\n"
"\tplaceholders in msgid_plural but missing in msgstr[1]: {count}\n"
"\tplaceholders in msgstr[1] but missing in msgid_plural: {bar}\n"
f"{po_file}:6: placeholders in msgid_plural differ from placeholders in msgstr[2]:\n"
"\tplaceholders in msgid_plural but missing in msgstr[2]: {count}\n"
"\tplaceholders in msgstr[2] but missing in msgid_plural: {baz}\n")

def test_lint_one_plural(self, tmp_path, capsys):
cmd = frontend.LintCatalog()
po_file = tmp_path / 'lint.po'
cmd.input_paths = [po_file]
po_file.write_text(r"""
msgid ""
msgstr ""
"Language: zh_TW\n"

msgid "You have {count} new message."
msgid_plural "You have {count} new messages."
msgstr[0] "You have {foo} new messages."
""")

cmd.run()
captured = capsys.readouterr()
assert captured.err == ''
assert captured.out == (f"{po_file}:6: placeholders in msgid_plural differ from placeholders in msgstr[0]:\n"
"\tplaceholders in msgid_plural but missing in msgstr[0]: {count}\n"
"\tplaceholders in msgstr[0] but missing in msgid_plural: {foo}\n")



class CommandLineInterfaceTestCase(unittest.TestCase):

def setUp(self):
Expand Down
Loading