633 lines
21 KiB
Python
633 lines
21 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
babel.numbers
|
|
~~~~~~~~~~~~~
|
|
|
|
CLDR Plural support. See UTS #35.
|
|
|
|
:copyright: (c) 2013-2021 by the Babel Team.
|
|
:license: BSD, see LICENSE for more details.
|
|
"""
|
|
import re
|
|
|
|
from babel._compat import decimal
|
|
|
|
|
|
_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
|
|
_fallback_tag = 'other'
|
|
|
|
|
|
def extract_operands(source):
|
|
"""Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
|
|
|
|
The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:
|
|
|
|
====== ===============================================================
|
|
Symbol Value
|
|
------ ---------------------------------------------------------------
|
|
n absolute value of the source number (integer and decimals).
|
|
i integer digits of n.
|
|
v number of visible fraction digits in n, with trailing zeros.
|
|
w number of visible fraction digits in n, without trailing zeros.
|
|
f visible fractional digits in n, with trailing zeros.
|
|
t visible fractional digits in n, without trailing zeros.
|
|
====== ===============================================================
|
|
|
|
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands
|
|
|
|
:param source: A real number
|
|
:type source: int|float|decimal.Decimal
|
|
:return: A n-i-v-w-f-t tuple
|
|
:rtype: tuple[decimal.Decimal, int, int, int, int, int]
|
|
"""
|
|
n = abs(source)
|
|
i = int(n)
|
|
if isinstance(n, float):
|
|
if i == n:
|
|
n = i
|
|
else:
|
|
# Cast the `float` to a number via the string representation.
|
|
# This is required for Python 2.6 anyway (it will straight out fail to
|
|
# do the conversion otherwise), and it's highly unlikely that the user
|
|
# actually wants the lossless conversion behavior (quoting the Python
|
|
# documentation):
|
|
# > If value is a float, the binary floating point value is losslessly
|
|
# > converted to its exact decimal equivalent.
|
|
# > This conversion can often require 53 or more digits of precision.
|
|
# Should the user want that behavior, they can simply pass in a pre-
|
|
# converted `Decimal` instance of desired accuracy.
|
|
n = decimal.Decimal(str(n))
|
|
|
|
if isinstance(n, decimal.Decimal):
|
|
dec_tuple = n.as_tuple()
|
|
exp = dec_tuple.exponent
|
|
fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
|
|
trailing = ''.join(str(d) for d in fraction_digits)
|
|
no_trailing = trailing.rstrip('0')
|
|
v = len(trailing)
|
|
w = len(no_trailing)
|
|
f = int(trailing or 0)
|
|
t = int(no_trailing or 0)
|
|
else:
|
|
v = w = f = t = 0
|
|
return n, i, v, w, f, t
|
|
|
|
|
|
class PluralRule(object):
|
|
"""Represents a set of language pluralization rules. The constructor
|
|
accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
|
|
resulting object is callable and accepts one parameter with a positive or
|
|
negative number (both integer and float) for the number that indicates the
|
|
plural form for a string and returns the tag for the format:
|
|
|
|
>>> rule = PluralRule({'one': 'n is 1'})
|
|
>>> rule(1)
|
|
'one'
|
|
>>> rule(2)
|
|
'other'
|
|
|
|
Currently the CLDR defines these tags: zero, one, two, few, many and
|
|
other where other is an implicit default. Rules should be mutually
|
|
exclusive; for a given numeric value, only one rule should apply (i.e.
|
|
the condition should only be true for one of the plural rule elements.
|
|
|
|
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
|
|
"""
|
|
|
|
__slots__ = ('abstract', '_func')
|
|
|
|
def __init__(self, rules):
|
|
"""Initialize the rule instance.
|
|
|
|
:param rules: a list of ``(tag, expr)``) tuples with the rules
|
|
conforming to UTS #35 or a dict with the tags as keys
|
|
and expressions as values.
|
|
:raise RuleError: if the expression is malformed
|
|
"""
|
|
if isinstance(rules, dict):
|
|
rules = rules.items()
|
|
found = set()
|
|
self.abstract = []
|
|
for key, expr in sorted(list(rules)):
|
|
if key not in _plural_tags:
|
|
raise ValueError('unknown tag %r' % key)
|
|
elif key in found:
|
|
raise ValueError('tag %r defined twice' % key)
|
|
found.add(key)
|
|
ast = _Parser(expr).ast
|
|
if ast:
|
|
self.abstract.append((key, ast))
|
|
|
|
def __repr__(self):
|
|
rules = self.rules
|
|
return '<%s %r>' % (
|
|
type(self).__name__,
|
|
', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags
|
|
if tag in rules])
|
|
)
|
|
|
|
@classmethod
|
|
def parse(cls, rules):
|
|
"""Create a `PluralRule` instance for the given rules. If the rules
|
|
are a `PluralRule` object, that object is returned.
|
|
|
|
:param rules: the rules as list or dict, or a `PluralRule` object
|
|
:raise RuleError: if the expression is malformed
|
|
"""
|
|
if isinstance(rules, cls):
|
|
return rules
|
|
return cls(rules)
|
|
|
|
@property
|
|
def rules(self):
|
|
"""The `PluralRule` as a dict of unicode plural rules.
|
|
|
|
>>> rule = PluralRule({'one': 'n is 1'})
|
|
>>> rule.rules
|
|
{'one': 'n is 1'}
|
|
"""
|
|
_compile = _UnicodeCompiler().compile
|
|
return dict([(tag, _compile(ast)) for tag, ast in self.abstract])
|
|
|
|
tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc="""
|
|
A set of explicitly defined tags in this rule. The implicit default
|
|
``'other'`` rules is not part of this set unless there is an explicit
|
|
rule for it.""")
|
|
|
|
def __getstate__(self):
|
|
return self.abstract
|
|
|
|
def __setstate__(self, abstract):
|
|
self.abstract = abstract
|
|
|
|
def __call__(self, n):
|
|
if not hasattr(self, '_func'):
|
|
self._func = to_python(self)
|
|
return self._func(n)
|
|
|
|
|
|
def to_javascript(rule):
|
|
"""Convert a list/dict of rules or a `PluralRule` object into a JavaScript
|
|
function. This function depends on no external library:
|
|
|
|
>>> to_javascript({'one': 'n is 1'})
|
|
"(function(n) { return (n == 1) ? 'one' : 'other'; })"
|
|
|
|
Implementation detail: The function generated will probably evaluate
|
|
expressions involved into range operations multiple times. This has the
|
|
advantage that external helper functions are not required and is not a
|
|
big performance hit for these simple calculations.
|
|
|
|
:param rule: the rules as list or dict, or a `PluralRule` object
|
|
:raise RuleError: if the expression is malformed
|
|
"""
|
|
to_js = _JavaScriptCompiler().compile
|
|
result = ['(function(n) { return ']
|
|
for tag, ast in PluralRule.parse(rule).abstract:
|
|
result.append('%s ? %r : ' % (to_js(ast), tag))
|
|
result.append('%r; })' % _fallback_tag)
|
|
return ''.join(result)
|
|
|
|
|
|
def to_python(rule):
|
|
"""Convert a list/dict of rules or a `PluralRule` object into a regular
|
|
Python function. This is useful in situations where you need a real
|
|
function and don't are about the actual rule object:
|
|
|
|
>>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
|
|
>>> func(1)
|
|
'one'
|
|
>>> func(3)
|
|
'few'
|
|
>>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
|
|
>>> func(11)
|
|
'one'
|
|
>>> func(15)
|
|
'few'
|
|
|
|
:param rule: the rules as list or dict, or a `PluralRule` object
|
|
:raise RuleError: if the expression is malformed
|
|
"""
|
|
namespace = {
|
|
'IN': in_range_list,
|
|
'WITHIN': within_range_list,
|
|
'MOD': cldr_modulo,
|
|
'extract_operands': extract_operands,
|
|
}
|
|
to_python_func = _PythonCompiler().compile
|
|
result = [
|
|
'def evaluate(n):',
|
|
' n, i, v, w, f, t = extract_operands(n)',
|
|
]
|
|
for tag, ast in PluralRule.parse(rule).abstract:
|
|
# the str() call is to coerce the tag to the native string. It's
|
|
# a limited ascii restricted set of tags anyways so that is fine.
|
|
result.append(' if (%s): return %r' % (to_python_func(ast), str(tag)))
|
|
result.append(' return %r' % _fallback_tag)
|
|
code = compile('\n'.join(result), '<rule>', 'exec')
|
|
eval(code, namespace)
|
|
return namespace['evaluate']
|
|
|
|
|
|
def to_gettext(rule):
|
|
"""The plural rule as gettext expression. The gettext expression is
|
|
technically limited to integers and returns indices rather than tags.
|
|
|
|
>>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
|
|
'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)'
|
|
|
|
:param rule: the rules as list or dict, or a `PluralRule` object
|
|
:raise RuleError: if the expression is malformed
|
|
"""
|
|
rule = PluralRule.parse(rule)
|
|
|
|
used_tags = rule.tags | {_fallback_tag}
|
|
_compile = _GettextCompiler().compile
|
|
_get_index = [tag for tag in _plural_tags if tag in used_tags].index
|
|
|
|
result = ['nplurals=%d; plural=(' % len(used_tags)]
|
|
for tag, ast in rule.abstract:
|
|
result.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
|
|
result.append('%d)' % _get_index(_fallback_tag))
|
|
return ''.join(result)
|
|
|
|
|
|
def in_range_list(num, range_list):
|
|
"""Integer range list test. This is the callback for the "in" operator
|
|
of the UTS #35 pluralization rule language:
|
|
|
|
>>> in_range_list(1, [(1, 3)])
|
|
True
|
|
>>> in_range_list(3, [(1, 3)])
|
|
True
|
|
>>> in_range_list(3, [(1, 3), (5, 8)])
|
|
True
|
|
>>> in_range_list(1.2, [(1, 4)])
|
|
False
|
|
>>> in_range_list(10, [(1, 4)])
|
|
False
|
|
>>> in_range_list(10, [(1, 4), (6, 8)])
|
|
False
|
|
"""
|
|
return num == int(num) and within_range_list(num, range_list)
|
|
|
|
|
|
def within_range_list(num, range_list):
|
|
"""Float range test. This is the callback for the "within" operator
|
|
of the UTS #35 pluralization rule language:
|
|
|
|
>>> within_range_list(1, [(1, 3)])
|
|
True
|
|
>>> within_range_list(1.0, [(1, 3)])
|
|
True
|
|
>>> within_range_list(1.2, [(1, 4)])
|
|
True
|
|
>>> within_range_list(8.8, [(1, 4), (7, 15)])
|
|
True
|
|
>>> within_range_list(10, [(1, 4)])
|
|
False
|
|
>>> within_range_list(10.5, [(1, 4), (20, 30)])
|
|
False
|
|
"""
|
|
return any(num >= min_ and num <= max_ for min_, max_ in range_list)
|
|
|
|
|
|
def cldr_modulo(a, b):
|
|
"""Javaish modulo. This modulo operator returns the value with the sign
|
|
of the dividend rather than the divisor like Python does:
|
|
|
|
>>> cldr_modulo(-3, 5)
|
|
-3
|
|
>>> cldr_modulo(-3, -5)
|
|
-3
|
|
>>> cldr_modulo(3, 5)
|
|
3
|
|
"""
|
|
reverse = 0
|
|
if a < 0:
|
|
a *= -1
|
|
reverse = 1
|
|
if b < 0:
|
|
b *= -1
|
|
rv = a % b
|
|
if reverse:
|
|
rv *= -1
|
|
return rv
|
|
|
|
|
|
class RuleError(Exception):
|
|
"""Raised if a rule is malformed."""
|
|
|
|
_VARS = 'nivwft'
|
|
|
|
_RULES = [
|
|
(None, re.compile(r'\s+', re.UNICODE)),
|
|
('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b'
|
|
.format(_VARS))),
|
|
('value', re.compile(r'\d+')),
|
|
('symbol', re.compile(r'%|,|!=|=')),
|
|
('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
|
|
]
|
|
|
|
|
|
def tokenize_rule(s):
|
|
s = s.split('@')[0]
|
|
result = []
|
|
pos = 0
|
|
end = len(s)
|
|
while pos < end:
|
|
for tok, rule in _RULES:
|
|
match = rule.match(s, pos)
|
|
if match is not None:
|
|
pos = match.end()
|
|
if tok:
|
|
result.append((tok, match.group()))
|
|
break
|
|
else:
|
|
raise RuleError('malformed CLDR pluralization rule. '
|
|
'Got unexpected %r' % s[pos])
|
|
return result[::-1]
|
|
|
|
|
|
def test_next_token(tokens, type_, value=None):
|
|
return tokens and tokens[-1][0] == type_ and \
|
|
(value is None or tokens[-1][1] == value)
|
|
|
|
|
|
def skip_token(tokens, type_, value=None):
|
|
if test_next_token(tokens, type_, value):
|
|
return tokens.pop()
|
|
|
|
|
|
def value_node(value):
|
|
return 'value', (value, )
|
|
|
|
|
|
def ident_node(name):
|
|
return name, ()
|
|
|
|
|
|
def range_list_node(range_list):
|
|
return 'range_list', range_list
|
|
|
|
|
|
def negate(rv):
|
|
return 'not', (rv,)
|
|
|
|
|
|
class _Parser(object):
|
|
"""Internal parser. This class can translate a single rule into an abstract
|
|
tree of tuples. It implements the following grammar::
|
|
|
|
condition = and_condition ('or' and_condition)*
|
|
('@integer' samples)?
|
|
('@decimal' samples)?
|
|
and_condition = relation ('and' relation)*
|
|
relation = is_relation | in_relation | within_relation
|
|
is_relation = expr 'is' ('not')? value
|
|
in_relation = expr (('not')? 'in' | '=' | '!=') range_list
|
|
within_relation = expr ('not')? 'within' range_list
|
|
expr = operand (('mod' | '%') value)?
|
|
operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
|
|
range_list = (range | value) (',' range_list)*
|
|
value = digit+
|
|
digit = 0|1|2|3|4|5|6|7|8|9
|
|
range = value'..'value
|
|
samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
|
|
sampleRange = decimalValue '~' decimalValue
|
|
decimalValue = value ('.' value)?
|
|
|
|
- Whitespace can occur between or around any of the above tokens.
|
|
- Rules should be mutually exclusive; for a given numeric value, only one
|
|
rule should apply (i.e. the condition should only be true for one of
|
|
the plural rule elements).
|
|
- The in and within relations can take comma-separated lists, such as:
|
|
'n in 3,5,7..15'.
|
|
- Samples are ignored.
|
|
|
|
The translator parses the expression on instanciation into an attribute
|
|
called `ast`.
|
|
"""
|
|
|
|
def __init__(self, string):
|
|
self.tokens = tokenize_rule(string)
|
|
if not self.tokens:
|
|
# If the pattern is only samples, it's entirely possible
|
|
# no stream of tokens whatsoever is generated.
|
|
self.ast = None
|
|
return
|
|
self.ast = self.condition()
|
|
if self.tokens:
|
|
raise RuleError('Expected end of rule, got %r' %
|
|
self.tokens[-1][1])
|
|
|
|
def expect(self, type_, value=None, term=None):
|
|
token = skip_token(self.tokens, type_, value)
|
|
if token is not None:
|
|
return token
|
|
if term is None:
|
|
term = repr(value is None and type_ or value)
|
|
if not self.tokens:
|
|
raise RuleError('expected %s but end of rule reached' % term)
|
|
raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1]))
|
|
|
|
def condition(self):
|
|
op = self.and_condition()
|
|
while skip_token(self.tokens, 'word', 'or'):
|
|
op = 'or', (op, self.and_condition())
|
|
return op
|
|
|
|
def and_condition(self):
|
|
op = self.relation()
|
|
while skip_token(self.tokens, 'word', 'and'):
|
|
op = 'and', (op, self.relation())
|
|
return op
|
|
|
|
def relation(self):
|
|
left = self.expr()
|
|
if skip_token(self.tokens, 'word', 'is'):
|
|
return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
|
|
(left, self.value())
|
|
negated = skip_token(self.tokens, 'word', 'not')
|
|
method = 'in'
|
|
if skip_token(self.tokens, 'word', 'within'):
|
|
method = 'within'
|
|
else:
|
|
if not skip_token(self.tokens, 'word', 'in'):
|
|
if negated:
|
|
raise RuleError('Cannot negate operator based rules.')
|
|
return self.newfangled_relation(left)
|
|
rv = 'relation', (method, left, self.range_list())
|
|
return negate(rv) if negated else rv
|
|
|
|
def newfangled_relation(self, left):
|
|
if skip_token(self.tokens, 'symbol', '='):
|
|
negated = False
|
|
elif skip_token(self.tokens, 'symbol', '!='):
|
|
negated = True
|
|
else:
|
|
raise RuleError('Expected "=" or "!=" or legacy relation')
|
|
rv = 'relation', ('in', left, self.range_list())
|
|
return negate(rv) if negated else rv
|
|
|
|
def range_or_value(self):
|
|
left = self.value()
|
|
if skip_token(self.tokens, 'ellipsis'):
|
|
return left, self.value()
|
|
else:
|
|
return left, left
|
|
|
|
def range_list(self):
|
|
range_list = [self.range_or_value()]
|
|
while skip_token(self.tokens, 'symbol', ','):
|
|
range_list.append(self.range_or_value())
|
|
return range_list_node(range_list)
|
|
|
|
def expr(self):
|
|
word = skip_token(self.tokens, 'word')
|
|
if word is None or word[1] not in _VARS:
|
|
raise RuleError('Expected identifier variable')
|
|
name = word[1]
|
|
if skip_token(self.tokens, 'word', 'mod'):
|
|
return 'mod', ((name, ()), self.value())
|
|
elif skip_token(self.tokens, 'symbol', '%'):
|
|
return 'mod', ((name, ()), self.value())
|
|
return ident_node(name)
|
|
|
|
def value(self):
|
|
return value_node(int(self.expect('value')[1]))
|
|
|
|
|
|
def _binary_compiler(tmpl):
|
|
"""Compiler factory for the `_Compiler`."""
|
|
return lambda self, l, r: tmpl % (self.compile(l), self.compile(r))
|
|
|
|
|
|
def _unary_compiler(tmpl):
|
|
"""Compiler factory for the `_Compiler`."""
|
|
return lambda self, x: tmpl % self.compile(x)
|
|
|
|
|
|
compile_zero = lambda x: '0'
|
|
|
|
|
|
class _Compiler(object):
|
|
"""The compilers are able to transform the expressions into multiple
|
|
output formats.
|
|
"""
|
|
|
|
def compile(self, arg):
|
|
op, args = arg
|
|
return getattr(self, 'compile_' + op)(*args)
|
|
|
|
compile_n = lambda x: 'n'
|
|
compile_i = lambda x: 'i'
|
|
compile_v = lambda x: 'v'
|
|
compile_w = lambda x: 'w'
|
|
compile_f = lambda x: 'f'
|
|
compile_t = lambda x: 't'
|
|
compile_value = lambda x, v: str(v)
|
|
compile_and = _binary_compiler('(%s && %s)')
|
|
compile_or = _binary_compiler('(%s || %s)')
|
|
compile_not = _unary_compiler('(!%s)')
|
|
compile_mod = _binary_compiler('(%s %% %s)')
|
|
compile_is = _binary_compiler('(%s == %s)')
|
|
compile_isnot = _binary_compiler('(%s != %s)')
|
|
|
|
def compile_relation(self, method, expr, range_list):
|
|
raise NotImplementedError()
|
|
|
|
|
|
class _PythonCompiler(_Compiler):
|
|
"""Compiles an expression to Python."""
|
|
|
|
compile_and = _binary_compiler('(%s and %s)')
|
|
compile_or = _binary_compiler('(%s or %s)')
|
|
compile_not = _unary_compiler('(not %s)')
|
|
compile_mod = _binary_compiler('MOD(%s, %s)')
|
|
|
|
def compile_relation(self, method, expr, range_list):
|
|
compile_range_list = '[%s]' % ','.join(
|
|
['(%s, %s)' % tuple(map(self.compile, range_))
|
|
for range_ in range_list[1]])
|
|
return '%s(%s, %s)' % (method.upper(), self.compile(expr),
|
|
compile_range_list)
|
|
|
|
|
|
class _GettextCompiler(_Compiler):
|
|
"""Compile into a gettext plural expression."""
|
|
|
|
compile_i = _Compiler.compile_n
|
|
compile_v = compile_zero
|
|
compile_w = compile_zero
|
|
compile_f = compile_zero
|
|
compile_t = compile_zero
|
|
|
|
def compile_relation(self, method, expr, range_list):
|
|
rv = []
|
|
expr = self.compile(expr)
|
|
for item in range_list[1]:
|
|
if item[0] == item[1]:
|
|
rv.append('(%s == %s)' % (
|
|
expr,
|
|
self.compile(item[0])
|
|
))
|
|
else:
|
|
min, max = map(self.compile, item)
|
|
rv.append('(%s >= %s && %s <= %s)' % (
|
|
expr,
|
|
min,
|
|
expr,
|
|
max
|
|
))
|
|
return '(%s)' % ' || '.join(rv)
|
|
|
|
|
|
class _JavaScriptCompiler(_GettextCompiler):
|
|
"""Compiles the expression to plain of JavaScript."""
|
|
|
|
# XXX: presently javascript does not support any of the
|
|
# fraction support and basically only deals with integers.
|
|
compile_i = lambda x: 'parseInt(n, 10)'
|
|
compile_v = compile_zero
|
|
compile_w = compile_zero
|
|
compile_f = compile_zero
|
|
compile_t = compile_zero
|
|
|
|
def compile_relation(self, method, expr, range_list):
|
|
code = _GettextCompiler.compile_relation(
|
|
self, method, expr, range_list)
|
|
if method == 'in':
|
|
expr = self.compile(expr)
|
|
code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
|
|
return code
|
|
|
|
|
|
class _UnicodeCompiler(_Compiler):
|
|
"""Returns a unicode pluralization rule again."""
|
|
|
|
# XXX: this currently spits out the old syntax instead of the new
|
|
# one. We can change that, but it will break a whole bunch of stuff
|
|
# for users I suppose.
|
|
|
|
compile_is = _binary_compiler('%s is %s')
|
|
compile_isnot = _binary_compiler('%s is not %s')
|
|
compile_and = _binary_compiler('%s and %s')
|
|
compile_or = _binary_compiler('%s or %s')
|
|
compile_mod = _binary_compiler('%s mod %s')
|
|
|
|
def compile_not(self, relation):
|
|
return self.compile_relation(negated=True, *relation[1])
|
|
|
|
def compile_relation(self, method, expr, range_list, negated=False):
|
|
ranges = []
|
|
for item in range_list[1]:
|
|
if item[0] == item[1]:
|
|
ranges.append(self.compile(item[0]))
|
|
else:
|
|
ranges.append('%s..%s' % tuple(map(self.compile, item)))
|
|
return '%s%s %s %s' % (
|
|
self.compile(expr), negated and ' not' or '',
|
|
method, ','.join(ranges)
|
|
)
|