1 /* Unicode CLDR plural rule parser and converter
2 Copyright (C) 2015 Free Software Foundation, Inc.
4 This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
30 #include "cldr-plural-exp.h"
31 #include "cldr-plural.h"
33 /* The grammar of Unicode CLDR plural rules is defined at:
34 http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax
36 This implementation only supports the "preferred" form, which
37 doesn't support obsolete keywords "in", "is", "not", and "within".
39 Unlike gettext, CLDR allows an unsigned decimal value as an
40 operand, in addition to unsigned integers. For simplicity, we
41 treat decimal relations as if it has a constant truth value.
43 The implementation is largely based on the idea of Michele Locati's
44 cldr-to-gettext-plural-rules:
45 https://github.com/mlocati/cldr-to-gettext-plural-rules */
48 cldr_plural_range_free (struct cldr_plural_range_ty *range)
50 if (range->start != range->end)
57 cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges)
59 while (ranges->nitems-- > 0)
60 cldr_plural_range_free (ranges->items[ranges->nitems]);
66 cldr_plural_condition_free (struct cldr_plural_condition_ty *condition)
68 if (condition->type == CLDR_PLURAL_CONDITION_AND
69 || condition->type == CLDR_PLURAL_CONDITION_OR)
71 cldr_plural_condition_free (condition->value.conditions[0]);
72 cldr_plural_condition_free (condition->value.conditions[1]);
74 else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
75 cldr_plural_relation_free (condition->value.relation);
80 cldr_plural_relation_free (struct cldr_plural_relation_ty *relation)
82 free (relation->expression);
83 cldr_plural_range_list_free (relation->ranges);
88 cldr_plural_rule_free (struct cldr_plural_rule_ty *rule)
91 cldr_plural_condition_free (rule->condition);
96 cldr_plural_rule_list_free (struct cldr_plural_rule_list_ty *rules)
98 while (rules->nitems-- > 0)
99 cldr_plural_rule_free (rules->items[rules->nitems]);
104 struct cldr_plural_rule_list_ty *
105 cldr_plural_parse (const char *input)
107 struct cldr_plural_parse_args arg;
109 memset (&arg, 0, sizeof (struct cldr_plural_parse_args));
111 arg.cp_end = input + strlen (input);
112 arg.result = XMALLOC (struct cldr_plural_rule_list_ty);
113 memset (arg.result, 0, sizeof (struct cldr_plural_rule_list_ty));
115 if (yyparse (&arg) != 0)
121 #define OPERAND_ZERO_P(o) \
122 (((o)->type == CLDR_PLURAL_OPERAND_INTEGER \
123 && (o)->value.ival == 0) \
124 || ((o)->type == CLDR_PLURAL_OPERAND_DECIMAL \
125 && (o)->value.dval.d == 0))
127 static enum cldr_plural_condition
128 eval_relation (struct cldr_plural_relation_ty *relation)
130 switch (relation->expression->operand)
134 /* Coerce decimal values in ranges into integers. */
136 for (i = 0; i < relation->ranges->nitems; i++)
138 struct cldr_plural_range_ty *range = relation->ranges->items[i];
139 if (range->start->type == CLDR_PLURAL_OPERAND_DECIMAL)
141 int truncated = (int) range->start->value.dval.d;
142 range->start->type = CLDR_PLURAL_OPERAND_INTEGER;
143 range->start->value.ival
144 = range->start->value.dval.d == truncated
145 ? truncated : truncated + 1;
147 if (range->end->type == CLDR_PLURAL_OPERAND_DECIMAL)
149 range->end->type = CLDR_PLURAL_OPERAND_INTEGER;
150 range->end->value.ival = (int) (range->end->value.dval.d);
153 relation->expression->operand = 'i';
159 /* Since plural expression in gettext only supports unsigned
160 integer, turn relations whose operand is either 'f', 't',
161 'v', or 'w' into a constant truth value. */
162 /* FIXME: check mod? */
164 for (i = 0; i < relation->ranges->nitems; i++)
166 struct cldr_plural_range_ty *range = relation->ranges->items[i];
167 if ((relation->type == CLDR_PLURAL_RELATION_EQUAL
168 && (!OPERAND_ZERO_P (range->start)
169 || !OPERAND_ZERO_P (range->end)))
170 || (relation->type == CLDR_PLURAL_RELATION_NOT_EQUAL
171 && (OPERAND_ZERO_P (range->start)
172 || OPERAND_ZERO_P (range->end))))
173 return CLDR_PLURAL_CONDITION_FALSE;
175 return CLDR_PLURAL_CONDITION_TRUE;
179 return CLDR_PLURAL_CONDITION_RELATION;
183 eval_condition (struct cldr_plural_condition_ty *condition)
185 if (condition->type == CLDR_PLURAL_CONDITION_AND)
187 eval_condition (condition->value.conditions[0]);
188 eval_condition (condition->value.conditions[1]);
190 if (condition->value.conditions[0]->type
191 == CLDR_PLURAL_CONDITION_FALSE
192 || condition->value.conditions[1]->type
193 == CLDR_PLURAL_CONDITION_FALSE)
195 cldr_plural_condition_free (condition->value.conditions[0]);
196 cldr_plural_condition_free (condition->value.conditions[1]);
197 condition->type = CLDR_PLURAL_CONDITION_FALSE;
199 else if (condition->value.conditions[0]->type
200 == CLDR_PLURAL_CONDITION_TRUE
201 && condition->value.conditions[1]->type
202 == CLDR_PLURAL_CONDITION_TRUE)
204 cldr_plural_condition_free (condition->value.conditions[0]);
205 cldr_plural_condition_free (condition->value.conditions[1]);
206 condition->type = CLDR_PLURAL_CONDITION_TRUE;
208 else if (condition->value.conditions[0]->type
209 == CLDR_PLURAL_CONDITION_TRUE)
211 struct cldr_plural_condition_ty *original
212 = condition->value.conditions[1];
213 cldr_plural_condition_free (condition->value.conditions[0]);
214 condition->type = condition->value.conditions[1]->type;
215 condition->value = condition->value.conditions[1]->value;
218 else if (condition->value.conditions[1]->type
219 == CLDR_PLURAL_CONDITION_TRUE)
221 struct cldr_plural_condition_ty *original
222 = condition->value.conditions[0];
223 cldr_plural_condition_free (condition->value.conditions[1]);
224 condition->type = condition->value.conditions[0]->type;
225 condition->value = condition->value.conditions[0]->value;
229 else if (condition->type == CLDR_PLURAL_CONDITION_OR)
231 eval_condition (condition->value.conditions[0]);
232 eval_condition (condition->value.conditions[1]);
234 if (condition->value.conditions[0]->type
235 == CLDR_PLURAL_CONDITION_TRUE
236 || condition->value.conditions[1]->type
237 == CLDR_PLURAL_CONDITION_TRUE)
239 cldr_plural_condition_free (condition->value.conditions[0]);
240 cldr_plural_condition_free (condition->value.conditions[1]);
241 condition->type = CLDR_PLURAL_CONDITION_TRUE;
243 else if (condition->value.conditions[0]->type
244 == CLDR_PLURAL_CONDITION_FALSE
245 && condition->value.conditions[1]->type
246 == CLDR_PLURAL_CONDITION_FALSE)
248 cldr_plural_condition_free (condition->value.conditions[0]);
249 cldr_plural_condition_free (condition->value.conditions[1]);
250 condition->type = CLDR_PLURAL_CONDITION_FALSE;
252 else if (condition->value.conditions[0]->type
253 == CLDR_PLURAL_CONDITION_FALSE)
255 struct cldr_plural_condition_ty *original
256 = condition->value.conditions[1];
257 cldr_plural_condition_free (condition->value.conditions[0]);
258 condition->type = condition->value.conditions[1]->type;
259 condition->value = condition->value.conditions[1]->value;
262 else if (condition->value.conditions[1]->type
263 == CLDR_PLURAL_CONDITION_FALSE)
265 struct cldr_plural_condition_ty *original
266 = condition->value.conditions[0];
267 cldr_plural_condition_free (condition->value.conditions[1]);
268 condition->type = condition->value.conditions[0]->type;
269 condition->value = condition->value.conditions[0]->value;
275 enum cldr_plural_condition value =
276 eval_relation (condition->value.relation);
277 if (value == CLDR_PLURAL_CONDITION_TRUE
278 || value == CLDR_PLURAL_CONDITION_FALSE)
280 cldr_plural_relation_free (condition->value.relation);
281 condition->type = value;
286 #define MAX(a,b) ((a) > (b) ? (a) : (b))
289 find_largest_modulus (struct cldr_plural_condition_ty *condition)
291 if (condition->type == CLDR_PLURAL_CONDITION_AND
292 || condition->type == CLDR_PLURAL_CONDITION_OR)
295 find_largest_modulus (condition->value.conditions[0]);
297 find_largest_modulus (condition->value.conditions[1]);
298 return MAX (modulus0, modulus1);
300 else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
301 return condition->value.relation->expression->mod;
307 find_largest_number (struct cldr_plural_condition_ty *condition)
309 if (condition->type == CLDR_PLURAL_CONDITION_AND
310 || condition->type == CLDR_PLURAL_CONDITION_OR)
313 find_largest_number (condition->value.conditions[0]);
315 find_largest_number (condition->value.conditions[1]);
316 return MAX (number0, number1);
318 else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
322 for (i = 0; i < condition->value.relation->ranges->nitems; i++)
324 struct cldr_plural_operand_ty *operand;
326 operand = condition->value.relation->ranges->items[i]->end;
327 if (operand->type == CLDR_PLURAL_OPERAND_INTEGER
328 && operand->value.ival > number)
329 number = operand->value.ival;
330 else if (operand->type == CLDR_PLURAL_OPERAND_DECIMAL
331 && operand->value.dval.d > number)
332 number = (int) operand->value.dval.d;
341 apply_condition (struct cldr_plural_condition_ty *condition, int value)
343 if (condition->type == CLDR_PLURAL_CONDITION_AND)
344 return apply_condition (condition->value.conditions[0], value)
345 && apply_condition (condition->value.conditions[1], value);
346 else if (condition->type == CLDR_PLURAL_CONDITION_OR)
347 return apply_condition (condition->value.conditions[0], value)
348 || apply_condition (condition->value.conditions[1], value);
349 else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
351 struct cldr_plural_relation_ty *relation
352 = condition->value.relation;
356 if (relation->expression->mod > 0)
357 number %= relation->expression->mod;
358 for (i = 0; i < relation->ranges->nitems; i++)
360 struct cldr_plural_range_ty *range = relation->ranges->items[i];
361 if (range->start->value.ival <= number
362 && number <= range->end->value.ival)
363 return relation->type == CLDR_PLURAL_RELATION_EQUAL;
365 return relation->type != CLDR_PLURAL_RELATION_EQUAL;
371 print_expression (struct cldr_plural_expression_ty *expression, bool space,
374 if (expression->mod == 0)
377 fprintf (fp, space ? "n %% %d" : "n%%%d", expression->mod);
381 print_relation (struct cldr_plural_relation_ty *relation,
382 enum cldr_plural_condition parent, bool space,
385 if (relation->type == CLDR_PLURAL_RELATION_EQUAL)
388 if (parent == CLDR_PLURAL_CONDITION_AND
389 && relation->ranges->nitems > 1)
391 for (i = 0; i < relation->ranges->nitems; i++)
393 struct cldr_plural_range_ty *range = relation->ranges->items[i];
395 fprintf (fp, " || ");
396 if (range->start->value.ival == range->end->value.ival)
398 print_expression (relation->expression, space, fp);
400 space && relation->ranges->nitems == 1
402 range->start->value.ival);
404 else if (range->start->value.ival == 0)
406 print_expression (relation->expression, false, fp);
407 fprintf (fp, "<=%d", range->end->value.ival);
411 if (parent == CLDR_PLURAL_CONDITION_OR
412 || relation->ranges->nitems > 1)
414 print_expression (relation->expression, false, fp);
415 fprintf (fp, ">=%d", range->start->value.ival);
416 fprintf (fp, " && ");
417 print_expression (relation->expression, false, fp);
418 fprintf (fp, "<=%d", range->end->value.ival);
419 if (parent == CLDR_PLURAL_CONDITION_OR
420 || relation->ranges->nitems > 1)
424 if (parent == CLDR_PLURAL_CONDITION_AND
425 && relation->ranges->nitems > 1)
431 if (parent == CLDR_PLURAL_CONDITION_OR
432 && relation->ranges->nitems > 1)
434 for (i = 0; i < relation->ranges->nitems; i++)
436 struct cldr_plural_range_ty *range = relation->ranges->items[i];
439 if (range->start->value.ival == range->end->value.ival)
441 print_expression (relation->expression, space, fp);
442 fprintf (fp, space && relation->ranges->nitems == 1
443 ? " != %d" : "!=%d", range->start->value.ival);
445 else if (range->start->value.ival == 0)
447 print_expression (relation->expression, false, fp);
448 fprintf (fp, ">%d", range->end->value.ival);
452 if (parent == CLDR_PLURAL_CONDITION_AND
453 || relation->ranges->nitems > 1)
455 print_expression (relation->expression, false, fp);
456 fprintf (fp, "<%d", range->start->value.ival);
457 fprintf (fp, " || ");
458 print_expression (relation->expression, false, fp);
459 fprintf (fp, ">%d", range->end->value.ival);
460 if (parent == CLDR_PLURAL_CONDITION_AND
461 || relation->ranges->nitems > 1)
465 if (parent == CLDR_PLURAL_CONDITION_OR
466 && relation->ranges->nitems > 1)
472 print_condition (struct cldr_plural_condition_ty *condition,
473 enum cldr_plural_condition parent, bool space,
476 if (condition->type == CLDR_PLURAL_CONDITION_AND)
478 if (parent == CLDR_PLURAL_CONDITION_OR)
480 print_condition (condition->value.conditions[0],
481 CLDR_PLURAL_CONDITION_AND, false,
483 fprintf (fp, " && ");
484 print_condition (condition->value.conditions[1],
485 CLDR_PLURAL_CONDITION_AND, false,
487 if (parent == CLDR_PLURAL_CONDITION_OR)
491 else if (condition->type == CLDR_PLURAL_CONDITION_OR)
493 if (parent == CLDR_PLURAL_CONDITION_AND)
495 print_condition (condition->value.conditions[0],
496 CLDR_PLURAL_CONDITION_OR, false,
498 fprintf (fp, " || ");
499 print_condition (condition->value.conditions[1],
500 CLDR_PLURAL_CONDITION_OR, false,
502 if (parent == CLDR_PLURAL_CONDITION_AND)
506 else if (condition->type == CLDR_PLURAL_CONDITION_RELATION)
508 print_relation (condition->value.relation, parent, space, fp);
514 #define RULE_PRINTABLE_P(r) \
515 ((r)->condition->type != CLDR_PLURAL_CONDITION_TRUE \
516 && (r)->condition->type != CLDR_PLURAL_CONDITION_FALSE)
518 /* Convert n == N into n != N. */
520 print_condition_negation (struct cldr_plural_condition_ty *condition, FILE *fp)
522 if (condition->type == CLDR_PLURAL_CONDITION_RELATION
523 && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL
524 && condition->value.relation->ranges->nitems == 1
525 && condition->value.relation->ranges->items[0]->start
526 == condition->value.relation->ranges->items[0]->end)
528 fprintf (fp, "nplurals=2; plural=(n != %d);\n",
529 condition->value.relation->ranges->items[0]->start->value.ival);
535 /* Convert n == 0,...,N into n > N. */
537 print_condition_greater (struct cldr_plural_condition_ty *condition, FILE *fp)
539 if (condition->type == CLDR_PLURAL_CONDITION_RELATION
540 && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL)
544 for (i = 0; i < condition->value.relation->ranges->nitems; i++)
546 struct cldr_plural_range_ty *range =
547 condition->value.relation->ranges->items[i];
548 if (range->start->type != CLDR_PLURAL_OPERAND_INTEGER
549 || range->end->type != CLDR_PLURAL_OPERAND_INTEGER
550 || range->start->value.ival != last + 1)
552 last = range->end->value.ival;
554 if (i == condition->value.relation->ranges->nitems)
556 struct cldr_plural_range_ty *range =
557 condition->value.relation->ranges->items[i - 1];
558 fprintf (fp, "nplurals=2; plural=(n > %d);\n",
559 range->end->value.ival);
566 typedef bool (*print_condition_function_ty) (struct cldr_plural_condition_ty *,
568 static print_condition_function_ty print_condition_functions[] =
570 print_condition_negation,
571 print_condition_greater
574 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
577 cldr_plural_rule_list_print (struct cldr_plural_rule_list_ty *rules, FILE *fp)
584 /* Prune trivial conditions. */
585 for (i = 0; i < rules->nitems; i++)
587 struct cldr_plural_rule_ty *rule = rules->items[i];
588 eval_condition (rule->condition);
591 /* Omit trivial rules (e.g., the last rule for "ru") with the
593 1. From all rules, find the largest modulus M
594 2. Prepare a bit vector with M elements and initialize it with zeros
595 3. Loop over the rules, until all bits are set:
596 For each value in the range [1, M], apply a rule, and flip the
597 corresponding bit if it evaluates true */
599 /* Find the largest modulus. */
600 for (i = 0; i < rules->nitems; i++)
602 struct cldr_plural_rule_ty *rule = rules->items[i];
603 int modulus = find_largest_modulus (rule->condition);
604 int number = find_largest_number (rule->condition);
605 /* If the rule contains a range whose end is larger than
606 MODULUS, we can't use MODULUS as the upper bound. Skip
608 if (modulus >= number && modulus > modulus_max)
609 modulus_max = modulus;
614 bool *values = XNMALLOC (modulus_max, bool);
616 memset (values, 0, sizeof (bool) * modulus_max);
617 for (i = 0; i < rules->nitems; i++)
619 struct cldr_plural_rule_ty *rule = rules->items[i];
622 for (j = 0; j < modulus_max; j++)
624 bool result = apply_condition (rule->condition, j + 1);
629 /* Check if all bits are set. Then we can omit one more rule. */
630 for (j = 0; j < modulus_max; j++)
631 if (values[j] == false)
633 if (j == modulus_max)
639 while (i < rules->nitems)
640 cldr_plural_rule_free (rules->items[--rules->nitems]);
643 for (i = 0, nplurals = 1; i < rules->nitems; i++)
644 if (RULE_PRINTABLE_P (rules->items[i]))
647 /* Special case when rules is empty. */
650 fprintf (fp, "nplurals=1; plural=0;\n");
654 /* If we have only one printable rule, apply some heuristics. */
657 struct cldr_plural_condition_ty *condition;
660 for (j = 0; j < rules->nitems; j++)
661 if (RULE_PRINTABLE_P (rules->items[j]))
664 condition = rules->items[j]->condition;
665 for (j = 0; j < SIZEOF (print_condition_functions); j++)
666 if (print_condition_functions[j] (condition, fp))
670 /* If there are more printable rules, build a ternary operator. */
671 fprintf (fp, "nplurals=%zu; plural=(", nplurals);
672 for (i = 0, count = 0; i < rules->nitems; i++)
674 struct cldr_plural_rule_ty *rule = rules->items[i];
675 if (print_condition (rule->condition,
676 CLDR_PLURAL_CONDITION_FALSE,
679 && rules->nitems > 1)
681 bool printable_left = false;
684 for (j = i + 1; j < rules->nitems; j++)
685 if (RULE_PRINTABLE_P (rules->items[j]))
686 printable_left = true;
688 if (i < rules->nitems - 1 && printable_left)
689 fprintf (fp, " ? %zu : ", count++);
692 if (rules->nitems > 1)
693 fprintf (fp, " ? %zu : %zu", count, count + 1);
694 fprintf (fp, ");\n");