regexec: Fix off-by-one bug in weight comparison [BZ #23036]
authorFlorian Weimer <fweimer@redhat.com>
Tue, 10 Jul 2018 09:18:26 +0000 (11:18 +0200)
committerFlorian Weimer <fweimer@redhat.com>
Tue, 10 Jul 2018 09:18:26 +0000 (11:18 +0200)
Each weight is prefixed by its length, and the length does not include
itself in the count.  This can be seen clearly from the find_idx
function in string/strxfrm_l.c, for example.  The old code behaved as if
the length itself counted, thus comparing an additional byte after the
weight, leading to spurious comparison failures and incorrect further
partitioning of character equivalence classes.

ChangeLog
posix/regexec.c

index 8068171e2bbce97913a404dc6125de4386f97ae1..d18c24453f6577a0ac06c64046d4e8354679a73a 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2018-07-10  Florian Weimer  <fweimer@redhat.com>
+
+       [BZ #23036]
+       * posix/regexec.c (check_node_accept_bytes): When comparing
+       weights, do not compare an extra byte after the end of the
+       weights.
+
 2018-07-10  Florian Weimer  <fweimer@redhat.com>
 
        * libio/readline.c: Fix copyright year.
index 63aef9753516d078938c3ff4aec77c5ba385b0a0..73644c2341336e6655980a6eb29bad2110ea73a2 100644 (file)
@@ -3878,30 +3878,27 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
              indirect = (const int32_t *)
                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
              int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
+             int32_t rule = idx >> 24;
+             idx &= 0xffffff;
              if (idx > 0)
-               for (i = 0; i < cset->nequiv_classes; ++i)
-                 {
-                   int32_t equiv_class_idx = cset->equiv_classes[i];
-                   size_t weight_len = weights[idx & 0xffffff];
-                   if (weight_len == weights[equiv_class_idx & 0xffffff]
-                       && (idx >> 24) == (equiv_class_idx >> 24))
-                     {
-                       Idx cnt = 0;
-
-                       idx &= 0xffffff;
-                       equiv_class_idx &= 0xffffff;
-
-                       while (cnt <= weight_len
-                              && (weights[equiv_class_idx + 1 + cnt]
-                                  == weights[idx + 1 + cnt]))
-                         ++cnt;
-                       if (cnt > weight_len)
-                         {
-                           match_len = elem_len;
-                           goto check_node_accept_bytes_match;
-                         }
-                     }
-                 }
+               {
+                 size_t weight_len = weights[idx];
+                 for (i = 0; i < cset->nequiv_classes; ++i)
+                   {
+                     int32_t equiv_class_idx = cset->equiv_classes[i];
+                     int32_t equiv_class_rule = equiv_class_idx >> 24;
+                     equiv_class_idx &= 0xffffff;
+                     if (weights[equiv_class_idx] == weight_len
+                         && equiv_class_rule == rule
+                         && memcmp (weights + idx + 1,
+                                    weights + equiv_class_idx + 1,
+                                    weight_len) == 0)
+                       {
+                         match_len = elem_len;
+                         goto check_node_accept_bytes_match;
+                       }
+                   }
+               }
            }
        }
       else