The code here was asymmetrical. It did not account for Unicode
semantics when ORing \W. For \w, \s, and \S it does. This patch
changes the code to be symmetrical.
I spent a couple hours trying to come up with a test, but could not get
this area of the code to execute, which may explain why there has not
been a field report of it. It may be that it is unreachable; there has
been other code in the routine that wasn't.
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
else {
- for (value = 0; value < 256; value++)
- if (!isALNUM(value))
- ANYOF_BITMAP_SET(data->start_class, value);
+ if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
+ for (value = 0; value < 256; value++) {
+ if (! isWORDCHAR_L1(value)) {
+ ANYOF_BITMAP_SET(data->start_class, value);
+ }
+ }
+ } else {
+ for (value = 0; value < 256; value++) {
+ if (! isALNUM(value)) {
+ ANYOF_BITMAP_SET(data->start_class, value);
+ }
+ }
+ }
}
}
break;