1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
51 #ifdef SUPPORT_LIBREADLINE
55 #include <readline/readline.h>
56 #include <readline/history.h>
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
79 #define fileno _fileno
82 /* A user sent this fix for Borland Builder 5 under Windows. */
85 #define _setmode(handle, mode) setmode(handle, mode)
91 #include <sys/time.h> /* These two includes are needed */
92 #include <sys/resource.h> /* for setrlimit(). */
93 #define INPUT_MODE "rb"
94 #define OUTPUT_MODE "wb"
98 /* We have to include pcre_internal.h because we need the internal info for
99 displaying the results of pcre_study() and we also need to know about the
100 internal macros, structures, and other internal data values; pcretest has
101 "inside information" compared to a program that strictly follows the PCRE API.
103 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105 appropriately for an application, not for building PCRE. */
108 #include "pcre_internal.h"
110 /* We need access to some of the data tables that PCRE uses. So as not to have
111 to keep two copies, we include the source file here, changing the names of the
112 external symbols to prevent clashes. */
114 #define _pcre_ucp_gentype ucp_gentype
115 #define _pcre_utf8_table1 utf8_table1
116 #define _pcre_utf8_table1_size utf8_table1_size
117 #define _pcre_utf8_table2 utf8_table2
118 #define _pcre_utf8_table3 utf8_table3
119 #define _pcre_utf8_table4 utf8_table4
120 #define _pcre_utt utt
121 #define _pcre_utt_size utt_size
122 #define _pcre_utt_names utt_names
123 #define _pcre_OP_lengths OP_lengths
125 #include "pcre_tables.c"
127 /* We also need the pcre_printint() function for printing out compiled
128 patterns. This function is in a separate file so that it can be included in
129 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 know which case is being compiled. */
132 #define COMPILING_PCRETEST
133 #include "pcre_printint.src"
135 /* The definition of the macro PRINTABLE, which determines whether to print an
136 output character as-is or as a hex value when showing compiled patterns, is
137 contained in the printint.src file. We uses it here also, in cases when the
138 locale has not been explicitly changed, so as to get consistent output from
139 systems that differ in their output from isprint() even in the "C" locale. */
141 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
143 /* It is possible to compile this test program without including support for
144 testing the POSIX interface, though this is not available via the standard
148 #include "pcreposix.h"
151 /* It is also possible, for the benefit of the version currently imported into
152 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153 interface to the DFA matcher (NODFA), and without the doublecheck of the old
154 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155 UTF8 support if PCRE is built without it. */
164 /* Other parameters */
166 #ifndef CLOCKS_PER_SEC
168 #define CLOCKS_PER_SEC CLK_TCK
170 #define CLOCKS_PER_SEC 100
174 /* This is the default loop count for timing. */
176 #define LOOPREPEAT 500000
178 /* Static variables */
180 static FILE *outfile;
181 static int log_store = 0;
182 static int callout_count;
183 static int callout_extra;
184 static int callout_fail_count;
185 static int callout_fail_id;
186 static int debug_lengths;
187 static int first_callout;
188 static int locale_set = 0;
189 static int show_malloc;
191 static size_t gotten_store;
193 /* The buffers grow automatically if very long input lines are encountered. */
195 static int buffer_size = 50000;
196 static uschar *buffer = NULL;
197 static uschar *dbuffer = NULL;
198 static uschar *pbuffer = NULL;
201 /*************************************************
202 * Alternate character tables *
203 *************************************************/
205 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206 using the default tables of the library. However, the T option can be used to
207 select alternate sets of tables, for different kinds of testing. Note also that
208 the L (locale) option also adjusts the tables. */
210 /* This is the set of tables distributed as default with PCRE. It recognizes
211 only ASCII characters. */
213 static const unsigned char tables0[] = {
215 /* This table is a lower casing table. */
217 0, 1, 2, 3, 4, 5, 6, 7,
218 8, 9, 10, 11, 12, 13, 14, 15,
219 16, 17, 18, 19, 20, 21, 22, 23,
220 24, 25, 26, 27, 28, 29, 30, 31,
221 32, 33, 34, 35, 36, 37, 38, 39,
222 40, 41, 42, 43, 44, 45, 46, 47,
223 48, 49, 50, 51, 52, 53, 54, 55,
224 56, 57, 58, 59, 60, 61, 62, 63,
225 64, 97, 98, 99,100,101,102,103,
226 104,105,106,107,108,109,110,111,
227 112,113,114,115,116,117,118,119,
228 120,121,122, 91, 92, 93, 94, 95,
229 96, 97, 98, 99,100,101,102,103,
230 104,105,106,107,108,109,110,111,
231 112,113,114,115,116,117,118,119,
232 120,121,122,123,124,125,126,127,
233 128,129,130,131,132,133,134,135,
234 136,137,138,139,140,141,142,143,
235 144,145,146,147,148,149,150,151,
236 152,153,154,155,156,157,158,159,
237 160,161,162,163,164,165,166,167,
238 168,169,170,171,172,173,174,175,
239 176,177,178,179,180,181,182,183,
240 184,185,186,187,188,189,190,191,
241 192,193,194,195,196,197,198,199,
242 200,201,202,203,204,205,206,207,
243 208,209,210,211,212,213,214,215,
244 216,217,218,219,220,221,222,223,
245 224,225,226,227,228,229,230,231,
246 232,233,234,235,236,237,238,239,
247 240,241,242,243,244,245,246,247,
248 248,249,250,251,252,253,254,255,
250 /* This table is a case flipping table. */
252 0, 1, 2, 3, 4, 5, 6, 7,
253 8, 9, 10, 11, 12, 13, 14, 15,
254 16, 17, 18, 19, 20, 21, 22, 23,
255 24, 25, 26, 27, 28, 29, 30, 31,
256 32, 33, 34, 35, 36, 37, 38, 39,
257 40, 41, 42, 43, 44, 45, 46, 47,
258 48, 49, 50, 51, 52, 53, 54, 55,
259 56, 57, 58, 59, 60, 61, 62, 63,
260 64, 97, 98, 99,100,101,102,103,
261 104,105,106,107,108,109,110,111,
262 112,113,114,115,116,117,118,119,
263 120,121,122, 91, 92, 93, 94, 95,
264 96, 65, 66, 67, 68, 69, 70, 71,
265 72, 73, 74, 75, 76, 77, 78, 79,
266 80, 81, 82, 83, 84, 85, 86, 87,
267 88, 89, 90,123,124,125,126,127,
268 128,129,130,131,132,133,134,135,
269 136,137,138,139,140,141,142,143,
270 144,145,146,147,148,149,150,151,
271 152,153,154,155,156,157,158,159,
272 160,161,162,163,164,165,166,167,
273 168,169,170,171,172,173,174,175,
274 176,177,178,179,180,181,182,183,
275 184,185,186,187,188,189,190,191,
276 192,193,194,195,196,197,198,199,
277 200,201,202,203,204,205,206,207,
278 208,209,210,211,212,213,214,215,
279 216,217,218,219,220,221,222,223,
280 224,225,226,227,228,229,230,231,
281 232,233,234,235,236,237,238,239,
282 240,241,242,243,244,245,246,247,
283 248,249,250,251,252,253,254,255,
285 /* This table contains bit maps for various character classes. Each map is 32
286 bytes long and the bits run from the least significant end of each byte. The
287 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288 graph, print, punct, and cntrl. Other classes are built from combinations. */
290 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340 /* This table identifies various classes of character by individual bits:
341 0x01 white space character
344 0x08 hexadecimal digit
345 0x10 alphanumeric or '_'
346 0x80 regular expression metacharacter or binary zero
349 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
350 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
351 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
352 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
353 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
354 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
355 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
356 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
357 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
358 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
359 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
360 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
361 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
362 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
363 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
364 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
382 /* This is a set of tables that came orginally from a Windows user. It seems to
383 be at least an approximation of ISO 8859. In particular, there are characters
384 greater than 128 that are marked as spaces, letters, etc. */
386 static const unsigned char tables1[] = {
388 8,9,10,11,12,13,14,15,
389 16,17,18,19,20,21,22,23,
390 24,25,26,27,28,29,30,31,
391 32,33,34,35,36,37,38,39,
392 40,41,42,43,44,45,46,47,
393 48,49,50,51,52,53,54,55,
394 56,57,58,59,60,61,62,63,
395 64,97,98,99,100,101,102,103,
396 104,105,106,107,108,109,110,111,
397 112,113,114,115,116,117,118,119,
398 120,121,122,91,92,93,94,95,
399 96,97,98,99,100,101,102,103,
400 104,105,106,107,108,109,110,111,
401 112,113,114,115,116,117,118,119,
402 120,121,122,123,124,125,126,127,
403 128,129,130,131,132,133,134,135,
404 136,137,138,139,140,141,142,143,
405 144,145,146,147,148,149,150,151,
406 152,153,154,155,156,157,158,159,
407 160,161,162,163,164,165,166,167,
408 168,169,170,171,172,173,174,175,
409 176,177,178,179,180,181,182,183,
410 184,185,186,187,188,189,190,191,
411 224,225,226,227,228,229,230,231,
412 232,233,234,235,236,237,238,239,
413 240,241,242,243,244,245,246,215,
414 248,249,250,251,252,253,254,223,
415 224,225,226,227,228,229,230,231,
416 232,233,234,235,236,237,238,239,
417 240,241,242,243,244,245,246,247,
418 248,249,250,251,252,253,254,255,
420 8,9,10,11,12,13,14,15,
421 16,17,18,19,20,21,22,23,
422 24,25,26,27,28,29,30,31,
423 32,33,34,35,36,37,38,39,
424 40,41,42,43,44,45,46,47,
425 48,49,50,51,52,53,54,55,
426 56,57,58,59,60,61,62,63,
427 64,97,98,99,100,101,102,103,
428 104,105,106,107,108,109,110,111,
429 112,113,114,115,116,117,118,119,
430 120,121,122,91,92,93,94,95,
431 96,65,66,67,68,69,70,71,
432 72,73,74,75,76,77,78,79,
433 80,81,82,83,84,85,86,87,
434 88,89,90,123,124,125,126,127,
435 128,129,130,131,132,133,134,135,
436 136,137,138,139,140,141,142,143,
437 144,145,146,147,148,149,150,151,
438 152,153,154,155,156,157,158,159,
439 160,161,162,163,164,165,166,167,
440 168,169,170,171,172,173,174,175,
441 176,177,178,179,180,181,182,183,
442 184,185,186,187,188,189,190,191,
443 224,225,226,227,228,229,230,231,
444 232,233,234,235,236,237,238,239,
445 240,241,242,243,244,245,246,215,
446 248,249,250,251,252,253,254,223,
447 192,193,194,195,196,197,198,199,
448 200,201,202,203,204,205,206,207,
449 208,209,210,211,212,213,214,247,
450 216,217,218,219,220,221,222,255,
464 254,255,255,7,0,0,0,0,
466 255,255,127,127,0,0,0,0,
468 0,0,0,0,254,255,255,7,
470 0,0,0,128,255,255,127,255,
472 254,255,255,135,254,255,255,7,
474 255,255,127,255,255,255,127,255,
475 0,0,0,0,254,255,255,255,
476 255,255,255,255,255,255,255,127,
477 0,0,0,0,254,255,255,255,
478 255,255,255,255,255,255,255,255,
479 0,2,0,0,255,255,255,255,
480 255,255,255,255,255,255,255,127,
481 0,0,0,0,255,255,255,255,
482 255,255,255,255,255,255,255,255,
483 0,0,0,0,254,255,0,252,
485 0,0,0,0,254,255,255,255,
487 255,255,255,255,0,0,0,0,
489 255,255,255,255,0,0,0,0,
496 128,128,128,128,0,0,128,0,
497 28,28,28,28,28,28,28,28,
499 0,26,26,26,26,26,26,18,
500 18,18,18,18,18,18,18,18,
501 18,18,18,18,18,18,18,18,
502 18,18,18,128,128,0,128,16,
503 0,26,26,26,26,26,26,18,
504 18,18,18,18,18,18,18,18,
505 18,18,18,18,18,18,18,18,
506 18,18,18,128,128,0,0,0,
515 18,18,18,18,18,18,18,18,
516 18,18,18,18,18,18,18,18,
517 18,18,18,18,18,18,18,0,
518 18,18,18,18,18,18,18,18,
519 18,18,18,18,18,18,18,18,
520 18,18,18,18,18,18,18,18,
521 18,18,18,18,18,18,18,0,
522 18,18,18,18,18,18,18,18
528 #ifndef HAVE_STRERROR
529 /*************************************************
530 * Provide strerror() for non-ANSI libraries *
531 *************************************************/
533 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534 in their libraries, but can provide the same facility by this simple
535 alternative function. */
538 extern char *sys_errlist[];
543 if (n < 0 || n >= sys_nerr) return "unknown error number";
544 return sys_errlist[n];
546 #endif /* HAVE_STRERROR */
551 /*************************************************
552 * Read or extend an input line *
553 *************************************************/
555 /* Input lines are read into buffer, but both patterns and data lines can be
556 continued over multiple input lines. In addition, if the buffer fills up, we
557 want to automatically expand it so as to be able to handle extremely large
558 lines that are needed for certain stress tests. When the input buffer is
559 expanded, the other two buffers must also be expanded likewise, and the
560 contents of pbuffer, which are a copy of the input for callouts, must be
561 preserved (for when expansion happens for a data line). This is not the most
562 optimal way of handling this, but hey, this is just a test program!
566 start where in buffer to start (this *must* be within buffer)
567 prompt for stdin or readline()
569 Returns: pointer to the start of new data
570 could be a copy of start, or could be moved
571 NULL if no data read and EOF reached
575 extend_inputline(FILE *f, uschar *start, const char *prompt)
577 uschar *here = start;
581 int rlen = (int)(buffer_size - (here - buffer));
587 /* If libreadline support is required, use readline() to read a line if the
588 input is a terminal. Note that readline() removes the trailing newline, so
589 we must put it back again, to be compatible with fgets(). */
591 #ifdef SUPPORT_LIBREADLINE
592 if (isatty(fileno(f)))
595 char *s = readline(prompt);
596 if (s == NULL) return (here == start)? NULL : start;
598 if (len > 0) add_history(s);
599 if (len > rlen - 1) len = rlen - 1;
600 memcpy(here, s, len);
608 /* Read the next line by normal means, prompting if the file is stdin. */
611 if (f == stdin) printf("%s", prompt);
612 if (fgets((char *)here, rlen, f) == NULL)
613 return (here == start)? NULL : start;
616 dlen = (int)strlen((char *)here);
617 if (dlen > 0 && here[dlen - 1] == '\n') return start;
623 int new_buffer_size = 2*buffer_size;
624 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
625 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
626 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
628 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
630 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
634 memcpy(new_buffer, buffer, buffer_size);
635 memcpy(new_pbuffer, pbuffer, buffer_size);
637 buffer_size = new_buffer_size;
639 start = new_buffer + (start - buffer);
640 here = new_buffer + (here - buffer);
647 dbuffer = new_dbuffer;
648 pbuffer = new_pbuffer;
652 return NULL; /* Control never gets here */
661 /*************************************************
662 * Read number from string *
663 *************************************************/
665 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
666 around with conditional compilation, just do the job by hand. It is only used
667 for unpicking arguments, so just keep it simple.
670 str string to be converted
671 endptr where to put the end pointer
673 Returns: the unsigned long
677 get_value(unsigned char *str, unsigned char **endptr)
680 while(*str != 0 && isspace(*str)) str++;
681 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
689 /*************************************************
690 * Convert UTF-8 string to value *
691 *************************************************/
693 /* This function takes one or more bytes that represents a UTF-8 character,
694 and returns the value of the character.
697 utf8bytes a pointer to the byte vector
698 vptr a pointer to an int to receive the value
700 Returns: > 0 => the number of bytes consumed
701 -6 to 0 => malformed UTF-8 character at offset = (-return)
707 utf82ord(unsigned char *utf8bytes, int *vptr)
709 int c = *utf8bytes++;
713 for (i = -1; i < 6; i++) /* i is number of additional bytes */
715 if ((d & 0x80) == 0) break;
719 if (i == -1) { *vptr = c; return 1; } /* ascii character */
720 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
722 /* i now has a value in the range 1-5 */
725 d = (c & utf8_table3[i]) << s;
727 for (j = 0; j < i; j++)
730 if ((c & 0xc0) != 0x80) return -(j+1);
732 d |= (c & 0x3f) << s;
735 /* Check that encoding was the correct unique one */
737 for (j = 0; j < utf8_table1_size; j++)
738 if (d <= utf8_table1[j]) break;
739 if (j != i) return -(i+1);
751 /*************************************************
752 * Convert character value to UTF-8 *
753 *************************************************/
755 /* This function takes an integer value in the range 0 - 0x7fffffff
756 and encodes it as a UTF-8 character in 0 to 6 bytes.
759 cvalue the character value
760 utf8bytes pointer to buffer for result - at least 6 bytes long
762 Returns: number of characters placed in the buffer
768 ord2utf8(int cvalue, uschar *utf8bytes)
771 for (i = 0; i < utf8_table1_size; i++)
772 if (cvalue <= utf8_table1[i]) break;
774 for (j = i; j > 0; j--)
776 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
779 *utf8bytes = utf8_table2[i] | cvalue;
787 /*************************************************
788 * Print character string *
789 *************************************************/
791 /* Character string printing function. Must handle UTF-8 strings in utf8
792 mode. Yields number of characters printed. If handed a NULL file, just counts
793 chars without printing. */
795 static int pchars(unsigned char *p, int length, FILE *f)
805 int rc = utf82ord(p, &c);
807 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
813 if (f != NULL) fprintf(f, "%c", c);
819 if (f != NULL) fprintf(f, "\\x{%02x}", c);
820 yield += (n <= 0x000000ff)? 2 :
821 (n <= 0x00000fff)? 3 :
822 (n <= 0x0000ffff)? 4 :
823 (n <= 0x000fffff)? 5 : 6;
830 /* Not UTF-8, or malformed UTF-8 */
835 if (f != NULL) fprintf(f, "%c", c);
840 if (f != NULL) fprintf(f, "\\x%02x", c);
850 /*************************************************
852 *************************************************/
854 /* Called from PCRE as a result of the (?C) item. We print out where we are in
855 the match. Yield zero unless more callouts than the fail count, or the callout
858 static int callout(pcre_callout_block *cb)
860 FILE *f = (first_callout | callout_extra)? outfile : NULL;
861 int i, pre_start, post_start, subject_length;
865 fprintf(f, "Callout %d: last capture = %d\n",
866 cb->callout_number, cb->capture_last);
868 for (i = 0; i < cb->capture_top * 2; i += 2)
870 if (cb->offset_vector[i] < 0)
871 fprintf(f, "%2d: <unset>\n", i/2);
874 fprintf(f, "%2d: ", i/2);
875 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
876 cb->offset_vector[i+1] - cb->offset_vector[i], f);
882 /* Re-print the subject in canonical form, the first time or if giving full
883 datails. On subsequent calls in the same match, we use pchars just to find the
884 printed lengths of the substrings. */
886 if (f != NULL) fprintf(f, "--->");
888 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
889 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
890 cb->current_position - cb->start_match, f);
892 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
894 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
895 cb->subject_length - cb->current_position, f);
897 if (f != NULL) fprintf(f, "\n");
899 /* Always print appropriate indicators, with callout number if not already
900 shown. For automatic callouts, show the pattern offset. */
902 if (cb->callout_number == 255)
904 fprintf(outfile, "%+3d ", cb->pattern_position);
905 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
909 if (callout_extra) fprintf(outfile, " ");
910 else fprintf(outfile, "%3d ", cb->callout_number);
913 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
914 fprintf(outfile, "^");
918 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
919 fprintf(outfile, "^");
922 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
923 fprintf(outfile, " ");
925 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
926 pbuffer + cb->pattern_position);
928 fprintf(outfile, "\n");
931 if (cb->callout_data != NULL)
933 int callout_data = *((int *)(cb->callout_data));
934 if (callout_data != 0)
936 fprintf(outfile, "Callout data = %d\n", callout_data);
941 return (cb->callout_number != callout_fail_id)? 0 :
942 (++callout_count >= callout_fail_count)? 1 : 0;
946 /*************************************************
947 * Local malloc functions *
948 *************************************************/
950 /* Alternative malloc function, to test functionality and show the size of the
953 static void *new_malloc(size_t size)
955 void *block = malloc(size);
958 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
962 static void new_free(void *block)
965 fprintf(outfile, "free %p\n", block);
970 /* For recursion malloc/free, to test stacking calls */
972 static void *stack_malloc(size_t size)
974 void *block = malloc(size);
976 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
980 static void stack_free(void *block)
983 fprintf(outfile, "stack_free %p\n", block);
988 /*************************************************
989 * Call pcre_fullinfo() *
990 *************************************************/
992 /* Get one piece of information from the pcre_fullinfo() function */
994 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
997 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
998 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1003 /*************************************************
1004 * Byte flipping function *
1005 *************************************************/
1007 static unsigned long int
1008 byteflip(unsigned long int value, int n)
1010 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1011 return ((value & 0x000000ff) << 24) |
1012 ((value & 0x0000ff00) << 8) |
1013 ((value & 0x00ff0000) >> 8) |
1014 ((value & 0xff000000) >> 24);
1020 /*************************************************
1021 * Check match or recursion limit *
1022 *************************************************/
1025 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1026 int start_offset, int options, int *use_offsets, int use_size_offsets,
1027 int flag, unsigned long int *limit, int errnumber, const char *msg)
1034 extra->flags |= flag;
1040 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1041 use_offsets, use_size_offsets);
1043 if (count == errnumber)
1045 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1047 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1050 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1051 count == PCRE_ERROR_PARTIAL)
1055 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1058 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1060 mid = (min + mid)/2;
1062 else break; /* Some other error */
1065 extra->flags &= ~flag;
1071 /*************************************************
1072 * Case-independent strncmp() function *
1073 *************************************************/
1079 n number of characters to compare
1081 Returns: < 0, = 0, or > 0, according to the comparison
1085 strncmpic(uschar *s, uschar *t, int n)
1089 int c = tolower(*s++) - tolower(*t++);
1097 /*************************************************
1098 * Check newline indicator *
1099 *************************************************/
1101 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102 a message and return 0 if there is no match.
1105 p points after the leading '<'
1106 f file for error message
1108 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1112 check_newline(uschar *p, FILE *f)
1114 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1115 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1116 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1117 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1118 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1119 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1120 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1121 fprintf(f, "Unknown newline type at: <%s\n", p);
1127 /*************************************************
1129 *************************************************/
1134 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1135 printf("Input and output default to stdin and stdout.\n");
1136 #ifdef SUPPORT_LIBREADLINE
1137 printf("If input is a terminal, readline() is used to read from it.\n");
1139 printf("This version of pcretest is not linked with readline().\n");
1141 printf("\nOptions:\n");
1142 printf(" -b show compiled code (bytecode)\n");
1143 printf(" -C show PCRE compile-time options and exit\n");
1144 printf(" -d debug: show compiled code and information (-b and -i)\n");
1146 printf(" -dfa force DFA matching for all subjects\n");
1148 printf(" -help show usage information\n");
1149 printf(" -i show information about compiled patterns\n"
1150 " -M find MATCH_LIMIT minimum for each subject\n"
1151 " -m output memory used information\n"
1152 " -o <n> set size of offsets vector to <n>\n");
1153 #if !defined NOPOSIX
1154 printf(" -p use POSIX interface\n");
1156 printf(" -q quiet: do not output PCRE version number at start\n");
1157 printf(" -S <n> set stack size to <n> megabytes\n");
1158 printf(" -s output store (memory) used information\n"
1159 " -t time compilation and execution\n");
1160 printf(" -t <n> time compilation and execution, repeating <n> times\n");
1161 printf(" -tm time execution (matching) only\n");
1162 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1167 /*************************************************
1169 *************************************************/
1171 /* Read lines from named file or stdin and write to named file or stdout; lines
1172 consist of a regular expression, in delimiters and optionally followed by
1173 options, followed by a set of test data, terminated by an empty line. */
1175 int main(int argc, char **argv)
1177 FILE *infile = stdin;
1179 int study_options = 0;
1180 int default_find_match_limit = FALSE;
1187 int size_offsets = 45;
1188 int size_offsets_max;
1189 int *offsets = NULL;
1190 #if !defined NOPOSIX
1195 int all_use_dfa = 0;
1199 /* These vectors store, end-to-end, a list of captured substring names. Assume
1200 that 1024 is plenty long enough for the few names we'll be testing. */
1202 uschar copynames[1024];
1203 uschar getnames[1024];
1205 uschar *copynamesptr;
1206 uschar *getnamesptr;
1208 /* Get buffers from malloc() so that Electric Fence will check their misuse
1209 when I am debugging. They grow automatically when very long lines are read. */
1211 buffer = (unsigned char *)malloc(buffer_size);
1212 dbuffer = (unsigned char *)malloc(buffer_size);
1213 pbuffer = (unsigned char *)malloc(buffer_size);
1215 /* The outfile variable is static so that new_malloc can use it. */
1219 /* The following _setmode() stuff is some Windows magic that tells its runtime
1220 library to translate CRLF into a single LF character. At least, that's what
1221 I've been told: never having used Windows I take this all on trust. Originally
1222 it set 0x8000, but then I was advised that _O_BINARY was better. */
1224 #if defined(_WIN32) || defined(WIN32)
1225 _setmode( _fileno( stdout ), _O_BINARY );
1230 while (argc > 1 && argv[op][0] == '-')
1232 unsigned char *endptr;
1234 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1236 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1237 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1238 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1239 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1240 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1242 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1244 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1245 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1251 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1253 int both = argv[op][2] == 0;
1255 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1262 else timeitm = LOOPREPEAT;
1263 if (both) timeit = timeitm;
1265 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1266 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1269 #if defined(_WIN32) || defined(WIN32)
1270 printf("PCRE: -S not supported on this OS\n");
1275 getrlimit(RLIMIT_STACK, &rlim);
1276 rlim.rlim_cur = stack_size * 1024 * 1024;
1277 rc = setrlimit(RLIMIT_STACK, &rlim);
1280 printf("PCRE: setrlimit() failed with error %d\n", rc);
1287 #if !defined NOPOSIX
1288 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1290 else if (strcmp(argv[op], "-C") == 0)
1293 unsigned long int lrc;
1294 printf("PCRE version %s\n", pcre_version());
1295 printf("Compiled with\n");
1296 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1297 printf(" %sUTF-8 support\n", rc? "" : "No ");
1298 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1299 printf(" %sUnicode properties support\n", rc? "" : "No ");
1300 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1301 /* Note that these values are always the ASCII values, even
1302 in EBCDIC environments. CR is 13 and NL is 10. */
1303 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1304 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1305 (rc == -2)? "ANYCRLF" :
1306 (rc == -1)? "ANY" : "???");
1307 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1308 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1309 "all Unicode newlines");
1310 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1311 printf(" Internal link size = %d\n", rc);
1312 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1313 printf(" POSIX malloc threshold = %d\n", rc);
1314 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1315 printf(" Default match limit = %ld\n", lrc);
1316 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1317 printf(" Default recursion depth limit = %ld\n", lrc);
1318 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1319 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1322 else if (strcmp(argv[op], "-help") == 0 ||
1323 strcmp(argv[op], "--help") == 0)
1330 printf("** Unknown or malformed option %s\n", argv[op]);
1339 /* Get the store for the offsets vector, and remember what it was */
1341 size_offsets_max = size_offsets;
1342 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1343 if (offsets == NULL)
1345 printf("** Failed to get %d bytes of memory for offsets vector\n",
1346 (int)(size_offsets_max * sizeof(int)));
1351 /* Sort out the input and output files */
1355 infile = fopen(argv[op], INPUT_MODE);
1358 printf("** Failed to open %s\n", argv[op]);
1366 outfile = fopen(argv[op+1], OUTPUT_MODE);
1367 if (outfile == NULL)
1369 printf("** Failed to open %s\n", argv[op+1]);
1375 /* Set alternative malloc function */
1377 pcre_malloc = new_malloc;
1378 pcre_free = new_free;
1379 pcre_stack_malloc = stack_malloc;
1380 pcre_stack_free = stack_free;
1382 /* Heading line unless quiet, then prompt for first regex if stdin */
1384 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1391 pcre_extra *extra = NULL;
1393 #if !defined NOPOSIX /* There are still compilers that require no indent */
1399 unsigned char *markptr;
1400 unsigned char *p, *pp, *ppp;
1401 unsigned char *to_file = NULL;
1402 const unsigned char *tables = NULL;
1403 unsigned long int true_size, true_study_size = 0;
1404 size_t size, regex_gotten_store;
1407 int do_debug = debug;
1410 int do_showinfo = showinfo;
1411 int do_showrest = 0;
1413 int erroroffset, len, delimiter, poffset;
1418 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1419 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1423 while (isspace(*p)) p++;
1424 if (*p == 0) continue;
1426 /* See if the pattern is to be loaded pre-compiled from a file. */
1428 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1430 unsigned long int magic, get_options;
1435 pp = p + (int)strlen((char *)p);
1436 while (isspace(pp[-1])) pp--;
1439 f = fopen((char *)p, "rb");
1442 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1446 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1449 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1451 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1453 re = (real_pcre *)new_malloc(true_size);
1454 regex_gotten_store = gotten_store;
1456 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1458 magic = ((real_pcre *)re)->magic_number;
1459 if (magic != MAGIC_NUMBER)
1461 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1467 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1473 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1474 do_flip? " (byte-inverted)" : "", p);
1476 /* Need to know if UTF-8 for printing data strings */
1478 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1479 use_utf8 = (get_options & PCRE_UTF8) != 0;
1481 /* Now see if there is any following study data */
1483 if (true_study_size != 0)
1485 pcre_study_data *psd;
1487 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1488 extra->flags = PCRE_EXTRA_STUDY_DATA;
1490 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1491 extra->study_data = psd;
1493 if (fread(psd, 1, true_study_size, f) != true_study_size)
1496 fprintf(outfile, "Failed to read data from %s\n", p);
1497 if (extra != NULL) new_free(extra);
1498 if (re != NULL) new_free(re);
1502 fprintf(outfile, "Study data loaded from %s\n", p);
1503 do_study = 1; /* To get the data output if requested */
1505 else fprintf(outfile, "No study data\n");
1511 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1512 the pattern; if is isn't complete, read more. */
1516 if (isalnum(delimiter) || delimiter == '\\')
1518 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1523 poffset = (int)(p - buffer);
1529 if (*pp == '\\' && pp[1] != 0) pp++;
1530 else if (*pp == delimiter) break;
1533 if (*pp != 0) break;
1534 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1536 fprintf(outfile, "** Unexpected EOF\n");
1540 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1543 /* The buffer may have moved while being extended; reset the start of data
1544 pointer to the correct relative point in the buffer. */
1546 p = buffer + poffset;
1548 /* If the first character after the delimiter is backslash, make
1549 the pattern end with backslash. This is purely to provide a way
1550 of testing for the error message when a pattern ends with backslash. */
1552 if (pp[1] == '\\') *pp++ = '\\';
1554 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1558 strcpy((char *)pbuffer, (char *)p);
1560 /* Look for options after final delimiter */
1564 log_store = showstore; /* default from command line */
1570 case 'f': options |= PCRE_FIRSTLINE; break;
1571 case 'g': do_g = 1; break;
1572 case 'i': options |= PCRE_CASELESS; break;
1573 case 'm': options |= PCRE_MULTILINE; break;
1574 case 's': options |= PCRE_DOTALL; break;
1575 case 'x': options |= PCRE_EXTENDED; break;
1577 case '+': do_showrest = 1; break;
1578 case 'A': options |= PCRE_ANCHORED; break;
1579 case 'B': do_debug = 1; break;
1580 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1581 case 'D': do_debug = do_showinfo = 1; break;
1582 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1583 case 'F': do_flip = 1; break;
1584 case 'G': do_G = 1; break;
1585 case 'I': do_showinfo = 1; break;
1586 case 'J': options |= PCRE_DUPNAMES; break;
1587 case 'K': do_mark = 1; break;
1588 case 'M': log_store = 1; break;
1589 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1591 #if !defined NOPOSIX
1592 case 'P': do_posix = 1; break;
1595 case 'S': do_study = 1; break;
1596 case 'U': options |= PCRE_UNGREEDY; break;
1597 case 'W': options |= PCRE_UCP; break;
1598 case 'X': options |= PCRE_EXTRA; break;
1599 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600 case 'Z': debug_lengths = 0; break;
1601 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1607 case '0': tables = tables0; break;
1608 case '1': tables = tables1; break;
1614 fprintf(outfile, "** Missing table number after /T\n");
1618 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1625 /* The '\r' test here is so that it works on Windows. */
1626 /* The '0' test is just in case this is an unterminated line. */
1627 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1629 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1631 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1635 tables = pcre_maketables();
1641 while (*pp != 0) pp++;
1642 while (isspace(pp[-1])) pp--;
1648 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1650 options |= PCRE_JAVASCRIPT_COMPAT;
1655 int x = check_newline(pp, outfile);
1656 if (x == 0) goto SKIP_DATA;
1658 while (*pp++ != '>');
1663 case '\r': /* So that it works in Windows */
1669 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1674 /* Handle compiling via the POSIX interface, which doesn't support the
1675 timing, showing, or debugging options, nor the ability to pass over
1676 local character tables. */
1678 #if !defined NOPOSIX
1679 if (posix || do_posix)
1684 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1685 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1686 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1692 rc = regcomp(&preg, (char *)p, cflags);
1694 /* Compilation failed; go back for another re, skipping to blank line
1695 if non-interactive. */
1699 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1700 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1705 /* Handle compiling via the native interface */
1708 #endif /* !defined NOPOSIX */
1711 unsigned long int get_options;
1717 clock_t start_time = clock();
1718 for (i = 0; i < timeit; i++)
1720 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1721 if (re != NULL) free(re);
1723 time_taken = clock() - start_time;
1724 fprintf(outfile, "Compile time %.4f milliseconds\n",
1725 (((double)time_taken * 1000.0) / (double)timeit) /
1726 (double)CLOCKS_PER_SEC);
1729 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1731 /* Compilation failed; go back for another re, skipping to blank line
1732 if non-interactive. */
1736 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1738 if (infile != stdin)
1742 if (extend_inputline(infile, buffer, NULL) == NULL)
1747 len = (int)strlen((char *)buffer);
1748 while (len > 0 && isspace(buffer[len-1])) len--;
1749 if (len == 0) break;
1751 fprintf(outfile, "\n");
1756 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757 within the regex; check for this so that we know how to process the data
1760 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1763 /* Print information if required. There are now two info-returning
1764 functions. The old one has a limited interface and returns only limited
1765 data. Check that it agrees with the newer one. */
1768 fprintf(outfile, "Memory allocation (code space): %d\n",
1769 (int)(gotten_store -
1771 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1773 /* Extract the size for possible writing before possibly flipping it,
1774 and remember the store that was got. */
1776 true_size = ((real_pcre *)re)->size;
1777 regex_gotten_store = gotten_store;
1779 /* If /S was present, study the regexp to generate additional info to
1780 help with the matching. */
1788 clock_t start_time = clock();
1789 for (i = 0; i < timeit; i++)
1790 extra = pcre_study(re, study_options, &error);
1791 time_taken = clock() - start_time;
1792 if (extra != NULL) free(extra);
1793 fprintf(outfile, " Study time %.4f milliseconds\n",
1794 (((double)time_taken * 1000.0) / (double)timeit) /
1795 (double)CLOCKS_PER_SEC);
1797 extra = pcre_study(re, study_options, &error);
1799 fprintf(outfile, "Failed to study: %s\n", error);
1800 else if (extra != NULL)
1801 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1804 /* If /K was present, we set up for handling MARK data. */
1810 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1813 extra->mark = &markptr;
1814 extra->flags |= PCRE_EXTRA_MARK;
1817 /* If the 'F' option was present, we flip the bytes of all the integer
1818 fields in the regex data block and the study block. This is to make it
1819 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1820 compiled on a different architecture. */
1824 real_pcre *rre = (real_pcre *)re;
1826 byteflip(rre->magic_number, sizeof(rre->magic_number));
1827 rre->size = byteflip(rre->size, sizeof(rre->size));
1828 rre->options = byteflip(rre->options, sizeof(rre->options));
1829 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1831 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1833 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1835 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1837 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1838 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1839 sizeof(rre->name_table_offset));
1840 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1841 sizeof(rre->name_entry_size));
1842 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1843 sizeof(rre->name_count));
1847 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1854 /* Extract information from the compiled data if required */
1860 fprintf(outfile, "------------------------------------------------------------------\n");
1861 pcre_printint(re, outfile, debug_lengths);
1864 /* We already have the options in get_options (see above) */
1868 unsigned long int all_options;
1869 #if !defined NOINFOCHECK
1870 int old_first_char, old_options, old_count;
1872 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1874 int nameentrysize, namecount;
1875 const uschar *nametable;
1877 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1880 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1881 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1882 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1883 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1884 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1885 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1886 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1887 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1889 #if !defined NOINFOCHECK
1890 old_count = pcre_info(re, &old_options, &old_first_char);
1891 if (count < 0) fprintf(outfile,
1892 "Error %d from pcre_info()\n", count);
1895 if (old_count != count) fprintf(outfile,
1896 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1899 if (old_first_char != first_char) fprintf(outfile,
1900 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1901 first_char, old_first_char);
1903 if (old_options != (int)get_options) fprintf(outfile,
1904 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1905 get_options, old_options);
1909 if (size != regex_gotten_store) fprintf(outfile,
1910 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1911 (int)size, (int)regex_gotten_store);
1913 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1915 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1919 fprintf(outfile, "Named capturing subpatterns:\n");
1920 while (namecount-- > 0)
1922 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1923 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1924 GET2(nametable, 0));
1925 nametable += nameentrysize;
1929 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1930 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1932 all_options = ((real_pcre *)re)->options;
1933 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1935 if (get_options == 0) fprintf(outfile, "No options\n");
1936 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1940 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1941 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1942 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1943 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1944 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1945 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1946 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1947 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1955 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1957 switch (get_options & PCRE_NEWLINE_BITS)
1959 case PCRE_NEWLINE_CR:
1960 fprintf(outfile, "Forced newline sequence: CR\n");
1963 case PCRE_NEWLINE_LF:
1964 fprintf(outfile, "Forced newline sequence: LF\n");
1967 case PCRE_NEWLINE_CRLF:
1968 fprintf(outfile, "Forced newline sequence: CRLF\n");
1971 case PCRE_NEWLINE_ANYCRLF:
1972 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1975 case PCRE_NEWLINE_ANY:
1976 fprintf(outfile, "Forced newline sequence: ANY\n");
1983 if (first_char == -1)
1985 fprintf(outfile, "First char at start or follows newline\n");
1987 else if (first_char < 0)
1989 fprintf(outfile, "No first char\n");
1993 int ch = first_char & 255;
1994 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1997 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1999 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2004 fprintf(outfile, "No need char\n");
2008 int ch = need_char & 255;
2009 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2012 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2014 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2017 /* Don't output study size; at present it is in any case a fixed
2018 value, but it varies, depending on the computer architecture, and
2019 so messes up the test suite. (And with the /F option, it might be
2025 fprintf(outfile, "Study returned NULL\n");
2028 uschar *start_bits = NULL;
2031 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2034 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035 if (start_bits == NULL)
2036 fprintf(outfile, "No set of starting bytes\n");
2041 fprintf(outfile, "Starting byte set: ");
2042 for (i = 0; i < 256; i++)
2044 if ((start_bits[i/8] & (1<<(i&7))) != 0)
2048 fprintf(outfile, "\n ");
2051 if (PRINTHEX(i) && i != ' ')
2053 fprintf(outfile, "%c ", i);
2058 fprintf(outfile, "\\x%02x ", i);
2063 fprintf(outfile, "\n");
2069 /* If the '>' option was present, we write out the regex to a file, and
2070 that is all. The first 8 bytes of the file are the regex length and then
2071 the study length, in big-endian order. */
2073 if (to_file != NULL)
2075 FILE *f = fopen((char *)to_file, "wb");
2078 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2083 sbuf[0] = (uschar)((true_size >> 24) & 255);
2084 sbuf[1] = (uschar)((true_size >> 16) & 255);
2085 sbuf[2] = (uschar)((true_size >> 8) & 255);
2086 sbuf[3] = (uschar)((true_size) & 255);
2088 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2089 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2090 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2091 sbuf[7] = (uschar)((true_study_size) & 255);
2093 if (fwrite(sbuf, 1, 8, f) < 8 ||
2094 fwrite(re, 1, true_size, f) < true_size)
2096 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2100 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2103 if (fwrite(extra->study_data, 1, true_study_size, f) <
2106 fprintf(outfile, "Write error on %s: %s\n", to_file,
2109 else fprintf(outfile, "Study data written to %s\n", to_file);
2117 if (extra != NULL) new_free(extra);
2120 new_free((void *)tables);
2121 setlocale(LC_CTYPE, "C");
2124 continue; /* With next regex */
2126 } /* End of non-POSIX compile */
2128 /* Read data lines and test them */
2134 int *use_offsets = offsets;
2135 int use_size_offsets = size_offsets;
2136 int callout_data = 0;
2137 int callout_data_set = 0;
2139 int copystrings = 0;
2140 int find_match_limit = default_find_match_limit;
2144 int start_offset = 0;
2145 int start_offset_sign = 1;
2154 copynamesptr = copynames;
2155 getnamesptr = getnames;
2157 pcre_callout = callout;
2161 callout_fail_count = 999999;
2162 callout_fail_id = -1;
2165 if (extra != NULL) extra->flags &=
2166 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2171 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2173 if (len > 0) /* Reached EOF without hitting a newline */
2175 fprintf(outfile, "\n");
2181 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2182 len = (int)strlen((char *)buffer);
2183 if (buffer[len-1] == '\n') break;
2186 while (len > 0 && isspace(buffer[len-1])) len--;
2188 if (len == 0) break;
2191 while (isspace(*p)) p++;
2194 while ((c = *p++) != 0)
2199 if (c == '\\') switch ((c = *p++))
2201 case 'a': c = 7; break;
2202 case 'b': c = '\b'; break;
2203 case 'e': c = 27; break;
2204 case 'f': c = '\f'; break;
2205 case 'n': c = '\n'; break;
2206 case 'r': c = '\r'; break;
2207 case 't': c = '\t'; break;
2208 case 'v': c = '\v'; break;
2210 case '0': case '1': case '2': case '3':
2211 case '4': case '5': case '6': case '7':
2213 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2214 c = c * 8 + *p++ - '0';
2217 if (use_utf8 && c > 255)
2219 unsigned char buff8[8];
2221 utn = ord2utf8(c, buff8);
2222 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2223 c = buff8[ii]; /* Last byte */
2230 /* Handle \x{..} specially - new Perl thing for utf8 */
2235 unsigned char *pt = p;
2237 while (isxdigit(*(++pt)))
2238 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2241 unsigned char buff8[8];
2245 utn = ord2utf8(c, buff8);
2246 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2247 c = buff8[ii]; /* Last byte */
2252 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2253 "UTF-8 mode is not enabled.\n"
2254 "** Truncation will probably give the wrong result.\n", c);
2259 /* Not correct form; fall through */
2266 while (i++ < 2 && isxdigit(*p))
2268 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2273 case 0: /* \ followed by EOF allows for an empty line */
2280 start_offset_sign = -1;
2283 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284 start_offset *= start_offset_sign;
2287 case 'A': /* Option setting */
2288 options |= PCRE_ANCHORED;
2292 options |= PCRE_NOTBOL;
2296 if (isdigit(*p)) /* Set copy string */
2298 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2299 copystrings |= 1 << n;
2301 else if (isalnum(*p))
2303 uschar *npp = copynamesptr;
2304 while (isalnum(*p)) *npp++ = *p++;
2307 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2309 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2319 pcre_callout = NULL;
2324 callout_fail_id = 0;
2327 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2328 callout_fail_count = 0;
2333 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2340 if (*(++p) == '-') { sign = -1; p++; }
2342 callout_data = callout_data * 10 + *p++ - '0';
2343 callout_data *= sign;
2344 callout_data_set = 1;
2350 #if !defined NOPOSIX
2351 if (posix || do_posix)
2352 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2361 options |= PCRE_DFA_SHORTEST;
2368 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2369 getstrings |= 1 << n;
2371 else if (isalnum(*p))
2373 uschar *npp = getnamesptr;
2374 while (isalnum(*p)) *npp++ = *p++;
2377 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2379 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2389 find_match_limit = 1;
2393 if ((options & PCRE_NOTEMPTY) != 0)
2394 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2396 options |= PCRE_NOTEMPTY;
2400 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2401 if (n > size_offsets_max)
2403 size_offsets_max = n;
2405 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2406 if (offsets == NULL)
2408 printf("** Failed to get %d bytes of memory for offsets vector\n",
2409 (int)(size_offsets_max * sizeof(int)));
2414 use_size_offsets = n;
2415 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2419 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2424 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2427 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2430 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2431 extra->match_limit_recursion = n;
2435 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2438 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2441 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2442 extra->match_limit = n;
2447 options |= PCRE_DFA_RESTART;
2456 options |= PCRE_NO_START_OPTIMIZE;
2460 options |= PCRE_NOTEOL;
2464 options |= PCRE_NO_UTF8_CHECK;
2469 int x = check_newline(p, outfile);
2470 if (x == 0) goto NEXT_DATA;
2472 while (*p++ != '>');
2479 len = (int)(q - dbuffer);
2481 /* Move the data to the end of the buffer so that a read over the end of
2482 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2483 we are using the POSIX interface, we must include the terminating zero. */
2485 #if !defined NOPOSIX
2486 if (posix || do_posix)
2488 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2489 bptr += buffer_size - len - 1;
2494 memmove(bptr + buffer_size - len, bptr, len);
2495 bptr += buffer_size - len;
2498 if ((all_use_dfa || use_dfa) && find_match_limit)
2500 printf("**Match limit not relevant for DFA matching: ignored\n");
2501 find_match_limit = 0;
2504 /* Handle matching via the POSIX interface, which does not
2505 support timing or playing with the match limit or callout data. */
2507 #if !defined NOPOSIX
2508 if (posix || do_posix)
2512 regmatch_t *pmatch = NULL;
2513 if (use_size_offsets > 0)
2514 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2515 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2516 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2517 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2519 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2523 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2524 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2526 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2529 fprintf(outfile, "Matched with REG_NOSUB\n");
2534 for (i = 0; i < (size_t)use_size_offsets; i++)
2536 if (pmatch[i].rm_so >= 0)
2538 fprintf(outfile, "%2d: ", (int)i);
2539 (void)pchars(dbuffer + pmatch[i].rm_so,
2540 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2541 fprintf(outfile, "\n");
2542 if (i == 0 && do_showrest)
2544 fprintf(outfile, " 0+ ");
2545 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2547 fprintf(outfile, "\n");
2555 /* Handle matching via the native interface - repeats for /g and /G */
2558 #endif /* !defined NOPOSIX */
2560 for (;; gmatched++) /* Loop for /g or /G */
2568 clock_t start_time = clock();
2571 if (all_use_dfa || use_dfa)
2573 int workspace[1000];
2574 for (i = 0; i < timeitm; i++)
2575 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576 options | g_notempty, use_offsets, use_size_offsets, workspace,
2577 sizeof(workspace)/sizeof(int));
2582 for (i = 0; i < timeitm; i++)
2583 count = pcre_exec(re, extra, (char *)bptr, len,
2584 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2586 time_taken = clock() - start_time;
2587 fprintf(outfile, "Execute time %.4f milliseconds\n",
2588 (((double)time_taken * 1000.0) / (double)timeitm) /
2589 (double)CLOCKS_PER_SEC);
2592 /* If find_match_limit is set, we want to do repeated matches with
2593 varying limits in order to find the minimum value for the match limit and
2594 for the recursion limit. */
2596 if (find_match_limit)
2600 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2604 (void)check_match_limit(re, extra, bptr, len, start_offset,
2605 options|g_notempty, use_offsets, use_size_offsets,
2606 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2607 PCRE_ERROR_MATCHLIMIT, "match()");
2609 count = check_match_limit(re, extra, bptr, len, start_offset,
2610 options|g_notempty, use_offsets, use_size_offsets,
2611 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2612 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2615 /* If callout_data is set, use the interface with additional data */
2617 else if (callout_data_set)
2621 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2624 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2625 extra->callout_data = &callout_data;
2626 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2627 options | g_notempty, use_offsets, use_size_offsets);
2628 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2631 /* The normal case is just to do the match once, with the default
2632 value of match_limit. */
2635 else if (all_use_dfa || use_dfa)
2637 int workspace[1000];
2638 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639 options | g_notempty, use_offsets, use_size_offsets, workspace,
2640 sizeof(workspace)/sizeof(int));
2643 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2644 count = use_size_offsets/2;
2651 count = pcre_exec(re, extra, (char *)bptr, len,
2652 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2655 fprintf(outfile, "Matched, but too many substrings\n");
2656 count = use_size_offsets/3;
2667 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2669 maxcount = use_size_offsets/3;
2671 /* This is a check against a lunatic return value. */
2673 if (count > maxcount)
2676 "** PCRE error: returned count %d is too big for offset size %d\n",
2677 count, use_size_offsets);
2678 count = use_size_offsets/3;
2681 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2682 do_g = do_G = FALSE; /* Break g/G loop */
2686 for (i = 0; i < count * 2; i += 2)
2688 if (use_offsets[i] < 0)
2689 fprintf(outfile, "%2d: <unset>\n", i/2);
2692 fprintf(outfile, "%2d: ", i/2);
2693 (void)pchars(bptr + use_offsets[i],
2694 use_offsets[i+1] - use_offsets[i], outfile);
2695 fprintf(outfile, "\n");
2700 fprintf(outfile, " 0+ ");
2701 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2703 fprintf(outfile, "\n");
2709 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2711 for (i = 0; i < 32; i++)
2713 if ((copystrings & (1 << i)) != 0)
2715 char copybuffer[256];
2716 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2717 i, copybuffer, sizeof(copybuffer));
2719 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2721 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2725 for (copynamesptr = copynames;
2727 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2729 char copybuffer[256];
2730 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2731 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2733 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2735 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2738 for (i = 0; i < 32; i++)
2740 if ((getstrings & (1 << i)) != 0)
2742 const char *substring;
2743 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2746 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2749 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2750 pcre_free_substring(substring);
2755 for (getnamesptr = getnames;
2757 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2759 const char *substring;
2760 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2761 count, (char *)getnamesptr, &substring);
2763 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2766 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2767 pcre_free_substring(substring);
2773 const char **stringlist;
2774 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2777 fprintf(outfile, "get substring list failed %d\n", rc);
2780 for (i = 0; i < count; i++)
2781 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2782 if (stringlist[i] != NULL)
2783 fprintf(outfile, "string list not terminated by NULL\n");
2784 /* free((void *)stringlist); */
2785 pcre_free_substring_list(stringlist);
2790 /* There was a partial match */
2792 else if (count == PCRE_ERROR_PARTIAL)
2794 if (markptr == NULL) fprintf(outfile, "Partial match");
2795 else fprintf(outfile, "Partial match, mark=%s", markptr);
2796 if (use_size_offsets > 1)
2798 fprintf(outfile, ": ");
2799 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2802 fprintf(outfile, "\n");
2803 break; /* Out of the /g loop */
2806 /* Failed to match. If this is a /g or /G loop and we previously set
2807 g_notempty after a null match, this is not necessarily the end. We want
2808 to advance the start offset, and continue. We won't be at the end of the
2809 string - that was checked before setting g_notempty.
2811 Complication arises in the case when the newline convention is "any",
2812 "crlf", or "anycrlf". If the previous match was at the end of a line
2813 terminated by CRLF, an advance of one character just passes the \r,
2814 whereas we should prefer the longer newline sequence, as does the code in
2815 pcre_exec(). Fudge the offset value to achieve this. We check for a
2816 newline setting in the pattern; if none was set, use pcre_config() to
2819 Otherwise, in the case of UTF-8 matching, the advance must be one
2820 character, not one byte. */
2824 if (g_notempty != 0)
2827 unsigned int obits = ((real_pcre *)re)->options;
2828 use_offsets[0] = start_offset;
2829 if ((obits & PCRE_NEWLINE_BITS) == 0)
2832 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2833 /* Note that these values are always the ASCII ones, even in
2834 EBCDIC environments. CR = 13, NL = 10. */
2835 obits = (d == 13)? PCRE_NEWLINE_CR :
2836 (d == 10)? PCRE_NEWLINE_LF :
2837 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2838 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2839 (d == -1)? PCRE_NEWLINE_ANY : 0;
2841 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2845 start_offset < len - 1 &&
2846 bptr[start_offset] == '\r' &&
2847 bptr[start_offset+1] == '\n')
2851 while (start_offset + onechar < len)
2853 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2857 use_offsets[1] = start_offset + onechar;
2861 if (count == PCRE_ERROR_NOMATCH)
2865 if (markptr == NULL) fprintf(outfile, "No match\n");
2866 else fprintf(outfile, "No match, mark = %s\n", markptr);
2869 else fprintf(outfile, "Error %d\n", count);
2870 break; /* Out of the /g loop */
2874 /* If not /g or /G we are done */
2876 if (!do_g && !do_G) break;
2878 /* If we have matched an empty string, first check to see if we are at
2879 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880 Perl's /g options does. This turns out to be rather cunning. First we set
2881 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882 same point. If this fails (picked up above) we advance to the next
2887 if (use_offsets[0] == use_offsets[1])
2889 if (use_offsets[0] == len) break;
2890 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2893 /* For /g, update the start offset, leaving the rest alone */
2895 if (do_g) start_offset = use_offsets[1];
2897 /* For /G, update the pointer and length */
2901 bptr += use_offsets[1];
2902 len -= use_offsets[1];
2904 } /* End of loop for /g and /G */
2906 NEXT_DATA: continue;
2907 } /* End of loop for data lines */
2911 #if !defined NOPOSIX
2912 if (posix || do_posix) regfree(&preg);
2915 if (re != NULL) new_free(re);
2916 if (extra != NULL) new_free(extra);
2919 new_free((void *)tables);
2920 setlocale(LC_CTYPE, "C");
2925 if (infile == stdin) fprintf(outfile, "\n");
2929 if (infile != NULL && infile != stdin) fclose(infile);
2930 if (outfile != NULL && outfile != stdout) fclose(outfile);
2940 /* End of pcretest.c */