1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
40 #include "../config.h"
51 #ifdef SUPPORT_LIBREADLINE
53 #include <readline/readline.h>
54 #include <readline/history.h>
58 /* A number of things vary for Windows builds. Originally, pcretest opened its
59 input and output without "b"; then I was told that "b" was needed in some
60 environments, so it was added for release 5.0 to both the input and output. (It
61 makes no difference on Unix-like systems.) Later I was told that it is wrong
62 for the input on Windows. I've now abstracted the modes into two macros that
63 are set here, to make it easier to fiddle with them, and removed "b" from the
64 input mode under Windows. */
66 #if defined(_WIN32) || defined(WIN32)
67 #include <io.h> /* For _setmode() */
68 #include <fcntl.h> /* For _O_BINARY */
69 #define INPUT_MODE "r"
70 #define OUTPUT_MODE "wb"
73 #include <sys/time.h> /* These two includes are needed */
74 #include <sys/resource.h> /* for setrlimit(). */
75 #define INPUT_MODE "rb"
76 #define OUTPUT_MODE "wb"
80 /* We have to include pcre_internal.h because we need the internal info for
81 displaying the results of pcre_study() and we also need to know about the
82 internal macros, structures, and other internal data values; pcretest has
83 "inside information" compared to a program that strictly follows the PCRE API.
85 Although pcre_internal.h does itself include pcre.h, we explicitly include it
86 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87 appropriately for an application, not for building PCRE. */
90 #include "src/pcre_internal.h"
92 /* We need access to the data tables that PCRE uses. So as not to have to keep
93 two copies, we include the source file here, changing the names of the external
94 symbols to prevent clashes. */
96 #define _pcre_utf8_table1 utf8_table1
97 #define _pcre_utf8_table1_size utf8_table1_size
98 #define _pcre_utf8_table2 utf8_table2
99 #define _pcre_utf8_table3 utf8_table3
100 #define _pcre_utf8_table4 utf8_table4
101 #define _pcre_utt utt
102 #define _pcre_utt_size utt_size
103 #define _pcre_utt_names utt_names
104 #define _pcre_OP_lengths OP_lengths
106 #include "src/pcre_tables.c"
108 /* We also need the pcre_printint() function for printing out compiled
109 patterns. This function is in a separate file so that it can be included in
110 pcre_compile.c when that module is compiled with debugging enabled.
112 The definition of the macro PRINTABLE, which determines whether to print an
113 output character as-is or as a hex value when showing compiled patterns, is
114 contained in this file. We uses it here also, in cases when the locale has not
115 been explicitly changed, so as to get consistent output from systems that
116 differ in their output from isprint() even in the "C" locale. */
118 #include "../pcre_printint.src"
120 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
123 /* It is possible to compile this test program without including support for
124 testing the POSIX interface, though this is not available via the standard
128 #include "src/pcreposix.h"
131 /* It is also possible, for the benefit of the version currently imported into
132 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133 interface to the DFA matcher (NODFA), and without the doublecheck of the old
134 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135 UTF8 support if PCRE is built without it. */
144 /* Other parameters */
146 #ifndef CLOCKS_PER_SEC
148 #define CLOCKS_PER_SEC CLK_TCK
150 #define CLOCKS_PER_SEC 100
154 /* This is the default loop count for timing. */
156 #define LOOPREPEAT 500000
158 /* Static variables */
160 static FILE *outfile;
161 static int log_store = 0;
162 static int callout_count;
163 static int callout_extra;
164 static int callout_fail_count;
165 static int callout_fail_id;
166 static int debug_lengths;
167 static int first_callout;
168 static int locale_set = 0;
169 static int show_malloc;
171 static size_t gotten_store;
173 /* The buffers grow automatically if very long input lines are encountered. */
175 static int buffer_size = 50000;
176 static uschar *buffer = NULL;
177 static uschar *dbuffer = NULL;
178 static uschar *pbuffer = NULL;
182 /*************************************************
183 * Read or extend an input line *
184 *************************************************/
186 /* Input lines are read into buffer, but both patterns and data lines can be
187 continued over multiple input lines. In addition, if the buffer fills up, we
188 want to automatically expand it so as to be able to handle extremely large
189 lines that are needed for certain stress tests. When the input buffer is
190 expanded, the other two buffers must also be expanded likewise, and the
191 contents of pbuffer, which are a copy of the input for callouts, must be
192 preserved (for when expansion happens for a data line). This is not the most
193 optimal way of handling this, but hey, this is just a test program!
197 start where in buffer to start (this *must* be within buffer)
198 prompt for stdin or readline()
200 Returns: pointer to the start of new data
201 could be a copy of start, or could be moved
202 NULL if no data read and EOF reached
206 extend_inputline(FILE *f, uschar *start, const char *prompt)
208 uschar *here = start;
212 int rlen = buffer_size - (here - buffer);
218 /* If libreadline support is required, use readline() to read a line if the
219 input is a terminal. Note that readline() removes the trailing newline, so
220 we must put it back again, to be compatible with fgets(). */
222 #ifdef SUPPORT_LIBREADLINE
223 if (isatty(fileno(f)))
226 char *s = readline(prompt);
227 if (s == NULL) return (here == start)? NULL : start;
229 if (len > 0) add_history(s);
230 if (len > rlen - 1) len = rlen - 1;
231 memcpy(here, s, len);
239 /* Read the next line by normal means, prompting if the file is stdin. */
242 if (f == stdin) printf(prompt);
243 if (fgets((char *)here, rlen, f) == NULL)
244 return (here == start)? NULL : start;
247 dlen = (int)strlen((char *)here);
248 if (dlen > 0 && here[dlen - 1] == '\n') return start;
254 int new_buffer_size = 2*buffer_size;
255 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
259 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
261 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
265 memcpy(new_buffer, buffer, buffer_size);
266 memcpy(new_pbuffer, pbuffer, buffer_size);
268 buffer_size = new_buffer_size;
270 start = new_buffer + (start - buffer);
271 here = new_buffer + (here - buffer);
278 dbuffer = new_dbuffer;
279 pbuffer = new_pbuffer;
283 return NULL; /* Control never gets here */
292 /*************************************************
293 * Read number from string *
294 *************************************************/
296 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297 around with conditional compilation, just do the job by hand. It is only used
298 for unpicking arguments, so just keep it simple.
301 str string to be converted
302 endptr where to put the end pointer
304 Returns: the unsigned long
308 get_value(unsigned char *str, unsigned char **endptr)
311 while(*str != 0 && isspace(*str)) str++;
312 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
320 /*************************************************
321 * Convert UTF-8 string to value *
322 *************************************************/
324 /* This function takes one or more bytes that represents a UTF-8 character,
325 and returns the value of the character.
328 utf8bytes a pointer to the byte vector
329 vptr a pointer to an int to receive the value
331 Returns: > 0 => the number of bytes consumed
332 -6 to 0 => malformed UTF-8 character at offset = (-return)
338 utf82ord(unsigned char *utf8bytes, int *vptr)
340 int c = *utf8bytes++;
344 for (i = -1; i < 6; i++) /* i is number of additional bytes */
346 if ((d & 0x80) == 0) break;
350 if (i == -1) { *vptr = c; return 1; } /* ascii character */
351 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
353 /* i now has a value in the range 1-5 */
356 d = (c & utf8_table3[i]) << s;
358 for (j = 0; j < i; j++)
361 if ((c & 0xc0) != 0x80) return -(j+1);
363 d |= (c & 0x3f) << s;
366 /* Check that encoding was the correct unique one */
368 for (j = 0; j < utf8_table1_size; j++)
369 if (d <= utf8_table1[j]) break;
370 if (j != i) return -(i+1);
382 /*************************************************
383 * Convert character value to UTF-8 *
384 *************************************************/
386 /* This function takes an integer value in the range 0 - 0x7fffffff
387 and encodes it as a UTF-8 character in 0 to 6 bytes.
390 cvalue the character value
391 utf8bytes pointer to buffer for result - at least 6 bytes long
393 Returns: number of characters placed in the buffer
399 ord2utf8(int cvalue, uschar *utf8bytes)
402 for (i = 0; i < utf8_table1_size; i++)
403 if (cvalue <= utf8_table1[i]) break;
405 for (j = i; j > 0; j--)
407 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
410 *utf8bytes = utf8_table2[i] | cvalue;
418 /*************************************************
419 * Print character string *
420 *************************************************/
422 /* Character string printing function. Must handle UTF-8 strings in utf8
423 mode. Yields number of characters printed. If handed a NULL file, just counts
424 chars without printing. */
426 static int pchars(unsigned char *p, int length, FILE *f)
436 int rc = utf82ord(p, &c);
438 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
444 if (f != NULL) fprintf(f, "%c", c);
450 if (f != NULL) fprintf(f, "\\x{%02x}", c);
451 yield += (n <= 0x000000ff)? 2 :
452 (n <= 0x00000fff)? 3 :
453 (n <= 0x0000ffff)? 4 :
454 (n <= 0x000fffff)? 5 : 6;
461 /* Not UTF-8, or malformed UTF-8 */
466 if (f != NULL) fprintf(f, "%c", c);
471 if (f != NULL) fprintf(f, "\\x%02x", c);
481 /*************************************************
483 *************************************************/
485 /* Called from PCRE as a result of the (?C) item. We print out where we are in
486 the match. Yield zero unless more callouts than the fail count, or the callout
489 static int callout(pcre_callout_block *cb)
491 FILE *f = (first_callout | callout_extra)? outfile : NULL;
492 int i, pre_start, post_start, subject_length;
496 fprintf(f, "Callout %d: last capture = %d\n",
497 cb->callout_number, cb->capture_last);
499 for (i = 0; i < cb->capture_top * 2; i += 2)
501 if (cb->offset_vector[i] < 0)
502 fprintf(f, "%2d: <unset>\n", i/2);
505 fprintf(f, "%2d: ", i/2);
506 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
507 cb->offset_vector[i+1] - cb->offset_vector[i], f);
513 /* Re-print the subject in canonical form, the first time or if giving full
514 datails. On subsequent calls in the same match, we use pchars just to find the
515 printed lengths of the substrings. */
517 if (f != NULL) fprintf(f, "--->");
519 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
520 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
521 cb->current_position - cb->start_match, f);
523 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
525 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
526 cb->subject_length - cb->current_position, f);
528 if (f != NULL) fprintf(f, "\n");
530 /* Always print appropriate indicators, with callout number if not already
531 shown. For automatic callouts, show the pattern offset. */
533 if (cb->callout_number == 255)
535 fprintf(outfile, "%+3d ", cb->pattern_position);
536 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
540 if (callout_extra) fprintf(outfile, " ");
541 else fprintf(outfile, "%3d ", cb->callout_number);
544 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
545 fprintf(outfile, "^");
549 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
550 fprintf(outfile, "^");
553 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
554 fprintf(outfile, " ");
556 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
557 pbuffer + cb->pattern_position);
559 fprintf(outfile, "\n");
562 if (cb->callout_data != NULL)
564 int callout_data = *((int *)(cb->callout_data));
565 if (callout_data != 0)
567 fprintf(outfile, "Callout data = %d\n", callout_data);
572 return (cb->callout_number != callout_fail_id)? 0 :
573 (++callout_count >= callout_fail_count)? 1 : 0;
577 /*************************************************
578 * Local malloc functions *
579 *************************************************/
581 /* Alternative malloc function, to test functionality and show the size of the
584 static void *new_malloc(size_t size)
586 void *block = malloc(size);
589 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
593 static void new_free(void *block)
596 fprintf(outfile, "free %p\n", block);
601 /* For recursion malloc/free, to test stacking calls */
603 static void *stack_malloc(size_t size)
605 void *block = malloc(size);
607 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
611 static void stack_free(void *block)
614 fprintf(outfile, "stack_free %p\n", block);
619 /*************************************************
620 * Call pcre_fullinfo() *
621 *************************************************/
623 /* Get one piece of information from the pcre_fullinfo() function */
625 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
628 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
629 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
634 /*************************************************
635 * Byte flipping function *
636 *************************************************/
638 static unsigned long int
639 byteflip(unsigned long int value, int n)
641 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642 return ((value & 0x000000ff) << 24) |
643 ((value & 0x0000ff00) << 8) |
644 ((value & 0x00ff0000) >> 8) |
645 ((value & 0xff000000) >> 24);
651 /*************************************************
652 * Check match or recursion limit *
653 *************************************************/
656 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657 int start_offset, int options, int *use_offsets, int use_size_offsets,
658 int flag, unsigned long int *limit, int errnumber, const char *msg)
665 extra->flags |= flag;
671 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672 use_offsets, use_size_offsets);
674 if (count == errnumber)
676 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
678 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
681 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682 count == PCRE_ERROR_PARTIAL)
686 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
689 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
693 else break; /* Some other error */
696 extra->flags &= ~flag;
702 /*************************************************
703 * Case-independent strncmp() function *
704 *************************************************/
710 n number of characters to compare
712 Returns: < 0, = 0, or > 0, according to the comparison
716 strncmpic(uschar *s, uschar *t, int n)
720 int c = tolower(*s++) - tolower(*t++);
728 /*************************************************
729 * Check newline indicator *
730 *************************************************/
732 /* This is used both at compile and run-time to check for <xxx> escapes, where
733 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
737 p points after the leading '<'
738 f file for error message
740 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
744 check_newline(uschar *p, FILE *f)
746 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753 fprintf(f, "Unknown newline type at: <%s\n", p);
759 /*************************************************
761 *************************************************/
766 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
767 printf("Input and output default to stdin and stdout.\n");
768 #ifdef SUPPORT_LIBREADLINE
769 printf("If input is a terminal, readline() is used to read from it.\n");
771 printf("This version of pcretest is not linked with readline().\n");
773 printf("\nOptions:\n");
774 printf(" -b show compiled code (bytecode)\n");
775 printf(" -C show PCRE compile-time options and exit\n");
776 printf(" -d debug: show compiled code and information (-b and -i)\n");
778 printf(" -dfa force DFA matching for all subjects\n");
780 printf(" -help show usage information\n");
781 printf(" -i show information about compiled patterns\n"
782 " -m output memory used information\n"
783 " -o <n> set size of offsets vector to <n>\n");
785 printf(" -p use POSIX interface\n");
787 printf(" -q quiet: do not output PCRE version number at start\n");
788 printf(" -S <n> set stack size to <n> megabytes\n");
789 printf(" -s output store (memory) used information\n"
790 " -t time compilation and execution\n");
791 printf(" -t <n> time compilation and execution, repeating <n> times\n");
792 printf(" -tm time execution (matching) only\n");
793 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
798 /*************************************************
800 *************************************************/
802 /* Read lines from named file or stdin and write to named file or stdout; lines
803 consist of a regular expression, in delimiters and optionally followed by
804 options, followed by a set of test data, terminated by an empty line. */
806 int main(int argc, char **argv)
808 FILE *infile = stdin;
810 int study_options = 0;
817 int size_offsets = 45;
818 int size_offsets_max;
829 /* These vectors store, end-to-end, a list of captured substring names. Assume
830 that 1024 is plenty long enough for the few names we'll be testing. */
832 uschar copynames[1024];
833 uschar getnames[1024];
835 uschar *copynamesptr;
838 /* Get buffers from malloc() so that Electric Fence will check their misuse
839 when I am debugging. They grow automatically when very long lines are read. */
841 buffer = (unsigned char *)malloc(buffer_size);
842 dbuffer = (unsigned char *)malloc(buffer_size);
843 pbuffer = (unsigned char *)malloc(buffer_size);
845 /* The outfile variable is static so that new_malloc can use it. */
849 /* The following _setmode() stuff is some Windows magic that tells its runtime
850 library to translate CRLF into a single LF character. At least, that's what
851 I've been told: never having used Windows I take this all on trust. Originally
852 it set 0x8000, but then I was advised that _O_BINARY was better. */
854 #if defined(_WIN32) || defined(WIN32)
855 _setmode( _fileno( stdout ), _O_BINARY );
860 while (argc > 1 && argv[op][0] == '-')
862 unsigned char *endptr;
864 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
866 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867 else if (strcmp(argv[op], "-b") == 0) debug = 1;
868 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
871 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
873 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
880 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
882 int both = argv[op][2] == 0;
884 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
891 else timeitm = LOOPREPEAT;
892 if (both) timeit = timeitm;
894 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
898 #if defined(_WIN32) || defined(WIN32)
899 printf("PCRE: -S not supported on this OS\n");
904 getrlimit(RLIMIT_STACK, &rlim);
905 rlim.rlim_cur = stack_size * 1024 * 1024;
906 rc = setrlimit(RLIMIT_STACK, &rlim);
909 printf("PCRE: setrlimit() failed with error %d\n", rc);
917 else if (strcmp(argv[op], "-p") == 0) posix = 1;
919 else if (strcmp(argv[op], "-C") == 0)
922 printf("PCRE version %s\n", pcre_version());
923 printf("Compiled with\n");
924 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
925 printf(" %sUTF-8 support\n", rc? "" : "No ");
926 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927 printf(" %sUnicode properties support\n", rc? "" : "No ");
928 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
930 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931 (rc == -2)? "ANYCRLF" :
932 (rc == -1)? "ANY" : "???");
933 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935 "all Unicode newlines");
936 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937 printf(" Internal link size = %d\n", rc);
938 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939 printf(" POSIX malloc threshold = %d\n", rc);
940 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941 printf(" Default match limit = %d\n", rc);
942 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943 printf(" Default recursion depth limit = %d\n", rc);
944 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
948 else if (strcmp(argv[op], "-help") == 0 ||
949 strcmp(argv[op], "--help") == 0)
956 printf("** Unknown or malformed option %s\n", argv[op]);
965 /* Get the store for the offsets vector, and remember what it was */
967 size_offsets_max = size_offsets;
968 offsets = (int *)malloc(size_offsets_max * sizeof(int));
971 printf("** Failed to get %d bytes of memory for offsets vector\n",
972 (int)(size_offsets_max * sizeof(int)));
977 /* Sort out the input and output files */
981 infile = fopen(argv[op], INPUT_MODE);
984 printf("** Failed to open %s\n", argv[op]);
992 outfile = fopen(argv[op+1], OUTPUT_MODE);
995 printf("** Failed to open %s\n", argv[op+1]);
1001 /* Set alternative malloc function */
1003 pcre_malloc = new_malloc;
1004 pcre_free = new_free;
1005 pcre_stack_malloc = stack_malloc;
1006 pcre_stack_free = stack_free;
1008 /* Heading line unless quiet, then prompt for first regex if stdin */
1010 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1017 pcre_extra *extra = NULL;
1019 #if !defined NOPOSIX /* There are still compilers that require no indent */
1025 unsigned char *p, *pp, *ppp;
1026 unsigned char *to_file = NULL;
1027 const unsigned char *tables = NULL;
1028 unsigned long int true_size, true_study_size = 0;
1029 size_t size, regex_gotten_store;
1031 int do_debug = debug;
1034 int do_showinfo = showinfo;
1035 int do_showrest = 0;
1037 int erroroffset, len, delimiter, poffset;
1042 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1043 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1047 while (isspace(*p)) p++;
1048 if (*p == 0) continue;
1050 /* See if the pattern is to be loaded pre-compiled from a file. */
1052 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1054 unsigned long int magic, get_options;
1059 pp = p + (int)strlen((char *)p);
1060 while (isspace(pp[-1])) pp--;
1063 f = fopen((char *)p, "rb");
1066 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1070 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1073 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1075 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1077 re = (real_pcre *)new_malloc(true_size);
1078 regex_gotten_store = gotten_store;
1080 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1082 magic = ((real_pcre *)re)->magic_number;
1083 if (magic != MAGIC_NUMBER)
1085 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1091 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1097 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1098 do_flip? " (byte-inverted)" : "", p);
1100 /* Need to know if UTF-8 for printing data strings */
1102 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103 use_utf8 = (get_options & PCRE_UTF8) != 0;
1105 /* Now see if there is any following study data */
1107 if (true_study_size != 0)
1109 pcre_study_data *psd;
1111 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1112 extra->flags = PCRE_EXTRA_STUDY_DATA;
1114 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1115 extra->study_data = psd;
1117 if (fread(psd, 1, true_study_size, f) != true_study_size)
1120 fprintf(outfile, "Failed to read data from %s\n", p);
1121 if (extra != NULL) new_free(extra);
1122 if (re != NULL) new_free(re);
1126 fprintf(outfile, "Study data loaded from %s\n", p);
1127 do_study = 1; /* To get the data output if requested */
1129 else fprintf(outfile, "No study data\n");
1135 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1136 the pattern; if is isn't complete, read more. */
1140 if (isalnum(delimiter) || delimiter == '\\')
1142 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1147 poffset = p - buffer;
1153 if (*pp == '\\' && pp[1] != 0) pp++;
1154 else if (*pp == delimiter) break;
1157 if (*pp != 0) break;
1158 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1160 fprintf(outfile, "** Unexpected EOF\n");
1164 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1167 /* The buffer may have moved while being extended; reset the start of data
1168 pointer to the correct relative point in the buffer. */
1170 p = buffer + poffset;
1172 /* If the first character after the delimiter is backslash, make
1173 the pattern end with backslash. This is purely to provide a way
1174 of testing for the error message when a pattern ends with backslash. */
1176 if (pp[1] == '\\') *pp++ = '\\';
1178 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1182 strcpy((char *)pbuffer, (char *)p);
1184 /* Look for options after final delimiter */
1188 log_store = showstore; /* default from command line */
1194 case 'f': options |= PCRE_FIRSTLINE; break;
1195 case 'g': do_g = 1; break;
1196 case 'i': options |= PCRE_CASELESS; break;
1197 case 'm': options |= PCRE_MULTILINE; break;
1198 case 's': options |= PCRE_DOTALL; break;
1199 case 'x': options |= PCRE_EXTENDED; break;
1201 case '+': do_showrest = 1; break;
1202 case 'A': options |= PCRE_ANCHORED; break;
1203 case 'B': do_debug = 1; break;
1204 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205 case 'D': do_debug = do_showinfo = 1; break;
1206 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207 case 'F': do_flip = 1; break;
1208 case 'G': do_G = 1; break;
1209 case 'I': do_showinfo = 1; break;
1210 case 'J': options |= PCRE_DUPNAMES; break;
1211 case 'M': log_store = 1; break;
1212 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1214 #if !defined NOPOSIX
1215 case 'P': do_posix = 1; break;
1218 case 'S': do_study = 1; break;
1219 case 'U': options |= PCRE_UNGREEDY; break;
1220 case 'X': options |= PCRE_EXTRA; break;
1221 case 'Z': debug_lengths = 0; break;
1222 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1227 /* The '\r' test here is so that it works on Windows. */
1228 /* The '0' test is just in case this is an unterminated line. */
1229 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1231 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1233 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1237 tables = pcre_maketables();
1243 while (*pp != 0) pp++;
1244 while (isspace(pp[-1])) pp--;
1250 int x = check_newline(pp, outfile);
1251 if (x == 0) goto SKIP_DATA;
1253 while (*pp++ != '>');
1257 case '\r': /* So that it works in Windows */
1263 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1268 /* Handle compiling via the POSIX interface, which doesn't support the
1269 timing, showing, or debugging options, nor the ability to pass over
1270 local character tables. */
1272 #if !defined NOPOSIX
1273 if (posix || do_posix)
1278 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1279 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1280 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1281 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1282 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1284 rc = regcomp(&preg, (char *)p, cflags);
1286 /* Compilation failed; go back for another re, skipping to blank line
1287 if non-interactive. */
1291 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1292 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1297 /* Handle compiling via the native interface */
1300 #endif /* !defined NOPOSIX */
1307 clock_t start_time = clock();
1308 for (i = 0; i < timeit; i++)
1310 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1311 if (re != NULL) free(re);
1313 time_taken = clock() - start_time;
1314 fprintf(outfile, "Compile time %.4f milliseconds\n",
1315 (((double)time_taken * 1000.0) / (double)timeit) /
1316 (double)CLOCKS_PER_SEC);
1319 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1321 /* Compilation failed; go back for another re, skipping to blank line
1322 if non-interactive. */
1326 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1328 if (infile != stdin)
1332 if (extend_inputline(infile, buffer, NULL) == NULL)
1337 len = (int)strlen((char *)buffer);
1338 while (len > 0 && isspace(buffer[len-1])) len--;
1339 if (len == 0) break;
1341 fprintf(outfile, "\n");
1346 /* Compilation succeeded; print data if required. There are now two
1347 info-returning functions. The old one has a limited interface and
1348 returns only limited data. Check that it agrees with the newer one. */
1351 fprintf(outfile, "Memory allocation (code space): %d\n",
1352 (int)(gotten_store -
1354 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1356 /* Extract the size for possible writing before possibly flipping it,
1357 and remember the store that was got. */
1359 true_size = ((real_pcre *)re)->size;
1360 regex_gotten_store = gotten_store;
1362 /* If /S was present, study the regexp to generate additional info to
1363 help with the matching. */
1371 clock_t start_time = clock();
1372 for (i = 0; i < timeit; i++)
1373 extra = pcre_study(re, study_options, &error);
1374 time_taken = clock() - start_time;
1375 if (extra != NULL) free(extra);
1376 fprintf(outfile, " Study time %.4f milliseconds\n",
1377 (((double)time_taken * 1000.0) / (double)timeit) /
1378 (double)CLOCKS_PER_SEC);
1380 extra = pcre_study(re, study_options, &error);
1382 fprintf(outfile, "Failed to study: %s\n", error);
1383 else if (extra != NULL)
1384 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1387 /* If the 'F' option was present, we flip the bytes of all the integer
1388 fields in the regex data block and the study block. This is to make it
1389 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1390 compiled on a different architecture. */
1394 real_pcre *rre = (real_pcre *)re;
1396 byteflip(rre->magic_number, sizeof(rre->magic_number));
1397 rre->size = byteflip(rre->size, sizeof(rre->size));
1398 rre->options = byteflip(rre->options, sizeof(rre->options));
1399 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1401 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1403 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1405 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1407 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1408 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1409 sizeof(rre->name_table_offset));
1410 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1411 sizeof(rre->name_entry_size));
1412 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1413 sizeof(rre->name_count));
1417 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1418 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1419 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1423 /* Extract information from the compiled data if required */
1429 fprintf(outfile, "------------------------------------------------------------------\n");
1430 pcre_printint(re, outfile, debug_lengths);
1435 unsigned long int get_options, all_options;
1436 #if !defined NOINFOCHECK
1437 int old_first_char, old_options, old_count;
1439 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1441 int nameentrysize, namecount;
1442 const uschar *nametable;
1444 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1445 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1446 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1447 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1448 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1449 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1450 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1451 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1452 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1453 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1454 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1455 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1457 #if !defined NOINFOCHECK
1458 old_count = pcre_info(re, &old_options, &old_first_char);
1459 if (count < 0) fprintf(outfile,
1460 "Error %d from pcre_info()\n", count);
1463 if (old_count != count) fprintf(outfile,
1464 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1467 if (old_first_char != first_char) fprintf(outfile,
1468 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1469 first_char, old_first_char);
1471 if (old_options != (int)get_options) fprintf(outfile,
1472 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1473 get_options, old_options);
1477 if (size != regex_gotten_store) fprintf(outfile,
1478 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1479 (int)size, (int)regex_gotten_store);
1481 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1483 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1487 fprintf(outfile, "Named capturing subpatterns:\n");
1488 while (namecount-- > 0)
1490 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1491 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1492 GET2(nametable, 0));
1493 nametable += nameentrysize;
1497 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1498 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1500 all_options = ((real_pcre *)re)->options;
1501 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1503 if (get_options == 0) fprintf(outfile, "No options\n");
1504 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1505 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1506 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1507 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1508 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1509 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1510 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1511 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1512 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1513 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1514 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1515 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1516 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1517 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1518 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1519 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1521 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1523 switch (get_options & PCRE_NEWLINE_BITS)
1525 case PCRE_NEWLINE_CR:
1526 fprintf(outfile, "Forced newline sequence: CR\n");
1529 case PCRE_NEWLINE_LF:
1530 fprintf(outfile, "Forced newline sequence: LF\n");
1533 case PCRE_NEWLINE_CRLF:
1534 fprintf(outfile, "Forced newline sequence: CRLF\n");
1537 case PCRE_NEWLINE_ANYCRLF:
1538 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1541 case PCRE_NEWLINE_ANY:
1542 fprintf(outfile, "Forced newline sequence: ANY\n");
1549 if (first_char == -1)
1551 fprintf(outfile, "First char at start or follows newline\n");
1553 else if (first_char < 0)
1555 fprintf(outfile, "No first char\n");
1559 int ch = first_char & 255;
1560 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1563 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1565 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1570 fprintf(outfile, "No need char\n");
1574 int ch = need_char & 255;
1575 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1578 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1580 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1583 /* Don't output study size; at present it is in any case a fixed
1584 value, but it varies, depending on the computer architecture, and
1585 so messes up the test suite. (And with the /F option, it might be
1591 fprintf(outfile, "Study returned NULL\n");
1594 uschar *start_bits = NULL;
1595 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1597 if (start_bits == NULL)
1598 fprintf(outfile, "No starting byte set\n");
1603 fprintf(outfile, "Starting byte set: ");
1604 for (i = 0; i < 256; i++)
1606 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1610 fprintf(outfile, "\n ");
1613 if (PRINTHEX(i) && i != ' ')
1615 fprintf(outfile, "%c ", i);
1620 fprintf(outfile, "\\x%02x ", i);
1625 fprintf(outfile, "\n");
1631 /* If the '>' option was present, we write out the regex to a file, and
1632 that is all. The first 8 bytes of the file are the regex length and then
1633 the study length, in big-endian order. */
1635 if (to_file != NULL)
1637 FILE *f = fopen((char *)to_file, "wb");
1640 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1645 sbuf[0] = (uschar)((true_size >> 24) & 255);
1646 sbuf[1] = (uschar)((true_size >> 16) & 255);
1647 sbuf[2] = (uschar)((true_size >> 8) & 255);
1648 sbuf[3] = (uschar)((true_size) & 255);
1650 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1651 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1652 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1653 sbuf[7] = (uschar)((true_study_size) & 255);
1655 if (fwrite(sbuf, 1, 8, f) < 8 ||
1656 fwrite(re, 1, true_size, f) < true_size)
1658 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1662 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1665 if (fwrite(extra->study_data, 1, true_study_size, f) <
1668 fprintf(outfile, "Write error on %s: %s\n", to_file,
1671 else fprintf(outfile, "Study data written to %s\n", to_file);
1679 if (extra != NULL) new_free(extra);
1680 if (tables != NULL) new_free((void *)tables);
1681 continue; /* With next regex */
1683 } /* End of non-POSIX compile */
1685 /* Read data lines and test them */
1691 int *use_offsets = offsets;
1692 int use_size_offsets = size_offsets;
1693 int callout_data = 0;
1694 int callout_data_set = 0;
1696 int copystrings = 0;
1697 int find_match_limit = 0;
1701 int start_offset = 0;
1710 copynamesptr = copynames;
1711 getnamesptr = getnames;
1713 pcre_callout = callout;
1717 callout_fail_count = 999999;
1718 callout_fail_id = -1;
1721 if (extra != NULL) extra->flags &=
1722 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1727 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1733 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1734 len = (int)strlen((char *)buffer);
1735 if (buffer[len-1] == '\n') break;
1738 while (len > 0 && isspace(buffer[len-1])) len--;
1740 if (len == 0) break;
1743 while (isspace(*p)) p++;
1746 while ((c = *p++) != 0)
1751 if (c == '\\') switch ((c = *p++))
1753 case 'a': c = 7; break;
1754 case 'b': c = '\b'; break;
1755 case 'e': c = 27; break;
1756 case 'f': c = '\f'; break;
1757 case 'n': c = '\n'; break;
1758 case 'r': c = '\r'; break;
1759 case 't': c = '\t'; break;
1760 case 'v': c = '\v'; break;
1762 case '0': case '1': case '2': case '3':
1763 case '4': case '5': case '6': case '7':
1765 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1766 c = c * 8 + *p++ - '0';
1769 if (use_utf8 && c > 255)
1771 unsigned char buff8[8];
1773 utn = ord2utf8(c, buff8);
1774 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1775 c = buff8[ii]; /* Last byte */
1782 /* Handle \x{..} specially - new Perl thing for utf8 */
1787 unsigned char *pt = p;
1789 while (isxdigit(*(++pt)))
1790 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1793 unsigned char buff8[8];
1795 utn = ord2utf8(c, buff8);
1796 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1797 c = buff8[ii]; /* Last byte */
1801 /* Not correct form; fall through */
1808 while (i++ < 2 && isxdigit(*p))
1810 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1815 case 0: /* \ followed by EOF allows for an empty line */
1820 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1823 case 'A': /* Option setting */
1824 options |= PCRE_ANCHORED;
1828 options |= PCRE_NOTBOL;
1832 if (isdigit(*p)) /* Set copy string */
1834 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835 copystrings |= 1 << n;
1837 else if (isalnum(*p))
1839 uschar *npp = copynamesptr;
1840 while (isalnum(*p)) *npp++ = *p++;
1843 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1845 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1855 pcre_callout = NULL;
1860 callout_fail_id = 0;
1863 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1864 callout_fail_count = 0;
1869 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1876 if (*(++p) == '-') { sign = -1; p++; }
1878 callout_data = callout_data * 10 + *p++ - '0';
1879 callout_data *= sign;
1880 callout_data_set = 1;
1886 #if !defined NOPOSIX
1887 if (posix || do_posix)
1888 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1895 options |= PCRE_DFA_SHORTEST;
1902 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1903 getstrings |= 1 << n;
1905 else if (isalnum(*p))
1907 uschar *npp = getnamesptr;
1908 while (isalnum(*p)) *npp++ = *p++;
1911 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1913 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1923 find_match_limit = 1;
1927 options |= PCRE_NOTEMPTY;
1931 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1932 if (n > size_offsets_max)
1934 size_offsets_max = n;
1936 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1937 if (offsets == NULL)
1939 printf("** Failed to get %d bytes of memory for offsets vector\n",
1940 (int)(size_offsets_max * sizeof(int)));
1945 use_size_offsets = n;
1946 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1950 options |= PCRE_PARTIAL;
1954 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1957 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1960 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1961 extra->match_limit_recursion = n;
1965 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1968 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1971 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1972 extra->match_limit = n;
1977 options |= PCRE_DFA_RESTART;
1986 options |= PCRE_NOTEOL;
1990 options |= PCRE_NO_UTF8_CHECK;
1995 int x = check_newline(p, outfile);
1996 if (x == 0) goto NEXT_DATA;
1998 while (*p++ != '>');
2007 if ((all_use_dfa || use_dfa) && find_match_limit)
2009 printf("**Match limit not relevant for DFA matching: ignored\n");
2010 find_match_limit = 0;
2013 /* Handle matching via the POSIX interface, which does not
2014 support timing or playing with the match limit or callout data. */
2016 #if !defined NOPOSIX
2017 if (posix || do_posix)
2021 regmatch_t *pmatch = NULL;
2022 if (use_size_offsets > 0)
2023 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2024 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2025 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2027 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2031 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2032 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2034 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2037 fprintf(outfile, "Matched with REG_NOSUB\n");
2042 for (i = 0; i < (size_t)use_size_offsets; i++)
2044 if (pmatch[i].rm_so >= 0)
2046 fprintf(outfile, "%2d: ", (int)i);
2047 (void)pchars(dbuffer + pmatch[i].rm_so,
2048 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2049 fprintf(outfile, "\n");
2050 if (i == 0 && do_showrest)
2052 fprintf(outfile, " 0+ ");
2053 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2055 fprintf(outfile, "\n");
2063 /* Handle matching via the native interface - repeats for /g and /G */
2066 #endif /* !defined NOPOSIX */
2068 for (;; gmatched++) /* Loop for /g or /G */
2074 clock_t start_time = clock();
2077 if (all_use_dfa || use_dfa)
2079 int workspace[1000];
2080 for (i = 0; i < timeitm; i++)
2081 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2082 options | g_notempty, use_offsets, use_size_offsets, workspace,
2083 sizeof(workspace)/sizeof(int));
2088 for (i = 0; i < timeitm; i++)
2089 count = pcre_exec(re, extra, (char *)bptr, len,
2090 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2092 time_taken = clock() - start_time;
2093 fprintf(outfile, "Execute time %.4f milliseconds\n",
2094 (((double)time_taken * 1000.0) / (double)timeitm) /
2095 (double)CLOCKS_PER_SEC);
2098 /* If find_match_limit is set, we want to do repeated matches with
2099 varying limits in order to find the minimum value for the match limit and
2100 for the recursion limit. */
2102 if (find_match_limit)
2106 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2110 (void)check_match_limit(re, extra, bptr, len, start_offset,
2111 options|g_notempty, use_offsets, use_size_offsets,
2112 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2113 PCRE_ERROR_MATCHLIMIT, "match()");
2115 count = check_match_limit(re, extra, bptr, len, start_offset,
2116 options|g_notempty, use_offsets, use_size_offsets,
2117 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2118 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2121 /* If callout_data is set, use the interface with additional data */
2123 else if (callout_data_set)
2127 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2130 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2131 extra->callout_data = &callout_data;
2132 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2133 options | g_notempty, use_offsets, use_size_offsets);
2134 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2137 /* The normal case is just to do the match once, with the default
2138 value of match_limit. */
2141 else if (all_use_dfa || use_dfa)
2143 int workspace[1000];
2144 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2145 options | g_notempty, use_offsets, use_size_offsets, workspace,
2146 sizeof(workspace)/sizeof(int));
2149 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2150 count = use_size_offsets/2;
2157 count = pcre_exec(re, extra, (char *)bptr, len,
2158 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2161 fprintf(outfile, "Matched, but too many substrings\n");
2162 count = use_size_offsets/3;
2173 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2175 maxcount = use_size_offsets/3;
2177 /* This is a check against a lunatic return value. */
2179 if (count > maxcount)
2182 "** PCRE error: returned count %d is too big for offset size %d\n",
2183 count, use_size_offsets);
2184 count = use_size_offsets/3;
2187 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2188 do_g = do_G = FALSE; /* Break g/G loop */
2192 for (i = 0; i < count * 2; i += 2)
2194 if (use_offsets[i] < 0)
2195 fprintf(outfile, "%2d: <unset>\n", i/2);
2198 fprintf(outfile, "%2d: ", i/2);
2199 (void)pchars(bptr + use_offsets[i],
2200 use_offsets[i+1] - use_offsets[i], outfile);
2201 fprintf(outfile, "\n");
2206 fprintf(outfile, " 0+ ");
2207 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2209 fprintf(outfile, "\n");
2215 for (i = 0; i < 32; i++)
2217 if ((copystrings & (1 << i)) != 0)
2219 char copybuffer[256];
2220 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2221 i, copybuffer, sizeof(copybuffer));
2223 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2225 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2229 for (copynamesptr = copynames;
2231 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2233 char copybuffer[256];
2234 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2235 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2237 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2239 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2242 for (i = 0; i < 32; i++)
2244 if ((getstrings & (1 << i)) != 0)
2246 const char *substring;
2247 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2250 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2253 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2254 pcre_free_substring(substring);
2259 for (getnamesptr = getnames;
2261 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2263 const char *substring;
2264 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2265 count, (char *)getnamesptr, &substring);
2267 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2270 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2271 pcre_free_substring(substring);
2277 const char **stringlist;
2278 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2281 fprintf(outfile, "get substring list failed %d\n", rc);
2284 for (i = 0; i < count; i++)
2285 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2286 if (stringlist[i] != NULL)
2287 fprintf(outfile, "string list not terminated by NULL\n");
2288 /* free((void *)stringlist); */
2289 pcre_free_substring_list(stringlist);
2294 /* There was a partial match */
2296 else if (count == PCRE_ERROR_PARTIAL)
2298 fprintf(outfile, "Partial match");
2300 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2301 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2302 bptr + use_offsets[0]);
2304 fprintf(outfile, "\n");
2305 break; /* Out of the /g loop */
2308 /* Failed to match. If this is a /g or /G loop and we previously set
2309 g_notempty after a null match, this is not necessarily the end. We want
2310 to advance the start offset, and continue. We won't be at the end of the
2311 string - that was checked before setting g_notempty.
2313 Complication arises in the case when the newline option is "any" or
2314 "anycrlf". If the previous match was at the end of a line terminated by
2315 CRLF, an advance of one character just passes the \r, whereas we should
2316 prefer the longer newline sequence, as does the code in pcre_exec().
2317 Fudge the offset value to achieve this.
2319 Otherwise, in the case of UTF-8 matching, the advance must be one
2320 character, not one byte. */
2324 if (g_notempty != 0)
2327 unsigned int obits = ((real_pcre *)re)->options;
2328 use_offsets[0] = start_offset;
2329 if ((obits & PCRE_NEWLINE_BITS) == 0)
2332 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2333 obits = (d == '\r')? PCRE_NEWLINE_CR :
2334 (d == '\n')? PCRE_NEWLINE_LF :
2335 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2336 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2337 (d == -1)? PCRE_NEWLINE_ANY : 0;
2339 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2340 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2342 start_offset < len - 1 &&
2343 bptr[start_offset] == '\r' &&
2344 bptr[start_offset+1] == '\n')
2348 while (start_offset + onechar < len)
2350 int tb = bptr[start_offset+onechar];
2351 if (tb <= 127) break;
2353 if (tb != 0 && tb != 0xc0) onechar++;
2356 use_offsets[1] = start_offset + onechar;
2360 if (count == PCRE_ERROR_NOMATCH)
2362 if (gmatched == 0) fprintf(outfile, "No match\n");
2364 else fprintf(outfile, "Error %d\n", count);
2365 break; /* Out of the /g loop */
2369 /* If not /g or /G we are done */
2371 if (!do_g && !do_G) break;
2373 /* If we have matched an empty string, first check to see if we are at
2374 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2375 what Perl's /g options does. This turns out to be rather cunning. First
2376 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2377 same point. If this fails (picked up above) we advance to the next
2382 if (use_offsets[0] == use_offsets[1])
2384 if (use_offsets[0] == len) break;
2385 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2388 /* For /g, update the start offset, leaving the rest alone */
2390 if (do_g) start_offset = use_offsets[1];
2392 /* For /G, update the pointer and length */
2396 bptr += use_offsets[1];
2397 len -= use_offsets[1];
2399 } /* End of loop for /g and /G */
2401 NEXT_DATA: continue;
2402 } /* End of loop for data lines */
2406 #if !defined NOPOSIX
2407 if (posix || do_posix) regfree(&preg);
2410 if (re != NULL) new_free(re);
2411 if (extra != NULL) new_free(extra);
2414 new_free((void *)tables);
2415 setlocale(LC_CTYPE, "C");
2420 if (infile == stdin) fprintf(outfile, "\n");
2424 if (infile != NULL && infile != stdin) fclose(infile);
2425 if (outfile != NULL && outfile != stdout) fclose(outfile);
2435 /* End of pcretest.c */