1 // -*- coding: utf-8 -*-
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat
34 // TODO: Test extractions for PartialMatch/Consume
45 using pcrecpp::StringPiece;
47 using pcrecpp::RE_Options;
50 using pcrecpp::CRadix;
52 static bool VERBOSE_TEST = false;
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
66 #define CHECK_EQ(a, b) CHECK(a == b)
68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
87 static void Timing3(int num_iters) {
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
93 RE line_matcher(".*\n");
95 StringPiece text(text_string);
97 while (line_matcher.Consume(&text)) {
100 printf("Matched %d lines\n", counter);
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
126 static void RadixTests() {
127 printf("Testing hex\n");
129 #define CHECK_HEX(type, value) \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
153 printf("Testing octal\n");
155 #define CHECK_OCTAL(type, value) \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
179 printf("Testing decimal\n");
181 #define CHECK_DECIMAL(type, value) \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
207 static void TestReplace() {
208 printf("Testing Replace\n");
213 const char *original;
216 int global_count; // the expected return value from ReplaceAll
218 static const ReplaceTest tests[] = {
219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221 "the quick brown fox jumps over the lazy dogs.",
222 "ethay quick brown fox jumps over the lazy dogs.",
223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
227 "paul.haahr@google.com",
228 "paul-NOSPAM.haahr@google.com",
229 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
283 "bbabbabb\nbbabbabb\nbb",
289 "bbabbabb\rbbabbabb\rbb",
295 "bbabbabb\r\nbbabbabb\r\nbb",
297 // Check empty-string matching (it's tricky!)
313 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
314 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
315 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
319 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
320 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
321 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
322 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
325 { "", NULL, NULL, NULL, NULL, 0 }
329 const bool support_utf8 = true;
331 const bool support_utf8 = false;
334 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
335 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
336 assert(re.error().empty());
337 string one(t->original);
338 CHECK(re.Replace(t->rewrite, &one));
339 CHECK_EQ(one, t->single);
340 string all(t->original);
341 const int replace_count = re.GlobalReplace(t->rewrite, &all);
342 CHECK_EQ(all, t->global);
343 CHECK_EQ(replace_count, t->global_count);
346 // One final test: test \r\n replacement when we're not in CRLF mode
348 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
349 assert(re.error().empty());
350 string all("aa\r\naa\r\n");
351 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
352 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
355 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
356 assert(re.error().empty());
357 string all("aa\r\naa\r\n");
358 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
359 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
362 // Alas, the answer depends on how pcre was compiled.
365 static void TestExtract() {
366 printf("Testing Extract\n");
370 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
371 CHECK_EQ(s, "kremvax!boris");
373 // check the RE interface as well
374 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
375 CHECK_EQ(s, "'foo'");
376 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
377 CHECK_EQ(s, "'foo'");
380 static void TestConsume() {
381 printf("Testing Consume\n");
385 string s(" aaa b!@#$@#$cccc");
386 StringPiece input(s);
388 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
389 CHECK(r.Consume(&input, &word));
390 CHECK_EQ(word, "aaa");
391 CHECK(r.Consume(&input, &word));
393 CHECK(! r.Consume(&input, &word));
396 static void TestFindAndConsume() {
397 printf("Testing FindAndConsume\n");
401 string s(" aaa b!@#$@#$cccc");
402 StringPiece input(s);
404 RE r("(\\w+)"); // matches a word
405 CHECK(r.FindAndConsume(&input, &word));
406 CHECK_EQ(word, "aaa");
407 CHECK(r.FindAndConsume(&input, &word));
409 CHECK(r.FindAndConsume(&input, &word));
410 CHECK_EQ(word, "cccc");
411 CHECK(! r.FindAndConsume(&input, &word));
414 static void TestMatchNumberPeculiarity() {
415 printf("Testing match-number peculiaraity\n");
421 RE r("(foo)|(bar)|(baz)");
422 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
423 CHECK_EQ(word1, "foo");
426 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428 CHECK_EQ(word2, "bar");
430 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
433 CHECK_EQ(word3, "baz");
434 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
437 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
441 static void TestRecursion() {
442 printf("Testing recursion\n");
444 // Get one string that passes (sometimes), one that never does.
445 string text_good("abcdefghijk");
446 string text_bad("acdefghijkl");
448 // According to pcretest, matching text_good against (\w+)*b
449 // requires match_limit of at least 8192, and match_recursion_limit
452 RE_Options options_ml;
453 options_ml.set_match_limit(8192);
454 RE re("(\\w+)*b", options_ml);
455 CHECK(re.PartialMatch(text_good) == true);
456 CHECK(re.PartialMatch(text_bad) == false);
457 CHECK(re.FullMatch(text_good) == false);
458 CHECK(re.FullMatch(text_bad) == false);
460 options_ml.set_match_limit(1024);
461 RE re2("(\\w+)*b", options_ml);
462 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
463 CHECK(re2.PartialMatch(text_bad) == false);
464 CHECK(re2.FullMatch(text_good) == false);
465 CHECK(re2.FullMatch(text_bad) == false);
467 RE_Options options_mlr;
468 options_mlr.set_match_limit_recursion(50);
469 RE re3("(\\w+)*b", options_mlr);
470 CHECK(re3.PartialMatch(text_good) == true);
471 CHECK(re3.PartialMatch(text_bad) == false);
472 CHECK(re3.FullMatch(text_good) == false);
473 CHECK(re3.FullMatch(text_bad) == false);
475 options_mlr.set_match_limit_recursion(10);
476 RE re4("(\\w+)*b", options_mlr);
477 CHECK(re4.PartialMatch(text_good) == false);
478 CHECK(re4.PartialMatch(text_bad) == false);
479 CHECK(re4.FullMatch(text_good) == false);
480 CHECK(re4.FullMatch(text_bad) == false);
483 // A meta-quoted string, interpreted as a pattern, should always match
484 // the original unquoted string.
485 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
486 string quoted = RE::QuoteMeta(unquoted);
487 RE re(quoted, options);
488 CHECK(re.FullMatch(unquoted));
491 // A string containing meaningful regexp characters, which is then meta-
492 // quoted, should not generally match a string the unquoted string does.
493 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
494 RE_Options options = RE_Options()) {
495 string quoted = RE::QuoteMeta(unquoted);
496 RE re(quoted, options);
497 CHECK(!re.FullMatch(should_not_match));
500 // Tests that quoted meta characters match their original strings,
501 // and that a few things that shouldn't match indeed do not.
502 static void TestQuotaMetaSimple() {
503 TestQuoteMeta("foo");
504 TestQuoteMeta("foo.bar");
505 TestQuoteMeta("foo\\.bar");
506 TestQuoteMeta("[1-9]");
507 TestQuoteMeta("1.5-2.0?");
508 TestQuoteMeta("\\d");
509 TestQuoteMeta("Who doesn't like ice cream?");
510 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
511 TestQuoteMeta("((?!)xxx).*yyy");
513 TestQuoteMeta(string("foo\0bar", 7));
516 static void TestQuoteMetaSimpleNegative() {
517 NegativeTestQuoteMeta("foo", "bar");
518 NegativeTestQuoteMeta("...", "bar");
519 NegativeTestQuoteMeta("\\.", ".");
520 NegativeTestQuoteMeta("\\.", "..");
521 NegativeTestQuoteMeta("(a)", "a");
522 NegativeTestQuoteMeta("(a|b)", "a");
523 NegativeTestQuoteMeta("(a|b)", "(a)");
524 NegativeTestQuoteMeta("(a|b)", "a|b");
525 NegativeTestQuoteMeta("[0-9]", "0");
526 NegativeTestQuoteMeta("[0-9]", "0-9");
527 NegativeTestQuoteMeta("[0-9]", "[9]");
528 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
531 static void TestQuoteMetaLatin1() {
532 TestQuoteMeta("3\xb2 = 9");
535 static void TestQuoteMetaUtf8() {
537 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
538 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
539 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
540 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
541 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
542 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
543 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
544 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
550 static void TestQuoteMetaAll() {
551 printf("Testing QuoteMeta\n");
552 TestQuotaMetaSimple();
553 TestQuoteMetaSimpleNegative();
554 TestQuoteMetaLatin1();
559 // Options tests contributed by
560 // Giuseppe Maxia, CTO, Stardata s.r.l.
563 static void GetOneOptionResult(
564 const char *option_name,
571 printf("Testing Option <%s>\n", option_name);
573 printf("/%s/ finds \"%s\" within \"%s\" \n",
579 RE(regex,options).FullMatch(str, &captured);
581 RE(regex,options).PartialMatch(str, &captured);
582 CHECK_EQ(captured, expected);
585 static void TestOneOption(
586 const char *option_name,
591 bool assertive = true) {
593 printf("Testing Option <%s>\n", option_name);
595 printf("'%s' %s /%s/ \n",
597 (assertive? "matches" : "doesn't match"),
601 CHECK(RE(regex,options).FullMatch(str));
603 CHECK(RE(regex,options).PartialMatch(str));
606 CHECK(!RE(regex,options).FullMatch(str));
608 CHECK(!RE(regex,options).PartialMatch(str));
612 static void Test_CASELESS() {
616 options.set_caseless(true);
617 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
618 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
619 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
621 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
622 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
623 options.set_caseless(false);
624 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
627 static void Test_MULTILINE() {
630 const char *str = "HELLO\n" "cruel\n" "world\n";
632 options.set_multiline(true);
633 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
634 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
635 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
636 options.set_multiline(false);
637 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
640 static void Test_DOTALL() {
643 const char *str = "HELLO\n" "cruel\n" "world";
645 options.set_dotall(true);
646 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
647 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
648 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
649 options.set_dotall(false);
650 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
653 static void Test_DOLLAR_ENDONLY() {
656 const char *str = "HELLO world\n";
658 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
659 options.set_dollar_endonly(true);
660 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
661 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
664 static void Test_EXTRA() {
666 const char *str = "HELLO";
668 options.set_extra(true);
669 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
670 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
671 options.set_extra(false);
672 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
675 static void Test_EXTENDED() {
678 const char *str = "HELLO world";
680 options.set_extended(true);
681 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
682 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
683 TestOneOption("EXTENDED (class)",
691 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
692 TestOneOption("EXTENDED (function)",
700 options.set_extended(false);
701 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
704 static void Test_NO_AUTO_CAPTURE() {
706 const char *str = "HELLO world";
709 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711 printf("parentheses capture text\n");
712 RE re("(world|universe)$", options);
713 CHECK(re.Extract("\\1", str , &captured));
714 CHECK_EQ(captured, "world");
715 options.set_no_auto_capture(true);
716 printf("testing Option <NO_AUTO_CAPTURE>\n");
718 printf("parentheses do not capture text\n");
719 re.Extract("\\1",str, &captured );
720 CHECK_EQ(captured, "world");
723 static void Test_UNGREEDY() {
725 const char *str = "HELLO, 'this' is the 'world'";
727 options.set_ungreedy(true);
728 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
729 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
730 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732 options.set_ungreedy(false);
733 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
734 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
737 static void Test_all_options() {
738 const char *str = "HELLO\n" "cruel\n" "world";
740 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
743 options.set_all_options(0);
744 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
745 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
748 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
751 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
754 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
762 options.set_all_options(0);
763 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
767 static void TestOptions() {
768 printf("Testing Options\n");
772 Test_DOLLAR_ENDONLY();
774 Test_NO_AUTO_CAPTURE();
780 static void TestConstructors() {
781 printf("Testing constructors\n");
784 options.set_dotall(true);
785 const char *str = "HELLO\n" "cruel\n" "world";
787 RE orig("HELLO.*world", options);
788 CHECK(orig.FullMatch(str));
791 CHECK(copy1.FullMatch(str));
793 RE copy2("not a match");
794 CHECK(!copy2.FullMatch(str));
796 CHECK(copy2.FullMatch(str));
798 CHECK(copy2.FullMatch(str));
800 // Make sure when we assign to ourselves, nothing bad happens
804 CHECK(orig.FullMatch(str));
805 CHECK(copy1.FullMatch(str));
806 CHECK(copy2.FullMatch(str));
809 int main(int argc, char** argv) {
810 // Treat any flag as --help
811 if (argc > 1 && argv[1][0] == '-') {
812 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
813 " If 'timingX ###' is specified, run the given timing test\n"
814 " with the given number of iterations, rather than running\n"
815 " the default corectness test.\n", argv[0]);
820 if ( argc == 2 || atoi(argv[2]) == 0) {
821 printf("timing mode needs a num-iters argument\n");
824 if (!strcmp(argv[1], "timing1"))
825 Timing1(atoi(argv[2]));
826 else if (!strcmp(argv[1], "timing2"))
827 Timing2(atoi(argv[2]));
828 else if (!strcmp(argv[1], "timing3"))
829 Timing3(atoi(argv[2]));
831 printf("Unknown argument '%s'\n", argv[1]);
835 printf("Testing FullMatch\n");
840 /***** FullMatch with no args *****/
842 CHECK(RE("h.*o").FullMatch("hello"));
843 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
844 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
845 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
846 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
847 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
849 /***** FullMatch with args *****/
852 CHECK(RE("\\d+").FullMatch("1001"));
855 CHECK(RE("(\\d+)").FullMatch("1001", &i));
857 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
859 CHECK(!RE("()\\d+").FullMatch("10", &i));
860 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863 // Digits surrounding integer-arg
864 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
866 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
868 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
870 CHECK(RE("(\\d)").PartialMatch("1234", &i));
872 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
876 CHECK(RE("h(.*)o").FullMatch("hello", &s));
877 CHECK_EQ(s, string("ell"));
881 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
882 CHECK_EQ(sp.size(), 4);
883 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
887 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
888 CHECK_EQ(s, string("ruby"));
891 // Ignore non-void* NULL arg
892 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
893 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
894 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
895 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
896 #ifdef HAVE_LONG_LONG
897 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
899 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
900 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
902 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
903 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
904 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
905 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
906 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
907 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
910 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
911 CHECK_EQ(s, string("ruby"));
917 CHECK(RE("(H)ello").FullMatch("Hello", &c));
922 CHECK(RE("(H)ello").FullMatch("Hello", &c));
923 CHECK_EQ(c, static_cast<unsigned char>('H'));
927 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
928 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
929 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
930 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
931 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
932 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
936 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
937 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
938 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
939 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
943 static const int max_value = 0x7fffffff;
944 static const int min_value = -max_value - 1;
945 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
946 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
947 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
948 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
949 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
950 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
954 static const unsigned int max_value = 0xfffffffful;
955 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
956 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
957 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
959 #ifdef HAVE_LONG_LONG
960 # if defined(__MINGW__) || defined(__MINGW32__)
969 static const long long max_value = 0x7fffffffffffffffLL;
970 static const long long min_value = -max_value - 1;
971 char buf[32]; // definitely big enough for a long long
973 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
974 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
976 sprintf(buf, LLD, max_value);
977 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
979 sprintf(buf, LLD, min_value);
980 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
982 sprintf(buf, LLD, max_value);
983 assert(buf[strlen(buf)-1] != '9');
984 buf[strlen(buf)-1]++;
985 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
987 sprintf(buf, LLD, min_value);
988 assert(buf[strlen(buf)-1] != '9');
989 buf[strlen(buf)-1]++;
990 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
995 unsigned long long v;
997 static const unsigned long long max_value = 0xffffffffffffffffULL;
998 char buf[32]; // definitely big enough for a unsigned long long
1000 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1001 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1003 sprintf(buf, LLU, max_value);
1004 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1006 assert(buf[strlen(buf)-1] != '9');
1007 buf[strlen(buf)-1]++;
1008 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1013 CHECK(RE("(.*)").FullMatch("100", &v));
1014 CHECK(RE("(.*)").FullMatch("-100.", &v));
1015 CHECK(RE("(.*)").FullMatch("1e23", &v));
1019 CHECK(RE("(.*)").FullMatch("100", &v));
1020 CHECK(RE("(.*)").FullMatch("-100.", &v));
1021 CHECK(RE("(.*)").FullMatch("1e23", &v));
1024 // Check that matching is fully anchored
1025 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1026 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1027 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1028 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1031 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1032 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1033 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1036 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1037 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1039 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1041 // Check full-match handling (needs '$' tacked on internally)
1042 CHECK(RE("fo|foo").FullMatch("fo"));
1043 CHECK(RE("fo|foo").FullMatch("foo"));
1044 CHECK(RE("fo|foo$").FullMatch("fo"));
1045 CHECK(RE("fo|foo$").FullMatch("foo"));
1046 CHECK(RE("foo$").FullMatch("foo"));
1047 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1048 CHECK(!RE("fo|bar").FullMatch("fox"));
1050 // Uncomment the following if we change the handling of '$' to
1051 // prevent it from matching a trailing newline
1053 // Check that we don't get bitten by pcre's special handling of a
1054 // '\n' at the end of the string matching '$'
1055 CHECK(!RE("foo$").PartialMatch("foo\n"));
1060 CHECK(RE("").FullMatch(""));
1062 memset(a, 0, sizeof(0));
1063 CHECK(RE("(\\d){1}").FullMatch("1",
1067 memset(a, 0, sizeof(0));
1068 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1073 memset(a, 0, sizeof(0));
1074 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1075 &a[0], &a[1], &a[2]));
1080 memset(a, 0, sizeof(0));
1081 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1082 &a[0], &a[1], &a[2], &a[3]));
1088 memset(a, 0, sizeof(0));
1089 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1090 &a[0], &a[1], &a[2],
1098 memset(a, 0, sizeof(0));
1099 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1100 &a[0], &a[1], &a[2],
1101 &a[3], &a[4], &a[5]));
1109 memset(a, 0, sizeof(0));
1110 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1111 &a[0], &a[1], &a[2], &a[3],
1112 &a[4], &a[5], &a[6]));
1121 memset(a, 0, sizeof(0));
1122 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1123 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1125 &a[0], &a[1], &a[2], &a[3],
1126 &a[4], &a[5], &a[6], &a[7],
1127 &a[8], &a[9], &a[10], &a[11],
1128 &a[12], &a[13], &a[14], &a[15]));
1146 /***** PartialMatch *****/
1148 printf("Testing PartialMatch\n");
1150 CHECK(RE("h.*o").PartialMatch("hello"));
1151 CHECK(RE("h.*o").PartialMatch("othello"));
1152 CHECK(RE("h.*o").PartialMatch("hello!"));
1153 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1155 /***** other tests *****/
1161 TestFindAndConsume();
1163 TestMatchNumberPeculiarity();
1165 // Check the pattern() accessor
1167 const string kPattern = "http://([^/]+)/.*";
1168 const RE re(kPattern);
1169 CHECK_EQ(kPattern, re.pattern());
1172 // Check RE error field.
1175 CHECK(re.error().empty()); // Must have no error
1179 // Check UTF-8 handling
1181 printf("Testing UTF-8 handling\n");
1183 // Three Japanese characters (nihongo)
1184 const unsigned char utf8_string[] = {
1185 0xe6, 0x97, 0xa5, // 65e5
1186 0xe6, 0x9c, 0xac, // 627c
1187 0xe8, 0xaa, 0x9e, // 8a9e
1190 const unsigned char utf8_pattern[] = {
1192 0xe6, 0x9c, 0xac, // 627c
1197 // Both should match in either mode, bytes or UTF-8
1198 RE re_test1(".........");
1199 CHECK(re_test1.FullMatch(utf8_string));
1200 RE re_test2("...", pcrecpp::UTF8());
1201 CHECK(re_test2.FullMatch(utf8_string));
1203 // Check that '.' matches one byte or UTF-8 character
1204 // according to the mode.
1207 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1208 CHECK_EQ(ss, string("\xe6"));
1209 RE re_test4("(.)", pcrecpp::UTF8());
1210 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1211 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1213 // Check that string matches itself in either mode
1214 RE re_test5(utf8_string);
1215 CHECK(re_test5.FullMatch(utf8_string));
1216 RE re_test6(utf8_string, pcrecpp::UTF8());
1217 CHECK(re_test6.FullMatch(utf8_string));
1219 // Check that pattern matches string only in UTF8 mode
1220 RE re_test7(utf8_pattern);
1221 CHECK(!re_test7.FullMatch(utf8_string));
1222 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1223 CHECK(re_test8.FullMatch(utf8_string));
1226 // Check that ungreedy, UTF8 regular expressions don't match when they
1227 // oughtn't -- see bug 82246.
1229 // This code always worked.
1230 const char* pattern = "\\w+X";
1231 const string target = "a aX";
1232 RE match_sentence(pattern);
1233 RE match_sentence_re(pattern, pcrecpp::UTF8());
1235 CHECK(!match_sentence.FullMatch(target));
1236 CHECK(!match_sentence_re.FullMatch(target));
1240 const char* pattern = "(?U)\\w+X";
1241 const string target = "a aX";
1242 RE match_sentence(pattern);
1243 RE match_sentence_re(pattern, pcrecpp::UTF8());
1245 CHECK(!match_sentence.FullMatch(target));
1246 CHECK(!match_sentence_re.FullMatch(target));
1248 #endif /* def SUPPORT_UTF8 */
1250 printf("Testing error reporting\n");
1252 { RE re("a\\1"); CHECK(!re.error().empty()); }
1255 CHECK(!re.error().empty());
1259 CHECK(!re.error().empty());
1262 RE re("a[[:foobar:]]");
1263 CHECK(!re.error().empty());
1267 CHECK(!re.error().empty());
1271 CHECK(!re.error().empty());
1274 // Test that recursion is stopped
1278 if (getenv("VERBOSE_TEST") != NULL)
1279 VERBOSE_TEST = true;
1282 // Test the constructors