1 // -*- coding: utf-8 -*-
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat
34 // TODO: Test extractions for PartialMatch/Consume
45 using pcrecpp::StringPiece;
47 using pcrecpp::RE_Options;
50 using pcrecpp::CRadix;
52 static bool VERBOSE_TEST = false;
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
66 #define CHECK_EQ(a, b) CHECK(a == b)
68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
87 static void Timing3(int num_iters) {
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
93 RE line_matcher(".*\n");
95 StringPiece text(text_string);
97 while (line_matcher.Consume(&text)) {
100 printf("Matched %d lines\n", counter);
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
126 static void RadixTests() {
127 printf("Testing hex\n");
129 #define CHECK_HEX(type, value) \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
153 printf("Testing octal\n");
155 #define CHECK_OCTAL(type, value) \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
179 printf("Testing decimal\n");
181 #define CHECK_DECIMAL(type, value) \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
207 static void TestReplace() {
208 printf("Testing Replace\n");
213 const char *original;
216 int global_count; // the expected return value from ReplaceAll
218 static const ReplaceTest tests[] = {
219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221 "the quick brown fox jumps over the lazy dogs.",
222 "ethay quick brown fox jumps over the lazy dogs.",
223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
227 "paul.haahr@google.com",
228 "paul-NOSPAM.haahr@google.com",
229 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
283 "bbabbabb\nbbabbabb\nbb",
289 "bbabbabb\rbbabbabb\rbb",
295 "bbabbabb\r\nbbabbabb\r\nbb",
300 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
301 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
306 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
307 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
312 { "", NULL, NULL, NULL, NULL, 0 }
316 const bool support_utf8 = true;
318 const bool support_utf8 = false;
321 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string one(t->original);
325 CHECK(re.Replace(t->rewrite, &one));
326 CHECK_EQ(one, t->single);
327 string all(t->original);
328 const int replace_count = re.GlobalReplace(t->rewrite, &all);
329 CHECK_EQ(all, t->global);
330 CHECK_EQ(replace_count, t->global_count);
333 // One final test: test \r\n replacement when we're not in CRLF mode
335 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336 assert(re.error().empty());
337 string all("aa\r\naa\r\n");
338 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
342 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343 assert(re.error().empty());
344 string all("aa\r\naa\r\n");
345 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
348 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349 // Alas, the answer depends on how pcre was compiled.
352 static void TestExtract() {
353 printf("Testing Extract\n");
357 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
358 CHECK_EQ(s, "kremvax!boris");
360 // check the RE interface as well
361 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
362 CHECK_EQ(s, "'foo'");
363 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
364 CHECK_EQ(s, "'foo'");
367 static void TestConsume() {
368 printf("Testing Consume\n");
372 string s(" aaa b!@#$@#$cccc");
373 StringPiece input(s);
375 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
376 CHECK(r.Consume(&input, &word));
377 CHECK_EQ(word, "aaa");
378 CHECK(r.Consume(&input, &word));
380 CHECK(! r.Consume(&input, &word));
383 static void TestFindAndConsume() {
384 printf("Testing FindAndConsume\n");
388 string s(" aaa b!@#$@#$cccc");
389 StringPiece input(s);
391 RE r("(\\w+)"); // matches a word
392 CHECK(r.FindAndConsume(&input, &word));
393 CHECK_EQ(word, "aaa");
394 CHECK(r.FindAndConsume(&input, &word));
396 CHECK(r.FindAndConsume(&input, &word));
397 CHECK_EQ(word, "cccc");
398 CHECK(! r.FindAndConsume(&input, &word));
401 static void TestMatchNumberPeculiarity() {
402 printf("Testing match-number peculiaraity\n");
408 RE r("(foo)|(bar)|(baz)");
409 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
410 CHECK_EQ(word1, "foo");
413 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
415 CHECK_EQ(word2, "bar");
417 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
420 CHECK_EQ(word3, "baz");
421 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
424 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
428 static void TestRecursion() {
429 printf("Testing recursion\n");
431 // Get one string that passes (sometimes), one that never does.
432 string text_good("abcdefghijk");
433 string text_bad("acdefghijkl");
435 // According to pcretest, matching text_good against (\w+)*b
436 // requires match_limit of at least 8192, and match_recursion_limit
439 RE_Options options_ml;
440 options_ml.set_match_limit(8192);
441 RE re("(\\w+)*b", options_ml);
442 CHECK(re.PartialMatch(text_good) == true);
443 CHECK(re.PartialMatch(text_bad) == false);
444 CHECK(re.FullMatch(text_good) == false);
445 CHECK(re.FullMatch(text_bad) == false);
447 options_ml.set_match_limit(1024);
448 RE re2("(\\w+)*b", options_ml);
449 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
450 CHECK(re2.PartialMatch(text_bad) == false);
451 CHECK(re2.FullMatch(text_good) == false);
452 CHECK(re2.FullMatch(text_bad) == false);
454 RE_Options options_mlr;
455 options_mlr.set_match_limit_recursion(50);
456 RE re3("(\\w+)*b", options_mlr);
457 CHECK(re3.PartialMatch(text_good) == true);
458 CHECK(re3.PartialMatch(text_bad) == false);
459 CHECK(re3.FullMatch(text_good) == false);
460 CHECK(re3.FullMatch(text_bad) == false);
462 options_mlr.set_match_limit_recursion(10);
463 RE re4("(\\w+)*b", options_mlr);
464 CHECK(re4.PartialMatch(text_good) == false);
465 CHECK(re4.PartialMatch(text_bad) == false);
466 CHECK(re4.FullMatch(text_good) == false);
467 CHECK(re4.FullMatch(text_bad) == false);
470 // A meta-quoted string, interpreted as a pattern, should always match
471 // the original unquoted string.
472 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473 string quoted = RE::QuoteMeta(unquoted);
474 RE re(quoted, options);
475 CHECK(re.FullMatch(unquoted));
478 // A string containing meaningful regexp characters, which is then meta-
479 // quoted, should not generally match a string the unquoted string does.
480 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481 RE_Options options = RE_Options()) {
482 string quoted = RE::QuoteMeta(unquoted);
483 RE re(quoted, options);
484 CHECK(!re.FullMatch(should_not_match));
487 // Tests that quoted meta characters match their original strings,
488 // and that a few things that shouldn't match indeed do not.
489 static void TestQuotaMetaSimple() {
490 TestQuoteMeta("foo");
491 TestQuoteMeta("foo.bar");
492 TestQuoteMeta("foo\\.bar");
493 TestQuoteMeta("[1-9]");
494 TestQuoteMeta("1.5-2.0?");
495 TestQuoteMeta("\\d");
496 TestQuoteMeta("Who doesn't like ice cream?");
497 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498 TestQuoteMeta("((?!)xxx).*yyy");
502 static void TestQuoteMetaSimpleNegative() {
503 NegativeTestQuoteMeta("foo", "bar");
504 NegativeTestQuoteMeta("...", "bar");
505 NegativeTestQuoteMeta("\\.", ".");
506 NegativeTestQuoteMeta("\\.", "..");
507 NegativeTestQuoteMeta("(a)", "a");
508 NegativeTestQuoteMeta("(a|b)", "a");
509 NegativeTestQuoteMeta("(a|b)", "(a)");
510 NegativeTestQuoteMeta("(a|b)", "a|b");
511 NegativeTestQuoteMeta("[0-9]", "0");
512 NegativeTestQuoteMeta("[0-9]", "0-9");
513 NegativeTestQuoteMeta("[0-9]", "[9]");
514 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
517 static void TestQuoteMetaLatin1() {
518 TestQuoteMeta("3\xb2 = 9");
521 static void TestQuoteMetaUtf8() {
523 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
524 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
525 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
526 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
527 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
528 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
529 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
530 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
536 static void TestQuoteMetaAll() {
537 printf("Testing QuoteMeta\n");
538 TestQuotaMetaSimple();
539 TestQuoteMetaSimpleNegative();
540 TestQuoteMetaLatin1();
545 // Options tests contributed by
546 // Giuseppe Maxia, CTO, Stardata s.r.l.
549 static void GetOneOptionResult(
550 const char *option_name,
557 printf("Testing Option <%s>\n", option_name);
559 printf("/%s/ finds \"%s\" within \"%s\" \n",
565 RE(regex,options).FullMatch(str, &captured);
567 RE(regex,options).PartialMatch(str, &captured);
568 CHECK_EQ(captured, expected);
571 static void TestOneOption(
572 const char *option_name,
577 bool assertive = true) {
579 printf("Testing Option <%s>\n", option_name);
581 printf("'%s' %s /%s/ \n",
583 (assertive? "matches" : "doesn't match"),
587 CHECK(RE(regex,options).FullMatch(str));
589 CHECK(RE(regex,options).PartialMatch(str));
592 CHECK(!RE(regex,options).FullMatch(str));
594 CHECK(!RE(regex,options).PartialMatch(str));
598 static void Test_CASELESS() {
602 options.set_caseless(true);
603 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
604 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
605 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
607 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
608 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
609 options.set_caseless(false);
610 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
613 static void Test_MULTILINE() {
616 const char *str = "HELLO\n" "cruel\n" "world\n";
618 options.set_multiline(true);
619 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
620 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
621 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
622 options.set_multiline(false);
623 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
626 static void Test_DOTALL() {
629 const char *str = "HELLO\n" "cruel\n" "world";
631 options.set_dotall(true);
632 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
633 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
634 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
635 options.set_dotall(false);
636 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
639 static void Test_DOLLAR_ENDONLY() {
642 const char *str = "HELLO world\n";
644 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
645 options.set_dollar_endonly(true);
646 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
647 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
650 static void Test_EXTRA() {
652 const char *str = "HELLO";
654 options.set_extra(true);
655 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
656 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
657 options.set_extra(false);
658 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
661 static void Test_EXTENDED() {
664 const char *str = "HELLO world";
666 options.set_extended(true);
667 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
668 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
669 TestOneOption("EXTENDED (class)",
677 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
678 TestOneOption("EXTENDED (function)",
686 options.set_extended(false);
687 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
690 static void Test_NO_AUTO_CAPTURE() {
692 const char *str = "HELLO world";
695 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
697 printf("parentheses capture text\n");
698 RE re("(world|universe)$", options);
699 CHECK(re.Extract("\\1", str , &captured));
700 CHECK_EQ(captured, "world");
701 options.set_no_auto_capture(true);
702 printf("testing Option <NO_AUTO_CAPTURE>\n");
704 printf("parentheses do not capture text\n");
705 re.Extract("\\1",str, &captured );
706 CHECK_EQ(captured, "world");
709 static void Test_UNGREEDY() {
711 const char *str = "HELLO, 'this' is the 'world'";
713 options.set_ungreedy(true);
714 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
715 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
716 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
718 options.set_ungreedy(false);
719 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
720 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
723 static void Test_all_options() {
724 const char *str = "HELLO\n" "cruel\n" "world";
726 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
728 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
729 options.set_all_options(0);
730 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
731 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
733 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
734 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
737 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
740 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
748 options.set_all_options(0);
749 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
753 static void TestOptions() {
754 printf("Testing Options\n");
758 Test_DOLLAR_ENDONLY();
760 Test_NO_AUTO_CAPTURE();
766 static void TestConstructors() {
767 printf("Testing constructors\n");
770 options.set_dotall(true);
771 const char *str = "HELLO\n" "cruel\n" "world";
773 RE orig("HELLO.*world", options);
774 CHECK(orig.FullMatch(str));
777 CHECK(copy1.FullMatch(str));
779 RE copy2("not a match");
780 CHECK(!copy2.FullMatch(str));
782 CHECK(copy2.FullMatch(str));
784 CHECK(copy2.FullMatch(str));
786 // Make sure when we assign to ourselves, nothing bad happens
790 CHECK(orig.FullMatch(str));
791 CHECK(copy1.FullMatch(str));
792 CHECK(copy2.FullMatch(str));
795 int main(int argc, char** argv) {
796 // Treat any flag as --help
797 if (argc > 1 && argv[1][0] == '-') {
798 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
799 " If 'timingX ###' is specified, run the given timing test\n"
800 " with the given number of iterations, rather than running\n"
801 " the default corectness test.\n", argv[0]);
806 if ( argc == 2 || atoi(argv[2]) == 0) {
807 printf("timing mode needs a num-iters argument\n");
810 if (!strcmp(argv[1], "timing1"))
811 Timing1(atoi(argv[2]));
812 else if (!strcmp(argv[1], "timing2"))
813 Timing2(atoi(argv[2]));
814 else if (!strcmp(argv[1], "timing3"))
815 Timing3(atoi(argv[2]));
817 printf("Unknown argument '%s'\n", argv[1]);
821 printf("Testing FullMatch\n");
826 /***** FullMatch with no args *****/
828 CHECK(RE("h.*o").FullMatch("hello"));
829 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
830 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
831 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
832 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
833 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
835 /***** FullMatch with args *****/
838 CHECK(RE("\\d+").FullMatch("1001"));
841 CHECK(RE("(\\d+)").FullMatch("1001", &i));
843 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
845 CHECK(!RE("()\\d+").FullMatch("10", &i));
846 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
849 // Digits surrounding integer-arg
850 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
852 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
854 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
856 CHECK(RE("(\\d)").PartialMatch("1234", &i));
858 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
862 CHECK(RE("h(.*)o").FullMatch("hello", &s));
863 CHECK_EQ(s, string("ell"));
867 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
868 CHECK_EQ(sp.size(), 4);
869 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
873 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
874 CHECK_EQ(s, string("ruby"));
877 // Ignore non-void* NULL arg
878 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
879 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
880 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
881 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
882 #ifdef HAVE_LONG_LONG
883 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
885 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
886 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
888 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
889 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
890 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
891 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
892 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
893 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
896 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
897 CHECK_EQ(s, string("ruby"));
903 CHECK(RE("(H)ello").FullMatch("Hello", &c));
908 CHECK(RE("(H)ello").FullMatch("Hello", &c));
909 CHECK_EQ(c, static_cast<unsigned char>('H'));
913 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
914 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
915 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
916 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
917 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
918 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
922 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
923 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
924 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
925 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
929 static const int max_value = 0x7fffffff;
930 static const int min_value = -max_value - 1;
931 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
932 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
933 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
934 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
935 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
936 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
940 static const unsigned int max_value = 0xfffffffful;
941 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
942 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
943 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
945 #ifdef HAVE_LONG_LONG
946 # if defined(__MINGW__) || defined(__MINGW32__)
955 static const long long max_value = 0x7fffffffffffffffLL;
956 static const long long min_value = -max_value - 1;
957 char buf[32]; // definitely big enough for a long long
959 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
960 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
962 sprintf(buf, LLD, max_value);
963 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
965 sprintf(buf, LLD, min_value);
966 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
968 sprintf(buf, LLD, max_value);
969 assert(buf[strlen(buf)-1] != '9');
970 buf[strlen(buf)-1]++;
971 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
973 sprintf(buf, LLD, min_value);
974 assert(buf[strlen(buf)-1] != '9');
975 buf[strlen(buf)-1]++;
976 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
979 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
981 unsigned long long v;
983 static const unsigned long long max_value = 0xffffffffffffffffULL;
984 char buf[32]; // definitely big enough for a unsigned long long
986 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
987 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
989 sprintf(buf, LLU, max_value);
990 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
992 assert(buf[strlen(buf)-1] != '9');
993 buf[strlen(buf)-1]++;
994 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
999 CHECK(RE("(.*)").FullMatch("100", &v));
1000 CHECK(RE("(.*)").FullMatch("-100.", &v));
1001 CHECK(RE("(.*)").FullMatch("1e23", &v));
1005 CHECK(RE("(.*)").FullMatch("100", &v));
1006 CHECK(RE("(.*)").FullMatch("-100.", &v));
1007 CHECK(RE("(.*)").FullMatch("1e23", &v));
1010 // Check that matching is fully anchored
1011 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1012 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1013 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1014 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1017 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1018 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1019 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1022 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1023 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1024 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1025 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1027 // Check full-match handling (needs '$' tacked on internally)
1028 CHECK(RE("fo|foo").FullMatch("fo"));
1029 CHECK(RE("fo|foo").FullMatch("foo"));
1030 CHECK(RE("fo|foo$").FullMatch("fo"));
1031 CHECK(RE("fo|foo$").FullMatch("foo"));
1032 CHECK(RE("foo$").FullMatch("foo"));
1033 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1034 CHECK(!RE("fo|bar").FullMatch("fox"));
1036 // Uncomment the following if we change the handling of '$' to
1037 // prevent it from matching a trailing newline
1039 // Check that we don't get bitten by pcre's special handling of a
1040 // '\n' at the end of the string matching '$'
1041 CHECK(!RE("foo$").PartialMatch("foo\n"));
1046 CHECK(RE("").FullMatch(""));
1048 memset(a, 0, sizeof(0));
1049 CHECK(RE("(\\d){1}").FullMatch("1",
1053 memset(a, 0, sizeof(0));
1054 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1059 memset(a, 0, sizeof(0));
1060 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1061 &a[0], &a[1], &a[2]));
1066 memset(a, 0, sizeof(0));
1067 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1068 &a[0], &a[1], &a[2], &a[3]));
1074 memset(a, 0, sizeof(0));
1075 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1076 &a[0], &a[1], &a[2],
1084 memset(a, 0, sizeof(0));
1085 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1086 &a[0], &a[1], &a[2],
1087 &a[3], &a[4], &a[5]));
1095 memset(a, 0, sizeof(0));
1096 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1097 &a[0], &a[1], &a[2], &a[3],
1098 &a[4], &a[5], &a[6]));
1107 memset(a, 0, sizeof(0));
1108 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1109 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1111 &a[0], &a[1], &a[2], &a[3],
1112 &a[4], &a[5], &a[6], &a[7],
1113 &a[8], &a[9], &a[10], &a[11],
1114 &a[12], &a[13], &a[14], &a[15]));
1132 /***** PartialMatch *****/
1134 printf("Testing PartialMatch\n");
1136 CHECK(RE("h.*o").PartialMatch("hello"));
1137 CHECK(RE("h.*o").PartialMatch("othello"));
1138 CHECK(RE("h.*o").PartialMatch("hello!"));
1139 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1141 /***** other tests *****/
1147 TestFindAndConsume();
1149 TestMatchNumberPeculiarity();
1151 // Check the pattern() accessor
1153 const string kPattern = "http://([^/]+)/.*";
1154 const RE re(kPattern);
1155 CHECK_EQ(kPattern, re.pattern());
1158 // Check RE error field.
1161 CHECK(re.error().empty()); // Must have no error
1165 // Check UTF-8 handling
1167 printf("Testing UTF-8 handling\n");
1169 // Three Japanese characters (nihongo)
1170 const unsigned char utf8_string[] = {
1171 0xe6, 0x97, 0xa5, // 65e5
1172 0xe6, 0x9c, 0xac, // 627c
1173 0xe8, 0xaa, 0x9e, // 8a9e
1176 const unsigned char utf8_pattern[] = {
1178 0xe6, 0x9c, 0xac, // 627c
1183 // Both should match in either mode, bytes or UTF-8
1184 RE re_test1(".........");
1185 CHECK(re_test1.FullMatch(utf8_string));
1186 RE re_test2("...", pcrecpp::UTF8());
1187 CHECK(re_test2.FullMatch(utf8_string));
1189 // Check that '.' matches one byte or UTF-8 character
1190 // according to the mode.
1193 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1194 CHECK_EQ(ss, string("\xe6"));
1195 RE re_test4("(.)", pcrecpp::UTF8());
1196 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1197 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1199 // Check that string matches itself in either mode
1200 RE re_test5(utf8_string);
1201 CHECK(re_test5.FullMatch(utf8_string));
1202 RE re_test6(utf8_string, pcrecpp::UTF8());
1203 CHECK(re_test6.FullMatch(utf8_string));
1205 // Check that pattern matches string only in UTF8 mode
1206 RE re_test7(utf8_pattern);
1207 CHECK(!re_test7.FullMatch(utf8_string));
1208 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1209 CHECK(re_test8.FullMatch(utf8_string));
1212 // Check that ungreedy, UTF8 regular expressions don't match when they
1213 // oughtn't -- see bug 82246.
1215 // This code always worked.
1216 const char* pattern = "\\w+X";
1217 const string target = "a aX";
1218 RE match_sentence(pattern);
1219 RE match_sentence_re(pattern, pcrecpp::UTF8());
1221 CHECK(!match_sentence.FullMatch(target));
1222 CHECK(!match_sentence_re.FullMatch(target));
1226 const char* pattern = "(?U)\\w+X";
1227 const string target = "a aX";
1228 RE match_sentence(pattern);
1229 RE match_sentence_re(pattern, pcrecpp::UTF8());
1231 CHECK(!match_sentence.FullMatch(target));
1232 CHECK(!match_sentence_re.FullMatch(target));
1234 #endif /* def SUPPORT_UTF8 */
1236 printf("Testing error reporting\n");
1238 { RE re("a\\1"); CHECK(!re.error().empty()); }
1241 CHECK(!re.error().empty());
1245 CHECK(!re.error().empty());
1248 RE re("a[[:foobar:]]");
1249 CHECK(!re.error().empty());
1253 CHECK(!re.error().empty());
1257 CHECK(!re.error().empty());
1260 // Test that recursion is stopped
1264 if (getenv("VERBOSE_TEST") != NULL)
1265 VERBOSE_TEST = true;
1268 // Test the constructors