1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
19 * More complete testing of regular expression functionality is done with the C++ tests.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
36 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
38 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
39 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
42 * TEST_SETUP and TEST_TEARDOWN
43 * macros to handle the boilerplate around setting up regex test cases.
44 * parameteres to setup:
45 * pattern: The regex pattern, a (char *) null terminated C string.
46 * testString: The string data, also a (char *) C string.
47 * flags: Regex flags to set when compiling the pattern
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * 're" is the compiled, ready-to-go regular expression.
52 #define TEST_SETUP(pattern, testString, flags) { \
53 UChar *srcString = NULL; \
54 status = U_ZERO_ERROR; \
55 re = uregex_openC(pattern, flags, NULL, &status); \
56 TEST_ASSERT_SUCCESS(status); \
57 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
58 u_uastrncpy(srcString, testString, strlen(testString)+1); \
59 uregex_setText(re, srcString, -1, &status); \
60 TEST_ASSERT_SUCCESS(status); \
61 if (U_SUCCESS(status)) {
63 #define TEST_TEARDOWN \
65 TEST_ASSERT_SUCCESS(status); \
72 * @param expected utf-8 array of bytes to be expected
74 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
75 char buf_inside_macro[120];
76 int32_t len = (int32_t)strlen(expected);
79 u_austrncpy(buf_inside_macro, (actual), len+1);
80 buf_inside_macro[len+2] = 0;
81 success = (strcmp((expected), buf_inside_macro) == 0);
83 u_austrncpy(buf_inside_macro, (actual), len);
84 buf_inside_macro[len+1] = 0;
85 success = (strncmp((expected), buf_inside_macro, len) == 0);
87 if (success == FALSE) {
88 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
89 file, line, (expected), buf_inside_macro);
93 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
96 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
100 UBool stringsEqual = TRUE;
101 utext_setNativeIndex(utext, 0);
103 U8_NEXT_UNSAFE(utf8, u8i, u8c);
104 utc = utext_next32(utext);
105 if (u8c == 0 && utc == U_SENTINEL) {
108 if (u8c != utc || u8c == 0) {
109 stringsEqual = FALSE;
117 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
118 utext_setNativeIndex(actual, 0);
119 if (!equals_utf8_utext(expected, actual)) {
121 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
122 c = utext_next32From(actual, 0);
123 while (c != U_SENTINEL) {
124 if (0x20<c && c <0x7e) {
129 c = UTEXT_NEXT32(actual);
136 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
137 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
139 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
141 static UBool testUTextEqual(UText *uta, UText *utb) {
144 utext_setNativeIndex(uta, 0);
145 utext_setNativeIndex(utb, 0);
147 ca = utext_next32(uta);
148 cb = utext_next32(utb);
152 } while (ca != U_SENTINEL);
159 static void TestRegexCAPI(void);
160 static void TestBug4315(void);
161 static void TestUTextAPI(void);
162 static void TestRefreshInput(void);
163 static void TestBug8421(void);
164 static void TestBug10815(void);
166 void addURegexTest(TestNode** root);
168 void addURegexTest(TestNode** root)
170 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
171 addTest(root, &TestBug4315, "regex/TestBug4315");
172 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
173 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
174 addTest(root, &TestBug8421, "regex/TestBug8421");
175 addTest(root, &TestBug10815, "regex/TestBug10815");
179 * Call back function and context struct used for testing
180 * regular expression user callbacks. This test is mostly the same as
181 * the corresponding C++ test in intltest.
183 typedef struct callBackContext {
189 static UBool U_EXPORT2 U_CALLCONV
190 TestCallbackFn(const void *context, int32_t steps) {
191 callBackContext *info = (callBackContext *)context;
192 if (info->lastSteps+1 != steps) {
193 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
195 info->lastSteps = steps;
197 return (info->numCalls < info->maxCalls);
201 * Regular Expression C API Tests
203 static void TestRegexCAPI(void) {
204 UErrorCode status = U_ZERO_ERROR;
205 URegularExpression *re;
209 memset(&minus1, -1, sizeof(minus1));
211 /* Mimimalist open/close */
212 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
213 re = uregex_open(pat, -1, 0, 0, &status);
214 if (U_FAILURE(status)) {
215 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
220 /* Open with all flag values set */
221 status = U_ZERO_ERROR;
222 re = uregex_open(pat, -1,
223 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
225 TEST_ASSERT_SUCCESS(status);
228 /* Open with an invalid flag */
229 status = U_ZERO_ERROR;
230 re = uregex_open(pat, -1, 0x40000000, 0, &status);
231 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
234 /* Open with an unimplemented flag */
235 status = U_ZERO_ERROR;
236 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
237 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
240 /* openC with an invalid parameter */
241 status = U_ZERO_ERROR;
242 re = uregex_openC(NULL,
243 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
244 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
246 /* openC with an invalid parameter */
247 status = U_USELESS_COLLATOR_ERROR;
248 re = uregex_openC(NULL,
249 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
250 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
252 /* openC open from a C string */
256 status = U_ZERO_ERROR;
257 re = uregex_openC("abc*", 0, 0, &status);
258 TEST_ASSERT_SUCCESS(status);
259 p = uregex_pattern(re, &len, &status);
260 TEST_ASSERT_SUCCESS(status);
262 /* The TEST_ASSERT_SUCCESS above should change too... */
263 if(U_SUCCESS(status)) {
264 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
265 TEST_ASSERT(u_strcmp(pat, p) == 0);
266 TEST_ASSERT(len==(int32_t)strlen("abc*"));
271 /* TODO: Open with ParseError parameter */
278 URegularExpression *clone1;
279 URegularExpression *clone2;
280 URegularExpression *clone3;
281 UChar testString1[30];
282 UChar testString2[30];
286 status = U_ZERO_ERROR;
287 re = uregex_openC("abc*", 0, 0, &status);
288 TEST_ASSERT_SUCCESS(status);
289 clone1 = uregex_clone(re, &status);
290 TEST_ASSERT_SUCCESS(status);
291 TEST_ASSERT(clone1 != NULL);
293 status = U_ZERO_ERROR;
294 clone2 = uregex_clone(re, &status);
295 TEST_ASSERT_SUCCESS(status);
296 TEST_ASSERT(clone2 != NULL);
299 status = U_ZERO_ERROR;
300 clone3 = uregex_clone(clone2, &status);
301 TEST_ASSERT_SUCCESS(status);
302 TEST_ASSERT(clone3 != NULL);
304 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
305 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
307 status = U_ZERO_ERROR;
308 uregex_setText(clone1, testString1, -1, &status);
309 TEST_ASSERT_SUCCESS(status);
310 result = uregex_lookingAt(clone1, 0, &status);
311 TEST_ASSERT_SUCCESS(status);
312 TEST_ASSERT(result==TRUE);
314 status = U_ZERO_ERROR;
315 uregex_setText(clone2, testString2, -1, &status);
316 TEST_ASSERT_SUCCESS(status);
317 result = uregex_lookingAt(clone2, 0, &status);
318 TEST_ASSERT_SUCCESS(status);
319 TEST_ASSERT(result==FALSE);
320 result = uregex_find(clone2, 0, &status);
321 TEST_ASSERT_SUCCESS(status);
322 TEST_ASSERT(result==TRUE);
324 uregex_close(clone1);
325 uregex_close(clone2);
326 uregex_close(clone3);
334 const UChar *resultPat;
336 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
337 status = U_ZERO_ERROR;
338 re = uregex_open(pat, -1, 0, NULL, &status);
339 resultPat = uregex_pattern(re, &resultLen, &status);
340 TEST_ASSERT_SUCCESS(status);
342 /* The TEST_ASSERT_SUCCESS above should change too... */
343 if (U_SUCCESS(status)) {
344 TEST_ASSERT(resultLen == -1);
345 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
350 status = U_ZERO_ERROR;
351 re = uregex_open(pat, 3, 0, NULL, &status);
352 resultPat = uregex_pattern(re, &resultLen, &status);
353 TEST_ASSERT_SUCCESS(status);
354 TEST_ASSERT_SUCCESS(status);
356 /* The TEST_ASSERT_SUCCESS above should change too... */
357 if (U_SUCCESS(status)) {
358 TEST_ASSERT(resultLen == 3);
359 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
360 TEST_ASSERT(u_strlen(resultPat) == 3);
372 status = U_ZERO_ERROR;
373 re = uregex_open(pat, -1, 0, NULL, &status);
374 t = uregex_flags(re, &status);
375 TEST_ASSERT_SUCCESS(status);
379 status = U_ZERO_ERROR;
380 re = uregex_open(pat, -1, 0, NULL, &status);
381 t = uregex_flags(re, &status);
382 TEST_ASSERT_SUCCESS(status);
386 status = U_ZERO_ERROR;
387 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
388 t = uregex_flags(re, &status);
389 TEST_ASSERT_SUCCESS(status);
390 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
395 * setText() and lookingAt()
402 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
403 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
404 status = U_ZERO_ERROR;
405 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
406 re = uregex_open(pat, -1, 0, NULL, &status);
407 TEST_ASSERT_SUCCESS(status);
409 /* Operation before doing a setText should fail... */
410 status = U_ZERO_ERROR;
411 uregex_lookingAt(re, 0, &status);
412 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
414 status = U_ZERO_ERROR;
415 uregex_setText(re, text1, -1, &status);
416 result = uregex_lookingAt(re, 0, &status);
417 TEST_ASSERT(result == TRUE);
418 TEST_ASSERT_SUCCESS(status);
420 status = U_ZERO_ERROR;
421 uregex_setText(re, text2, -1, &status);
422 result = uregex_lookingAt(re, 0, &status);
423 TEST_ASSERT(result == FALSE);
424 TEST_ASSERT_SUCCESS(status);
426 status = U_ZERO_ERROR;
427 uregex_setText(re, text1, -1, &status);
428 result = uregex_lookingAt(re, 0, &status);
429 TEST_ASSERT(result == TRUE);
430 TEST_ASSERT_SUCCESS(status);
432 status = U_ZERO_ERROR;
433 uregex_setText(re, text1, 5, &status);
434 result = uregex_lookingAt(re, 0, &status);
435 TEST_ASSERT(result == FALSE);
436 TEST_ASSERT_SUCCESS(status);
438 status = U_ZERO_ERROR;
439 uregex_setText(re, text1, 6, &status);
440 result = uregex_lookingAt(re, 0, &status);
441 TEST_ASSERT(result == TRUE);
442 TEST_ASSERT_SUCCESS(status);
457 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
458 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
459 status = U_ZERO_ERROR;
460 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
461 re = uregex_open(pat, -1, 0, NULL, &status);
463 uregex_setText(re, text1, -1, &status);
464 result = uregex_getText(re, &textLength, &status);
465 TEST_ASSERT(result == text1);
466 TEST_ASSERT(textLength == -1);
467 TEST_ASSERT_SUCCESS(status);
469 status = U_ZERO_ERROR;
470 uregex_setText(re, text2, 7, &status);
471 result = uregex_getText(re, &textLength, &status);
472 TEST_ASSERT(result == text2);
473 TEST_ASSERT(textLength == 7);
474 TEST_ASSERT_SUCCESS(status);
476 status = U_ZERO_ERROR;
477 uregex_setText(re, text2, 4, &status);
478 result = uregex_getText(re, &textLength, &status);
479 TEST_ASSERT(result == text2);
480 TEST_ASSERT(textLength == 4);
481 TEST_ASSERT_SUCCESS(status);
492 UChar nullString[] = {0,0,0};
494 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
495 status = U_ZERO_ERROR;
496 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
497 re = uregex_open(pat, -1, 0, NULL, &status);
499 uregex_setText(re, text1, -1, &status);
500 result = uregex_matches(re, 0, &status);
501 TEST_ASSERT(result == FALSE);
502 TEST_ASSERT_SUCCESS(status);
504 status = U_ZERO_ERROR;
505 uregex_setText(re, text1, 6, &status);
506 result = uregex_matches(re, 0, &status);
507 TEST_ASSERT(result == TRUE);
508 TEST_ASSERT_SUCCESS(status);
510 status = U_ZERO_ERROR;
511 uregex_setText(re, text1, 6, &status);
512 result = uregex_matches(re, 1, &status);
513 TEST_ASSERT(result == FALSE);
514 TEST_ASSERT_SUCCESS(status);
517 status = U_ZERO_ERROR;
518 re = uregex_openC(".?", 0, NULL, &status);
519 uregex_setText(re, text1, -1, &status);
520 len = u_strlen(text1);
521 result = uregex_matches(re, len, &status);
522 TEST_ASSERT(result == TRUE);
523 TEST_ASSERT_SUCCESS(status);
525 status = U_ZERO_ERROR;
526 uregex_setText(re, nullString, -1, &status);
527 TEST_ASSERT_SUCCESS(status);
528 result = uregex_matches(re, 0, &status);
529 TEST_ASSERT(result == TRUE);
530 TEST_ASSERT_SUCCESS(status);
536 * lookingAt() Used in setText test.
541 * find(), findNext, start, end, reset
546 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
547 status = U_ZERO_ERROR;
548 re = uregex_openC("rx", 0, NULL, &status);
550 uregex_setText(re, text1, -1, &status);
551 result = uregex_find(re, 0, &status);
552 TEST_ASSERT(result == TRUE);
553 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
554 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
555 TEST_ASSERT_SUCCESS(status);
557 result = uregex_find(re, 9, &status);
558 TEST_ASSERT(result == TRUE);
559 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
560 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
561 TEST_ASSERT_SUCCESS(status);
563 result = uregex_find(re, 14, &status);
564 TEST_ASSERT(result == FALSE);
565 TEST_ASSERT_SUCCESS(status);
567 status = U_ZERO_ERROR;
568 uregex_reset(re, 0, &status);
570 result = uregex_findNext(re, &status);
571 TEST_ASSERT(result == TRUE);
572 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
573 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
574 TEST_ASSERT_SUCCESS(status);
576 result = uregex_findNext(re, &status);
577 TEST_ASSERT(result == TRUE);
578 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
579 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
580 TEST_ASSERT_SUCCESS(status);
582 status = U_ZERO_ERROR;
583 uregex_reset(re, 12, &status);
585 result = uregex_findNext(re, &status);
586 TEST_ASSERT(result == TRUE);
587 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
588 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
589 TEST_ASSERT_SUCCESS(status);
591 result = uregex_findNext(re, &status);
592 TEST_ASSERT(result == FALSE);
593 TEST_ASSERT_SUCCESS(status);
604 status = U_ZERO_ERROR;
605 re = uregex_openC("abc", 0, NULL, &status);
606 result = uregex_groupCount(re, &status);
607 TEST_ASSERT_SUCCESS(status);
608 TEST_ASSERT(result == 0);
611 status = U_ZERO_ERROR;
612 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
613 result = uregex_groupCount(re, &status);
614 TEST_ASSERT_SUCCESS(status);
615 TEST_ASSERT(result == 3);
629 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
631 status = U_ZERO_ERROR;
632 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
633 TEST_ASSERT_SUCCESS(status);
636 uregex_setText(re, text1, -1, &status);
637 result = uregex_find(re, 0, &status);
638 TEST_ASSERT(result==TRUE);
640 /* Capture Group 0, the full match. Should succeed. */
641 status = U_ZERO_ERROR;
642 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
643 TEST_ASSERT_SUCCESS(status);
644 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
645 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
647 /* Capture group #1. Should succeed. */
648 status = U_ZERO_ERROR;
649 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
650 TEST_ASSERT_SUCCESS(status);
651 TEST_ASSERT_STRING(" interior ", buf, TRUE);
652 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
654 /* Capture group out of range. Error. */
655 status = U_ZERO_ERROR;
656 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
657 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
659 /* NULL buffer, pure pre-flight */
660 status = U_ZERO_ERROR;
661 resultSz = uregex_group(re, 0, NULL, 0, &status);
662 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
663 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
665 /* Too small buffer, truncated string */
666 status = U_ZERO_ERROR;
667 memset(buf, -1, sizeof(buf));
668 resultSz = uregex_group(re, 0, buf, 5, &status);
669 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
670 TEST_ASSERT_STRING("abc i", buf, FALSE);
671 TEST_ASSERT(buf[5] == (UChar)0xffff);
672 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
674 /* Output string just fits buffer, no NUL term. */
675 status = U_ZERO_ERROR;
676 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
677 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
678 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
679 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
680 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
691 /* SetRegion(), getRegion() do something */
692 TEST_SETUP(".*", "0123456789ABCDEF", 0)
693 UChar resultString[40];
694 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
695 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
696 uregex_setRegion(re, 3, 6, &status);
697 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
698 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
699 TEST_ASSERT(uregex_findNext(re, &status));
700 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
701 TEST_ASSERT_STRING("345", resultString, TRUE);
704 /* find(start=-1) uses regions */
705 TEST_SETUP(".*", "0123456789ABCDEF", 0);
706 uregex_setRegion(re, 4, 6, &status);
707 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
708 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
709 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
712 /* find (start >=0) does not use regions */
713 TEST_SETUP(".*", "0123456789ABCDEF", 0);
714 uregex_setRegion(re, 4, 6, &status);
715 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
716 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
717 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
720 /* findNext() obeys regions */
721 TEST_SETUP(".", "0123456789ABCDEF", 0);
722 uregex_setRegion(re, 4, 6, &status);
723 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
724 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
725 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
726 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
727 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
730 /* matches(start=-1) uses regions */
731 /* Also, verify that non-greedy *? succeeds in finding the full match. */
732 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
733 uregex_setRegion(re, 4, 6, &status);
734 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
735 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
736 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
739 /* matches (start >=0) does not use regions */
740 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
741 uregex_setRegion(re, 4, 6, &status);
742 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
743 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
744 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
747 /* lookingAt(start=-1) uses regions */
748 /* Also, verify that non-greedy *? finds the first (shortest) match. */
749 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
750 uregex_setRegion(re, 4, 6, &status);
751 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
752 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
753 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
756 /* lookingAt (start >=0) does not use regions */
757 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
758 uregex_setRegion(re, 4, 6, &status);
759 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
760 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
761 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
765 TEST_SETUP("[a-f]*", "abcdefghij", 0);
766 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
767 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
770 TEST_SETUP("[a-f]*", "abcdef", 0);
771 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
772 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
776 TEST_SETUP("abcd", "abcd", 0);
777 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
778 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
781 TEST_SETUP("abcd$", "abcd", 0);
782 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
783 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
786 /* anchoringBounds */
787 TEST_SETUP("abc$", "abcdef", 0);
788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
789 uregex_useAnchoringBounds(re, FALSE, &status);
790 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
792 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
793 uregex_useAnchoringBounds(re, TRUE, &status);
794 uregex_setRegion(re, 0, 3, &status);
795 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
796 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
799 /* Transparent Bounds */
800 TEST_SETUP("abc(?=def)", "abcdef", 0);
801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
802 uregex_useTransparentBounds(re, TRUE, &status);
803 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
805 uregex_useTransparentBounds(re, FALSE, &status);
806 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
807 uregex_setRegion(re, 0, 3, &status);
808 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
809 uregex_useTransparentBounds(re, TRUE, &status);
810 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
811 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
824 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
825 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
826 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
828 status = U_ZERO_ERROR;
829 re = uregex_openC("x(.*?)x", 0, NULL, &status);
830 TEST_ASSERT_SUCCESS(status);
832 /* Normal case, with match */
833 uregex_setText(re, text1, -1, &status);
834 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
835 TEST_ASSERT_SUCCESS(status);
836 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
837 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
839 /* No match. Text should copy to output with no changes. */
840 status = U_ZERO_ERROR;
841 uregex_setText(re, text2, -1, &status);
842 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
843 TEST_ASSERT_SUCCESS(status);
844 TEST_ASSERT_STRING("No match here.", buf, TRUE);
845 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
847 /* Match, output just fills buffer, no termination warning. */
848 status = U_ZERO_ERROR;
849 uregex_setText(re, text1, -1, &status);
850 memset(buf, -1, sizeof(buf));
851 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
852 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
853 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
854 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
855 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
857 /* Do the replaceFirst again, without first resetting anything.
858 * Should give the same results.
860 status = U_ZERO_ERROR;
861 memset(buf, -1, sizeof(buf));
862 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
863 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
864 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
865 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
866 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
868 /* NULL buffer, zero buffer length */
869 status = U_ZERO_ERROR;
870 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
871 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
872 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
874 /* Buffer too small by one */
875 status = U_ZERO_ERROR;
876 memset(buf, -1, sizeof(buf));
877 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
878 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
879 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
880 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
881 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
891 UChar text1[80]; /* "Replace xaax x1x x...x." */
892 UChar text2[80]; /* "No match Here" */
893 UChar replText[80]; /* "<$1>" */
894 UChar replText2[80]; /* "<<$1>>" */
895 const char * pattern = "x(.*?)x";
896 const char * expectedResult = "Replace <aa> <1> <...>.";
897 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
900 int32_t expectedResultSize;
901 int32_t expectedResultSize2;
904 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
905 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
906 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
907 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
908 expectedResultSize = strlen(expectedResult);
909 expectedResultSize2 = strlen(expectedResult2);
911 status = U_ZERO_ERROR;
912 re = uregex_openC(pattern, 0, NULL, &status);
913 TEST_ASSERT_SUCCESS(status);
915 /* Normal case, with match */
916 uregex_setText(re, text1, -1, &status);
917 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
918 TEST_ASSERT_SUCCESS(status);
919 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
920 TEST_ASSERT(resultSize == expectedResultSize);
922 /* No match. Text should copy to output with no changes. */
923 status = U_ZERO_ERROR;
924 uregex_setText(re, text2, -1, &status);
925 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
926 TEST_ASSERT_SUCCESS(status);
927 TEST_ASSERT_STRING("No match here.", buf, TRUE);
928 TEST_ASSERT(resultSize == u_strlen(text2));
930 /* Match, output just fills buffer, no termination warning. */
931 status = U_ZERO_ERROR;
932 uregex_setText(re, text1, -1, &status);
933 memset(buf, -1, sizeof(buf));
934 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
935 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
936 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
937 TEST_ASSERT(resultSize == expectedResultSize);
938 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
940 /* Do the replaceFirst again, without first resetting anything.
941 * Should give the same results.
943 status = U_ZERO_ERROR;
944 memset(buf, -1, sizeof(buf));
945 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
946 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
947 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
948 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
949 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
951 /* NULL buffer, zero buffer length */
952 status = U_ZERO_ERROR;
953 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
954 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
955 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
957 /* Buffer too small. Try every size, which will tickle edge cases
958 * in uregex_appendReplacement (used by replaceAll) */
959 for (i=0; i<expectedResultSize; i++) {
961 status = U_ZERO_ERROR;
962 memset(buf, -1, sizeof(buf));
963 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
964 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
965 strcpy(expected, expectedResult);
967 TEST_ASSERT_STRING(expected, buf, FALSE);
968 TEST_ASSERT(resultSize == expectedResultSize);
969 TEST_ASSERT(buf[i] == (UChar)0xffff);
972 /* Buffer too small. Same as previous test, except this time the replacement
973 * text is longer than the match capture group, making the length of the complete
974 * replacement longer than the original string.
976 for (i=0; i<expectedResultSize2; i++) {
978 status = U_ZERO_ERROR;
979 memset(buf, -1, sizeof(buf));
980 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
981 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
982 strcpy(expected, expectedResult2);
984 TEST_ASSERT_STRING(expected, buf, FALSE);
985 TEST_ASSERT(resultSize == expectedResultSize2);
986 TEST_ASSERT(buf[i] == (UChar)0xffff);
995 * appendReplacement()
1005 status = U_ZERO_ERROR;
1006 re = uregex_openC(".*", 0, 0, &status);
1007 TEST_ASSERT_SUCCESS(status);
1009 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1010 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1011 uregex_setText(re, text, -1, &status);
1013 /* match covers whole target string */
1014 uregex_find(re, 0, &status);
1015 TEST_ASSERT_SUCCESS(status);
1017 bufCap = UPRV_LENGTHOF(buf);
1018 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1019 TEST_ASSERT_SUCCESS(status);
1020 TEST_ASSERT_STRING("some other", buf, TRUE);
1022 /* Match has \u \U escapes */
1023 uregex_find(re, 0, &status);
1024 TEST_ASSERT_SUCCESS(status);
1026 bufCap = UPRV_LENGTHOF(buf);
1027 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1028 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1029 TEST_ASSERT_SUCCESS(status);
1030 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1032 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1033 status = U_ZERO_ERROR;
1034 uregex_find(re, 0, &status);
1035 TEST_ASSERT_SUCCESS(status);
1037 status = U_BUFFER_OVERFLOW_ERROR;
1038 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1039 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1046 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1053 UChar textToSplit[80];
1058 int32_t requiredCapacity;
1059 int32_t spaceNeeded;
1062 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1063 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1065 status = U_ZERO_ERROR;
1066 re = uregex_openC(":", 0, NULL, &status);
1071 uregex_setText(re, textToSplit, -1, &status);
1072 TEST_ASSERT_SUCCESS(status);
1074 /* The TEST_ASSERT_SUCCESS call above should change too... */
1075 if (U_SUCCESS(status)) {
1076 memset(fields, -1, sizeof(fields));
1078 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1079 TEST_ASSERT_SUCCESS(status);
1081 /* The TEST_ASSERT_SUCCESS call above should change too... */
1082 if(U_SUCCESS(status)) {
1083 TEST_ASSERT(numFields == 3);
1084 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1085 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1086 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1087 TEST_ASSERT(fields[3] == NULL);
1089 spaceNeeded = u_strlen(textToSplit) -
1090 (numFields - 1) + /* Field delimiters do not appear in output */
1091 numFields; /* Each field gets a NUL terminator */
1093 TEST_ASSERT(spaceNeeded == requiredCapacity);
1100 /* Split with too few output strings available */
1101 status = U_ZERO_ERROR;
1102 re = uregex_openC(":", 0, NULL, &status);
1103 uregex_setText(re, textToSplit, -1, &status);
1104 TEST_ASSERT_SUCCESS(status);
1106 /* The TEST_ASSERT_SUCCESS call above should change too... */
1107 if(U_SUCCESS(status)) {
1108 memset(fields, -1, sizeof(fields));
1110 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1111 TEST_ASSERT_SUCCESS(status);
1113 /* The TEST_ASSERT_SUCCESS call above should change too... */
1114 if(U_SUCCESS(status)) {
1115 TEST_ASSERT(numFields == 2);
1116 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1117 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1118 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1120 spaceNeeded = u_strlen(textToSplit) -
1121 (numFields - 1) + /* Field delimiters do not appear in output */
1122 numFields; /* Each field gets a NUL terminator */
1124 TEST_ASSERT(spaceNeeded == requiredCapacity);
1126 /* Split with a range of output buffer sizes. */
1127 spaceNeeded = u_strlen(textToSplit) -
1128 (numFields - 1) + /* Field delimiters do not appear in output */
1129 numFields; /* Each field gets a NUL terminator */
1131 for (sz=0; sz < spaceNeeded+1; sz++) {
1132 memset(fields, -1, sizeof(fields));
1133 status = U_ZERO_ERROR;
1135 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1136 if (sz >= spaceNeeded) {
1137 TEST_ASSERT_SUCCESS(status);
1138 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1139 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1140 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1142 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1144 TEST_ASSERT(numFields == 3);
1145 TEST_ASSERT(fields[3] == NULL);
1146 TEST_ASSERT(spaceNeeded == requiredCapacity);
1157 /* Split(), part 2. Patterns with capture groups. The capture group text
1158 * comes out as additional fields. */
1160 UChar textToSplit[80];
1164 int32_t requiredCapacity;
1165 int32_t spaceNeeded;
1168 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1170 status = U_ZERO_ERROR;
1171 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1173 uregex_setText(re, textToSplit, -1, &status);
1174 TEST_ASSERT_SUCCESS(status);
1176 /* The TEST_ASSERT_SUCCESS call above should change too... */
1177 if(U_SUCCESS(status)) {
1178 memset(fields, -1, sizeof(fields));
1180 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1181 TEST_ASSERT_SUCCESS(status);
1183 /* The TEST_ASSERT_SUCCESS call above should change too... */
1184 if(U_SUCCESS(status)) {
1185 TEST_ASSERT(numFields == 5);
1186 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1187 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1188 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1189 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1190 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1191 TEST_ASSERT(fields[5] == NULL);
1192 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1193 TEST_ASSERT(spaceNeeded == requiredCapacity);
1197 /* Split with too few output strings available (2) */
1198 status = U_ZERO_ERROR;
1199 memset(fields, -1, sizeof(fields));
1201 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1202 TEST_ASSERT_SUCCESS(status);
1204 /* The TEST_ASSERT_SUCCESS call above should change too... */
1205 if(U_SUCCESS(status)) {
1206 TEST_ASSERT(numFields == 2);
1207 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1208 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1209 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1211 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1212 TEST_ASSERT(spaceNeeded == requiredCapacity);
1215 /* Split with too few output strings available (3) */
1216 status = U_ZERO_ERROR;
1217 memset(fields, -1, sizeof(fields));
1219 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1220 TEST_ASSERT_SUCCESS(status);
1222 /* The TEST_ASSERT_SUCCESS call above should change too... */
1223 if(U_SUCCESS(status)) {
1224 TEST_ASSERT(numFields == 3);
1225 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1226 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1227 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1228 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1230 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1231 TEST_ASSERT(spaceNeeded == requiredCapacity);
1234 /* Split with just enough output strings available (5) */
1235 status = U_ZERO_ERROR;
1236 memset(fields, -1, sizeof(fields));
1238 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1239 TEST_ASSERT_SUCCESS(status);
1241 /* The TEST_ASSERT_SUCCESS call above should change too... */
1242 if(U_SUCCESS(status)) {
1243 TEST_ASSERT(numFields == 5);
1244 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1245 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1246 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1247 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1248 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1249 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1251 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1252 TEST_ASSERT(spaceNeeded == requiredCapacity);
1255 /* Split, end of text is a field delimiter. */
1256 status = U_ZERO_ERROR;
1257 sz = strlen("first <tag-a> second<tag-b>");
1258 uregex_setText(re, textToSplit, sz, &status);
1259 TEST_ASSERT_SUCCESS(status);
1261 /* The TEST_ASSERT_SUCCESS call above should change too... */
1262 if(U_SUCCESS(status)) {
1263 memset(fields, -1, sizeof(fields));
1265 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1266 TEST_ASSERT_SUCCESS(status);
1268 /* The TEST_ASSERT_SUCCESS call above should change too... */
1269 if(U_SUCCESS(status)) {
1270 TEST_ASSERT(numFields == 5);
1271 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1272 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1273 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1274 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1275 TEST_ASSERT_STRING("", fields[4], TRUE);
1276 TEST_ASSERT(fields[5] == NULL);
1277 TEST_ASSERT(fields[8] == NULL);
1278 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1279 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1280 TEST_ASSERT(spaceNeeded == requiredCapacity);
1290 TEST_SETUP("abc$", "abcdef", 0);
1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1292 uregex_setTimeLimit(re, 1000, &status);
1293 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1294 TEST_ASSERT_SUCCESS(status);
1295 uregex_setTimeLimit(re, -1, &status);
1296 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1297 status = U_ZERO_ERROR;
1298 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1302 * set/get Stack Limit
1304 TEST_SETUP("abc$", "abcdef", 0);
1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1306 uregex_setStackLimit(re, 40000, &status);
1307 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1308 TEST_ASSERT_SUCCESS(status);
1309 uregex_setStackLimit(re, -1, &status);
1310 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1311 status = U_ZERO_ERROR;
1312 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1317 * Get/Set callback functions
1318 * This test is copied from intltest regex/Callbacks
1319 * The pattern and test data will run long enough to cause the callback
1320 * to be invoked. The nested '+' operators give exponential time
1321 * behavior with increasing string length.
1323 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1324 callBackContext cbInfo = {4, 0, 0};
1325 const void *pContext = &cbInfo;
1326 URegexMatchCallback *returnedFn = &TestCallbackFn;
1328 /* Getting the callback fn when it hasn't been set must return NULL */
1329 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1330 TEST_ASSERT_SUCCESS(status);
1331 TEST_ASSERT(returnedFn == NULL);
1332 TEST_ASSERT(pContext == NULL);
1334 /* Set thecallback and do a match. */
1335 /* The callback function should record that it has been called. */
1336 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1337 TEST_ASSERT_SUCCESS(status);
1338 TEST_ASSERT(cbInfo.numCalls == 0);
1339 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1340 TEST_ASSERT_SUCCESS(status);
1341 TEST_ASSERT(cbInfo.numCalls > 0);
1343 /* Getting the callback should return the values that were set above. */
1344 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1345 TEST_ASSERT(returnedFn == &TestCallbackFn);
1346 TEST_ASSERT(pContext == &cbInfo);
1353 static void TestBug4315(void) {
1354 UErrorCode theICUError = U_ZERO_ERROR;
1355 URegularExpression *theRegEx;
1357 const char *thePattern;
1358 UChar theString[100];
1359 UChar *destFields[24];
1360 int32_t neededLength1;
1361 int32_t neededLength2;
1363 int32_t wordCount = 0;
1364 int32_t destFieldsSize = 24;
1367 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1370 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1371 TEST_ASSERT_SUCCESS(theICUError);
1373 /* set the input string */
1374 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1375 TEST_ASSERT_SUCCESS(theICUError);
1378 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1380 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1381 destFieldsSize, &theICUError);
1383 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1384 TEST_ASSERT(wordCount==3);
1386 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1388 theICUError = U_ZERO_ERROR;
1389 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1390 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1391 destFields, destFieldsSize, &theICUError);
1392 TEST_ASSERT(wordCount==3);
1393 TEST_ASSERT_SUCCESS(theICUError);
1394 TEST_ASSERT(neededLength1 == neededLength2);
1395 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1396 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1397 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1398 TEST_ASSERT(destFields[3] == NULL);
1401 uregex_close(theRegEx);
1404 /* Based on TestRegexCAPI() */
1405 static void TestUTextAPI(void) {
1406 UErrorCode status = U_ZERO_ERROR;
1407 URegularExpression *re;
1408 UText patternText = UTEXT_INITIALIZER;
1410 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1412 /* Mimimalist open/close */
1413 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1414 re = uregex_openUText(&patternText, 0, 0, &status);
1415 if (U_FAILURE(status)) {
1416 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1417 utext_close(&patternText);
1422 /* Open with all flag values set */
1423 status = U_ZERO_ERROR;
1424 re = uregex_openUText(&patternText,
1425 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1427 TEST_ASSERT_SUCCESS(status);
1430 /* Open with an invalid flag */
1431 status = U_ZERO_ERROR;
1432 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1433 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1436 /* open with an invalid parameter */
1437 status = U_ZERO_ERROR;
1438 re = uregex_openUText(NULL,
1439 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1440 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1446 URegularExpression *clone1;
1447 URegularExpression *clone2;
1448 URegularExpression *clone3;
1449 UChar testString1[30];
1450 UChar testString2[30];
1454 status = U_ZERO_ERROR;
1455 re = uregex_openUText(&patternText, 0, 0, &status);
1456 TEST_ASSERT_SUCCESS(status);
1457 clone1 = uregex_clone(re, &status);
1458 TEST_ASSERT_SUCCESS(status);
1459 TEST_ASSERT(clone1 != NULL);
1461 status = U_ZERO_ERROR;
1462 clone2 = uregex_clone(re, &status);
1463 TEST_ASSERT_SUCCESS(status);
1464 TEST_ASSERT(clone2 != NULL);
1467 status = U_ZERO_ERROR;
1468 clone3 = uregex_clone(clone2, &status);
1469 TEST_ASSERT_SUCCESS(status);
1470 TEST_ASSERT(clone3 != NULL);
1472 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1473 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1475 status = U_ZERO_ERROR;
1476 uregex_setText(clone1, testString1, -1, &status);
1477 TEST_ASSERT_SUCCESS(status);
1478 result = uregex_lookingAt(clone1, 0, &status);
1479 TEST_ASSERT_SUCCESS(status);
1480 TEST_ASSERT(result==TRUE);
1482 status = U_ZERO_ERROR;
1483 uregex_setText(clone2, testString2, -1, &status);
1484 TEST_ASSERT_SUCCESS(status);
1485 result = uregex_lookingAt(clone2, 0, &status);
1486 TEST_ASSERT_SUCCESS(status);
1487 TEST_ASSERT(result==FALSE);
1488 result = uregex_find(clone2, 0, &status);
1489 TEST_ASSERT_SUCCESS(status);
1490 TEST_ASSERT(result==TRUE);
1492 uregex_close(clone1);
1493 uregex_close(clone2);
1494 uregex_close(clone3);
1499 * pattern() and patternText()
1502 const UChar *resultPat;
1505 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1506 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1507 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1508 status = U_ZERO_ERROR;
1510 utext_openUTF8(&patternText, str_hello, -1, &status);
1511 re = uregex_open(pat, -1, 0, NULL, &status);
1512 resultPat = uregex_pattern(re, &resultLen, &status);
1513 TEST_ASSERT_SUCCESS(status);
1515 /* The TEST_ASSERT_SUCCESS above should change too... */
1516 if (U_SUCCESS(status)) {
1517 TEST_ASSERT(resultLen == -1);
1518 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1521 resultText = uregex_patternUText(re, &status);
1522 TEST_ASSERT_SUCCESS(status);
1523 TEST_ASSERT_UTEXT(str_hello, resultText);
1527 status = U_ZERO_ERROR;
1528 re = uregex_open(pat, 3, 0, NULL, &status);
1529 resultPat = uregex_pattern(re, &resultLen, &status);
1530 TEST_ASSERT_SUCCESS(status);
1532 /* The TEST_ASSERT_SUCCESS above should change too... */
1533 if (U_SUCCESS(status)) {
1534 TEST_ASSERT(resultLen == 3);
1535 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1536 TEST_ASSERT(u_strlen(resultPat) == 3);
1539 resultText = uregex_patternUText(re, &status);
1540 TEST_ASSERT_SUCCESS(status);
1541 TEST_ASSERT_UTEXT(str_hel, resultText);
1547 * setUText() and lookingAt()
1550 UText text1 = UTEXT_INITIALIZER;
1551 UText text2 = UTEXT_INITIALIZER;
1553 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1554 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1555 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1556 status = U_ZERO_ERROR;
1557 utext_openUTF8(&text1, str_abcccd, -1, &status);
1558 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1560 utext_openUTF8(&patternText, str_abcd, -1, &status);
1561 re = uregex_openUText(&patternText, 0, NULL, &status);
1562 TEST_ASSERT_SUCCESS(status);
1564 /* Operation before doing a setText should fail... */
1565 status = U_ZERO_ERROR;
1566 uregex_lookingAt(re, 0, &status);
1567 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1569 status = U_ZERO_ERROR;
1570 uregex_setUText(re, &text1, &status);
1571 result = uregex_lookingAt(re, 0, &status);
1572 TEST_ASSERT(result == TRUE);
1573 TEST_ASSERT_SUCCESS(status);
1575 status = U_ZERO_ERROR;
1576 uregex_setUText(re, &text2, &status);
1577 result = uregex_lookingAt(re, 0, &status);
1578 TEST_ASSERT(result == FALSE);
1579 TEST_ASSERT_SUCCESS(status);
1581 status = U_ZERO_ERROR;
1582 uregex_setUText(re, &text1, &status);
1583 result = uregex_lookingAt(re, 0, &status);
1584 TEST_ASSERT(result == TRUE);
1585 TEST_ASSERT_SUCCESS(status);
1588 utext_close(&text1);
1589 utext_close(&text2);
1594 * getText() and getUText()
1597 UText text1 = UTEXT_INITIALIZER;
1598 UText text2 = UTEXT_INITIALIZER;
1599 UChar text2Chars[20];
1601 const UChar *result;
1603 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1604 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1605 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1608 status = U_ZERO_ERROR;
1609 utext_openUTF8(&text1, str_abcccd, -1, &status);
1610 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1611 utext_openUChars(&text2, text2Chars, -1, &status);
1613 utext_openUTF8(&patternText, str_abcd, -1, &status);
1614 re = uregex_openUText(&patternText, 0, NULL, &status);
1616 /* First set a UText */
1617 uregex_setUText(re, &text1, &status);
1618 resultText = uregex_getUText(re, NULL, &status);
1619 TEST_ASSERT_SUCCESS(status);
1620 TEST_ASSERT(resultText != &text1);
1621 utext_setNativeIndex(resultText, 0);
1622 utext_setNativeIndex(&text1, 0);
1623 TEST_ASSERT(testUTextEqual(resultText, &text1));
1624 utext_close(resultText);
1626 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1627 (void)result; /* Suppress set but not used warning. */
1628 TEST_ASSERT(textLength == -1 || textLength == 6);
1629 resultText = uregex_getUText(re, NULL, &status);
1630 TEST_ASSERT_SUCCESS(status);
1631 TEST_ASSERT(resultText != &text1);
1632 utext_setNativeIndex(resultText, 0);
1633 utext_setNativeIndex(&text1, 0);
1634 TEST_ASSERT(testUTextEqual(resultText, &text1));
1635 utext_close(resultText);
1637 /* Then set a UChar * */
1638 uregex_setText(re, text2Chars, 7, &status);
1639 resultText = uregex_getUText(re, NULL, &status);
1640 TEST_ASSERT_SUCCESS(status);
1641 utext_setNativeIndex(resultText, 0);
1642 utext_setNativeIndex(&text2, 0);
1643 TEST_ASSERT(testUTextEqual(resultText, &text2));
1644 utext_close(resultText);
1645 result = uregex_getText(re, &textLength, &status);
1646 TEST_ASSERT(textLength == 7);
1649 utext_close(&text1);
1650 utext_close(&text2);
1657 UText text1 = UTEXT_INITIALIZER;
1659 UText nullText = UTEXT_INITIALIZER;
1660 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1661 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1663 status = U_ZERO_ERROR;
1664 utext_openUTF8(&text1, str_abcccde, -1, &status);
1665 utext_openUTF8(&patternText, str_abcd, -1, &status);
1666 re = uregex_openUText(&patternText, 0, NULL, &status);
1668 uregex_setUText(re, &text1, &status);
1669 result = uregex_matches(re, 0, &status);
1670 TEST_ASSERT(result == FALSE);
1671 TEST_ASSERT_SUCCESS(status);
1674 status = U_ZERO_ERROR;
1675 re = uregex_openC(".?", 0, NULL, &status);
1676 uregex_setUText(re, &text1, &status);
1677 result = uregex_matches(re, 7, &status);
1678 TEST_ASSERT(result == TRUE);
1679 TEST_ASSERT_SUCCESS(status);
1681 status = U_ZERO_ERROR;
1682 utext_openUTF8(&nullText, "", -1, &status);
1683 uregex_setUText(re, &nullText, &status);
1684 TEST_ASSERT_SUCCESS(status);
1685 result = uregex_matches(re, 0, &status);
1686 TEST_ASSERT(result == TRUE);
1687 TEST_ASSERT_SUCCESS(status);
1690 utext_close(&text1);
1691 utext_close(&nullText);
1696 * lookingAt() Used in setText test.
1701 * find(), findNext, start, end, reset
1706 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1707 status = U_ZERO_ERROR;
1708 re = uregex_openC("rx", 0, NULL, &status);
1710 uregex_setText(re, text1, -1, &status);
1711 result = uregex_find(re, 0, &status);
1712 TEST_ASSERT(result == TRUE);
1713 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1714 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1715 TEST_ASSERT_SUCCESS(status);
1717 result = uregex_find(re, 9, &status);
1718 TEST_ASSERT(result == TRUE);
1719 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1720 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1721 TEST_ASSERT_SUCCESS(status);
1723 result = uregex_find(re, 14, &status);
1724 TEST_ASSERT(result == FALSE);
1725 TEST_ASSERT_SUCCESS(status);
1727 status = U_ZERO_ERROR;
1728 uregex_reset(re, 0, &status);
1730 result = uregex_findNext(re, &status);
1731 TEST_ASSERT(result == TRUE);
1732 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1733 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1734 TEST_ASSERT_SUCCESS(status);
1736 result = uregex_findNext(re, &status);
1737 TEST_ASSERT(result == TRUE);
1738 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1739 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1740 TEST_ASSERT_SUCCESS(status);
1742 status = U_ZERO_ERROR;
1743 uregex_reset(re, 12, &status);
1745 result = uregex_findNext(re, &status);
1746 TEST_ASSERT(result == TRUE);
1747 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1748 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1749 TEST_ASSERT_SUCCESS(status);
1751 result = uregex_findNext(re, &status);
1752 TEST_ASSERT(result == FALSE);
1753 TEST_ASSERT_SUCCESS(status);
1765 int64_t groupLen = 0;
1768 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1770 status = U_ZERO_ERROR;
1771 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1772 TEST_ASSERT_SUCCESS(status);
1774 uregex_setText(re, text1, -1, &status);
1775 result = uregex_find(re, 0, &status);
1776 TEST_ASSERT(result==TRUE);
1778 /* Capture Group 0 with shallow clone API. Should succeed. */
1779 status = U_ZERO_ERROR;
1780 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1781 TEST_ASSERT_SUCCESS(status);
1783 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1784 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1785 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1787 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1788 utext_close(actual);
1790 /* Capture group #1. Should succeed. */
1791 status = U_ZERO_ERROR;
1793 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1794 TEST_ASSERT_SUCCESS(status);
1795 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1796 /* (within the string text1) */
1797 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1798 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1799 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1801 utext_close(actual);
1803 /* Capture group out of range. Error. */
1804 status = U_ZERO_ERROR;
1805 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1806 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1807 utext_close(actual);
1818 UText replText = UTEXT_INITIALIZER;
1820 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1821 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1822 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1823 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1824 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1825 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1826 status = U_ZERO_ERROR;
1827 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1828 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1829 utext_openUTF8(&replText, str_1x, -1, &status);
1831 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1832 TEST_ASSERT_SUCCESS(status);
1834 /* Normal case, with match */
1835 uregex_setText(re, text1, -1, &status);
1836 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1837 TEST_ASSERT_SUCCESS(status);
1838 TEST_ASSERT_UTEXT(str_Replxxx, result);
1839 utext_close(result);
1841 /* No match. Text should copy to output with no changes. */
1842 uregex_setText(re, text2, -1, &status);
1843 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1844 TEST_ASSERT_SUCCESS(status);
1845 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1846 utext_close(result);
1848 /* Unicode escapes */
1849 uregex_setText(re, text1, -1, &status);
1850 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1851 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1852 TEST_ASSERT_SUCCESS(status);
1853 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1854 utext_close(result);
1857 utext_close(&replText);
1867 UText replText = UTEXT_INITIALIZER;
1869 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1870 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1871 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1872 status = U_ZERO_ERROR;
1873 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1874 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1875 utext_openUTF8(&replText, str_1, -1, &status);
1877 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1878 TEST_ASSERT_SUCCESS(status);
1880 /* Normal case, with match */
1881 uregex_setText(re, text1, -1, &status);
1882 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1883 TEST_ASSERT_SUCCESS(status);
1884 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1885 utext_close(result);
1887 /* No match. Text should copy to output with no changes. */
1888 uregex_setText(re, text2, -1, &status);
1889 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1890 TEST_ASSERT_SUCCESS(status);
1891 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1892 utext_close(result);
1895 utext_close(&replText);
1900 * appendReplacement()
1909 status = U_ZERO_ERROR;
1910 re = uregex_openC(".*", 0, 0, &status);
1911 TEST_ASSERT_SUCCESS(status);
1913 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1914 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1915 uregex_setText(re, text, -1, &status);
1917 /* match covers whole target string */
1918 uregex_find(re, 0, &status);
1919 TEST_ASSERT_SUCCESS(status);
1921 bufCap = UPRV_LENGTHOF(buf);
1922 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1923 TEST_ASSERT_SUCCESS(status);
1924 TEST_ASSERT_STRING("some other", buf, TRUE);
1926 /* Match has \u \U escapes */
1927 uregex_find(re, 0, &status);
1928 TEST_ASSERT_SUCCESS(status);
1930 bufCap = UPRV_LENGTHOF(buf);
1931 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1932 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1933 TEST_ASSERT_SUCCESS(status);
1934 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1941 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1948 UChar textToSplit[80];
1954 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1955 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1957 status = U_ZERO_ERROR;
1958 re = uregex_openC(":", 0, NULL, &status);
1963 uregex_setText(re, textToSplit, -1, &status);
1964 TEST_ASSERT_SUCCESS(status);
1966 /* The TEST_ASSERT_SUCCESS call above should change too... */
1967 if (U_SUCCESS(status)) {
1968 memset(fields, 0, sizeof(fields));
1969 numFields = uregex_splitUText(re, fields, 10, &status);
1970 TEST_ASSERT_SUCCESS(status);
1972 /* The TEST_ASSERT_SUCCESS call above should change too... */
1973 if(U_SUCCESS(status)) {
1974 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1975 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1976 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1977 TEST_ASSERT(numFields == 3);
1978 TEST_ASSERT_UTEXT(str_first, fields[0]);
1979 TEST_ASSERT_UTEXT(str_second, fields[1]);
1980 TEST_ASSERT_UTEXT(str_third, fields[2]);
1981 TEST_ASSERT(fields[3] == NULL);
1983 for(i = 0; i < numFields; i++) {
1984 utext_close(fields[i]);
1991 /* Split with too few output strings available */
1992 status = U_ZERO_ERROR;
1993 re = uregex_openC(":", 0, NULL, &status);
1994 uregex_setText(re, textToSplit, -1, &status);
1995 TEST_ASSERT_SUCCESS(status);
1997 /* The TEST_ASSERT_SUCCESS call above should change too... */
1998 if(U_SUCCESS(status)) {
2001 fields[2] = &patternText;
2002 numFields = uregex_splitUText(re, fields, 2, &status);
2003 TEST_ASSERT_SUCCESS(status);
2005 /* The TEST_ASSERT_SUCCESS call above should change too... */
2006 if(U_SUCCESS(status)) {
2007 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2008 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2009 TEST_ASSERT(numFields == 2);
2010 TEST_ASSERT_UTEXT(str_first, fields[0]);
2011 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2012 TEST_ASSERT(fields[2] == &patternText);
2014 for(i = 0; i < numFields; i++) {
2015 utext_close(fields[i]);
2022 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2023 * comes out as additional fields. */
2025 UChar textToSplit[80];
2030 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2032 status = U_ZERO_ERROR;
2033 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2035 uregex_setText(re, textToSplit, -1, &status);
2036 TEST_ASSERT_SUCCESS(status);
2038 /* The TEST_ASSERT_SUCCESS call above should change too... */
2039 if(U_SUCCESS(status)) {
2040 memset(fields, 0, sizeof(fields));
2041 numFields = uregex_splitUText(re, fields, 10, &status);
2042 TEST_ASSERT_SUCCESS(status);
2044 /* The TEST_ASSERT_SUCCESS call above should change too... */
2045 if(U_SUCCESS(status)) {
2046 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2047 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2048 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2049 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2050 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2052 TEST_ASSERT(numFields == 5);
2053 TEST_ASSERT_UTEXT(str_first, fields[0]);
2054 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2055 TEST_ASSERT_UTEXT(str_second, fields[2]);
2056 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2057 TEST_ASSERT_UTEXT(str_third, fields[4]);
2058 TEST_ASSERT(fields[5] == NULL);
2060 for(i = 0; i < numFields; i++) {
2061 utext_close(fields[i]);
2065 /* Split with too few output strings available (2) */
2066 status = U_ZERO_ERROR;
2069 fields[2] = &patternText;
2070 numFields = uregex_splitUText(re, fields, 2, &status);
2071 TEST_ASSERT_SUCCESS(status);
2073 /* The TEST_ASSERT_SUCCESS call above should change too... */
2074 if(U_SUCCESS(status)) {
2075 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2076 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2077 TEST_ASSERT(numFields == 2);
2078 TEST_ASSERT_UTEXT(str_first, fields[0]);
2079 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2080 TEST_ASSERT(fields[2] == &patternText);
2082 for(i = 0; i < numFields; i++) {
2083 utext_close(fields[i]);
2087 /* Split with too few output strings available (3) */
2088 status = U_ZERO_ERROR;
2092 fields[3] = &patternText;
2093 numFields = uregex_splitUText(re, fields, 3, &status);
2094 TEST_ASSERT_SUCCESS(status);
2096 /* The TEST_ASSERT_SUCCESS call above should change too... */
2097 if(U_SUCCESS(status)) {
2098 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2099 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2100 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2101 TEST_ASSERT(numFields == 3);
2102 TEST_ASSERT_UTEXT(str_first, fields[0]);
2103 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2104 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2105 TEST_ASSERT(fields[3] == &patternText);
2107 for(i = 0; i < numFields; i++) {
2108 utext_close(fields[i]);
2111 /* Split with just enough output strings available (5) */
2112 status = U_ZERO_ERROR;
2118 fields[5] = &patternText;
2119 numFields = uregex_splitUText(re, fields, 5, &status);
2120 TEST_ASSERT_SUCCESS(status);
2122 /* The TEST_ASSERT_SUCCESS call above should change too... */
2123 if(U_SUCCESS(status)) {
2124 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2125 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2126 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2127 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2128 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2130 TEST_ASSERT(numFields == 5);
2131 TEST_ASSERT_UTEXT(str_first, fields[0]);
2132 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2133 TEST_ASSERT_UTEXT(str_second, fields[2]);
2134 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2135 TEST_ASSERT_UTEXT(str_third, fields[4]);
2136 TEST_ASSERT(fields[5] == &patternText);
2138 for(i = 0; i < numFields; i++) {
2139 utext_close(fields[i]);
2142 /* Split, end of text is a field delimiter. */
2143 status = U_ZERO_ERROR;
2144 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2145 TEST_ASSERT_SUCCESS(status);
2147 /* The TEST_ASSERT_SUCCESS call above should change too... */
2148 if(U_SUCCESS(status)) {
2149 memset(fields, 0, sizeof(fields));
2150 fields[9] = &patternText;
2151 numFields = uregex_splitUText(re, fields, 9, &status);
2152 TEST_ASSERT_SUCCESS(status);
2154 /* The TEST_ASSERT_SUCCESS call above should change too... */
2155 if(U_SUCCESS(status)) {
2156 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2157 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2158 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2159 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2160 const char str_empty[] = { 0x00 };
2162 TEST_ASSERT(numFields == 5);
2163 TEST_ASSERT_UTEXT(str_first, fields[0]);
2164 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2165 TEST_ASSERT_UTEXT(str_second, fields[2]);
2166 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2167 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2168 TEST_ASSERT(fields[5] == NULL);
2169 TEST_ASSERT(fields[8] == NULL);
2170 TEST_ASSERT(fields[9] == &patternText);
2172 for(i = 0; i < numFields; i++) {
2173 utext_close(fields[i]);
2179 utext_close(&patternText);
2183 static void TestRefreshInput(void) {
2185 * RefreshInput changes out the input of a URegularExpression without
2186 * changing anything else in the match state. Used with Java JNI,
2187 * when Java moves the underlying string storage. This test
2188 * runs a find() loop, moving the text after the first match.
2189 * The right number of matches should still be found.
2191 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2192 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2193 UErrorCode status = U_ZERO_ERROR;
2194 URegularExpression *re;
2195 UText ut1 = UTEXT_INITIALIZER;
2196 UText ut2 = UTEXT_INITIALIZER;
2198 re = uregex_openC("[ABC]", 0, 0, &status);
2199 TEST_ASSERT_SUCCESS(status);
2201 utext_openUChars(&ut1, testStr, -1, &status);
2202 TEST_ASSERT_SUCCESS(status);
2203 uregex_setUText(re, &ut1, &status);
2204 TEST_ASSERT_SUCCESS(status);
2206 /* Find the first match "A" in the original string */
2207 TEST_ASSERT(uregex_findNext(re, &status));
2208 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2210 /* Move the string, kill the original string. */
2211 u_strcpy(movedStr, testStr);
2212 u_memset(testStr, 0, u_strlen(testStr));
2213 utext_openUChars(&ut2, movedStr, -1, &status);
2214 TEST_ASSERT_SUCCESS(status);
2215 uregex_refreshUText(re, &ut2, &status);
2216 TEST_ASSERT_SUCCESS(status);
2218 /* Find the following two matches, now working in the moved string. */
2219 TEST_ASSERT(uregex_findNext(re, &status));
2220 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2221 TEST_ASSERT(uregex_findNext(re, &status));
2222 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2223 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2229 static void TestBug8421(void) {
2230 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2233 URegularExpression *re;
2234 UErrorCode status = U_ZERO_ERROR;
2237 re = uregex_openC("abc", 0, 0, &status);
2238 TEST_ASSERT_SUCCESS(status);
2240 limit = uregex_getTimeLimit(re, &status);
2241 TEST_ASSERT_SUCCESS(status);
2242 TEST_ASSERT(limit == 0);
2244 uregex_setTimeLimit(re, 100, &status);
2245 TEST_ASSERT_SUCCESS(status);
2246 limit = uregex_getTimeLimit(re, &status);
2247 TEST_ASSERT_SUCCESS(status);
2248 TEST_ASSERT(limit == 100);
2253 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2257 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2261 static void TestBug10815() {
2262 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2263 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2265 URegularExpression *re;
2266 UErrorCode status = U_ZERO_ERROR;
2270 // findNext() with a find progress callback function.
2272 re = uregex_openC(".z", 0, 0, &status);
2273 TEST_ASSERT_SUCCESS(status);
2275 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2276 uregex_setText(re, text, -1, &status);
2277 TEST_ASSERT_SUCCESS(status);
2279 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2280 TEST_ASSERT_SUCCESS(status);
2282 uregex_findNext(re, &status);
2283 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2287 // findNext() with a match progress callback function.
2289 status = U_ZERO_ERROR;
2290 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2291 TEST_ASSERT_SUCCESS(status);
2293 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2294 // it will appear to be stuck in a (near) infinite loop.
2295 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2296 uregex_setText(re, text, -1, &status);
2297 TEST_ASSERT_SUCCESS(status);
2299 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2300 TEST_ASSERT_SUCCESS(status);
2302 uregex_findNext(re, &status);
2303 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2309 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */