source/test/cintltst/reapits.c

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 2004-2015, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8 /********************************************************************************
   9 *
  10 * File reapits.c
  11 *
  12 *********************************************************************************/
  13 /*C API TEST FOR Regular Expressions */
  14 /**
  15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
  16 *   try to test the full functionality.  It just calls each function and verifies that it
  17 *   works on a basic level.
  18 *
  19 *   More complete testing of regular expression functionality is done with the C++ tests.
  20 **/
  21
  22 #include "unicode/utypes.h"
  23
  24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include "unicode/uloc.h"
  29 #include "unicode/uregex.h"
  30 #include "unicode/ustring.h"
  31 #include "unicode/utext.h"
  32 #include "cintltst.h"
  33 #include "cmemory.h"
  34
  35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
  36 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
  37
  38 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
  39 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
  40
  41 /*
  42  *   TEST_SETUP and TEST_TEARDOWN
  43  *         macros to handle the boilerplate around setting up regex test cases.
  44  *         parameteres to setup:
  45  *              pattern:     The regex pattern, a (char *) null terminated C string.
  46  *              testString:  The string data, also a (char *) C string.
  47  *              flags:       Regex flags to set when compiling the pattern
  48  *
  49  *         Put arbitrary test code between SETUP and TEARDOWN.
  50  *         're" is the compiled, ready-to-go  regular expression.
  51  */
  52 #define TEST_SETUP(pattern, testString, flags) {  \
  53     UChar   *srcString = NULL;  \
  54     status = U_ZERO_ERROR; \
  55     re = uregex_openC(pattern, flags, NULL, &status);  \
  56     TEST_ASSERT_SUCCESS(status);   \
  57     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
  58     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
  59     uregex_setText(re, srcString, -1, &status); \
  60     TEST_ASSERT_SUCCESS(status);  \
  61     if (U_SUCCESS(status)) {
  62
  63 #define TEST_TEARDOWN  \
  64     }  \
  65     TEST_ASSERT_SUCCESS(status);  \
  66     uregex_close(re);  \
  67     free(srcString);   \
  68     }
  69
  70
  71 /**
  72  * @param expected utf-8 array of bytes to be expected
  73  */
  74 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
  75      char     buf_inside_macro[120];
  76      int32_t  len = (int32_t)strlen(expected);
  77      UBool    success;
  78      if (nulTerm) {
  79          u_austrncpy(buf_inside_macro, (actual), len+1);
  80          buf_inside_macro[len+2] = 0;
  81          success = (strcmp((expected), buf_inside_macro) == 0);
  82      } else {
  83          u_austrncpy(buf_inside_macro, (actual), len);
  84          buf_inside_macro[len+1] = 0;
  85          success = (strncmp((expected), buf_inside_macro, len) == 0);
  86      }
  87      if (success == FALSE) {
  88          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
  89              file, line, (expected), buf_inside_macro);
  90      }
  91 }
  92
  93 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
  94
  95
  96 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
  97     int32_t u8i = 0;
  98     UChar32 u8c = 0;
  99     UChar32 utc = 0;
 100     UBool   stringsEqual = TRUE;
 101     utext_setNativeIndex(utext, 0);
 102     for (;;) {
 103         U8_NEXT_UNSAFE(utf8, u8i, u8c);
 104         utc = utext_next32(utext);
 105         if (u8c == 0 && utc == U_SENTINEL) {
 106             break;
 107         }
 108         if (u8c != utc || u8c == 0) {
 109             stringsEqual = FALSE;
 110             break;
 111         }
 112     }
 113     return stringsEqual;
 114 }
 115
 116
 117 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
 118     utext_setNativeIndex(actual, 0);
 119     if (!equals_utf8_utext(expected, actual)) {
 120         UChar32 c;
 121         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
 122         c = utext_next32From(actual, 0);
 123         while (c != U_SENTINEL) {
 124             if (0x20<c && c <0x7e) {
 125                 log_err("%c", c);
 126             } else {
 127                 log_err("%#x", c);
 128             }
 129             c = UTEXT_NEXT32(actual);
 130         }
 131         log_err("\"\n");
 132     }
 133 }
 134
 135 /*
 136  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
 137  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
 138  */
 139 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
 140
 141 static UBool testUTextEqual(UText *uta, UText *utb) {
 142     UChar32 ca = 0;
 143     UChar32 cb = 0;
 144     utext_setNativeIndex(uta, 0);
 145     utext_setNativeIndex(utb, 0);
 146     do {
 147         ca = utext_next32(uta);
 148         cb = utext_next32(utb);
 149         if (ca != cb) {
 150             break;
 151         }
 152     } while (ca != U_SENTINEL);
 153     return ca == cb;
 154 }
 155
 156
 157
 158
 159 static void TestRegexCAPI(void);
 160 static void TestBug4315(void);
 161 static void TestUTextAPI(void);
 162 static void TestRefreshInput(void);
 163 static void TestBug8421(void);
 164 static void TestBug10815(void);
 165
 166 void addURegexTest(TestNode** root);
 167
 168 void addURegexTest(TestNode** root)
 169 {
 170     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
 171     addTest(root, &TestBug4315,   "regex/TestBug4315");
 172     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
 173     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
 174     addTest(root, &TestBug8421,   "regex/TestBug8421");
 175     addTest(root, &TestBug10815,   "regex/TestBug10815");
 176 }
 177
 178 /*
 179  * Call back function and context struct used for testing
 180  *    regular expression user callbacks.  This test is mostly the same as
 181  *   the corresponding C++ test in intltest.
 182  */
 183 typedef struct callBackContext {
 184     int32_t          maxCalls;
 185     int32_t          numCalls;
 186     int32_t          lastSteps;
 187 } callBackContext;
 188
 189 static UBool U_EXPORT2 U_CALLCONV
 190 TestCallbackFn(const void *context, int32_t steps) {
 191   callBackContext  *info = (callBackContext *)context;
 192   if (info->lastSteps+1 != steps) {
 193       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
 194   }
 195   info->lastSteps = steps;
 196   info->numCalls++;
 197   return (info->numCalls < info->maxCalls);
 198 }
 199
 200 /*
 201  *   Regular Expression C API Tests
 202  */
 203 static void TestRegexCAPI(void) {
 204     UErrorCode           status = U_ZERO_ERROR;
 205     URegularExpression  *re;
 206     UChar                pat[200];
 207     UChar               *minus1;
 208
 209     memset(&minus1, -1, sizeof(minus1));
 210
 211     /* Mimimalist open/close */
 212     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
 213     re = uregex_open(pat, -1, 0, 0, &status);
 214     if (U_FAILURE(status)) {
 215          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
 216          return;
 217     }
 218     uregex_close(re);
 219
 220     /* Open with all flag values set */
 221     status = U_ZERO_ERROR;
 222     re = uregex_open(pat, -1,
 223         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
 224         0, &status);
 225     TEST_ASSERT_SUCCESS(status);
 226     uregex_close(re);
 227
 228     /* Open with an invalid flag */
 229     status = U_ZERO_ERROR;
 230     re = uregex_open(pat, -1, 0x40000000, 0, &status);
 231     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
 232     uregex_close(re);
 233
 234     /* Open with an unimplemented flag */
 235     status = U_ZERO_ERROR;
 236     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
 237     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
 238     uregex_close(re);
 239
 240     /* openC with an invalid parameter */
 241     status = U_ZERO_ERROR;
 242     re = uregex_openC(NULL,
 243         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 244     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
 245
 246     /* openC with an invalid parameter */
 247     status = U_USELESS_COLLATOR_ERROR;
 248     re = uregex_openC(NULL,
 249         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 250     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
 251
 252     /* openC   open from a C string */
 253     {
 254         const UChar   *p;
 255         int32_t  len;
 256         status = U_ZERO_ERROR;
 257         re = uregex_openC("abc*", 0, 0, &status);
 258         TEST_ASSERT_SUCCESS(status);
 259         p = uregex_pattern(re, &len, &status);
 260         TEST_ASSERT_SUCCESS(status);
 261
 262         /* The TEST_ASSERT_SUCCESS above should change too... */
 263         if(U_SUCCESS(status)) {
 264             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
 265             TEST_ASSERT(u_strcmp(pat, p) == 0);
 266             TEST_ASSERT(len==(int32_t)strlen("abc*"));
 267         }
 268
 269         uregex_close(re);
 270
 271         /*  TODO:  Open with ParseError parameter */
 272     }
 273
 274     /*
 275      *  clone
 276      */
 277     {
 278         URegularExpression *clone1;
 279         URegularExpression *clone2;
 280         URegularExpression *clone3;
 281         UChar  testString1[30];
 282         UChar  testString2[30];
 283         UBool  result;
 284
 285
 286         status = U_ZERO_ERROR;
 287         re = uregex_openC("abc*", 0, 0, &status);
 288         TEST_ASSERT_SUCCESS(status);
 289         clone1 = uregex_clone(re, &status);
 290         TEST_ASSERT_SUCCESS(status);
 291         TEST_ASSERT(clone1 != NULL);
 292
 293         status = U_ZERO_ERROR;
 294         clone2 = uregex_clone(re, &status);
 295         TEST_ASSERT_SUCCESS(status);
 296         TEST_ASSERT(clone2 != NULL);
 297         uregex_close(re);
 298
 299         status = U_ZERO_ERROR;
 300         clone3 = uregex_clone(clone2, &status);
 301         TEST_ASSERT_SUCCESS(status);
 302         TEST_ASSERT(clone3 != NULL);
 303
 304         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
 305         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
 306
 307         status = U_ZERO_ERROR;
 308         uregex_setText(clone1, testString1, -1, &status);
 309         TEST_ASSERT_SUCCESS(status);
 310         result = uregex_lookingAt(clone1, 0, &status);
 311         TEST_ASSERT_SUCCESS(status);
 312         TEST_ASSERT(result==TRUE);
 313
 314         status = U_ZERO_ERROR;
 315         uregex_setText(clone2, testString2, -1, &status);
 316         TEST_ASSERT_SUCCESS(status);
 317         result = uregex_lookingAt(clone2, 0, &status);
 318         TEST_ASSERT_SUCCESS(status);
 319         TEST_ASSERT(result==FALSE);
 320         result = uregex_find(clone2, 0, &status);
 321         TEST_ASSERT_SUCCESS(status);
 322         TEST_ASSERT(result==TRUE);
 323
 324         uregex_close(clone1);
 325         uregex_close(clone2);
 326         uregex_close(clone3);
 327
 328     }
 329
 330     /*
 331      *  pattern()
 332     */
 333     {
 334         const UChar  *resultPat;
 335         int32_t       resultLen;
 336         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
 337         status = U_ZERO_ERROR;
 338         re = uregex_open(pat, -1, 0, NULL, &status);
 339         resultPat = uregex_pattern(re, &resultLen, &status);
 340         TEST_ASSERT_SUCCESS(status);
 341
 342         /* The TEST_ASSERT_SUCCESS above should change too... */
 343         if (U_SUCCESS(status)) {
 344             TEST_ASSERT(resultLen == -1);
 345             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
 346         }
 347
 348         uregex_close(re);
 349
 350         status = U_ZERO_ERROR;
 351         re = uregex_open(pat, 3, 0, NULL, &status);
 352         resultPat = uregex_pattern(re, &resultLen, &status);
 353         TEST_ASSERT_SUCCESS(status);
 354         TEST_ASSERT_SUCCESS(status);
 355
 356         /* The TEST_ASSERT_SUCCESS above should change too... */
 357         if (U_SUCCESS(status)) {
 358             TEST_ASSERT(resultLen == 3);
 359             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
 360             TEST_ASSERT(u_strlen(resultPat) == 3);
 361         }
 362
 363         uregex_close(re);
 364     }
 365
 366     /*
 367      *  flags()
 368      */
 369     {
 370         int32_t  t;
 371
 372         status = U_ZERO_ERROR;
 373         re = uregex_open(pat, -1, 0, NULL, &status);
 374         t  = uregex_flags(re, &status);
 375         TEST_ASSERT_SUCCESS(status);
 376         TEST_ASSERT(t == 0);
 377         uregex_close(re);
 378
 379         status = U_ZERO_ERROR;
 380         re = uregex_open(pat, -1, 0, NULL, &status);
 381         t  = uregex_flags(re, &status);
 382         TEST_ASSERT_SUCCESS(status);
 383         TEST_ASSERT(t == 0);
 384         uregex_close(re);
 385
 386         status = U_ZERO_ERROR;
 387         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
 388         t  = uregex_flags(re, &status);
 389         TEST_ASSERT_SUCCESS(status);
 390         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
 391         uregex_close(re);
 392     }
 393
 394     /*
 395      *  setText() and lookingAt()
 396      */
 397     {
 398         UChar  text1[50];
 399         UChar  text2[50];
 400         UBool  result;
 401
 402         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
 403         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
 404         status = U_ZERO_ERROR;
 405         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 406         re = uregex_open(pat, -1, 0, NULL, &status);
 407         TEST_ASSERT_SUCCESS(status);
 408
 409         /* Operation before doing a setText should fail... */
 410         status = U_ZERO_ERROR;
 411         uregex_lookingAt(re, 0, &status);
 412         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
 413
 414         status = U_ZERO_ERROR;
 415         uregex_setText(re, text1, -1, &status);
 416         result = uregex_lookingAt(re, 0, &status);
 417         TEST_ASSERT(result == TRUE);
 418         TEST_ASSERT_SUCCESS(status);
 419
 420         status = U_ZERO_ERROR;
 421         uregex_setText(re, text2, -1, &status);
 422         result = uregex_lookingAt(re, 0, &status);
 423         TEST_ASSERT(result == FALSE);
 424         TEST_ASSERT_SUCCESS(status);
 425
 426         status = U_ZERO_ERROR;
 427         uregex_setText(re, text1, -1, &status);
 428         result = uregex_lookingAt(re, 0, &status);
 429         TEST_ASSERT(result == TRUE);
 430         TEST_ASSERT_SUCCESS(status);
 431
 432         status = U_ZERO_ERROR;
 433         uregex_setText(re, text1, 5, &status);
 434         result = uregex_lookingAt(re, 0, &status);
 435         TEST_ASSERT(result == FALSE);
 436         TEST_ASSERT_SUCCESS(status);
 437
 438         status = U_ZERO_ERROR;
 439         uregex_setText(re, text1, 6, &status);
 440         result = uregex_lookingAt(re, 0, &status);
 441         TEST_ASSERT(result == TRUE);
 442         TEST_ASSERT_SUCCESS(status);
 443
 444         uregex_close(re);
 445     }
 446
 447
 448     /*
 449      *  getText()
 450      */
 451     {
 452         UChar    text1[50];
 453         UChar    text2[50];
 454         const UChar   *result;
 455         int32_t  textLength;
 456
 457         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
 458         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
 459         status = U_ZERO_ERROR;
 460         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 461         re = uregex_open(pat, -1, 0, NULL, &status);
 462
 463         uregex_setText(re, text1, -1, &status);
 464         result = uregex_getText(re, &textLength, &status);
 465         TEST_ASSERT(result == text1);
 466         TEST_ASSERT(textLength == -1);
 467         TEST_ASSERT_SUCCESS(status);
 468
 469         status = U_ZERO_ERROR;
 470         uregex_setText(re, text2, 7, &status);
 471         result = uregex_getText(re, &textLength, &status);
 472         TEST_ASSERT(result == text2);
 473         TEST_ASSERT(textLength == 7);
 474         TEST_ASSERT_SUCCESS(status);
 475
 476         status = U_ZERO_ERROR;
 477         uregex_setText(re, text2, 4, &status);
 478         result = uregex_getText(re, &textLength, &status);
 479         TEST_ASSERT(result == text2);
 480         TEST_ASSERT(textLength == 4);
 481         TEST_ASSERT_SUCCESS(status);
 482         uregex_close(re);
 483     }
 484
 485     /*
 486      *  matches()
 487      */
 488     {
 489         UChar   text1[50];
 490         UBool   result;
 491         int     len;
 492         UChar   nullString[] = {0,0,0};
 493
 494         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
 495         status = U_ZERO_ERROR;
 496         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 497         re = uregex_open(pat, -1, 0, NULL, &status);
 498
 499         uregex_setText(re, text1, -1, &status);
 500         result = uregex_matches(re, 0, &status);
 501         TEST_ASSERT(result == FALSE);
 502         TEST_ASSERT_SUCCESS(status);
 503
 504         status = U_ZERO_ERROR;
 505         uregex_setText(re, text1, 6, &status);
 506         result = uregex_matches(re, 0, &status);
 507         TEST_ASSERT(result == TRUE);
 508         TEST_ASSERT_SUCCESS(status);
 509
 510         status = U_ZERO_ERROR;
 511         uregex_setText(re, text1, 6, &status);
 512         result = uregex_matches(re, 1, &status);
 513         TEST_ASSERT(result == FALSE);
 514         TEST_ASSERT_SUCCESS(status);
 515         uregex_close(re);
 516
 517         status = U_ZERO_ERROR;
 518         re = uregex_openC(".?", 0, NULL, &status);
 519         uregex_setText(re, text1, -1, &status);
 520         len = u_strlen(text1);
 521         result = uregex_matches(re, len, &status);
 522         TEST_ASSERT(result == TRUE);
 523         TEST_ASSERT_SUCCESS(status);
 524
 525         status = U_ZERO_ERROR;
 526         uregex_setText(re, nullString, -1, &status);
 527         TEST_ASSERT_SUCCESS(status);
 528         result = uregex_matches(re, 0, &status);
 529         TEST_ASSERT(result == TRUE);
 530         TEST_ASSERT_SUCCESS(status);
 531         uregex_close(re);
 532     }
 533
 534
 535     /*
 536      *  lookingAt()    Used in setText test.
 537      */
 538
 539
 540     /*
 541      *  find(), findNext, start, end, reset
 542      */
 543     {
 544         UChar    text1[50];
 545         UBool    result;
 546         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
 547         status = U_ZERO_ERROR;
 548         re = uregex_openC("rx", 0, NULL, &status);
 549
 550         uregex_setText(re, text1, -1, &status);
 551         result = uregex_find(re, 0, &status);
 552         TEST_ASSERT(result == TRUE);
 553         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 554         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 555         TEST_ASSERT_SUCCESS(status);
 556
 557         result = uregex_find(re, 9, &status);
 558         TEST_ASSERT(result == TRUE);
 559         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
 560         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
 561         TEST_ASSERT_SUCCESS(status);
 562
 563         result = uregex_find(re, 14, &status);
 564         TEST_ASSERT(result == FALSE);
 565         TEST_ASSERT_SUCCESS(status);
 566
 567         status = U_ZERO_ERROR;
 568         uregex_reset(re, 0, &status);
 569
 570         result = uregex_findNext(re, &status);
 571         TEST_ASSERT(result == TRUE);
 572         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 573         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 574         TEST_ASSERT_SUCCESS(status);
 575
 576         result = uregex_findNext(re, &status);
 577         TEST_ASSERT(result == TRUE);
 578         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
 579         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
 580         TEST_ASSERT_SUCCESS(status);
 581
 582         status = U_ZERO_ERROR;
 583         uregex_reset(re, 12, &status);
 584
 585         result = uregex_findNext(re, &status);
 586         TEST_ASSERT(result == TRUE);
 587         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
 588         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
 589         TEST_ASSERT_SUCCESS(status);
 590
 591         result = uregex_findNext(re, &status);
 592         TEST_ASSERT(result == FALSE);
 593         TEST_ASSERT_SUCCESS(status);
 594
 595         uregex_close(re);
 596     }
 597
 598     /*
 599      *  groupCount
 600      */
 601     {
 602         int32_t result;
 603
 604         status = U_ZERO_ERROR;
 605         re = uregex_openC("abc", 0, NULL, &status);
 606         result = uregex_groupCount(re, &status);
 607         TEST_ASSERT_SUCCESS(status);
 608         TEST_ASSERT(result == 0);
 609         uregex_close(re);
 610
 611         status = U_ZERO_ERROR;
 612         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
 613         result = uregex_groupCount(re, &status);
 614         TEST_ASSERT_SUCCESS(status);
 615         TEST_ASSERT(result == 3);
 616         uregex_close(re);
 617
 618     }
 619
 620
 621     /*
 622      *  group()
 623      */
 624     {
 625         UChar    text1[80];
 626         UChar    buf[80];
 627         UBool    result;
 628         int32_t  resultSz;
 629         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
 630
 631         status = U_ZERO_ERROR;
 632         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
 633         TEST_ASSERT_SUCCESS(status);
 634
 635
 636         uregex_setText(re, text1, -1, &status);
 637         result = uregex_find(re, 0, &status);
 638         TEST_ASSERT(result==TRUE);
 639
 640         /*  Capture Group 0, the full match.  Should succeed.  */
 641         status = U_ZERO_ERROR;
 642         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
 643         TEST_ASSERT_SUCCESS(status);
 644         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
 645         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 646
 647         /*  Capture group #1.  Should succeed. */
 648         status = U_ZERO_ERROR;
 649         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
 650         TEST_ASSERT_SUCCESS(status);
 651         TEST_ASSERT_STRING(" interior ", buf, TRUE);
 652         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
 653
 654         /*  Capture group out of range.  Error. */
 655         status = U_ZERO_ERROR;
 656         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
 657         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 658
 659         /* NULL buffer, pure pre-flight */
 660         status = U_ZERO_ERROR;
 661         resultSz = uregex_group(re, 0, NULL, 0, &status);
 662         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 663         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 664
 665         /* Too small buffer, truncated string */
 666         status = U_ZERO_ERROR;
 667         memset(buf, -1, sizeof(buf));
 668         resultSz = uregex_group(re, 0, buf, 5, &status);
 669         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 670         TEST_ASSERT_STRING("abc i", buf, FALSE);
 671         TEST_ASSERT(buf[5] == (UChar)0xffff);
 672         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 673
 674         /* Output string just fits buffer, no NUL term. */
 675         status = U_ZERO_ERROR;
 676         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
 677         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 678         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
 679         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 680         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
 681
 682         uregex_close(re);
 683
 684     }
 685
 686     /*
 687      *  Regions
 688      */
 689
 690
 691         /* SetRegion(), getRegion() do something  */
 692         TEST_SETUP(".*", "0123456789ABCDEF", 0)
 693         UChar resultString[40];
 694         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
 695         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
 696         uregex_setRegion(re, 3, 6, &status);
 697         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
 698         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
 699         TEST_ASSERT(uregex_findNext(re, &status));
 700         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
 701         TEST_ASSERT_STRING("345", resultString, TRUE);
 702         TEST_TEARDOWN;
 703
 704         /* find(start=-1) uses regions   */
 705         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 706         uregex_setRegion(re, 4, 6, &status);
 707         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 708         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 709         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 710         TEST_TEARDOWN;
 711
 712         /* find (start >=0) does not use regions   */
 713         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 714         uregex_setRegion(re, 4, 6, &status);
 715         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 716         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 717         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 718         TEST_TEARDOWN;
 719
 720         /* findNext() obeys regions    */
 721         TEST_SETUP(".", "0123456789ABCDEF", 0);
 722         uregex_setRegion(re, 4, 6, &status);
 723         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
 724         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 725         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
 726         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
 727         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
 728         TEST_TEARDOWN;
 729
 730         /* matches(start=-1) uses regions                                           */
 731         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
 732         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 733         uregex_setRegion(re, 4, 6, &status);
 734         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
 735         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 736         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 737         TEST_TEARDOWN;
 738
 739         /* matches (start >=0) does not use regions       */
 740         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 741         uregex_setRegion(re, 4, 6, &status);
 742         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
 743         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 744         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 745         TEST_TEARDOWN;
 746
 747         /* lookingAt(start=-1) uses regions                                         */
 748         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
 749         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 750         uregex_setRegion(re, 4, 6, &status);
 751         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
 752         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 753         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
 754         TEST_TEARDOWN;
 755
 756         /* lookingAt (start >=0) does not use regions  */
 757         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 758         uregex_setRegion(re, 4, 6, &status);
 759         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
 760         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 761         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
 762         TEST_TEARDOWN;
 763
 764         /* hitEnd()       */
 765         TEST_SETUP("[a-f]*", "abcdefghij", 0);
 766         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 767         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
 768         TEST_TEARDOWN;
 769
 770         TEST_SETUP("[a-f]*", "abcdef", 0);
 771         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 772         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
 773         TEST_TEARDOWN;
 774
 775         /* requireEnd   */
 776         TEST_SETUP("abcd", "abcd", 0);
 777         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 778         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
 779         TEST_TEARDOWN;
 780
 781         TEST_SETUP("abcd$", "abcd", 0);
 782         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 783         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
 784         TEST_TEARDOWN;
 785
 786         /* anchoringBounds        */
 787         TEST_SETUP("abc$", "abcdef", 0);
 788         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
 789         uregex_useAnchoringBounds(re, FALSE, &status);
 790         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
 791
 792         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
 793         uregex_useAnchoringBounds(re, TRUE, &status);
 794         uregex_setRegion(re, 0, 3, &status);
 795         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 796         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 797         TEST_TEARDOWN;
 798
 799         /* Transparent Bounds      */
 800         TEST_SETUP("abc(?=def)", "abcdef", 0);
 801         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
 802         uregex_useTransparentBounds(re, TRUE, &status);
 803         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
 804
 805         uregex_useTransparentBounds(re, FALSE, &status);
 806         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
 807         uregex_setRegion(re, 0, 3, &status);
 808         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
 809         uregex_useTransparentBounds(re, TRUE, &status);
 810         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
 811         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 812         TEST_TEARDOWN;
 813
 814
 815     /*
 816      *  replaceFirst()
 817      */
 818     {
 819         UChar    text1[80];
 820         UChar    text2[80];
 821         UChar    replText[80];
 822         UChar    buf[80];
 823         int32_t  resultSz;
 824         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
 825         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
 826         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
 827
 828         status = U_ZERO_ERROR;
 829         re = uregex_openC("x(.*?)x", 0, NULL, &status);
 830         TEST_ASSERT_SUCCESS(status);
 831
 832         /*  Normal case, with match */
 833         uregex_setText(re, text1, -1, &status);
 834         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 835         TEST_ASSERT_SUCCESS(status);
 836         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
 837         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 838
 839         /* No match.  Text should copy to output with no changes.  */
 840         status = U_ZERO_ERROR;
 841         uregex_setText(re, text2, -1, &status);
 842         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 843         TEST_ASSERT_SUCCESS(status);
 844         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 845         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
 846
 847         /*  Match, output just fills buffer, no termination warning. */
 848         status = U_ZERO_ERROR;
 849         uregex_setText(re, text1, -1, &status);
 850         memset(buf, -1, sizeof(buf));
 851         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 852         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 853         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 854         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 855         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 856
 857         /* Do the replaceFirst again, without first resetting anything.
 858          *  Should give the same results.
 859          */
 860         status = U_ZERO_ERROR;
 861         memset(buf, -1, sizeof(buf));
 862         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 863         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 864         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 865         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 866         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 867
 868         /* NULL buffer, zero buffer length */
 869         status = U_ZERO_ERROR;
 870         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
 871         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 872         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 873
 874         /* Buffer too small by one */
 875         status = U_ZERO_ERROR;
 876         memset(buf, -1, sizeof(buf));
 877         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
 878         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 879         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
 880         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 881         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 882
 883         uregex_close(re);
 884     }
 885
 886
 887     /*
 888      *  replaceAll()
 889      */
 890     {
 891         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
 892         UChar    text2[80];          /*  "No match Here"           */
 893         UChar    replText[80];       /*  "<$1>"                    */
 894         UChar    replText2[80];      /*  "<<$1>>"                  */
 895         const char * pattern = "x(.*?)x";
 896         const char * expectedResult = "Replace <aa> <1> <...>.";
 897         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
 898         UChar    buf[80];
 899         int32_t  resultSize;
 900         int32_t  expectedResultSize;
 901         int32_t  expectedResultSize2;
 902         int32_t  i;
 903
 904         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
 905         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
 906         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
 907         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
 908         expectedResultSize = strlen(expectedResult);
 909         expectedResultSize2 = strlen(expectedResult2);
 910
 911         status = U_ZERO_ERROR;
 912         re = uregex_openC(pattern, 0, NULL, &status);
 913         TEST_ASSERT_SUCCESS(status);
 914
 915         /*  Normal case, with match */
 916         uregex_setText(re, text1, -1, &status);
 917         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 918         TEST_ASSERT_SUCCESS(status);
 919         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
 920         TEST_ASSERT(resultSize == expectedResultSize);
 921
 922         /* No match.  Text should copy to output with no changes.  */
 923         status = U_ZERO_ERROR;
 924         uregex_setText(re, text2, -1, &status);
 925         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 926         TEST_ASSERT_SUCCESS(status);
 927         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 928         TEST_ASSERT(resultSize == u_strlen(text2));
 929
 930         /*  Match, output just fills buffer, no termination warning. */
 931         status = U_ZERO_ERROR;
 932         uregex_setText(re, text1, -1, &status);
 933         memset(buf, -1, sizeof(buf));
 934         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
 935         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 936         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
 937         TEST_ASSERT(resultSize == expectedResultSize);
 938         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 939
 940         /* Do the replaceFirst again, without first resetting anything.
 941          *  Should give the same results.
 942          */
 943         status = U_ZERO_ERROR;
 944         memset(buf, -1, sizeof(buf));
 945         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
 946         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 947         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
 948         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 949         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 950
 951         /* NULL buffer, zero buffer length */
 952         status = U_ZERO_ERROR;
 953         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
 954         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 955         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 956
 957         /* Buffer too small.  Try every size, which will tickle edge cases
 958          * in uregex_appendReplacement (used by replaceAll)   */
 959         for (i=0; i<expectedResultSize; i++) {
 960             char  expected[80];
 961             status = U_ZERO_ERROR;
 962             memset(buf, -1, sizeof(buf));
 963             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
 964             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 965             strcpy(expected, expectedResult);
 966             expected[i] = 0;
 967             TEST_ASSERT_STRING(expected, buf, FALSE);
 968             TEST_ASSERT(resultSize == expectedResultSize);
 969             TEST_ASSERT(buf[i] == (UChar)0xffff);
 970         }
 971
 972         /* Buffer too small.  Same as previous test, except this time the replacement
 973          * text is longer than the match capture group, making the length of the complete
 974          * replacement longer than the original string.
 975          */
 976         for (i=0; i<expectedResultSize2; i++) {
 977             char  expected[80];
 978             status = U_ZERO_ERROR;
 979             memset(buf, -1, sizeof(buf));
 980             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
 981             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 982             strcpy(expected, expectedResult2);
 983             expected[i] = 0;
 984             TEST_ASSERT_STRING(expected, buf, FALSE);
 985             TEST_ASSERT(resultSize == expectedResultSize2);
 986             TEST_ASSERT(buf[i] == (UChar)0xffff);
 987         }
 988
 989
 990         uregex_close(re);
 991     }
 992
 993
 994     /*
 995      *  appendReplacement()
 996      */
 997     {
 998         UChar    text[100];
 999         UChar    repl[100];
1000         UChar    buf[100];
1001         UChar   *bufPtr;
1002         int32_t  bufCap;
1003
1004
1005         status = U_ZERO_ERROR;
1006         re = uregex_openC(".*", 0, 0, &status);
1007         TEST_ASSERT_SUCCESS(status);
1008
1009         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1010         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1011         uregex_setText(re, text, -1, &status);
1012
1013         /* match covers whole target string */
1014         uregex_find(re, 0, &status);
1015         TEST_ASSERT_SUCCESS(status);
1016         bufPtr = buf;
1017         bufCap = UPRV_LENGTHOF(buf);
1018         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1019         TEST_ASSERT_SUCCESS(status);
1020         TEST_ASSERT_STRING("some other", buf, TRUE);
1021
1022         /* Match has \u \U escapes */
1023         uregex_find(re, 0, &status);
1024         TEST_ASSERT_SUCCESS(status);
1025         bufPtr = buf;
1026         bufCap = UPRV_LENGTHOF(buf);
1027         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1028         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1029         TEST_ASSERT_SUCCESS(status);
1030         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1031
1032         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1033         status = U_ZERO_ERROR;
1034         uregex_find(re, 0, &status);
1035         TEST_ASSERT_SUCCESS(status);
1036         bufPtr = buf;
1037         status = U_BUFFER_OVERFLOW_ERROR;
1038         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1039         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1040
1041         uregex_close(re);
1042     }
1043
1044
1045     /*
1046      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1047      */
1048
1049     /*
1050      *  split()
1051      */
1052     {
1053         UChar    textToSplit[80];
1054         UChar    text2[80];
1055         UChar    buf[200];
1056         UChar    *fields[10];
1057         int32_t  numFields;
1058         int32_t  requiredCapacity;
1059         int32_t  spaceNeeded;
1060         int32_t  sz;
1061
1062         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1063         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1064
1065         status = U_ZERO_ERROR;
1066         re = uregex_openC(":", 0, NULL, &status);
1067
1068
1069         /*  Simple split */
1070
1071         uregex_setText(re, textToSplit, -1, &status);
1072         TEST_ASSERT_SUCCESS(status);
1073
1074         /* The TEST_ASSERT_SUCCESS call above should change too... */
1075         if (U_SUCCESS(status)) {
1076             memset(fields, -1, sizeof(fields));
1077             numFields =
1078                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1079             TEST_ASSERT_SUCCESS(status);
1080
1081             /* The TEST_ASSERT_SUCCESS call above should change too... */
1082             if(U_SUCCESS(status)) {
1083                 TEST_ASSERT(numFields == 3);
1084                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1085                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1086                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1087                 TEST_ASSERT(fields[3] == NULL);
1088
1089                 spaceNeeded = u_strlen(textToSplit) -
1090                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1091                             numFields;          /* Each field gets a NUL terminator */
1092
1093                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1094             }
1095         }
1096
1097         uregex_close(re);
1098
1099
1100         /*  Split with too few output strings available */
1101         status = U_ZERO_ERROR;
1102         re = uregex_openC(":", 0, NULL, &status);
1103         uregex_setText(re, textToSplit, -1, &status);
1104         TEST_ASSERT_SUCCESS(status);
1105
1106         /* The TEST_ASSERT_SUCCESS call above should change too... */
1107         if(U_SUCCESS(status)) {
1108             memset(fields, -1, sizeof(fields));
1109             numFields =
1110                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1111             TEST_ASSERT_SUCCESS(status);
1112
1113             /* The TEST_ASSERT_SUCCESS call above should change too... */
1114             if(U_SUCCESS(status)) {
1115                 TEST_ASSERT(numFields == 2);
1116                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1117                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1118                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1119
1120                 spaceNeeded = u_strlen(textToSplit) -
1121                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1122                             numFields;          /* Each field gets a NUL terminator */
1123
1124                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1125
1126                 /* Split with a range of output buffer sizes.  */
1127                 spaceNeeded = u_strlen(textToSplit) -
1128                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1129                     numFields;          /* Each field gets a NUL terminator */
1130
1131                 for (sz=0; sz < spaceNeeded+1; sz++) {
1132                     memset(fields, -1, sizeof(fields));
1133                     status = U_ZERO_ERROR;
1134                     numFields =
1135                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1136                     if (sz >= spaceNeeded) {
1137                         TEST_ASSERT_SUCCESS(status);
1138                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1139                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1140                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1141                     } else {
1142                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1143                     }
1144                     TEST_ASSERT(numFields == 3);
1145                     TEST_ASSERT(fields[3] == NULL);
1146                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1147                 }
1148             }
1149         }
1150
1151         uregex_close(re);
1152     }
1153
1154
1155
1156
1157     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1158      *                   comes out as additional fields.  */
1159     {
1160         UChar    textToSplit[80];
1161         UChar    buf[200];
1162         UChar    *fields[10];
1163         int32_t  numFields;
1164         int32_t  requiredCapacity;
1165         int32_t  spaceNeeded;
1166         int32_t  sz;
1167
1168         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1169
1170         status = U_ZERO_ERROR;
1171         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1172
1173         uregex_setText(re, textToSplit, -1, &status);
1174         TEST_ASSERT_SUCCESS(status);
1175
1176         /* The TEST_ASSERT_SUCCESS call above should change too... */
1177         if(U_SUCCESS(status)) {
1178             memset(fields, -1, sizeof(fields));
1179             numFields =
1180                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1181             TEST_ASSERT_SUCCESS(status);
1182
1183             /* The TEST_ASSERT_SUCCESS call above should change too... */
1184             if(U_SUCCESS(status)) {
1185                 TEST_ASSERT(numFields == 5);
1186                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1187                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1188                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1189                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1190                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1191                 TEST_ASSERT(fields[5] == NULL);
1192                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1193                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1194             }
1195         }
1196
1197         /*  Split with too few output strings available (2) */
1198         status = U_ZERO_ERROR;
1199         memset(fields, -1, sizeof(fields));
1200         numFields =
1201             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1202         TEST_ASSERT_SUCCESS(status);
1203
1204         /* The TEST_ASSERT_SUCCESS call above should change too... */
1205         if(U_SUCCESS(status)) {
1206             TEST_ASSERT(numFields == 2);
1207             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1208             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1209             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1210
1211             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1212             TEST_ASSERT(spaceNeeded == requiredCapacity);
1213         }
1214
1215         /*  Split with too few output strings available (3) */
1216         status = U_ZERO_ERROR;
1217         memset(fields, -1, sizeof(fields));
1218         numFields =
1219             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1220         TEST_ASSERT_SUCCESS(status);
1221
1222         /* The TEST_ASSERT_SUCCESS call above should change too... */
1223         if(U_SUCCESS(status)) {
1224             TEST_ASSERT(numFields == 3);
1225             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1226             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1227             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1228             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1229
1230             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1231             TEST_ASSERT(spaceNeeded == requiredCapacity);
1232         }
1233
1234         /*  Split with just enough output strings available (5) */
1235         status = U_ZERO_ERROR;
1236         memset(fields, -1, sizeof(fields));
1237         numFields =
1238             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1239         TEST_ASSERT_SUCCESS(status);
1240
1241         /* The TEST_ASSERT_SUCCESS call above should change too... */
1242         if(U_SUCCESS(status)) {
1243             TEST_ASSERT(numFields == 5);
1244             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1245             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1246             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1247             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1248             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1249             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1250
1251             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1252             TEST_ASSERT(spaceNeeded == requiredCapacity);
1253         }
1254
1255         /* Split, end of text is a field delimiter.   */
1256         status = U_ZERO_ERROR;
1257         sz = strlen("first <tag-a> second<tag-b>");
1258         uregex_setText(re, textToSplit, sz, &status);
1259         TEST_ASSERT_SUCCESS(status);
1260
1261         /* The TEST_ASSERT_SUCCESS call above should change too... */
1262         if(U_SUCCESS(status)) {
1263             memset(fields, -1, sizeof(fields));
1264             numFields =
1265                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1266             TEST_ASSERT_SUCCESS(status);
1267
1268             /* The TEST_ASSERT_SUCCESS call above should change too... */
1269             if(U_SUCCESS(status)) {
1270                 TEST_ASSERT(numFields == 5);
1271                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1272                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1273                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1274                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1275                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1276                 TEST_ASSERT(fields[5] == NULL);
1277                 TEST_ASSERT(fields[8] == NULL);
1278                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1279                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1280                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1281             }
1282         }
1283
1284         uregex_close(re);
1285     }
1286
1287     /*
1288      * set/getTimeLimit
1289      */
1290      TEST_SETUP("abc$", "abcdef", 0);
1291      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1292      uregex_setTimeLimit(re, 1000, &status);
1293      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1294      TEST_ASSERT_SUCCESS(status);
1295      uregex_setTimeLimit(re, -1, &status);
1296      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1297      status = U_ZERO_ERROR;
1298      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1299      TEST_TEARDOWN;
1300
1301      /*
1302       * set/get Stack Limit
1303       */
1304      TEST_SETUP("abc$", "abcdef", 0);
1305      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1306      uregex_setStackLimit(re, 40000, &status);
1307      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1308      TEST_ASSERT_SUCCESS(status);
1309      uregex_setStackLimit(re, -1, &status);
1310      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1311      status = U_ZERO_ERROR;
1312      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1313      TEST_TEARDOWN;
1314
1315
1316      /*
1317       * Get/Set callback functions
1318       *     This test is copied from intltest regex/Callbacks
1319       *     The pattern and test data will run long enough to cause the callback
1320       *       to be invoked.  The nested '+' operators give exponential time
1321       *       behavior with increasing string length.
1322       */
1323      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1324      callBackContext cbInfo = {4, 0, 0};
1325      const void     *pContext   = &cbInfo;
1326      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1327
1328      /*  Getting the callback fn when it hasn't been set must return NULL  */
1329      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1330      TEST_ASSERT_SUCCESS(status);
1331      TEST_ASSERT(returnedFn == NULL);
1332      TEST_ASSERT(pContext == NULL);
1333
1334      /* Set thecallback and do a match.                                   */
1335      /* The callback function should record that it has been called.      */
1336      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1337      TEST_ASSERT_SUCCESS(status);
1338      TEST_ASSERT(cbInfo.numCalls == 0);
1339      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1340      TEST_ASSERT_SUCCESS(status);
1341      TEST_ASSERT(cbInfo.numCalls > 0);
1342
1343      /* Getting the callback should return the values that were set above.  */
1344      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1345      TEST_ASSERT(returnedFn == &TestCallbackFn);
1346      TEST_ASSERT(pContext == &cbInfo);
1347
1348      TEST_TEARDOWN;
1349 }
1350
1351
1352
1353 static void TestBug4315(void) {
1354     UErrorCode      theICUError = U_ZERO_ERROR;
1355     URegularExpression *theRegEx;
1356     UChar           *textBuff;
1357     const char      *thePattern;
1358     UChar            theString[100];
1359     UChar           *destFields[24];
1360     int32_t         neededLength1;
1361     int32_t         neededLength2;
1362
1363     int32_t         wordCount = 0;
1364     int32_t         destFieldsSize = 24;
1365
1366     thePattern  = "ck ";
1367     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1368
1369     /* open a regex */
1370     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1371     TEST_ASSERT_SUCCESS(theICUError);
1372
1373     /* set the input string */
1374     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1375     TEST_ASSERT_SUCCESS(theICUError);
1376
1377     /* split */
1378     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1379      *  error occurs! */
1380     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1381         destFieldsSize, &theICUError);
1382
1383     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1384     TEST_ASSERT(wordCount==3);
1385
1386     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1387     {
1388         theICUError = U_ZERO_ERROR;
1389         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1390         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1391             destFields, destFieldsSize, &theICUError);
1392         TEST_ASSERT(wordCount==3);
1393         TEST_ASSERT_SUCCESS(theICUError);
1394         TEST_ASSERT(neededLength1 == neededLength2);
1395         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1396         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1397         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1398         TEST_ASSERT(destFields[3] == NULL);
1399         free(textBuff);
1400     }
1401     uregex_close(theRegEx);
1402 }
1403
1404 /* Based on TestRegexCAPI() */
1405 static void TestUTextAPI(void) {
1406     UErrorCode           status = U_ZERO_ERROR;
1407     URegularExpression  *re;
1408     UText                patternText = UTEXT_INITIALIZER;
1409     UChar                pat[200];
1410     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1411
1412     /* Mimimalist open/close */
1413     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1414     re = uregex_openUText(&patternText, 0, 0, &status);
1415     if (U_FAILURE(status)) {
1416          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1417          utext_close(&patternText);
1418          return;
1419     }
1420     uregex_close(re);
1421
1422     /* Open with all flag values set */
1423     status = U_ZERO_ERROR;
1424     re = uregex_openUText(&patternText,
1425         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1426         0, &status);
1427     TEST_ASSERT_SUCCESS(status);
1428     uregex_close(re);
1429
1430     /* Open with an invalid flag */
1431     status = U_ZERO_ERROR;
1432     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1433     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1434     uregex_close(re);
1435
1436     /* open with an invalid parameter */
1437     status = U_ZERO_ERROR;
1438     re = uregex_openUText(NULL,
1439         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1440     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1441
1442     /*
1443      *  clone
1444      */
1445     {
1446         URegularExpression *clone1;
1447         URegularExpression *clone2;
1448         URegularExpression *clone3;
1449         UChar  testString1[30];
1450         UChar  testString2[30];
1451         UBool  result;
1452
1453
1454         status = U_ZERO_ERROR;
1455         re = uregex_openUText(&patternText, 0, 0, &status);
1456         TEST_ASSERT_SUCCESS(status);
1457         clone1 = uregex_clone(re, &status);
1458         TEST_ASSERT_SUCCESS(status);
1459         TEST_ASSERT(clone1 != NULL);
1460
1461         status = U_ZERO_ERROR;
1462         clone2 = uregex_clone(re, &status);
1463         TEST_ASSERT_SUCCESS(status);
1464         TEST_ASSERT(clone2 != NULL);
1465         uregex_close(re);
1466
1467         status = U_ZERO_ERROR;
1468         clone3 = uregex_clone(clone2, &status);
1469         TEST_ASSERT_SUCCESS(status);
1470         TEST_ASSERT(clone3 != NULL);
1471
1472         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1473         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1474
1475         status = U_ZERO_ERROR;
1476         uregex_setText(clone1, testString1, -1, &status);
1477         TEST_ASSERT_SUCCESS(status);
1478         result = uregex_lookingAt(clone1, 0, &status);
1479         TEST_ASSERT_SUCCESS(status);
1480         TEST_ASSERT(result==TRUE);
1481
1482         status = U_ZERO_ERROR;
1483         uregex_setText(clone2, testString2, -1, &status);
1484         TEST_ASSERT_SUCCESS(status);
1485         result = uregex_lookingAt(clone2, 0, &status);
1486         TEST_ASSERT_SUCCESS(status);
1487         TEST_ASSERT(result==FALSE);
1488         result = uregex_find(clone2, 0, &status);
1489         TEST_ASSERT_SUCCESS(status);
1490         TEST_ASSERT(result==TRUE);
1491
1492         uregex_close(clone1);
1493         uregex_close(clone2);
1494         uregex_close(clone3);
1495
1496     }
1497
1498     /*
1499      *  pattern() and patternText()
1500      */
1501     {
1502         const UChar  *resultPat;
1503         int32_t       resultLen;
1504         UText        *resultText;
1505         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1506         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1507         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1508         status = U_ZERO_ERROR;
1509
1510         utext_openUTF8(&patternText, str_hello, -1, &status);
1511         re = uregex_open(pat, -1, 0, NULL, &status);
1512         resultPat = uregex_pattern(re, &resultLen, &status);
1513         TEST_ASSERT_SUCCESS(status);
1514
1515         /* The TEST_ASSERT_SUCCESS above should change too... */
1516         if (U_SUCCESS(status)) {
1517             TEST_ASSERT(resultLen == -1);
1518             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1519         }
1520
1521         resultText = uregex_patternUText(re, &status);
1522         TEST_ASSERT_SUCCESS(status);
1523         TEST_ASSERT_UTEXT(str_hello, resultText);
1524
1525         uregex_close(re);
1526
1527         status = U_ZERO_ERROR;
1528         re = uregex_open(pat, 3, 0, NULL, &status);
1529         resultPat = uregex_pattern(re, &resultLen, &status);
1530         TEST_ASSERT_SUCCESS(status);
1531
1532         /* The TEST_ASSERT_SUCCESS above should change too... */
1533         if (U_SUCCESS(status)) {
1534             TEST_ASSERT(resultLen == 3);
1535             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1536             TEST_ASSERT(u_strlen(resultPat) == 3);
1537         }
1538
1539         resultText = uregex_patternUText(re, &status);
1540         TEST_ASSERT_SUCCESS(status);
1541         TEST_ASSERT_UTEXT(str_hel, resultText);
1542
1543         uregex_close(re);
1544     }
1545
1546     /*
1547      *  setUText() and lookingAt()
1548      */
1549     {
1550         UText  text1 = UTEXT_INITIALIZER;
1551         UText  text2 = UTEXT_INITIALIZER;
1552         UBool  result;
1553         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1554         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1555         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1556         status = U_ZERO_ERROR;
1557         utext_openUTF8(&text1, str_abcccd, -1, &status);
1558         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1559
1560         utext_openUTF8(&patternText, str_abcd, -1, &status);
1561         re = uregex_openUText(&patternText, 0, NULL, &status);
1562         TEST_ASSERT_SUCCESS(status);
1563
1564         /* Operation before doing a setText should fail... */
1565         status = U_ZERO_ERROR;
1566         uregex_lookingAt(re, 0, &status);
1567         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1568
1569         status = U_ZERO_ERROR;
1570         uregex_setUText(re, &text1, &status);
1571         result = uregex_lookingAt(re, 0, &status);
1572         TEST_ASSERT(result == TRUE);
1573         TEST_ASSERT_SUCCESS(status);
1574
1575         status = U_ZERO_ERROR;
1576         uregex_setUText(re, &text2, &status);
1577         result = uregex_lookingAt(re, 0, &status);
1578         TEST_ASSERT(result == FALSE);
1579         TEST_ASSERT_SUCCESS(status);
1580
1581         status = U_ZERO_ERROR;
1582         uregex_setUText(re, &text1, &status);
1583         result = uregex_lookingAt(re, 0, &status);
1584         TEST_ASSERT(result == TRUE);
1585         TEST_ASSERT_SUCCESS(status);
1586
1587         uregex_close(re);
1588         utext_close(&text1);
1589         utext_close(&text2);
1590     }
1591
1592
1593     /*
1594      *  getText() and getUText()
1595      */
1596     {
1597         UText  text1 = UTEXT_INITIALIZER;
1598         UText  text2 = UTEXT_INITIALIZER;
1599         UChar  text2Chars[20];
1600         UText  *resultText;
1601         const UChar   *result;
1602         int32_t  textLength;
1603         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1604         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1605         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1606
1607
1608         status = U_ZERO_ERROR;
1609         utext_openUTF8(&text1, str_abcccd, -1, &status);
1610         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1611         utext_openUChars(&text2, text2Chars, -1, &status);
1612
1613         utext_openUTF8(&patternText, str_abcd, -1, &status);
1614         re = uregex_openUText(&patternText, 0, NULL, &status);
1615
1616         /* First set a UText */
1617         uregex_setUText(re, &text1, &status);
1618         resultText = uregex_getUText(re, NULL, &status);
1619         TEST_ASSERT_SUCCESS(status);
1620         TEST_ASSERT(resultText != &text1);
1621         utext_setNativeIndex(resultText, 0);
1622         utext_setNativeIndex(&text1, 0);
1623         TEST_ASSERT(testUTextEqual(resultText, &text1));
1624         utext_close(resultText);
1625
1626         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1627         (void)result;    /* Suppress set but not used warning. */
1628         TEST_ASSERT(textLength == -1 || textLength == 6);
1629         resultText = uregex_getUText(re, NULL, &status);
1630         TEST_ASSERT_SUCCESS(status);
1631         TEST_ASSERT(resultText != &text1);
1632         utext_setNativeIndex(resultText, 0);
1633         utext_setNativeIndex(&text1, 0);
1634         TEST_ASSERT(testUTextEqual(resultText, &text1));
1635         utext_close(resultText);
1636
1637         /* Then set a UChar * */
1638         uregex_setText(re, text2Chars, 7, &status);
1639         resultText = uregex_getUText(re, NULL, &status);
1640         TEST_ASSERT_SUCCESS(status);
1641         utext_setNativeIndex(resultText, 0);
1642         utext_setNativeIndex(&text2, 0);
1643         TEST_ASSERT(testUTextEqual(resultText, &text2));
1644         utext_close(resultText);
1645         result = uregex_getText(re, &textLength, &status);
1646         TEST_ASSERT(textLength == 7);
1647
1648         uregex_close(re);
1649         utext_close(&text1);
1650         utext_close(&text2);
1651     }
1652
1653     /*
1654      *  matches()
1655      */
1656     {
1657         UText   text1 = UTEXT_INITIALIZER;
1658         UBool   result;
1659         UText   nullText = UTEXT_INITIALIZER;
1660         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1661         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1662
1663         status = U_ZERO_ERROR;
1664         utext_openUTF8(&text1, str_abcccde, -1, &status);
1665         utext_openUTF8(&patternText, str_abcd, -1, &status);
1666         re = uregex_openUText(&patternText, 0, NULL, &status);
1667
1668         uregex_setUText(re, &text1, &status);
1669         result = uregex_matches(re, 0, &status);
1670         TEST_ASSERT(result == FALSE);
1671         TEST_ASSERT_SUCCESS(status);
1672         uregex_close(re);
1673
1674         status = U_ZERO_ERROR;
1675         re = uregex_openC(".?", 0, NULL, &status);
1676         uregex_setUText(re, &text1, &status);
1677         result = uregex_matches(re, 7, &status);
1678         TEST_ASSERT(result == TRUE);
1679         TEST_ASSERT_SUCCESS(status);
1680
1681         status = U_ZERO_ERROR;
1682         utext_openUTF8(&nullText, "", -1, &status);
1683         uregex_setUText(re, &nullText, &status);
1684         TEST_ASSERT_SUCCESS(status);
1685         result = uregex_matches(re, 0, &status);
1686         TEST_ASSERT(result == TRUE);
1687         TEST_ASSERT_SUCCESS(status);
1688
1689         uregex_close(re);
1690         utext_close(&text1);
1691         utext_close(&nullText);
1692     }
1693
1694
1695     /*
1696      *  lookingAt()    Used in setText test.
1697      */
1698
1699
1700     /*
1701      *  find(), findNext, start, end, reset
1702      */
1703     {
1704         UChar    text1[50];
1705         UBool    result;
1706         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1707         status = U_ZERO_ERROR;
1708         re = uregex_openC("rx", 0, NULL, &status);
1709
1710         uregex_setText(re, text1, -1, &status);
1711         result = uregex_find(re, 0, &status);
1712         TEST_ASSERT(result == TRUE);
1713         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1714         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1715         TEST_ASSERT_SUCCESS(status);
1716
1717         result = uregex_find(re, 9, &status);
1718         TEST_ASSERT(result == TRUE);
1719         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1720         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1721         TEST_ASSERT_SUCCESS(status);
1722
1723         result = uregex_find(re, 14, &status);
1724         TEST_ASSERT(result == FALSE);
1725         TEST_ASSERT_SUCCESS(status);
1726
1727         status = U_ZERO_ERROR;
1728         uregex_reset(re, 0, &status);
1729
1730         result = uregex_findNext(re, &status);
1731         TEST_ASSERT(result == TRUE);
1732         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1733         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1734         TEST_ASSERT_SUCCESS(status);
1735
1736         result = uregex_findNext(re, &status);
1737         TEST_ASSERT(result == TRUE);
1738         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1739         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1740         TEST_ASSERT_SUCCESS(status);
1741
1742         status = U_ZERO_ERROR;
1743         uregex_reset(re, 12, &status);
1744
1745         result = uregex_findNext(re, &status);
1746         TEST_ASSERT(result == TRUE);
1747         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1748         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1749         TEST_ASSERT_SUCCESS(status);
1750
1751         result = uregex_findNext(re, &status);
1752         TEST_ASSERT(result == FALSE);
1753         TEST_ASSERT_SUCCESS(status);
1754
1755         uregex_close(re);
1756     }
1757
1758     /*
1759      *  groupUText()
1760      */
1761     {
1762         UChar    text1[80];
1763         UText   *actual;
1764         UBool    result;
1765         int64_t  groupLen = 0;
1766         UChar    groupBuf[20];
1767
1768         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1769
1770         status = U_ZERO_ERROR;
1771         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1772         TEST_ASSERT_SUCCESS(status);
1773
1774         uregex_setText(re, text1, -1, &status);
1775         result = uregex_find(re, 0, &status);
1776         TEST_ASSERT(result==TRUE);
1777
1778         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1779         status = U_ZERO_ERROR;
1780         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1781         TEST_ASSERT_SUCCESS(status);
1782
1783         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1784         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1785         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1786
1787         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1788         utext_close(actual);
1789
1790         /*  Capture group #1.  Should succeed. */
1791         status = U_ZERO_ERROR;
1792
1793         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1794         TEST_ASSERT_SUCCESS(status);
1795         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1796                                                            /*    (within the string text1)           */
1797         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1798         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1799         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1800
1801         utext_close(actual);
1802
1803         /*  Capture group out of range.  Error. */
1804         status = U_ZERO_ERROR;
1805         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1806         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1807         utext_close(actual);
1808
1809         uregex_close(re);
1810     }
1811
1812     /*
1813      *  replaceFirst()
1814      */
1815     {
1816         UChar    text1[80];
1817         UChar    text2[80];
1818         UText    replText = UTEXT_INITIALIZER;
1819         UText   *result;
1820         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1821         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1822         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1823                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1824         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1825         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1826         status = U_ZERO_ERROR;
1827         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1828         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1829         utext_openUTF8(&replText, str_1x, -1, &status);
1830
1831         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1832         TEST_ASSERT_SUCCESS(status);
1833
1834         /*  Normal case, with match */
1835         uregex_setText(re, text1, -1, &status);
1836         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1837         TEST_ASSERT_SUCCESS(status);
1838         TEST_ASSERT_UTEXT(str_Replxxx, result);
1839         utext_close(result);
1840
1841         /* No match.  Text should copy to output with no changes.  */
1842         uregex_setText(re, text2, -1, &status);
1843         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1844         TEST_ASSERT_SUCCESS(status);
1845         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1846         utext_close(result);
1847
1848         /* Unicode escapes */
1849         uregex_setText(re, text1, -1, &status);
1850         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1851         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1852         TEST_ASSERT_SUCCESS(status);
1853         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1854         utext_close(result);
1855
1856         uregex_close(re);
1857         utext_close(&replText);
1858     }
1859
1860
1861     /*
1862      *  replaceAll()
1863      */
1864     {
1865         UChar    text1[80];
1866         UChar    text2[80];
1867         UText    replText = UTEXT_INITIALIZER;
1868         UText   *result;
1869         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1870         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1871         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1872         status = U_ZERO_ERROR;
1873         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1874         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1875         utext_openUTF8(&replText, str_1, -1, &status);
1876
1877         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1878         TEST_ASSERT_SUCCESS(status);
1879
1880         /*  Normal case, with match */
1881         uregex_setText(re, text1, -1, &status);
1882         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1883         TEST_ASSERT_SUCCESS(status);
1884         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1885         utext_close(result);
1886
1887         /* No match.  Text should copy to output with no changes.  */
1888         uregex_setText(re, text2, -1, &status);
1889         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1890         TEST_ASSERT_SUCCESS(status);
1891         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1892         utext_close(result);
1893
1894         uregex_close(re);
1895         utext_close(&replText);
1896     }
1897
1898
1899     /*
1900      *  appendReplacement()
1901      */
1902     {
1903         UChar    text[100];
1904         UChar    repl[100];
1905         UChar    buf[100];
1906         UChar   *bufPtr;
1907         int32_t  bufCap;
1908
1909         status = U_ZERO_ERROR;
1910         re = uregex_openC(".*", 0, 0, &status);
1911         TEST_ASSERT_SUCCESS(status);
1912
1913         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1914         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1915         uregex_setText(re, text, -1, &status);
1916
1917         /* match covers whole target string */
1918         uregex_find(re, 0, &status);
1919         TEST_ASSERT_SUCCESS(status);
1920         bufPtr = buf;
1921         bufCap = UPRV_LENGTHOF(buf);
1922         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1923         TEST_ASSERT_SUCCESS(status);
1924         TEST_ASSERT_STRING("some other", buf, TRUE);
1925
1926         /* Match has \u \U escapes */
1927         uregex_find(re, 0, &status);
1928         TEST_ASSERT_SUCCESS(status);
1929         bufPtr = buf;
1930         bufCap = UPRV_LENGTHOF(buf);
1931         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1932         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1933         TEST_ASSERT_SUCCESS(status);
1934         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1935
1936         uregex_close(re);
1937     }
1938
1939
1940     /*
1941      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1942      */
1943
1944     /*
1945      *  splitUText()
1946      */
1947     {
1948         UChar    textToSplit[80];
1949         UChar    text2[80];
1950         UText    *fields[10];
1951         int32_t  numFields;
1952         int32_t i;
1953
1954         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1955         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1956
1957         status = U_ZERO_ERROR;
1958         re = uregex_openC(":", 0, NULL, &status);
1959
1960
1961         /*  Simple split */
1962
1963         uregex_setText(re, textToSplit, -1, &status);
1964         TEST_ASSERT_SUCCESS(status);
1965
1966         /* The TEST_ASSERT_SUCCESS call above should change too... */
1967         if (U_SUCCESS(status)) {
1968             memset(fields, 0, sizeof(fields));
1969             numFields = uregex_splitUText(re, fields, 10, &status);
1970             TEST_ASSERT_SUCCESS(status);
1971
1972             /* The TEST_ASSERT_SUCCESS call above should change too... */
1973             if(U_SUCCESS(status)) {
1974               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1975               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1976               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1977                 TEST_ASSERT(numFields == 3);
1978                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1979                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1980                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1981                 TEST_ASSERT(fields[3] == NULL);
1982             }
1983             for(i = 0; i < numFields; i++) {
1984                 utext_close(fields[i]);
1985             }
1986         }
1987
1988         uregex_close(re);
1989
1990
1991         /*  Split with too few output strings available */
1992         status = U_ZERO_ERROR;
1993         re = uregex_openC(":", 0, NULL, &status);
1994         uregex_setText(re, textToSplit, -1, &status);
1995         TEST_ASSERT_SUCCESS(status);
1996
1997         /* The TEST_ASSERT_SUCCESS call above should change too... */
1998         if(U_SUCCESS(status)) {
1999             fields[0] = NULL;
2000             fields[1] = NULL;
2001             fields[2] = &patternText;
2002             numFields = uregex_splitUText(re, fields, 2, &status);
2003             TEST_ASSERT_SUCCESS(status);
2004
2005             /* The TEST_ASSERT_SUCCESS call above should change too... */
2006             if(U_SUCCESS(status)) {
2007                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2008                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2009                 TEST_ASSERT(numFields == 2);
2010                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2011                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2012                 TEST_ASSERT(fields[2] == &patternText);
2013             }
2014             for(i = 0; i < numFields; i++) {
2015                 utext_close(fields[i]);
2016             }
2017         }
2018
2019         uregex_close(re);
2020     }
2021
2022     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2023      *                   comes out as additional fields.  */
2024     {
2025         UChar    textToSplit[80];
2026         UText    *fields[10];
2027         int32_t  numFields;
2028         int32_t i;
2029
2030         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2031
2032         status = U_ZERO_ERROR;
2033         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2034
2035         uregex_setText(re, textToSplit, -1, &status);
2036         TEST_ASSERT_SUCCESS(status);
2037
2038         /* The TEST_ASSERT_SUCCESS call above should change too... */
2039         if(U_SUCCESS(status)) {
2040             memset(fields, 0, sizeof(fields));
2041             numFields = uregex_splitUText(re, fields, 10, &status);
2042             TEST_ASSERT_SUCCESS(status);
2043
2044             /* The TEST_ASSERT_SUCCESS call above should change too... */
2045             if(U_SUCCESS(status)) {
2046                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2047                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2048                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2049                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2050                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2051
2052                 TEST_ASSERT(numFields == 5);
2053                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2054                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2055                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2056                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2057                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2058                 TEST_ASSERT(fields[5] == NULL);
2059             }
2060             for(i = 0; i < numFields; i++) {
2061                 utext_close(fields[i]);
2062             }
2063         }
2064
2065         /*  Split with too few output strings available (2) */
2066         status = U_ZERO_ERROR;
2067         fields[0] = NULL;
2068         fields[1] = NULL;
2069         fields[2] = &patternText;
2070         numFields = uregex_splitUText(re, fields, 2, &status);
2071         TEST_ASSERT_SUCCESS(status);
2072
2073         /* The TEST_ASSERT_SUCCESS call above should change too... */
2074         if(U_SUCCESS(status)) {
2075             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2076             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2077             TEST_ASSERT(numFields == 2);
2078             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2079             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2080             TEST_ASSERT(fields[2] == &patternText);
2081         }
2082         for(i = 0; i < numFields; i++) {
2083             utext_close(fields[i]);
2084         }
2085
2086
2087         /*  Split with too few output strings available (3) */
2088         status = U_ZERO_ERROR;
2089         fields[0] = NULL;
2090         fields[1] = NULL;
2091         fields[2] = NULL;
2092         fields[3] = &patternText;
2093         numFields = uregex_splitUText(re, fields, 3, &status);
2094         TEST_ASSERT_SUCCESS(status);
2095
2096         /* The TEST_ASSERT_SUCCESS call above should change too... */
2097         if(U_SUCCESS(status)) {
2098             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2099             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2100             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2101             TEST_ASSERT(numFields == 3);
2102             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2103             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2104             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2105             TEST_ASSERT(fields[3] == &patternText);
2106         }
2107         for(i = 0; i < numFields; i++) {
2108             utext_close(fields[i]);
2109         }
2110
2111         /*  Split with just enough output strings available (5) */
2112         status = U_ZERO_ERROR;
2113         fields[0] = NULL;
2114         fields[1] = NULL;
2115         fields[2] = NULL;
2116         fields[3] = NULL;
2117         fields[4] = NULL;
2118         fields[5] = &patternText;
2119         numFields = uregex_splitUText(re, fields, 5, &status);
2120         TEST_ASSERT_SUCCESS(status);
2121
2122         /* The TEST_ASSERT_SUCCESS call above should change too... */
2123         if(U_SUCCESS(status)) {
2124             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2125             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2126             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2127             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2128             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2129
2130             TEST_ASSERT(numFields == 5);
2131             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2132             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2133             TEST_ASSERT_UTEXT(str_second, fields[2]);
2134             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2135             TEST_ASSERT_UTEXT(str_third, fields[4]);
2136             TEST_ASSERT(fields[5] == &patternText);
2137         }
2138         for(i = 0; i < numFields; i++) {
2139             utext_close(fields[i]);
2140         }
2141
2142         /* Split, end of text is a field delimiter.   */
2143         status = U_ZERO_ERROR;
2144         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2145         TEST_ASSERT_SUCCESS(status);
2146
2147         /* The TEST_ASSERT_SUCCESS call above should change too... */
2148         if(U_SUCCESS(status)) {
2149             memset(fields, 0, sizeof(fields));
2150             fields[9] = &patternText;
2151             numFields = uregex_splitUText(re, fields, 9, &status);
2152             TEST_ASSERT_SUCCESS(status);
2153
2154             /* The TEST_ASSERT_SUCCESS call above should change too... */
2155             if(U_SUCCESS(status)) {
2156                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2157                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2158                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2159                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2160                 const char str_empty[] = { 0x00 };
2161
2162                 TEST_ASSERT(numFields == 5);
2163                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2164                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2165                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2166                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2167                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2168                 TEST_ASSERT(fields[5] == NULL);
2169                 TEST_ASSERT(fields[8] == NULL);
2170                 TEST_ASSERT(fields[9] == &patternText);
2171             }
2172             for(i = 0; i < numFields; i++) {
2173                 utext_close(fields[i]);
2174             }
2175         }
2176
2177         uregex_close(re);
2178     }
2179     utext_close(&patternText);
2180 }
2181
2182
2183 static void TestRefreshInput(void) {
2184     /*
2185      *  RefreshInput changes out the input of a URegularExpression without
2186      *    changing anything else in the match state.  Used with Java JNI,
2187      *    when Java moves the underlying string storage.   This test
2188      *    runs a find() loop, moving the text after the first match.
2189      *    The right number of matches should still be found.
2190      */
2191     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2192     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2193     UErrorCode status = U_ZERO_ERROR;
2194     URegularExpression *re;
2195     UText ut1 = UTEXT_INITIALIZER;
2196     UText ut2 = UTEXT_INITIALIZER;
2197
2198     re = uregex_openC("[ABC]", 0, 0, &status);
2199     TEST_ASSERT_SUCCESS(status);
2200
2201     utext_openUChars(&ut1, testStr, -1, &status);
2202     TEST_ASSERT_SUCCESS(status);
2203     uregex_setUText(re, &ut1, &status);
2204     TEST_ASSERT_SUCCESS(status);
2205
2206     /* Find the first match "A" in the original string */
2207     TEST_ASSERT(uregex_findNext(re, &status));
2208     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2209
2210     /* Move the string, kill the original string.  */
2211     u_strcpy(movedStr, testStr);
2212     u_memset(testStr, 0, u_strlen(testStr));
2213     utext_openUChars(&ut2, movedStr, -1, &status);
2214     TEST_ASSERT_SUCCESS(status);
2215     uregex_refreshUText(re, &ut2, &status);
2216     TEST_ASSERT_SUCCESS(status);
2217
2218     /* Find the following two matches, now working in the moved string. */
2219     TEST_ASSERT(uregex_findNext(re, &status));
2220     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2221     TEST_ASSERT(uregex_findNext(re, &status));
2222     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2223     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2224
2225     uregex_close(re);
2226 }
2227
2228
2229 static void TestBug8421(void) {
2230     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2231      *             was failing.
2232      */
2233     URegularExpression *re;
2234     UErrorCode status = U_ZERO_ERROR;
2235     int32_t  limit = -1;
2236
2237     re = uregex_openC("abc", 0, 0, &status);
2238     TEST_ASSERT_SUCCESS(status);
2239
2240     limit = uregex_getTimeLimit(re, &status);
2241     TEST_ASSERT_SUCCESS(status);
2242     TEST_ASSERT(limit == 0);
2243
2244     uregex_setTimeLimit(re, 100, &status);
2245     TEST_ASSERT_SUCCESS(status);
2246     limit = uregex_getTimeLimit(re, &status);
2247     TEST_ASSERT_SUCCESS(status);
2248     TEST_ASSERT(limit == 100);
2249
2250     uregex_close(re);
2251 }
2252
2253 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2254     return FALSE;
2255 }
2256
2257 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2258     return FALSE;
2259 }
2260
2261 static void TestBug10815() {
2262   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2263    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2264    */
2265     URegularExpression *re;
2266     UErrorCode status = U_ZERO_ERROR;
2267     UChar    text[100];
2268
2269
2270     // findNext() with a find progress callback function.
2271
2272     re = uregex_openC(".z", 0, 0, &status);
2273     TEST_ASSERT_SUCCESS(status);
2274
2275     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2276     uregex_setText(re, text, -1, &status);
2277     TEST_ASSERT_SUCCESS(status);
2278
2279     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2280     TEST_ASSERT_SUCCESS(status);
2281
2282     uregex_findNext(re, &status);
2283     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2284
2285     uregex_close(re);
2286
2287     // findNext() with a match progress callback function.
2288
2289     status = U_ZERO_ERROR;
2290     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2291     TEST_ASSERT_SUCCESS(status);
2292
2293     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2294     // it will appear to be stuck in a (near) infinite loop.
2295     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2296     uregex_setText(re, text, -1, &status);
2297     TEST_ASSERT_SUCCESS(status);
2298
2299     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2300     TEST_ASSERT_SUCCESS(status);
2301
2302     uregex_findNext(re, &status);
2303     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2304
2305     uregex_close(re);
2306 }
2307
2308
2309 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */