Imported Upstream version 58.1
[platform/upstream/icu.git] / source / samples / strsrch / strsrch.cpp
1 /*************************************************************************
2  * Copyright (C) 2016 and later: Unicode, Inc. and others.
3  * License & terms of use: http://www.unicode.org/copyright.html#License
4  *
5  *************************************************************************
6  *************************************************************************
7  * COPYRIGHT:
8  * Copyright (C) 2002-2006 IBM, Inc.   All Rights Reserved.
9  *
10  *************************************************************************/
11
12 /** 
13  * This program demos string collation
14  */
15
16 const char gHelpString[] =
17     "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
18     "-help            Display this message.\n"
19     "-locale name     ICU locale to use.  Default is en_US\n"
20     "-rules rule      Collation rules file (overrides locale)\n"
21     "-french          French accent ordering\n"
22     "-norm            Normalizing mode on\n"
23     "-shifted         Shifted mode\n"
24     "-lower           Lower case first\n"
25     "-upper           Upper case first\n"
26     "-case            Enable separate case level\n"
27     "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
28         "-source string   Source string\n"
29         "-pattern string  Pattern string to look for in source\n"
30         "-overlap         Enable searching to be done on overlapping patterns\n"
31         "-canonical       Enable searching to be done matching canonical equivalent patterns"
32     "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
33         "The format \\uXXXX is supported for the rules and comparison strings\n"
34         ;
35
36 #include <stdio.h>
37 #include <string.h>
38 #include <stdlib.h>
39
40 #include <unicode/utypes.h>
41 #include <unicode/ucol.h>
42 #include <unicode/usearch.h>
43 #include <unicode/ustring.h>
44
45 /** 
46  * Command line option variables
47  *    These global variables are set according to the options specified
48  *    on the command line by the user.
49  */
50 char * opt_locale      = "en_US";
51 char * opt_rules       = 0;
52 UBool  opt_help        = FALSE;
53 UBool  opt_norm        = FALSE;
54 UBool  opt_french      = FALSE;
55 UBool  opt_shifted     = FALSE;
56 UBool  opt_lower       = FALSE;
57 UBool  opt_upper       = FALSE;
58 UBool  opt_case        = FALSE;
59 UBool  opt_overlap     = FALSE;
60 UBool  opt_canonical   = FALSE;
61 int    opt_level       = 0;
62 char * opt_source      = "International Components for Unicode";
63 char * opt_pattern     = "Unicode";
64 UCollator * collator   = 0;
65 UStringSearch * search = 0;
66 UChar rules[100];
67 UChar source[100];
68 UChar pattern[100];
69
70 /** 
71  * Definitions for the command line options
72  */
73 struct OptSpec {
74     const char *name;
75     enum {FLAG, NUM, STRING} type;
76     void *pVar;
77 };
78
79 OptSpec opts[] = {
80     {"-locale",      OptSpec::STRING, &opt_locale},
81     {"-rules",       OptSpec::STRING, &opt_rules},
82         {"-source",      OptSpec::STRING, &opt_source},
83     {"-pattern",     OptSpec::STRING, &opt_pattern},
84     {"-norm",        OptSpec::FLAG,   &opt_norm},
85     {"-french",      OptSpec::FLAG,   &opt_french},
86     {"-shifted",     OptSpec::FLAG,   &opt_shifted},
87     {"-lower",       OptSpec::FLAG,   &opt_lower},
88     {"-upper",       OptSpec::FLAG,   &opt_upper},
89     {"-case",        OptSpec::FLAG,   &opt_case},
90     {"-level",       OptSpec::NUM,    &opt_level},
91         {"-overlap",     OptSpec::FLAG,   &opt_overlap},
92         {"-canonical",   OptSpec::FLAG,   &opt_canonical},
93     {"-help",        OptSpec::FLAG,   &opt_help},
94     {"-?",           OptSpec::FLAG,   &opt_help},
95     {0, OptSpec::FLAG, 0}
96 };
97
98 /**  
99  * processOptions()  Function to read the command line options.
100  */
101 UBool processOptions(int argc, const char **argv, OptSpec opts[])
102 {
103     for (int argNum = 1; argNum < argc; argNum ++) {
104         const char *pArgName = argv[argNum];
105         OptSpec *pOpt;
106         for (pOpt = opts;  pOpt->name != 0; pOpt ++) {
107             if (strcmp(pOpt->name, pArgName) == 0) {
108                 switch (pOpt->type) {
109                 case OptSpec::FLAG:
110                     *(UBool *)(pOpt->pVar) = TRUE;
111                     break;
112                 case OptSpec::STRING:
113                     argNum ++;
114                     if (argNum >= argc) {
115                         fprintf(stderr, "value expected for \"%s\" option.\n", 
116                                                             pOpt->name);
117                         return FALSE;
118                     }
119                     *(const char **)(pOpt->pVar) = argv[argNum];
120                     break;
121                 case OptSpec::NUM:
122                     argNum ++;
123                     if (argNum >= argc) {
124                         fprintf(stderr, "value expected for \"%s\" option.\n", 
125                                                             pOpt->name);
126                         return FALSE;
127                     }
128                     char *endp;
129                     int i = strtol(argv[argNum], &endp, 0);
130                     if (endp == argv[argNum]) {
131                         fprintf(stderr, 
132                                                             "integer value expected for \"%s\" option.\n", 
133                                                                 pOpt->name);
134                         return FALSE;
135                     }
136                     *(int *)(pOpt->pVar) = i;
137                 }
138                 break;
139             }
140         }
141         if (pOpt->name == 0)
142         {
143             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
144             return FALSE;
145         }
146     }
147         return TRUE;
148 }
149
150 /**
151  * Creates a collator
152  */
153 UBool processCollator()
154 {
155         // Set up an ICU collator
156     UErrorCode status = U_ZERO_ERROR;
157
158     if (opt_rules != 0) {
159                 u_unescape(opt_rules, rules, 100);
160         collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, 
161                                           NULL, &status);
162     }
163     else {
164         collator = ucol_open(opt_locale, &status);
165     }
166         if (U_FAILURE(status)) {
167         fprintf(stderr, "Collator creation failed.: %d\n", status);
168         return FALSE;
169     }
170     if (status == U_USING_DEFAULT_WARNING) {
171         fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", 
172                             opt_locale);
173     }
174     if (status == U_USING_FALLBACK_WARNING) {
175         fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", 
176                             opt_locale);
177     }
178     if (opt_norm) {
179         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
180     }
181     if (opt_french) {
182         ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
183     }
184     if (opt_lower) {
185         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, 
186                                       &status);
187     }
188     if (opt_upper) {
189         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, 
190                                       &status);
191     }
192     if (opt_case) {
193         ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
194     }
195     if (opt_shifted) {
196         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, 
197                                       &status);
198     }
199     if (opt_level != 0) {
200         switch (opt_level) {
201         case 1:
202             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
203             break;
204         case 2:
205             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, 
206                                               &status);
207             break;
208         case 3:
209             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
210             break;
211         case 4:
212             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, 
213                                               &status);
214             break;
215         case 5:
216             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, 
217                                               &status);
218             break;
219         default:
220             fprintf(stderr, "-level param must be between 1 and 5\n");
221             return FALSE;
222         }
223     }
224     if (U_FAILURE(status)) {
225         fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
226         return FALSE;
227     }
228         return TRUE;
229 }
230
231 /**
232  * Creates a string search
233  */
234 UBool processStringSearch()
235 {
236         u_unescape(opt_source, source, 100);
237         u_unescape(opt_pattern, pattern, 100);
238         UErrorCode status = U_ZERO_ERROR;
239         search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL, 
240                                               &status);
241         if (U_FAILURE(status)) {
242                 return FALSE;
243         }
244         if (opt_overlap == TRUE) {
245                 usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
246         }
247         if (opt_canonical == TRUE) {
248                 usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON, 
249                                          &status);
250         }
251         if (U_FAILURE(status)) {
252                 fprintf(stderr, "Error setting search attributes\n");
253                 return FALSE;
254         }
255         return TRUE;
256 }
257
258 UBool findPattern()
259 {
260         UErrorCode status = U_ZERO_ERROR;
261         int32_t offset = usearch_next(search, &status);
262         if (offset == USEARCH_DONE) {
263                 fprintf(stdout, "Pattern not found in source\n");
264         }
265         while (offset != USEARCH_DONE) {
266                 fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
267                                 usearch_getMatchedLength(search));
268                 offset = usearch_next(search, &status);
269         }
270         if (U_FAILURE(status)) {
271                 fprintf(stderr, "Error in searching for pattern %d\n", status);
272                 return FALSE;
273         }
274         fprintf(stdout, "End of search\n");
275         return TRUE;
276 }
277
278 /** 
279  * Main   --  process command line, read in and pre-process the test file,
280  *            call other functions to do the actual tests.
281  */
282 int main(int argc, const char** argv) 
283 {
284     if (processOptions(argc, argv, opts) != TRUE || opt_help) {
285         printf(gHelpString);
286         return -1;
287     }
288
289     if (processCollator() != TRUE) {
290                 fprintf(stderr, "Error creating collator\n");
291                 return -1;
292         }
293
294         if (processStringSearch() != TRUE) {
295                 fprintf(stderr, "Error creating string search\n");
296                 return -1;
297         }
298
299         fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern, 
300                     opt_source);
301
302         findPattern();
303         ucol_close(collator);
304         usearch_close(search);
305         return 0;
306 }