Imported Upstream version 58.1
[platform/upstream/icu.git] / source / i18n / stsearch.cpp
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2001-2014 IBM and others. All rights reserved.
6 **********************************************************************
7 *   Date        Name        Description
8 *  03/22/2000   helena      Creation.
9 **********************************************************************
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/stsearch.h"
17 #include "usrchimp.h"
18 #include "cmemory.h"
19
20 U_NAMESPACE_BEGIN
21
22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
23
24 // public constructors and destructors -----------------------------------
25
26 StringSearch::StringSearch(const UnicodeString &pattern,
27                            const UnicodeString &text,
28                            const Locale        &locale,
29                                  BreakIterator *breakiter,
30                                  UErrorCode    &status) :
31                            SearchIterator(text, breakiter),
32                            m_pattern_(pattern)
33 {
34     if (U_FAILURE(status)) {
35         m_strsrch_ = NULL;
36         return;
37     }
38
39     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
40                               m_text_.getBuffer(), m_text_.length(),
41                               locale.getName(), (UBreakIterator *)breakiter,
42                               &status);
43     uprv_free(m_search_);
44     m_search_ = NULL;
45
46     if (U_SUCCESS(status)) {
47         // m_search_ has been created by the base SearchIterator class
48         m_search_        = m_strsrch_->search;
49     }
50 }
51
52 StringSearch::StringSearch(const UnicodeString     &pattern,
53                            const UnicodeString     &text,
54                                  RuleBasedCollator *coll,
55                                  BreakIterator     *breakiter,
56                                  UErrorCode        &status) :
57                            SearchIterator(text, breakiter),
58                            m_pattern_(pattern)
59 {
60     if (U_FAILURE(status)) {
61         m_strsrch_ = NULL;
62         return;
63     }
64     if (coll == NULL) {
65         status     = U_ILLEGAL_ARGUMENT_ERROR;
66         m_strsrch_ = NULL;
67         return;
68     }
69     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
70                                           m_pattern_.length(),
71                                           m_text_.getBuffer(),
72                                           m_text_.length(), coll->toUCollator(),
73                                           (UBreakIterator *)breakiter,
74                                           &status);
75     uprv_free(m_search_);
76     m_search_ = NULL;
77
78     if (U_SUCCESS(status)) {
79         // m_search_ has been created by the base SearchIterator class
80         m_search_ = m_strsrch_->search;
81     }
82 }
83
84 StringSearch::StringSearch(const UnicodeString     &pattern,
85                                  CharacterIterator &text,
86                            const Locale            &locale,
87                                  BreakIterator     *breakiter,
88                                  UErrorCode        &status) :
89                            SearchIterator(text, breakiter),
90                            m_pattern_(pattern)
91 {
92     if (U_FAILURE(status)) {
93         m_strsrch_ = NULL;
94         return;
95     }
96     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
97                               m_text_.getBuffer(), m_text_.length(),
98                               locale.getName(), (UBreakIterator *)breakiter,
99                               &status);
100     uprv_free(m_search_);
101     m_search_ = NULL;
102
103     if (U_SUCCESS(status)) {
104         // m_search_ has been created by the base SearchIterator class
105         m_search_ = m_strsrch_->search;
106     }
107 }
108
109 StringSearch::StringSearch(const UnicodeString     &pattern,
110                                  CharacterIterator &text,
111                                  RuleBasedCollator *coll,
112                                  BreakIterator     *breakiter,
113                                  UErrorCode        &status) :
114                            SearchIterator(text, breakiter),
115                            m_pattern_(pattern)
116 {
117     if (U_FAILURE(status)) {
118         m_strsrch_ = NULL;
119         return;
120     }
121     if (coll == NULL) {
122         status     = U_ILLEGAL_ARGUMENT_ERROR;
123         m_strsrch_ = NULL;
124         return;
125     }
126     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
127                                           m_pattern_.length(),
128                                           m_text_.getBuffer(),
129                                           m_text_.length(), coll->toUCollator(),
130                                           (UBreakIterator *)breakiter,
131                                           &status);
132     uprv_free(m_search_);
133     m_search_ = NULL;
134
135     if (U_SUCCESS(status)) {
136         // m_search_ has been created by the base SearchIterator class
137         m_search_ = m_strsrch_->search;
138     }
139 }
140
141 StringSearch::StringSearch(const StringSearch &that) :
142                        SearchIterator(that.m_text_, that.m_breakiterator_),
143                        m_pattern_(that.m_pattern_)
144 {
145     UErrorCode status = U_ZERO_ERROR;
146
147     // Free m_search_ from the superclass
148     uprv_free(m_search_);
149     m_search_ = NULL;
150
151     if (that.m_strsrch_ == NULL) {
152         // This was not a good copy
153         m_strsrch_ = NULL;
154     }
155     else {
156         // Make a deep copy
157         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
158                                               m_pattern_.length(),
159                                               m_text_.getBuffer(),
160                                               m_text_.length(),
161                                               that.m_strsrch_->collator,
162                                              (UBreakIterator *)that.m_breakiterator_,
163                                               &status);
164         if (U_SUCCESS(status)) {
165             // m_search_ has been created by the base SearchIterator class
166             m_search_        = m_strsrch_->search;
167         }
168     }
169 }
170
171 StringSearch::~StringSearch()
172 {
173     if (m_strsrch_ != NULL) {
174         usearch_close(m_strsrch_);
175         m_search_ = NULL;
176     }
177 }
178
179 StringSearch *
180 StringSearch::clone() const {
181     return new StringSearch(*this);
182 }
183
184 // operator overloading ---------------------------------------------
185 StringSearch & StringSearch::operator=(const StringSearch &that)
186 {
187     if ((*this) != that) {
188         UErrorCode status = U_ZERO_ERROR;
189         m_text_          = that.m_text_;
190         m_breakiterator_ = that.m_breakiterator_;
191         m_pattern_       = that.m_pattern_;
192         // all m_search_ in the parent class is linked up with m_strsrch_
193         usearch_close(m_strsrch_);
194         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
195                                               m_pattern_.length(),
196                                               m_text_.getBuffer(),
197                                               m_text_.length(),
198                                               that.m_strsrch_->collator,
199                                               NULL, &status);
200         // Check null pointer
201         if (m_strsrch_ != NULL) {
202             m_search_ = m_strsrch_->search;
203         }
204     }
205     return *this;
206 }
207
208 UBool StringSearch::operator==(const SearchIterator &that) const
209 {
210     if (this == &that) {
211         return TRUE;
212     }
213     if (SearchIterator::operator ==(that)) {
214         StringSearch &thatsrch = (StringSearch &)that;
215         return (this->m_pattern_ == thatsrch.m_pattern_ &&
216                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
217     }
218     return FALSE;
219 }
220
221 // public get and set methods ----------------------------------------
222
223 void StringSearch::setOffset(int32_t position, UErrorCode &status)
224 {
225     // status checked in usearch_setOffset
226     usearch_setOffset(m_strsrch_, position, &status);
227 }
228
229 int32_t StringSearch::getOffset(void) const
230 {
231     return usearch_getOffset(m_strsrch_);
232 }
233
234 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
235 {
236     if (U_SUCCESS(status)) {
237         m_text_ = text;
238         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
239     }
240 }
241
242 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
243 {
244     if (U_SUCCESS(status)) {
245         text.getText(m_text_);
246         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
247     }
248 }
249
250 RuleBasedCollator * StringSearch::getCollator() const
251 {
252     // Note the const_cast. It would be cleaner if this const method returned a const collator.
253     return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
254 }
255
256 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
257 {
258     if (U_SUCCESS(status)) {
259         usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
260     }
261 }
262
263 void StringSearch::setPattern(const UnicodeString &pattern,
264                                     UErrorCode    &status)
265 {
266     if (U_SUCCESS(status)) {
267         m_pattern_ = pattern;
268         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
269                            &status);
270     }
271 }
272
273 const UnicodeString & StringSearch::getPattern() const
274 {
275     return m_pattern_;
276 }
277
278 // public methods ----------------------------------------------------
279
280 void StringSearch::reset()
281 {
282     usearch_reset(m_strsrch_);
283 }
284
285 SearchIterator * StringSearch::safeClone(void) const
286 {
287     UErrorCode status = U_ZERO_ERROR;
288     StringSearch *result = new StringSearch(m_pattern_, m_text_,
289                                             getCollator(),
290                                             m_breakiterator_,
291                                             status);
292     /* test for NULL */
293     if (result == 0) {
294         status = U_MEMORY_ALLOCATION_ERROR;
295         return 0;
296     }
297     result->setOffset(getOffset(), status);
298     result->setMatchStart(m_strsrch_->search->matchedIndex);
299     result->setMatchLength(m_strsrch_->search->matchedLength);
300     if (U_FAILURE(status)) {
301         return NULL;
302     }
303     return result;
304 }
305
306 // protected method -------------------------------------------------
307
308 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
309 {
310     // values passed here are already in the pre-shift position
311     if (U_SUCCESS(status)) {
312         if (m_strsrch_->pattern.cesLength == 0) {
313             m_search_->matchedIndex =
314                                     m_search_->matchedIndex == USEARCH_DONE ?
315                                     getOffset() : m_search_->matchedIndex + 1;
316             m_search_->matchedLength = 0;
317             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
318                            &status);
319             if (m_search_->matchedIndex == m_search_->textLength) {
320                 m_search_->matchedIndex = USEARCH_DONE;
321             }
322         }
323         else {
324             // looking at usearch.cpp, this part is shifted out to
325             // StringSearch instead of SearchIterator because m_strsrch_ is
326             // not accessible in SearchIterator
327 #if 0
328             if (position + m_strsrch_->pattern.defaultShiftSize
329                 > m_search_->textLength) {
330                 setMatchNotFound();
331                 return USEARCH_DONE;
332             }
333 #endif
334             if (m_search_->matchedLength <= 0) {
335                 // the flipping direction issue has already been handled
336                 // in next()
337                 // for boundary check purposes. this will ensure that the
338                 // next match will not preceed the current offset
339                 // note search->matchedIndex will always be set to something
340                 // in the code
341                 m_search_->matchedIndex = position - 1;
342             }
343
344             ucol_setOffset(m_strsrch_->textIter, position, &status);
345             
346 #if 0
347             for (;;) {
348                 if (m_search_->isCanonicalMatch) {
349                     // can't use exact here since extra accents are allowed.
350                     usearch_handleNextCanonical(m_strsrch_, &status);
351                 }
352                 else {
353                     usearch_handleNextExact(m_strsrch_, &status);
354                 }
355                 if (U_FAILURE(status)) {
356                     return USEARCH_DONE;
357                 }
358                 if (m_breakiterator_ == NULL
359 #if !UCONFIG_NO_BREAK_ITERATION
360                     ||
361                     m_search_->matchedIndex == USEARCH_DONE ||
362                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
363                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
364                                                   m_search_->matchedLength))
365 #endif
366                 ) {
367                     if (m_search_->matchedIndex == USEARCH_DONE) {
368                         ucol_setOffset(m_strsrch_->textIter,
369                                        m_search_->textLength, &status);
370                     }
371                     else {
372                         ucol_setOffset(m_strsrch_->textIter,
373                                        m_search_->matchedIndex, &status);
374                     }
375                     return m_search_->matchedIndex;
376                 }
377             }
378 #else
379             // if m_strsrch_->breakIter is always the same as m_breakiterator_
380             // then we don't need to check the match boundaries here because
381             // usearch_handleNextXXX will already have done it.
382             if (m_search_->isCanonicalMatch) {
383                 // *could* actually use exact here 'cause no extra accents allowed...
384                 usearch_handleNextCanonical(m_strsrch_, &status);
385             } else {
386                 usearch_handleNextExact(m_strsrch_, &status);
387             }
388             
389             if (U_FAILURE(status)) {
390                 return USEARCH_DONE;
391             }
392             
393             if (m_search_->matchedIndex == USEARCH_DONE) {
394                 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
395             } else {
396                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
397             }
398             
399             return m_search_->matchedIndex;
400 #endif
401         }
402     }
403     return USEARCH_DONE;
404 }
405
406 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
407 {
408     // values passed here are already in the pre-shift position
409     if (U_SUCCESS(status)) {
410         if (m_strsrch_->pattern.cesLength == 0) {
411             m_search_->matchedIndex =
412                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
413                    m_search_->matchedIndex);
414             if (m_search_->matchedIndex == 0) {
415                 setMatchNotFound();
416             }
417             else {
418                 m_search_->matchedIndex --;
419                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
420                                &status);
421                 m_search_->matchedLength = 0;
422             }
423         }
424         else {
425             // looking at usearch.cpp, this part is shifted out to
426             // StringSearch instead of SearchIterator because m_strsrch_ is
427             // not accessible in SearchIterator
428 #if 0
429             if (!m_search_->isOverlap &&
430                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
431                 setMatchNotFound();
432                 return USEARCH_DONE;
433             }
434             
435             for (;;) {
436                 if (m_search_->isCanonicalMatch) {
437                     // can't use exact here since extra accents are allowed.
438                     usearch_handlePreviousCanonical(m_strsrch_, &status);
439                 }
440                 else {
441                     usearch_handlePreviousExact(m_strsrch_, &status);
442                 }
443                 if (U_FAILURE(status)) {
444                     return USEARCH_DONE;
445                 }
446                 if (m_breakiterator_ == NULL
447 #if !UCONFIG_NO_BREAK_ITERATION
448                     ||
449                     m_search_->matchedIndex == USEARCH_DONE ||
450                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
451                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
452                                                   m_search_->matchedLength))
453 #endif
454                 ) {
455                     return m_search_->matchedIndex;
456                 }
457             }
458 #else
459             ucol_setOffset(m_strsrch_->textIter, position, &status);
460             
461             if (m_search_->isCanonicalMatch) {
462                 // *could* use exact match here since extra accents *not* allowed!
463                 usearch_handlePreviousCanonical(m_strsrch_, &status);
464             } else {
465                 usearch_handlePreviousExact(m_strsrch_, &status);
466             }
467             
468             if (U_FAILURE(status)) {
469                 return USEARCH_DONE;
470             }
471             
472             return m_search_->matchedIndex;
473 #endif
474         }
475
476         return m_search_->matchedIndex;
477     }
478     return USEARCH_DONE;
479 }
480
481 U_NAMESPACE_END
482
483 #endif /* #if !UCONFIG_NO_COLLATION */