2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FBaseUtilRegularExpression.h
19 * @brief This is the header file for the %RegularExpression class.
21 * This header file contains the declarations of the %RegularExpression class.
23 #ifndef _FBASE_UTIL_REGULAR_EXPRESSION_H_
24 #define _FBASE_UTIL_REGULAR_EXPRESSION_H_
26 #include <FBaseColIList.h>
27 #include <FBaseString.h>
30 namespace Tizen { namespace Base { namespace Utility
34 * @enum RegularExpressionOptions
36 * Defines the options available for a regular expression.
40 enum RegularExpressionOptions
42 REGEX_CASELESS = 0x00000001, /**< The case insensitive match option */
43 REGEX_MULTI_LINE = 0x00000002, /**< The multiple lines match option @n
44 Without this option, (^) matches only at the start of the string, while ($) matches only at
45 the end of the string, or before a terminating newline. */
46 REGEX_DOTALL = 0x00000004, /**< The dot matches newlines option @n
47 Without this option, a dot does not match when the current position is at a newline. */
48 REGEX_EXTENDED = 0x00000008, /**< The ignored whitespaces in a pattern */
49 REGEX_DOLLAR_ENDONLY = 0x00000020, /**< The option to match the dollar symbol ($) only at the end @n
50 Without this option, a dollar symbol also matches immediately before a newline. */
51 REGEX_UNGREEDY = 0x00000200, /**< The option to reverse the (*) and (*?) symbols @n
52 If this option is set, the quantifiers are not greedy by default, however they are, if followed by a question mark. */
53 REGEX_UNICODE = 0x01000000, /**< The option to support the unicode characters @n
54 Without this option, only the ASCII characters are recognized. */
58 * @class RegularExpression
59 * @brief This class provides the functionality for a regular expression.
63 * The %RegularExpression class provides operations of a regular expression based on PCRE and the syntax based on
64 * the Perl regular expression.
65 * The various supported operations are Match(), Replace(), and Consume().
67 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/base/regular_expression.htm">Regular Expression</a>.
69 * The following example demonstrates how to use the %RegularExpression class.
75 * using namespace Tizen::Base;
76 * using namespace Tizen::Base::Collection;
77 * using namespace Tizen::Base::Utility;
80 * MyClass:RegularExpressionSample(void)
84 * String pattern(L"the quick brown fox");
85 * String text(L"What do you know about the quick brown fox?");
88 * RegularExpression regex;
89 * regex.Construct(pattern, REGEX_CASELESS);
91 * ret = regex.Match(text, false); // This returns true value
96 class _OSP_EXPORT_ RegularExpression
97 : public Tizen::Base::Object
101 * The object is not fully constructed after this constructor is called. @n
102 * For full construction, the Construct() method must be called right after calling this constructor.
106 RegularExpression(void);
109 * This destructor overrides Tizen::Base::Object::~Object().
113 virtual ~RegularExpression(void);
116 * Initializes this instance of %RegularExpression with the specified parameters.
120 * @return An error code
121 * @param[in] pattern The pattern to use
122 * @param[in] options The option for the regular expression
123 * @exception E_SUCCESS The method is successful.
124 * @exception E_INVALID_STATE This instance has already been constructed.
125 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0.
127 * The following example demonstrates how to use the %Construct() method.
132 * String pattern(L"^CRUEL$");
133 * String text(L"Hello\ncruel\nworld");
135 * RegularExpression regex;
136 * regex.Construct(pattern, REGEX_CASELESS | REGEX_MULTI_LINE);
137 * ret = regex.Match(text, false); // This returns true value
141 result Construct(const Tizen::Base::String& pattern, unsigned long options = 0x0);
144 * Checks whether the specified text matches completely or partially.
148 * @return @c true if the text matches successfully, @n
150 * @param[in] text The text to match
151 * @param[in] fullMatch Set to @c true to match exactly, @n
152 * else @c false to match any substring of the text
153 * @param[out] pMatchedString A list of the matched string instances @n
154 The count of the matched items is acquired from IList::GetCount() and
155 * the maximum count of the items is @c 16.
156 * @exception E_SUCCESS The method is successful.
157 * @exception E_INVALID_STATE This instance has not been constructed as yet.
158 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
159 * @remarks The specific error code can be accessed using the GetLastResult() method.
160 * @remarks If the grouping subpatterns are used in a pattern,
161 * the @c pMatchedString list will contain the grouping data. @n
162 * For example, if the pattern has two grouping subpatterns,
163 * there will be three data sets in the @c pMatchedString list. @n
164 * The first data set will be a full grouping data and the second
165 * and the third data sets will contain individual grouping data.
167 * The following example demonstrates how to use the %Match() method.
173 * String pattern(L"(\\d\\d)-(\\d\\d)-(\\d\\d\\d\\d)");
174 * String text(L"04-12-1979");
177 * RegularExpression regex;
178 * regex.Construct(pattern);
184 * ret = regex.Match(text, true, &list); // The list will contain four string instances
186 * out = *(String *)list.GetAt(0); // L"04-12-1979"
187 * out = *(String *)list.GetAt(1); // L"04"
188 * out = *(String *)list.GetAt(2); // L"12"
189 * out = *(String *)list.GetAt(3); // L"1979"
191 * list.RemoveAll(true);
194 bool Match(const Tizen::Base::String& text, bool fullMatch, Tizen::Base::Collection::IList* pMatchedString = null) const;
197 * Matches the pattern from the starting point of the text and removes the matched string. @n
198 * If the pattern does not match the text at the starting point, it will return @c false.
202 * @return @c true if the text matches successfully, @n
204 * @param[in, out] text The text to consume
205 * @param[out] pMatchedString A list of matched string instances @n
206 * The count of the matched items is acquired from IList::GetCount() and
207 * the maximum count of the items is @c 16.
208 * @exception E_SUCCESS The method is successful.
209 * @exception E_INVALID_STATE This instance has not been constructed as yet.
210 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
211 * @remarks The specific error code can be accessed using the GetLastResult() method.
212 * @remarks If the grouping subpatterns are used in a pattern, the @c pMatchedString list will
213 * contain the grouping data. @n
214 * For example, if the pattern has two grouping subpatterns,
215 * there will be three data sets in the @c pMatchedString list. @n
216 * The first data set will be a full grouping data and the second
217 * and the third data sets will contain individual grouping data.
220 * The following example demonstrates how to use the %Consume() method.
226 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
227 * String text(L" abcd1234test");
231 * RegularExpression regex;
232 * regex.Construct(pattern);
233 * ret = regex.Consume(text, &list); // The list will contain four string instances
234 * // and the text instance will be changed to L"test"
235 * out = *(String *)list.GetAt(0); // L" abcd1234"
236 * out = *(String *)list.GetAt(1); // L" "
237 * out = *(String *)list.GetAt(2); // L"abcd"
238 * out = *(String *)list.GetAt(3); // L"1234"
240 * list.RemoveAll(true);
243 bool Consume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
246 * Matches the pattern in strings similar to the Consume() method but does not anchor the match at the beginning of the string. @n
247 * This operation can be used to find certain patterns in the text and extract the required information.
251 * @return @c true if the text matches successfully, @n
253 * @param[in, out] text The text to find and consume
254 * @param[out] pMatchedString A list of matched string instances @n
255 * The count of the matched items is acquired from IList::GetCount() and
256 * the maximum count of the items is @c 16.
257 * @exception E_SUCCESS The method is successful.
258 * @exception E_INVALID_STATE This instance has not been constructed as yet.
259 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
260 * @remarks The specific error code can be accessed using the GetLastResult() method.
261 * @remarks If the grouping subpatterns are used in a pattern,
262 * the @c pMatchedString list will contain the grouping data. @n
263 * For example, if the pattern has two grouping subpatterns,
264 * there will be three data sets in the @c pMatchedString list. @n
265 * The first data set will be a full grouping data and the second
266 * and the third data sets will contain individual grouping data.
268 * The following example demonstrates how to use the %FindAndConsume() method.
275 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
276 * String text(L"test abcd1234test");
280 * RegularExpression regex;
281 * regex.Construct(pattern);
282 * ret = regex.FindAndConsume(text, &list); // The list will contain four String instances
283 * // and text instance will be changed to L"test"
284 * out = *(String *)list.GetAt(0); // L" abcd1234"
285 * out = *(String *)list.GetAt(1); // L" "
286 * out = *(String *)list.GetAt(2); // L"abcd"
287 * out = *(String *)list.GetAt(3); // L"1234"
289 * list.RemoveAll(true);
292 bool FindAndConsume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
295 * Replaces either the first match of a pattern in the text with the @c rewrite string or
296 * all the occurrences of a pattern in the text.
300 * @return @c true if the text is replaced successfully, @n
302 * @param[in, out] text The text to replace when it is matched to a pattern
303 * @param[in] rewrite The text with which to replace
304 * @param[in] globalReplace Set to @c true to replace globally, @n
305 * else @c false to replace the first match of the pattern in the text
306 * @param[in] startPos The starting position of the text
307 * @exception E_SUCCESS The method is successful.
308 * @exception E_INVALID_STATE This instance has not been constructed as yet.
309 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0, or
310 * the size of @c pMatchedString exceeds limitations.
311 * @remarks The specific error code can be accessed using the GetLastResult() method.
313 * The following example demonstrates how to use the %Replace() method.
319 * String pattern(L"replace");
320 * String text(L"test replace method");
321 * String rewrite(L"REPLACE");
323 * RegularExpression regex;
324 * regex.Construct(pattern);
325 * ret = regex.Replace(text, rewrite, false); // text = L"test REPLACE method"
329 bool Replace(Tizen::Base::String& text, const Tizen::Base::String& rewrite, bool globalReplace, int startPos = 0) const;
332 * Extracts the first match of the pattern in the text. @n
333 * Similar to Replace() but @c rewrite is copied to @c out with substitutions.
337 * @return @c true if the text is extracted successfully, @n
339 * @param[in] text The text to match
340 * @param[in] rewrite The text to replace
341 * @param[out] out The text to extract
342 * @exception E_SUCCESS The method is successful.
343 * @exception E_INVALID_STATE This instance has not been constructed as yet.
344 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0, or
345 * the size of @c pMatchedString exceeds limitations.
346 * @remarks The specific error code can be accessed using the GetLastResult() method.
348 * The following example demonstrates how to use the %Extract() method.
355 * String pattern(L"(.*)@([^.]*)");
356 * String text(L"test@email.com");
357 * String rewrite(L"\\2!\\1");
359 * RegularExpression regex;
360 * regex.Construct(pattern);
361 * ret = regex.Extract(text, rewrite, out); // out = L"email!test"
365 bool Extract(const Tizen::Base::String& text, const Tizen::Base::String& rewrite, Tizen::Base::String& out) const;
368 * Compares the specified instance to the calling instance.
372 * @return @c true if the specified instance is equal to the current instance, @n
374 * @param[in] obj The object to compare with the current instance
375 * @remarks This method returns @c true if all the attributes in the instance are the same.
377 virtual bool Equals(const Tizen::Base::Object& obj) const;
380 * Gets the hash value of the current instance.
384 * @return The hash value of the current instance
386 virtual int GetHashCode(void) const;
389 * Gets the pattern used to compile the regular expression.
393 * @return The pattern used to compile the regular expression @n An empty string if this instance is not initialized
395 Tizen::Base::String GetPattern(void) const;
399 * Sets the value of the regular expression options.
403 * @param[in] options The logical OR operator values of RegularExpressionOptions
404 * @exception E_SUCCESS The method is successful.
405 * @exception E_INVALID_ARG The value of @c options is invalid.
407 result SetOptions(unsigned long options);
411 * Gets the value of the regular expression options.
415 * @return The logical OR operator values of RegularExpressionOptions
417 unsigned long GetOptions(void) const;
421 * The implementation of this copy constructor is intentionally blank and declared as private to prohibit copying of objects.
423 * @param [in] regularExpression The instance of the %RegularExpression class to copy from
424 * @remarks This constructor is hidden.
426 RegularExpression(const RegularExpression& regularExpression);
429 * The implementation of this copy assignment operator is intentionally blank and declared as private to prohibit copying of objects.
431 * @param [in] regularExpression An instance of %RegularExpression
432 * @remarks This operator is hidden.
434 RegularExpression& operator =(const RegularExpression& regularExpression);
437 friend class _RegularExpressionImpl;
438 class _RegularExpressionImpl* __pRegularExpressionImpl;
440 }; // RegularExpression
442 }}} // Tizen::Base::Utility
444 #endif // _FBASE_UTIL_REGULAR_EXPRESSION_H_