2 // Open Service Platform
3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 // Licensed under the Apache License, Version 2.0 (the License);
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
19 * @file FBaseUtilRegularExpression.h
20 * @brief This is the header file for the %RegularExpression class.
22 * This header file contains the declarations of the %RegularExpression class.
24 #ifndef _FBASE_UTIL_REGULAR_EXPRESSION_H_
25 #define _FBASE_UTIL_REGULAR_EXPRESSION_H_
27 #include <FBaseColIList.h>
28 #include <FBaseString.h>
31 namespace Tizen { namespace Base { namespace Utility
35 * @enum RegularExpressionOptions
37 * Defines the options available for a regular expression.
41 enum RegularExpressionOptions
43 REGEX_CASELESS = 0x00000001, /**< The case insensitive match option */
44 REGEX_MULTI_LINE = 0x00000002, /**< The multiple lines match option @n
45 Without this option, (^) matches only at the start of the string, while ($) matches only at
46 the end of the string, or before a terminating newline. */
47 REGEX_DOTALL = 0x00000004, /**< The dot matches newlines option @n
48 Without this option, a dot does not match when the current position is at a newline. */
49 REGEX_EXTENDED = 0x00000008, /**< The ignored whitespaces in a pattern */
50 REGEX_DOLLAR_ENDONLY = 0x00000020, /**< The option to match the dollar symbol ($) only at the end @n
51 Without this option, a dollar symbol also matches immediately before a newline. */
52 REGEX_UNGREEDY = 0x00000200, /**< The option to reverse the (*) and (*?) symbols @n
53 If this option is set, the quantifiers are not greedy by default, however they are, if followed by a question mark. */
54 REGEX_UNICODE = 0x01000000, /**< The option to support the unicode characters @n
55 Without this option, only the ASCII characters are recognized. */
59 * @class RegularExpression
60 * @brief This class provides the functionality for a regular expression.
64 * The %RegularExpression class provides operations of a regular expression based on PCRE and the syntax based on
65 * the Perl regular expression.
66 * The various supported operations are Match(), Replace(), and Consume().
68 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/base/regular_expression.htm">Regular Expression</a>.
70 * The following example demonstrates how to use the %RegularExpression class.
76 * using namespace Tizen::Base;
77 * using namespace Tizen::Base::Collection;
78 * using namespace Tizen::Base::Utility;
81 * MyClass:RegularExpressionSample(void)
85 * String pattern(L"the quick brown fox");
86 * String text(L"What do you know about the quick brown fox?");
89 * RegularExpression regex;
90 * regex.Construct(pattern, REGEX_CASELESS);
92 * ret = regex.Match(text, false); // This returns true value
97 class _OSP_EXPORT_ RegularExpression
98 : public Tizen::Base::Object
102 * The object is not fully constructed after this constructor is called. @n
103 * For full construction, the Construct() method must be called right after calling this constructor.
107 RegularExpression(void);
110 * This destructor overrides Tizen::Base::Object::~Object().
114 virtual ~RegularExpression(void);
117 * Initializes this instance of %RegularExpression with the specified parameters.
121 * @return An error code
122 * @param[in] pattern The pattern to use
123 * @param[in] options The option for the regular expression
124 * @exception E_SUCCESS The method is successful.
125 * @exception E_INVALID_STATE This instance has already been constructed.
126 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0.
128 * The following example demonstrates how to use the %Construct() method.
133 * String pattern(L"^CRUEL$");
134 * String text(L"Hello\ncruel\nworld");
136 * RegularExpression regex;
137 * regex.Construct(pattern, REGEX_CASELESS | REGEX_MULTI_LINE);
138 * ret = regex.Match(text, false); // This returns true value
142 result Construct(const Tizen::Base::String& pattern, unsigned long options = 0x0);
145 * Checks whether the specified text matches completely or partially.
149 * @return @c true if the text matches successfully, @n
151 * @param[in] text The text to match
152 * @param[in] fullMatch Set to @c true to match exactly, @n
153 * else @c false to match any substring of the text
154 * @param[out] pMatchedString A list of the matched string instances @n
155 The count of the matched items is acquired from IList::GetCount() and
156 * the maximum count of the items is @c 16.
157 * @exception E_SUCCESS The method is successful.
158 * @exception E_INVALID_STATE This instance has not been constructed as yet.
159 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
160 * @remarks The specific error code can be accessed using the GetLastResult() method.
161 * @remarks If the grouping subpatterns are used in a pattern,
162 * the @c pMatchedString list will contain the grouping data. @n
163 * For example, if the pattern has two grouping subpatterns,
164 * there will be three data sets in the @c pMatchedString list. @n
165 * The first data set will be a full grouping data and the second
166 * and the third data sets will contain individual grouping data.
168 * The following example demonstrates how to use the %Match() method.
174 * String pattern(L"(\\d\\d)-(\\d\\d)-(\\d\\d\\d\\d)");
175 * String text(L"04-12-1979");
178 * RegularExpression regex;
179 * regex.Construct(pattern);
185 * ret = regex.Match(text, true, &list); // The list will contain four string instances
187 * out = *(String *)list.GetAt(0); // L"04-12-1979"
188 * out = *(String *)list.GetAt(1); // L"04"
189 * out = *(String *)list.GetAt(2); // L"12"
190 * out = *(String *)list.GetAt(3); // L"1979"
192 * list.RemoveAll(true);
195 bool Match(const Tizen::Base::String& text, bool fullMatch, Tizen::Base::Collection::IList* pMatchedString = null) const;
198 * Matches the pattern from the starting point of the text and removes the matched string. @n
199 * If the pattern does not match the text at the starting point, it will return @c false.
203 * @return @c true if the text matches successfully, @n
205 * @param[in, out] text The text to consume
206 * @param[out] pMatchedString A list of matched string instances @n
207 * The count of the matched items is acquired from IList::GetCount() and
208 * the maximum count of the items is @c 16.
209 * @exception E_SUCCESS The method is successful.
210 * @exception E_INVALID_STATE This instance has not been constructed as yet.
211 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
212 * @remarks The specific error code can be accessed using the GetLastResult() method.
213 * @remarks If the grouping subpatterns are used in a pattern, the @c pMatchedString list will
214 * contain the grouping data. @n
215 * For example, if the pattern has two grouping subpatterns,
216 * there will be three data sets in the @c pMatchedString list. @n
217 * The first data set will be a full grouping data and the second
218 * and the third data sets will contain individual grouping data.
221 * The following example demonstrates how to use the %Consume() method.
227 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
228 * String text(L" abcd1234test");
232 * RegularExpression regex;
233 * regex.Construct(pattern);
234 * ret = regex.Consume(text, &list); // The list will contain four string instances
235 * // and the text instance will be changed to L"test"
236 * out = *(String *)list.GetAt(0); // L" abcd1234"
237 * out = *(String *)list.GetAt(1); // L" "
238 * out = *(String *)list.GetAt(2); // L"abcd"
239 * out = *(String *)list.GetAt(3); // L"1234"
241 * list.RemoveAll(true);
244 bool Consume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
247 * Matches the pattern in strings similar to the Consume() method but does not anchor the match at the beginning of the string. @n
248 * This operation can be used to find certain patterns in the text and extract the required information.
252 * @return @c true if the text matches successfully, @n
254 * @param[in, out] text The text to find and consume
255 * @param[out] pMatchedString A list of matched string instances @n
256 * The count of the matched items is acquired from IList::GetCount() and
257 * the maximum count of the items is @c 16.
258 * @exception E_SUCCESS The method is successful.
259 * @exception E_INVALID_STATE This instance has not been constructed as yet.
260 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
261 * @remarks The specific error code can be accessed using the GetLastResult() method.
262 * @remarks If the grouping subpatterns are used in a pattern,
263 * the @c pMatchedString list will contain the grouping data. @n
264 * For example, if the pattern has two grouping subpatterns,
265 * there will be three data sets in the @c pMatchedString list. @n
266 * The first data set will be a full grouping data and the second
267 * and the third data sets will contain individual grouping data.
269 * The following example demonstrates how to use the %FindAndConsume() method.
276 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
277 * String text(L"test abcd1234test");
281 * RegularExpression regex;
282 * regex.Construct(pattern);
283 * ret = regex.FindAndConsume(text, &list); // The list will contain four String instances
284 * // and text instance will be changed to L"test"
285 * out = *(String *)list.GetAt(0); // L" abcd1234"
286 * out = *(String *)list.GetAt(1); // L" "
287 * out = *(String *)list.GetAt(2); // L"abcd"
288 * out = *(String *)list.GetAt(3); // L"1234"
290 * list.RemoveAll(true);
293 bool FindAndConsume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
296 * Replaces either the first match of a pattern in the text with the @c rewrite string or
297 * all the occurrences of a pattern in the text.
301 * @return @c true if the text is replaced successfully, @n
303 * @param[in, out] text The text to replace when it is matched to a pattern
304 * @param[in] rewrite The text with which to replace
305 * @param[in] globalReplace Set to @c true to replace globally, @n
306 * else @c false to replace the first match of the pattern in the text
307 * @param[in] startPos The starting position of the text
308 * @exception E_SUCCESS The method is successful.
309 * @exception E_INVALID_STATE This instance has not been constructed as yet.
310 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0, or
311 * the size of @c pMatchedString exceeds limitations.
312 * @remarks The specific error code can be accessed using the GetLastResult() method.
314 * The following example demonstrates how to use the %Replace() method.
320 * String pattern(L"replace");
321 * String text(L"test replace method");
322 * String rewrite(L"REPLACE");
324 * RegularExpression regex;
325 * regex.Construct(pattern);
326 * ret = regex.Replace(text, rewrite, false); // text = L"test REPLACE method"
330 bool Replace(Tizen::Base::String& text, const Tizen::Base::String& rewrite, bool globalReplace, int startPos = 0) const;
333 * Extracts the first match of the pattern in the text. @n
334 * Similar to Replace() but @c rewrite is copied to @c out with substitutions.
338 * @return @c true if the text is extracted successfully, @n
340 * @param[in] text The text to match
341 * @param[in] rewrite The text to replace
342 * @param[out] out The text to extract
343 * @exception E_SUCCESS The method is successful.
344 * @exception E_INVALID_STATE This instance has not been constructed as yet.
345 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0, or
346 * the size of @c pMatchedString exceeds limitations.
347 * @remarks The specific error code can be accessed using the GetLastResult() method.
349 * The following example demonstrates how to use the %Extract() method.
356 * String pattern(L"(.*)@([^.]*)");
357 * String text(L"test@email.com");
358 * String rewrite(L"\\2!\\1");
360 * RegularExpression regex;
361 * regex.Construct(pattern);
362 * ret = regex.Extract(text, rewrite, out); // out = L"email!test"
366 bool Extract(const Tizen::Base::String& text, const Tizen::Base::String& rewrite, Tizen::Base::String& out) const;
369 * Compares the specified instance to the calling instance.
373 * @return @c true if the specified instance is equal to the current instance, @n
375 * @param[in] obj The object to compare with the current instance
376 * @remarks This method returns @c true if all the attributes in the instance are the same.
378 virtual bool Equals(const Tizen::Base::Object& obj) const;
381 * Gets the hash value of the current instance.
385 * @return The hash value of the current instance
387 virtual int GetHashCode(void) const;
390 * Gets the pattern used to compile the regular expression.
394 * @return The pattern used to compile the regular expression @n An empty string if this instance is not initialized
396 Tizen::Base::String GetPattern(void) const;
400 * Sets the value of the regular expression options.
404 * @param[in] options The logical OR operator values of RegularExpressionOptions
405 * @exception E_SUCCESS The method is successful.
406 * @exception E_INVALID_ARG The value of @c options is invalid.
408 result SetOptions(unsigned long options);
412 * Gets the value of the regular expression options.
416 * @return The logical OR operator values of RegularExpressionOptions
418 unsigned long GetOptions(void) const;
422 * The implementation of this copy constructor is intentionally blank and declared as private to prohibit copying of objects.
424 * @param [in] regularExpression The instance of the %RegularExpression class to copy from
425 * @remarks This constructor is hidden.
427 RegularExpression(const RegularExpression& regularExpression);
430 * The implementation of this copy assignment operator is intentionally blank and declared as private to prohibit copying of objects.
432 * @param [in] regularExpression An instance of %RegularExpression
433 * @remarks This operator is hidden.
435 RegularExpression& operator =(const RegularExpression& regularExpression);
438 friend class _RegularExpressionImpl;
439 class _RegularExpressionImpl* __pRegularExpressionImpl;
441 }; // RegularExpression
443 }}} // Tizen::Base::Utility
445 #endif // _FBASE_UTIL_REGULAR_EXPRESSION_H_