2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FBaseUtilRegularExpression.h
19 * @brief This is the header file for the %RegularExpression class.
21 * This header file contains the declarations of the %RegularExpression class.
23 #ifndef _FBASE_UTIL_REGULAR_EXPRESSION_H_
24 #define _FBASE_UTIL_REGULAR_EXPRESSION_H_
26 #include <FBaseColIList.h>
27 #include <FBaseString.h>
30 namespace Tizen { namespace Base { namespace Utility
34 * @enum RegularExpressionOptions
36 * Defines the options available for a regular expression.
40 enum RegularExpressionOptions
42 REGEX_CASELESS = 0x00000001, /**< The case insensitive match option */
43 REGEX_MULTI_LINE = 0x00000002, /**< The multiple lines match option @n
44 Without this option, (^) matches only at the start of the string, while ($) matches only at
45 the end of the string, or before a terminating newline */
46 REGEX_DOTALL = 0x00000004, /**< The dot matches newlines option @n
47 Without this option, a dot does not match when the current position is at a newline */
48 REGEX_EXTENDED = 0x00000008, /**< The ignored whitespaces in a pattern */
49 REGEX_DOLLAR_ENDONLY = 0x00000020, /**< The option to match the dollar symbol ($) only at the end @n
50 Without this option, a dollar symbol also matches immediately before a newline */
51 REGEX_UNGREEDY = 0x00000200, /**< The option to reverse the (*) and (*?) symbols @n
52 If this option is set, the quantifiers are not greedy by default, however they are, if followed by a question mark */
53 REGEX_UNICODE = 0x01000000, /**< The option to support the unicode characters @n
54 Without this option, only the ASCII characters are recognized */
58 * @class RegularExpression
59 * @brief This class provides the functionality for a regular expression.
63 * The %RegularExpression class provides the operations of a regular expression based on PCRE and the syntax based on
64 * the Perl regular expression.
65 * The various supported operations are Match(), Replace(), and Consume().
67 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/base/regular_expression.htm">Regular Expression</a>.
69 * The following example demonstrates how to use the %RegularExpression class.
75 * using namespace Tizen::Base;
76 * using namespace Tizen::Base::Collection;
77 * using namespace Tizen::Base::Utility;
80 * MyClass:RegularExpressionSample(void)
82 * String pattern(L"the quick brown fox");
83 * String text(L"What do you know about the quick brown fox?");
85 * RegularExpression regex;
86 * regex.Construct(pattern, REGEX_CASELESS);
88 * bool ret = regex.Match(text, false); // This returns true value
94 class _OSP_EXPORT_ RegularExpression
95 : public Tizen::Base::Object
99 * The object is not fully constructed after this constructor is called. @n
100 * For full construction, the Construct() method must be called right after calling this constructor.
104 RegularExpression(void);
107 * This destructor overrides Tizen::Base::Object::~Object().
111 virtual ~RegularExpression(void);
114 * Initializes this instance of %RegularExpression with the specified parameters.
118 * @return An error code
119 * @param[in] pattern The pattern to use
120 * @param[in] options The option for the regular expression
121 * @exception E_SUCCESS The method is successful.
122 * @exception E_INVALID_STATE This instance has already been constructed.
123 * @exception E_INVALID_ARG The length of the specified @c pattern is @c 0.
125 * The following example demonstrates how to use the %Construct() method.
128 * String pattern(L"^CRUEL$");
129 * String text(L"Hello\ncruel\nworld");
131 * RegularExpression regex;
132 * regex.Construct(pattern, REGEX_CASELESS | REGEX_MULTI_LINE);
133 * bool ret = regex.Match(text, false); // This returns true value
138 result Construct(const Tizen::Base::String& pattern, unsigned long options = 0x0);
141 * Checks whether the specified text matches completely or partially.
145 * @return @c true if the text matches successfully, @n
147 * @param[in] text The text to match
148 * @param[in] fullMatch Set to @c true to match exactly, @n
149 * else @c false to match any substring of the text
150 * @param[out] pMatchedString The list of the matched string instances @n
151 * The count of the matched items is acquired from IList::GetCount() and
152 * the maximum count of the items is @c 16.
153 * @exception E_SUCCESS The method is successful.
154 * @exception E_INVALID_STATE This instance has not been constructed as yet.
155 * @exception E_INVALID_ARG The length of the specified @c text is @c 0.
157 * - The specific error code can be accessed using the GetLastResult() method.
158 * - If the grouping subpatterns are used in a pattern, the @c pMatchedString list contains the grouping data. @n
159 * For example, if the pattern has two grouping subpatterns,
160 * there are three data sets in the @c pMatchedString list.
161 * The first data set contains full grouping data while the second
162 * and third data set contains individual grouping data.
163 * Because this method returns a new instance through an out-parameter @c pMatchedString,
164 * the caller needs to delete it after use. @n
165 * - Setting the element deleter of @c pMatchedString to SingleObjectDeleter is recommended.
167 * The following example demonstrates how to use the %Match() method.
171 * String pattern(L"(\\d\\d)-(\\d\\d)-(\\d\\d\\d\\d)");
172 * String text(L"04-12-1979");
174 * RegularExpression regex;
175 * regex.Construct(pattern);
177 * ArrayList list(SingleObjectDeleter);
181 * bool ret = regex.Match(text, true, &list); // The list will contain four string instances
183 * String out = *(dynamic_cast< String* >(list.GetAt(0))); // L"04-12-1979"
184 * out = *(dynamic_cast< String* >(list.GetAt(1))); // L"04"
185 * out = *(dynamic_cast< String* >(list.GetAt(2))); // L"12"
186 * out = *(dynamic_cast< String* >(list.GetAt(3))); // L"1979"
190 bool Match(const Tizen::Base::String& text, bool fullMatch, Tizen::Base::Collection::IList* pMatchedString = null) const;
193 * Matches the pattern from the starting point of the text and removes the matched string. @n
194 * If the pattern does not match the text at the starting point, it returns @c false.
198 * @return @c true if the text matches successfully, @n
200 * @param[in, out] text The text to consume
201 * @param[out] pMatchedString The list of matched string instances @n
202 * The count of the matched items is acquired from IList::GetCount() and
203 * the maximum count of the items is @c 16.
204 * @exception E_SUCCESS The method is successful.
205 * @exception E_INVALID_STATE This instance has not been constructed as yet.
206 * @exception E_INVALID_ARG The length of the specified @c text is @c 0.
208 * - The specific error code can be accessed using the GetLastResult() method.
209 * - If the grouping subpatterns are used in a pattern, the @c pMatchedString list
210 * contains grouping data. @n
211 * For example, if the pattern has two grouping subpatterns,
212 * there are three data sets in the @c pMatchedString list. @n
213 * The first data set contains full grouping data while the second
214 * and the third data set contains individual grouping data.
216 * The following example demonstrates how to use the %Consume() method.
219 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
220 * String text(L" abcd1234test");
222 * ArrayList list(SingleObjectDeleter);
225 * RegularExpression regex;
226 * regex.Construct(pattern);
227 * bool ret = regex.Consume(text, &list); // The list will contain four string instances
228 * // and the text instance will be changed to L"test"
229 * String out = *(dynamic_cast< String* >(list.GetAt(0))); // L" abcd1234"
230 * out = *(dynamic_cast< String* >(list.GetAt(1))); // L" "
231 * out = *(dynamic_cast< String* >(list.GetAt(2))); // L"abcd"
232 * out = *(dynamic_cast< String* >(list.GetAt(3))); // L"1234"
236 bool Consume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
239 * Matches the pattern in strings similar to the Consume() method but does not anchor the match at the beginning of the string. @n
240 * This operation can be used to find certain patterns in the text and extract the required information.
244 * @return @c true if the text matches successfully, @n
246 * @param[in, out] text The text to find and consume
247 * @param[out] pMatchedString The list of matched string instances @n
248 * The count of the matched items is acquired from IList::GetCount() and
249 * the maximum count of the items is @c 16.
250 * @exception E_SUCCESS The method is successful.
251 * @exception E_INVALID_STATE This instance has not been constructed as yet.
252 * @exception E_INVALID_ARG The length of the specified @c text is @c 0.
254 * - The specific error code can be accessed using the GetLastResult() method.
255 * - If the grouping subpatterns are used in a pattern,
256 * the @c pMatchedString list contains grouping data. @n
257 * For example, if the pattern has two grouping subpatterns,
258 * there are three data sets in the @c pMatchedString list. @n
259 * The first data set contains full grouping data while the second
260 * and the third data sets contains individual grouping data.
262 * The following example demonstrates how to use the %FindAndConsume() method.
266 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
267 * String text(L"test abcd1234test");
269 * ArrayList list(SingleObjectDeleter);
272 * RegularExpression regex;
273 * regex.Construct(pattern);
274 * bool ret = regex.FindAndConsume(text, &list); // The list will contain four String instances
275 * // and text instance will be changed to L"test"
276 * String out = *(dynamic_cast< String* >(list.GetAt(0))); // L" abcd1234"
277 * out = *(dynamic_cast< String* >(list.GetAt(1))); // L" "
278 * out = *(dynamic_cast< String* >(list.GetAt(2))); // L"abcd"
279 * out = *(dynamic_cast< String* >(list.GetAt(3))); // L"1234"
283 bool FindAndConsume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
286 * Replaces either the first match of a pattern in the text with the @c rewrite string or
287 * all the occurrences of a pattern in the text.
291 * @return @c true if the text is replaced successfully, @n
293 * @param[in, out] text The text to replace when it is matched to a pattern
294 * @param[in] rewrite The text with which to replace
295 * @param[in] globalReplace Set to @c true to replace globally, @n
296 * else @c false to replace the first match of the pattern in the text
297 * @param[in] startPos The starting position of the text
298 * @exception E_SUCCESS The method is successful.
299 * @exception E_INVALID_STATE This instance has not been constructed as yet.
300 * @exception E_INVALID_ARG Either of the following conditions has occurred:
301 * - The length of the specified @c pattern is @c 0.
302 * - The size of @c pMatchedString exceeds the limitations.
303 * @remarks The specific error code can be accessed using the GetLastResult() method.
305 * The following example demonstrates how to use the %Replace() method.
309 * String pattern(L"replace");
310 * String text(L"test replace method");
311 * String rewrite(L"REPLACE");
313 * RegularExpression regex;
314 * regex.Construct(pattern);
315 * bool ret = regex.Replace(text, rewrite, false); // text = L"test REPLACE method"
319 bool Replace(Tizen::Base::String& text, const Tizen::Base::String& rewrite, bool globalReplace, int startPos = 0) const;
322 * Extracts the first match of the pattern in the text. @n
323 * Similar to Replace() but @c rewrite is copied to @c out with substitutions.
327 * @return @c true if the text is extracted successfully, @n
329 * @param[in] text The text to match
330 * @param[in] rewrite The text to replace
331 * @param[out] out The text to extract
332 * @exception E_SUCCESS The method is successful.
333 * @exception E_INVALID_STATE This instance has not been constructed as yet.
334 * @exception E_INVALID_ARG Either of the following conditions has occurred:
335 * - The length of the specified @c pattern is @c 0.
336 * - The size of @c pMatchedString exceeds the limitations.
337 * @remarks The specific error code can be accessed using the GetLastResult() method.
339 * The following example demonstrates how to use the %Extract() method.
343 * String pattern(L"(.*)@([^.]*)");
344 * String text(L"test@email.com");
345 * String rewrite(L"\\2!\\1");
347 * RegularExpression regex;
348 * regex.Construct(pattern);
351 * bool ret = regex.Extract(text, rewrite, out); // out = L"email!test"
355 bool Extract(const Tizen::Base::String& text, const Tizen::Base::String& rewrite, Tizen::Base::String& out) const;
358 * Compares the specified instance to the calling instance.
362 * @return @c true if the specified instance is equal to the current instance, @n
364 * @param[in] obj The object to compare with the current instance
365 * @remarks This method returns @c true if all the attributes in the instance are the same.
367 virtual bool Equals(const Tizen::Base::Object& obj) const;
370 * Gets the hash value of the current instance.
374 * @return The hash value of the current instance
376 virtual int GetHashCode(void) const;
379 * Gets the pattern used to compile the regular expression.
383 * @return The pattern used to compile the regular expression, @n
384 * else an empty string if this instance is not initialized
386 Tizen::Base::String GetPattern(void) const;
389 * Sets the value of the regular expression options.
393 * @param[in] options The logical OR operator values of RegularExpressionOptions
394 * @exception E_SUCCESS The method is successful.
395 * @exception E_INVALID_ARG The value of the specified @c options is invalid.
397 result SetOptions(unsigned long options);
400 * Gets the value of the regular expression options.
404 * @return The logical OR operator values of RegularExpressionOptions
406 unsigned long GetOptions(void) const;
410 * The implementation of this copy constructor is intentionally blank and declared as private to prohibit copying of objects.
412 * @param [in] regularExpression The instance of the %RegularExpression class to copy from
413 * @remarks This constructor is hidden.
415 RegularExpression(const RegularExpression& regularExpression);
418 * The implementation of this copy assignment operator is intentionally blank and declared as private to prohibit copying of objects.
420 * @param [in] regularExpression An instance of %RegularExpression
421 * @remarks This operator is hidden.
423 RegularExpression& operator =(const RegularExpression& regularExpression);
426 friend class _RegularExpressionImpl;
427 class _RegularExpressionImpl* __pRegularExpressionImpl;
429 }; // RegularExpression
431 }}} // Tizen::Base::Utility
433 #endif // _FBASE_UTIL_REGULAR_EXPRESSION_H_