2 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
4 // Licensed under the Apache License, Version 2.0 (the License);
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 * @file FBaseUtilRegularExpression.h
19 * @brief This is the header file for the %RegularExpression class.
21 * This header file contains the declarations of the %RegularExpression class.
23 #ifndef _FBASE_UTIL_REGULAR_EXPRESSION_H_
24 #define _FBASE_UTIL_REGULAR_EXPRESSION_H_
26 #include <FBaseColIList.h>
27 #include <FBaseString.h>
30 namespace Tizen { namespace Base { namespace Utility
34 * @enum RegularExpressionOptions
36 * Defines the options available for a regular expression.
40 enum RegularExpressionOptions
42 REGEX_CASELESS = 0x00000001, /**< The case insensitive match option */
43 REGEX_MULTI_LINE = 0x00000002, /**< The multiple lines match option @n
44 Without this option, (^) matches only at the start of the string, while ($) matches only at
45 the end of the string, or before a terminating newline. */
46 REGEX_DOTALL = 0x00000004, /**< The dot matches newlines option @n
47 Without this option, a dot does not match when the current position is at a newline. */
48 REGEX_EXTENDED = 0x00000008, /**< The ignored whitespaces in a pattern */
49 REGEX_DOLLAR_ENDONLY = 0x00000020, /**< The option to match the dollar symbol ($) only at the end @n
50 Without this option, a dollar symbol also matches immediately before a newline. */
51 REGEX_UNGREEDY = 0x00000200, /**< The option to reverse the (*) and (*?) symbols @n
52 If this option is set, the quantifiers are not greedy by default, however they are, if followed by a question mark. */
53 REGEX_UNICODE = 0x01000000, /**< The option to support the unicode characters @n
54 Without this option, only the ASCII characters are recognized. */
58 * @class RegularExpression
59 * @brief This class provides the functionality for a regular expression.
63 * The %RegularExpression class provides operations of a regular expression based on PCRE and the syntax based on
64 * the Perl regular expression.
65 * The various supported operations are Match(), Replace(), and Consume().
67 * For more information on the class features, see <a href="../org.tizen.native.appprogramming/html/guide/base/regular_expression.htm">Regular Expression</a>.
69 * The following example demonstrates how to use the %RegularExpression class.
75 * using namespace Tizen::Base;
76 * using namespace Tizen::Base::Collection;
77 * using namespace Tizen::Base::Utility;
80 * MyClass:RegularExpressionSample(void)
82 * String pattern(L"the quick brown fox");
83 * String text(L"What do you know about the quick brown fox?");
85 * RegularExpression regex;
86 * regex.Construct(pattern, REGEX_CASELESS);
88 * bool ret = regex.Match(text, false); // This returns true value
94 class _OSP_EXPORT_ RegularExpression
95 : public Tizen::Base::Object
99 * The object is not fully constructed after this constructor is called. @n
100 * For full construction, the Construct() method must be called right after calling this constructor.
104 RegularExpression(void);
107 * This destructor overrides Tizen::Base::Object::~Object().
111 virtual ~RegularExpression(void);
114 * Initializes this instance of %RegularExpression with the specified parameters.
118 * @return An error code
119 * @param[in] pattern The pattern to use
120 * @param[in] options The option for the regular expression
121 * @exception E_SUCCESS The method is successful.
122 * @exception E_INVALID_STATE This instance has already been constructed.
123 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0.
125 * The following example demonstrates how to use the %Construct() method.
128 * String pattern(L"^CRUEL$");
129 * String text(L"Hello\ncruel\nworld");
131 * RegularExpression regex;
132 * regex.Construct(pattern, REGEX_CASELESS | REGEX_MULTI_LINE);
133 * bool ret = regex.Match(text, false); // This returns true value
138 result Construct(const Tizen::Base::String& pattern, unsigned long options = 0x0);
141 * Checks whether the specified text matches completely or partially.
145 * @return @c true if the text matches successfully, @n
147 * @param[in] text The text to match
148 * @param[in] fullMatch Set to @c true to match exactly, @n
149 * else @c false to match any substring of the text
150 * @param[out] pMatchedString A list of the matched string instances @n
151 * The count of the matched items is acquired from IList::GetCount() and
152 * the maximum count of the items is @c 16.
153 * @exception E_SUCCESS The method is successful.
154 * @exception E_INVALID_STATE This instance has not been constructed as yet.
155 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
157 * - The specific error code can be accessed using the GetLastResult() method.
158 * - If the grouping subpatterns are used in a pattern,
159 * the @c pMatchedString list will contain the grouping data. @n
160 * For example, if the pattern has two grouping subpatterns,
161 * there will be three data sets in the @c pMatchedString list. @n
162 * The first data set will be a full grouping data and the second
163 * and the third data sets will contain individual grouping data.
164 * - Because this method returns a new instance through an out-parameter @c pMatchedString,
165 * the caller needs to delete it after use. @n
166 * Setting the element deleter of @c pMatchedString to SingleObjectDeleter is recommended.
168 * The following example demonstrates how to use the %Match() method.
172 * String pattern(L"(\\d\\d)-(\\d\\d)-(\\d\\d\\d\\d)");
173 * String text(L"04-12-1979");
175 * RegularExpression regex;
176 * regex.Construct(pattern);
178 * ArrayList list(SingleObjectDeleter);
182 * bool ret = regex.Match(text, true, &list); // The list will contain four string instances
184 * String out = *(dynamic_cast< String* >(list.GetAt(0))); // L"04-12-1979"
185 * out = *(dynamic_cast< String* >(list.GetAt(1))); // L"04"
186 * out = *(dynamic_cast< String* >(list.GetAt(2))); // L"12"
187 * out = *(dynamic_cast< String* >(list.GetAt(3))); // L"1979"
191 bool Match(const Tizen::Base::String& text, bool fullMatch, Tizen::Base::Collection::IList* pMatchedString = null) const;
194 * Matches the pattern from the starting point of the text and removes the matched string. @n
195 * If the pattern does not match the text at the starting point, it will return @c false.
199 * @return @c true if the text matches successfully, @n
201 * @param[in, out] text The text to consume
202 * @param[out] pMatchedString A list of matched string instances @n
203 * The count of the matched items is acquired from IList::GetCount() and
204 * the maximum count of the items is @c 16.
205 * @exception E_SUCCESS The method is successful.
206 * @exception E_INVALID_STATE This instance has not been constructed as yet.
207 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
208 * @remarks The specific error code can be accessed using the GetLastResult() method.
209 * @remarks If the grouping subpatterns are used in a pattern, the @c pMatchedString list will
210 * contain the grouping data. @n
211 * For example, if the pattern has two grouping subpatterns,
212 * there will be three data sets in the @c pMatchedString list. @n
213 * The first data set will be a full grouping data and the second
214 * and the third data sets will contain individual grouping data.
216 * The following example demonstrates how to use the %Consume() method.
219 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
220 * String text(L" abcd1234test");
222 * ArrayList list(SingleObjectDeleter);
225 * RegularExpression regex;
226 * regex.Construct(pattern);
227 * bool ret = regex.Consume(text, &list); // The list will contain four string instances
228 * // and the text instance will be changed to L"test"
229 * String out = *(dynamic_cast< String* >(list.GetAt(0))); // L" abcd1234"
230 * out = *(dynamic_cast< String* >(list.GetAt(1))); // L" "
231 * out = *(dynamic_cast< String* >(list.GetAt(2))); // L"abcd"
232 * out = *(dynamic_cast< String* >(list.GetAt(3))); // L"1234"
236 bool Consume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
239 * Matches the pattern in strings similar to the Consume() method but does not anchor the match at the beginning of the string. @n
240 * This operation can be used to find certain patterns in the text and extract the required information.
244 * @return @c true if the text matches successfully, @n
246 * @param[in, out] text The text to find and consume
247 * @param[out] pMatchedString A list of matched string instances @n
248 * The count of the matched items is acquired from IList::GetCount() and
249 * the maximum count of the items is @c 16.
250 * @exception E_SUCCESS The method is successful.
251 * @exception E_INVALID_STATE This instance has not been constructed as yet.
252 * @exception E_INVALID_ARG The length of the specified @c text parameter is @c 0.
253 * @remarks The specific error code can be accessed using the GetLastResult() method.
254 * @remarks If the grouping subpatterns are used in a pattern,
255 * the @c pMatchedString list will contain the grouping data. @n
256 * For example, if the pattern has two grouping subpatterns,
257 * there will be three data sets in the @c pMatchedString list. @n
258 * The first data set will be a full grouping data and the second
259 * and the third data sets will contain individual grouping data.
261 * The following example demonstrates how to use the %FindAndConsume() method.
265 * String pattern(L"(\\s+)([a-z]+)(\\d+)");
266 * String text(L"test abcd1234test");
268 * ArrayList list(SingleObjectDeleter);
271 * RegularExpression regex;
272 * regex.Construct(pattern);
273 * bool ret = regex.FindAndConsume(text, &list); // The list will contain four String instances
274 * // and text instance will be changed to L"test"
275 * String out = *(dynamic_cast< String* >(list.GetAt(0))); // L" abcd1234"
276 * out = *(dynamic_cast< String* >(list.GetAt(1))); // L" "
277 * out = *(dynamic_cast< String* >(list.GetAt(2))); // L"abcd"
278 * out = *(dynamic_cast< String* >(list.GetAt(3))); // L"1234"
282 bool FindAndConsume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString = null) const;
285 * Replaces either the first match of a pattern in the text with the @c rewrite string or
286 * all the occurrences of a pattern in the text.
290 * @return @c true if the text is replaced successfully, @n
292 * @param[in, out] text The text to replace when it is matched to a pattern
293 * @param[in] rewrite The text with which to replace
294 * @param[in] globalReplace Set to @c true to replace globally, @n
295 * else @c false to replace the first match of the pattern in the text
296 * @param[in] startPos The starting position of the text
297 * @exception E_SUCCESS The method is successful.
298 * @exception E_INVALID_STATE This instance has not been constructed as yet.
299 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0, or
300 * the size of @c pMatchedString exceeds limitations.
301 * @remarks The specific error code can be accessed using the GetLastResult() method.
303 * The following example demonstrates how to use the %Replace() method.
307 * String pattern(L"replace");
308 * String text(L"test replace method");
309 * String rewrite(L"REPLACE");
311 * RegularExpression regex;
312 * regex.Construct(pattern);
313 * bool ret = regex.Replace(text, rewrite, false); // text = L"test REPLACE method"
317 bool Replace(Tizen::Base::String& text, const Tizen::Base::String& rewrite, bool globalReplace, int startPos = 0) const;
320 * Extracts the first match of the pattern in the text. @n
321 * Similar to Replace() but @c rewrite is copied to @c out with substitutions.
325 * @return @c true if the text is extracted successfully, @n
327 * @param[in] text The text to match
328 * @param[in] rewrite The text to replace
329 * @param[out] out The text to extract
330 * @exception E_SUCCESS The method is successful.
331 * @exception E_INVALID_STATE This instance has not been constructed as yet.
332 * @exception E_INVALID_ARG The length of the specified @c pattern parameter is @c 0, or
333 * the size of @c pMatchedString exceeds limitations.
334 * @remarks The specific error code can be accessed using the GetLastResult() method.
336 * The following example demonstrates how to use the %Extract() method.
340 * String pattern(L"(.*)@([^.]*)");
341 * String text(L"test@email.com");
342 * String rewrite(L"\\2!\\1");
344 * RegularExpression regex;
345 * regex.Construct(pattern);
348 * bool ret = regex.Extract(text, rewrite, out); // out = L"email!test"
352 bool Extract(const Tizen::Base::String& text, const Tizen::Base::String& rewrite, Tizen::Base::String& out) const;
355 * Compares the specified instance to the calling instance.
359 * @return @c true if the specified instance is equal to the current instance, @n
361 * @param[in] obj The object to compare with the current instance
362 * @remarks This method returns @c true if all the attributes in the instance are the same.
364 virtual bool Equals(const Tizen::Base::Object& obj) const;
367 * Gets the hash value of the current instance.
371 * @return The hash value of the current instance
373 virtual int GetHashCode(void) const;
376 * Gets the pattern used to compile the regular expression.
380 * @return The pattern used to compile the regular expression @n An empty string if this instance is not initialized
382 Tizen::Base::String GetPattern(void) const;
385 * Sets the value of the regular expression options.
389 * @param[in] options The logical OR operator values of RegularExpressionOptions
390 * @exception E_SUCCESS The method is successful.
391 * @exception E_INVALID_ARG The value of @c options is invalid.
393 result SetOptions(unsigned long options);
396 * Gets the value of the regular expression options.
400 * @return The logical OR operator values of RegularExpressionOptions
402 unsigned long GetOptions(void) const;
406 * The implementation of this copy constructor is intentionally blank and declared as private to prohibit copying of objects.
408 * @param [in] regularExpression The instance of the %RegularExpression class to copy from
409 * @remarks This constructor is hidden.
411 RegularExpression(const RegularExpression& regularExpression);
414 * The implementation of this copy assignment operator is intentionally blank and declared as private to prohibit copying of objects.
416 * @param [in] regularExpression An instance of %RegularExpression
417 * @remarks This operator is hidden.
419 RegularExpression& operator =(const RegularExpression& regularExpression);
422 friend class _RegularExpressionImpl;
423 class _RegularExpressionImpl* __pRegularExpressionImpl;
425 }; // RegularExpression
427 }}} // Tizen::Base::Utility
429 #endif // _FBASE_UTIL_REGULAR_EXPRESSION_H_