2 // Open Service Platform
3 // Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 // Licensed under the Apache License, Version 2.0 (the License);
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
19 * @File : FBaseUtil_RegularExpressionImpl.cpp
20 * @brief : Implementation for _RegularExpressionImpl Class
24 //#include "FBaseSysInternal.h"
27 #include <unique_ptr.h>
28 #include <FBaseUtilRegularExpression.h>
29 #include <FBaseSysLog.h>
30 #include "FBaseUtil_RegularExpressionImpl.h"
33 #define REGEX_MAX_GROUP_COUNT 16
35 using namespace Tizen::Base;
36 using namespace Tizen::Base::Collection;
38 namespace Tizen { namespace Base { namespace Utility
42 static int allOptions =
47 REGEX_DOLLAR_ENDONLY |
52 static char emptyString[1] = {'\0'};
56 void* pRe; // pcre object
57 char* pPattern; // regular expression pattern
58 unsigned long options; // options used while compiling
59 int* pOffsets; // array to store offsets while match
60 int offsetCount; // entries in offsets array
62 // Valid in case of error
63 char* pError; // Error string if compilation fails
64 int errOffset; // Error offset in patter in case compilation fails
68 RegexFree(RegExInfo* pRegex)
72 if (pRegex->pRe != null)
74 pcre_free(pRegex->pRe);
77 if (pRegex->pPattern != emptyString)
79 free(pRegex->pPattern);
82 if (pRegex->pError != emptyString)
87 if (pRegex->pOffsets != null)
89 free(pRegex->pOffsets);
97 RegexGetGroupCountInPattern(const char* pattern, int len)
100 const char* pos = pattern + 1;
101 while ((--len) && (grpCount < REGEX_MAX_GROUP_COUNT))
105 if (*(pos - 1) != '\\')
114 // return REGEX_MAX_GROUP_COUNT;
117 RegexAllocateOffset(RegExInfo* pRegex, int maxGroupCount)
121 if (pRegex->pOffsets)
123 free(pRegex->pOffsets);
126 if (maxGroupCount < REGEX_MAX_GROUP_COUNT)
128 maxGroupCount = REGEX_MAX_GROUP_COUNT;
131 pRegex->offsetCount = (maxGroupCount + 1) * 3;
132 pRegex->pOffsets = (int*) calloc(pRegex->offsetCount, sizeof(int));
137 _RegularExpressionImpl::_RegularExpressionImpl()
141 , __isCompilationRequired(true)
142 , __isLastMatchPartial(true)
146 _RegularExpressionImpl::~_RegularExpressionImpl()
152 _RegularExpressionImpl::Cleanup(void)
154 if (__pReFull != null)
156 RegexFree((RegExInfo*) __pReFull);
160 if (__pRePartial != null)
162 RegexFree((RegExInfo*) __pRePartial);
168 _RegularExpressionImpl::Construct(const String& pattern, unsigned long options)
170 if (options & (~allOptions))
172 return E_INVALID_ARG;
175 result r = E_SUCCESS;
176 r = Init(pattern, options);
182 _RegularExpressionImpl::Init(const String& pattern, const int options)
184 result r = E_SUCCESS;
190 __options |= PCRE_UTF8;
192 __pRePartial = Compile(__pattern, __options, false);
193 if (null != __pRePartial)
195 __pReFull = Compile(__pattern, __options, true);
199 __isCompilationRequired = (r == E_SUCCESS) ? false : true;
205 _RegularExpressionImpl::Compile(const String& pattern, int options, bool forFullMatch)
207 RegExInfo* pRegex = null;
209 result r = E_SUCCESS;
210 String pat = pattern;
213 r = pat.Insert("(?:", 0);
214 SysTryReturn(NID_BASE_UTIL, !IsFailed(r), null, r, "[%s] : Failed to insert string", GetErrorMessage(r));
218 std::unique_ptr< ByteBuffer > pTmpBuf(StringUtil::StringToUtf8N(pat));
223 lenPattern = pTmpBuf->GetLimit() - 1;
224 pRegex = (RegExInfo*) calloc(1, sizeof(RegExInfo));
227 pRegex->pError = emptyString;
228 pRegex->options = options;
229 pTmpBuf->SetByte(lenPattern, '\0');
230 pRegex->pRe = pcre_compile2((const char*) pTmpBuf->GetPointer(), // Pattern to compile
231 options, // Compile options
232 &errorCode, // Error code return by PCRE
233 (const char**) &pRegex->pError, // Pointer to string containing error message
234 &pRegex->errOffset, // Index in pattern, where error occurs
235 null // Passing null for pointer to character table so it will use default table.
241 result r = PcreCompileErrorToSystemError(errorCode);
248 pRegex->pPattern = (char*) calloc(lenPattern, sizeof(char));
249 if(pRegex->pPattern != null)
251 strncpy(pRegex->pPattern, (const char*) pTmpBuf->GetPointer(), lenPattern);
261 // Allocating memory for offset array, to be used during match operation
262 RegexAllocateOffset(pRegex, RegexGetGroupCountInPattern((const char*) pTmpBuf->GetPointer(), lenPattern));
271 return (void*) pRegex;
275 _RegularExpressionImpl::Match(const Tizen::Base::String& text, bool fullMatch, IList* pMatchedString)
280 matchCount = Match(fullMatch, text, 0, 0);
281 if (pMatchedString && matchCount > 0)
283 GetLastMatchedGroups(text, *pMatchedString);
286 SetLastResult(E_SUCCESS);
287 return(matchCount > 0);
292 _RegularExpressionImpl::Match(bool isFull, const String& text, int startPos, int matchOptions)
294 result r = E_SUCCESS;
295 std::unique_ptr< ByteBuffer > pTmpBuf(null);
297 bool containsNonAsciiChars = false;
299 if (__isCompilationRequired || ((isFull) == false ? __pReFull : __pRePartial))
301 if (__pRePartial && __pReFull)
305 Init(__pattern, __options);
308 RegExInfo* pRegex = static_cast<RegExInfo*>((isFull) ? __pReFull : __pRePartial);
309 if ((pRegex == null))
311 SetLastResult(E_INVALID_STATE);
314 if ((pRegex->pRe == null) || (pRegex->pOffsets == null))
316 SetLastResult(E_INVALID_ARG);
322 matchOptions |= PCRE_ANCHORED;
327 pTmpBuf.reset(new (std::nothrow) ByteBuffer);
330 SetLastResult(E_OUT_OF_MEMORY);
333 r = pTmpBuf->Construct(1);
334 if(IsFailed(r) == true)
339 r = pTmpBuf->SetByte('\0');
340 if(IsFailed(r) == true)
349 pTmpBuf.reset(StringUtil::StringToUtf8N(text));
355 pTmpBuf->SetByte(pTmpBuf->GetLimit() - 1, '\0');
357 if ((pTmpBuf->GetLimit() - 1) != text.GetLength())
359 containsNonAsciiChars = true;
360 startPos = StringToUtf8Index(text, startPos);
364 // Ressting offset array to invalid values
365 memset(pRegex->pOffsets, -1, pRegex->offsetCount * sizeof(int));
366 int matchCount = pcre_exec((pcre*) (pRegex->pRe), // the regular expression object
368 (const char*) pTmpBuf->GetPointer(), // pointer to subject string
369 pTmpBuf->GetLimit() - 1, // length of subject string
370 startPos, // where to start in the subject string
371 matchOptions, // option bits
372 pRegex->pOffsets, // points to a vector of ints to be filled in with offsets
373 pRegex->offsetCount // the number of elements in the vector
375 if (matchCount == PCRE_ERROR_NOMATCH)
382 SetLastResult(PcreExecErrorToSystemError(matchCount));
386 // To handle cases where captured strings count is grater than offset count
389 matchCount = pRegex->offsetCount / 2;
392 if (containsNonAsciiChars)
394 int offsetCnt = matchCount * 2;
395 for (int i = 0; i < offsetCnt; i += 2)
397 pRegex->pOffsets[i] = Utf8ToStringIndex((char*) pTmpBuf->GetPointer(), pRegex->pOffsets[i]);
398 pRegex->pOffsets[i + 1] = Utf8ToStringIndex((char*) pTmpBuf->GetPointer(), pRegex->pOffsets[i + 1]);
402 SetLastResult(E_SUCCESS);
404 __isLastMatchPartial = !isFull;
409 _RegularExpressionImpl::Consume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString)
411 result r = E_SUCCESS;
415 matchCount = Match(false, text, 0, PCRE_ANCHORED);
420 r = GetLastMatchedGroups(text, *pMatchedString);
427 r = text.Remove(0, GetLastMatchEnd());
433 SetLastResult(E_SUCCESS);
443 _RegularExpressionImpl::FindAndConsume(Tizen::Base::String& text, Tizen::Base::Collection::IList* pMatchedString)
445 result r = E_SUCCESS;
449 matchCount = Match(false, text, 0, 0);
454 r = GetLastMatchedGroups(text, *pMatchedString);
461 r = text.Remove(0, GetLastMatchEnd());
477 _RegularExpressionImpl::Replace(Tizen::Base::String& text, const Tizen::Base::String& rewrite, bool globalReplace, int startPos)
479 result r = E_SUCCESS;
482 int lenText = text.GetLength();
483 bool isMatchEmpty = false;
489 r = text.SubString(0, startPos, out);
496 while (startPos <= text.GetLength())
505 matchCount = Match(false, text, startPos, PCRE_ANCHORED | PCRE_NOTEMPTY);
506 if ((matchCount <= 0))
508 if (startPos < text.GetLength())
510 r = out.Append(text[startPos]);
518 isMatchEmpty = false;
524 matchCount = Match(false, text, startPos, 0);
531 matchStart = GetLastMatchStart();
532 matchEnd = GetLastMatchEnd();
534 if ((matchStart < startPos) || (matchEnd < matchStart))
540 if (matchStart > startPos)
542 r = text.SubString(startPos, matchStart - startPos, out1);
550 r = Rewrite(rewrite, text, out1);
559 isMatchEmpty = (matchStart == matchEnd);
574 if (startPos < lenText)
577 r = text.SubString(startPos, lenText - startPos, out1);
587 SetLastResult(E_SUCCESS);
596 _RegularExpressionImpl::Extract(const Tizen::Base::String& text, const Tizen::Base::String& rewrite, Tizen::Base::String& out)
598 result r = E_SUCCESS;
603 matchCount = Match(false, text, 0, 0);
609 r = Rewrite(rewrite, text, out);
623 _RegularExpressionImpl::GetPattern(void) const
629 _RegularExpressionImpl::SetOptions(unsigned long options)
631 if (options & (~allOptions))
633 return E_INVALID_ARG;
638 if (!(options && PCRE_NOTEMPTY))
640 __isCompilationRequired = true;
646 _RegularExpressionImpl::GetOptions(void) const
653 _RegularExpressionImpl::GetLastMatchedGroups(const String& text, IList& matchedGrpStrList)
655 result r = E_SUCCESS;
656 RegExInfo* pRegex = static_cast<RegExInfo*>((__isLastMatchPartial) ? __pRePartial : __pReFull);
659 int grpCount = GetLastGroupCount();
662 return E_INVALID_STATE;
665 grpCount = (grpCount + 1) * 2;
666 for (int i = 0; i < grpCount; i += 2)
668 String* pTmpStr = new (std::nothrow) String();
669 if (pRegex->pOffsets[i] >= 0 && (pRegex->pOffsets[i] < pRegex->pOffsets[i + 1]))
671 r = text.SubString(pRegex->pOffsets[i], pRegex->pOffsets[i + 1] - pRegex->pOffsets[i], *pTmpStr);
680 r = matchedGrpStrList.Add(*pTmpStr);
690 matchedGrpStrList.RemoveAll(true);
695 _RegularExpressionImpl::GetLastGroupCount(void)
698 RegExInfo* pRegex = static_cast<RegExInfo*>((__isLastMatchPartial) ? __pRePartial : __pReFull);
702 SetLastResult(E_INVALID_STATE);
705 if (pRegex->pRe == null)
707 SetLastResult(E_INVALID_ARG);
711 int retValue = pcre_fullinfo((pcre*) (pRegex->pRe), // the regular expression object
712 null, // points extra data, or null
713 PCRE_INFO_CAPTURECOUNT, // what information is required
714 (void*) &grpCount // where to put the information
718 if ((0 != retValue) || (grpCount < -1))
726 _RegularExpressionImpl::StringToUtf8Index(const Tizen::Base::String text, int index)
735 result r = text.SubString(0, index, tmpStr);
738 std::unique_ptr< ByteBuffer > pTmpBuf(StringUtil::StringToUtf8N(tmpStr));
741 newIndex = pTmpBuf->GetLimit() - 1;
745 // SysLogExceptionxception("[%s] Regex: StringToUtf8Index failed", GetErrorMessage(r));
749 _RegularExpressionImpl::Utf8ToStringIndex(char* pUtf8String, int index)
760 int byteAtIndex = pUtf8String[index];
761 pUtf8String[index] = '\0';
763 result r = StringUtil::Utf8ToString(pUtf8String, tmpStr);
766 newIndex = tmpStr.GetLength();
770 // SysLogExceptionxception("[%s] Regex: Utf8ToStringIndex failed", GetErrorMessage(r));
773 pUtf8String[index] = byteAtIndex;
780 _RegularExpressionImpl::CheckRewritePattern(const Tizen::Base::String& rewrite)
783 result r = E_SUCCESS;
785 std::unique_ptr< ByteBuffer > pTmpBuf(StringUtil::StringToUtf8N(rewrite));
788 int limit = pTmpBuf->GetLimit();
789 while (limit > pTmpBuf->GetPosition())
791 if (IsFailed(pTmpBuf->GetByte(c)))
798 if (IsFailed(pTmpBuf->GetByte(c)))
804 if ((c >= '0') && (c <= '9'))
826 _RegularExpressionImpl::Rewrite(const String& rewrite, const String& text, String& out)
828 result r = E_UNKNOWN;
831 ArrayList matchedGrpStrList;
833 maxCount = CheckRewritePattern(rewrite);
848 String matchedStr = GetLastMatchedString(text);
850 return out.Replace("\\0", matchedStr);
853 String out1(rewrite);
854 r = matchedGrpStrList.Construct(REGEX_MAX_GROUP_COUNT);
860 r = GetLastMatchedGroups(text, matchedGrpStrList);
861 if (maxCount >= matchedGrpStrList.GetCount())
868 for (index = 0; index <= maxCount; index++)
870 String* pTemp = static_cast<String*>(matchedGrpStrList.GetAt(index));
874 r = tmp1.Append(index);
877 r = out1.Replace(tmp1, *pTemp);
894 matchedGrpStrList.RemoveAll(true);
900 _RegularExpressionImpl::GetLastMatchStart(void)
902 RegExInfo* pRegEx = static_cast<RegExInfo*>((__isLastMatchPartial) ? __pRePartial : __pReFull);
905 return pRegEx->pOffsets[0];
912 _RegularExpressionImpl::GetLastMatchEnd(void)
914 RegExInfo* pRegEx = static_cast<RegExInfo*>((__isLastMatchPartial) ? __pRePartial : __pReFull);
917 return pRegEx->pOffsets[1];
924 _RegularExpressionImpl::GetLastMatchedString(const String& text)
927 result r = E_SUCCESS;
928 RegExInfo* pRegEx = static_cast<RegExInfo*>((__isLastMatchPartial) ? __pRePartial : __pReFull);
929 if (pRegEx && pRegEx->pOffsets[0] > -1 && pRegEx->pOffsets[1] > -1)
931 r = text.SubString(pRegEx->pOffsets[0], pRegEx->pOffsets[1] - pRegEx->pOffsets[0], out);
939 _RegularExpressionImpl::PcreCompileErrorToSystemError(int compileErr)
949 return E_OUT_OF_MEMORY;
952 return E_INVALID_ARG;
957 _RegularExpressionImpl::PcreExecErrorToSystemError(int err)
961 case PCRE_ERROR_NOMATCH:
964 // case PCRE_ERROR_NULL:
965 case PCRE_ERROR_NOSUBSTRING:
967 case PCRE_ERROR_MATCHLIMIT:
969 case PCRE_ERROR_CALLOUT:
971 case PCRE_ERROR_PARTIAL:
973 case PCRE_ERROR_BADPARTIAL:
975 case PCRE_ERROR_INTERNAL:
977 case PCRE_ERROR_BADCOUNT:
979 case PCRE_ERROR_DFA_UITEM:
981 case PCRE_ERROR_DFA_UCOND:
983 case PCRE_ERROR_DFA_UMLIMIT:
985 case PCRE_ERROR_DFA_WSSIZE:
987 case PCRE_ERROR_DFA_RECURSE:
989 case PCRE_ERROR_RECURSIONLIMIT:
991 case PCRE_ERROR_NULLWSLIMIT:
994 case PCRE_ERROR_NULL:
996 case PCRE_ERROR_BADOPTION:
998 case PCRE_ERROR_BADMAGIC:
1000 case PCRE_ERROR_UNKNOWN_OPCODE:
1002 // case PCRE_ERROR_UNKNOWN_NODE:
1003 case PCRE_ERROR_BADUTF8:
1005 case PCRE_ERROR_BADUTF8_OFFSET:
1007 case PCRE_ERROR_BADNEWLINE:
1008 // case PCRE_ERROR_BADOFFSET:
1009 // case PCRE_ERROR_SHORTUTF8:
1010 return E_INVALID_ARG;
1012 case PCRE_ERROR_NOMEMORY:
1013 return E_OUT_OF_MEMORY;