From: Pawel Andruszkiewicz <p.andruszkie@samsung.com>
Date: Fri, 31 Jul 2015 08:51:00 +0000 (+0200)
Subject: [MessagingEmail] Remove malformed UTF-8 characters.
X-Git-Tag: submit/tizen/20150803.122237^2^2^2^2
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0c484320ff758419b8a9a68abc9a8f6047916dc4;p=platform%2Fcore%2Fapi%2Fwebapi-plugins.git

[MessagingEmail] Remove malformed UTF-8 characters.

[Verification] TCT pass rate: 100%

Change-Id: I5aff8ec8baf16902473967e3d077f42e581043dc
Signed-off-by: Pawel Andruszkiewicz <p.andruszkie@samsung.com>
---

diff --git a/src/messaging/message_conversation.cc b/src/messaging/message_conversation.cc
index 5e9fbf1a..b7db3208 100755
--- a/src/messaging/message_conversation.cc
+++ b/src/messaging/message_conversation.cc
@@ -187,7 +187,7 @@ PlatformResult MessageConversation::convertMsgConversationToObject(
 
     msg_get_str_value(msg_thread, MSG_THREAD_MSG_DATA_STR, msgData, MAX_THREAD_DATA_LEN);
 
-    conversation->m_preview = msgData;
+    conversation->setPreview(msgData);
 
     err = msg_get_conversation_view_list(handle, conversation->m_conversation_id,
                                          &convViewList);
@@ -265,7 +265,7 @@ PlatformResult MessageConversation::convertMsgConversationToObject(
     char strTemp[MAX_SUBJECT_LEN] = {0};
     msg_get_str_value(msgInfo, MSG_MESSAGE_SUBJECT_STR, strTemp, MAX_SUBJECT_LEN);
 
-    conversation->m_conversation_subject = strTemp;
+    conversation->setSubject(strTemp);
 
     *result = conversation;
     return PlatformResult(ErrorCode::NO_ERROR);
@@ -335,12 +335,12 @@ PlatformResult MessageConversation::convertEmailConversationToObject(
 
         if (resultMail->preview_text[0] != '\0')
         {
-            conversation->m_preview = resultMail->preview_text;
+            conversation->setPreview(resultMail->preview_text);
         }
 
         if (resultMail->subject[0] != '\0')
         {
-            conversation->m_conversation_subject = resultMail->subject;
+            conversation->setSubject(resultMail->subject);
         }
 
         conversation->m_is_read = (bool)resultMail->flags_seen_field;
@@ -413,12 +413,12 @@ void MessageConversation::setUnreadMessages(int unread_messages)
 
 void MessageConversation::setPreview(std::string preview)
 {
-    m_preview = preview;
+    m_preview = SanitizeUtf8String(preview);
 }
 
 void MessageConversation::setSubject(std::string conversation_subject)
 {
-    m_conversation_subject = conversation_subject;
+    m_conversation_subject = SanitizeUtf8String(conversation_subject);
 }
 
 void MessageConversation::setIsRead(bool is_read)
@@ -541,6 +541,19 @@ bool MessageConversation::isMatchingAttributeRange(const std::string& attribute_
     return false;
 }
 
+std::string MessageConversation::SanitizeUtf8String(const std::string& input) {
+  LoggerD("Entered");
+
+  std::string result = input;
+  const gchar* end = nullptr;
+
+  while (FALSE == g_utf8_validate(result.c_str(), -1, &end)) {
+    result = result.substr(0, end - result.c_str());
+  }
+
+  return result;
+}
+
 }    //messaging
 }    //extension
 
diff --git a/src/messaging/message_conversation.h b/src/messaging/message_conversation.h
index f4934bb2..5a2dc423 100755
--- a/src/messaging/message_conversation.h
+++ b/src/messaging/message_conversation.h
@@ -97,6 +97,8 @@ public:
             tizen::AnyPtr end_value) const;
 
 private:
+    std::string SanitizeUtf8String(const std::string& input);
+
     int m_conversation_id;
     MessageType m_conversation_type;
     time_t m_timestamp;
diff --git a/src/messaging/messaging_util.cc b/src/messaging/messaging_util.cc
index 7122a153..258b9639 100755
--- a/src/messaging/messaging_util.cc
+++ b/src/messaging/messaging_util.cc
@@ -310,6 +310,110 @@ std::string GetFilename(const std::string& file_path) {
   return basename.substr(0, basename.find_last_of("."));
 }
 
+std::string PerformConversion(const std::string& input, const gchar* from_charset) {
+  LoggerD("Entered");
+
+  GIConv cd = g_iconv_open("UTF-8//IGNORE", from_charset);
+
+  if ((GIConv)-1 == cd) {
+    LoggerE("Failed to open iconv.");
+    return "";
+  }
+
+  // copied from glib/gconvert.c, g_convert does not handle "//IGNORE" properly
+  static const gsize kNulTerminatorLength = 4;
+  const gchar* str = input.c_str();
+  gssize len = input.size();
+
+  gchar* p = const_cast<gchar*>(str);
+  gsize inbytes_remaining = len;
+  gsize outbuf_size = len + kNulTerminatorLength;
+  gsize outbytes_remaining = outbuf_size - kNulTerminatorLength;
+  gchar* dest = nullptr;
+  gchar* outp = nullptr;
+  gboolean have_error = FALSE;
+  gboolean done = FALSE;
+  gboolean reset = FALSE;
+
+  outp = dest = static_cast<gchar*>(g_malloc(outbuf_size));
+
+  while (!done && !have_error) {
+    gsize err = 0;
+
+    if (reset) {
+      err = g_iconv(cd, nullptr, &inbytes_remaining, &outp, &outbytes_remaining);
+    } else {
+      err = g_iconv(cd, &p, &inbytes_remaining, &outp, &outbytes_remaining);
+    }
+
+    if (static_cast<gsize>(-1) == err) {
+      switch (errno) {
+        case EINVAL:
+          LoggerD("EINVAL");
+          // Incomplete text, do not report an error
+          done = TRUE;
+          break;
+
+        case E2BIG:
+          {
+            LoggerD("E2BIG");
+            gsize used = outp - dest;
+
+            outbuf_size *= 2;
+            dest = static_cast<gchar*>(g_realloc(dest, outbuf_size));
+
+            outp = dest + used;
+            outbytes_remaining = outbuf_size - used - kNulTerminatorLength;
+          }
+          break;
+
+        case EILSEQ:
+          if (0 == inbytes_remaining) {
+            LoggerD("EILSEQ reported, but whole input buffer was processed, assuming it's OK");
+          } else {
+            LoggerE("EILSEQ");
+            have_error = TRUE;
+          }
+          break;
+
+        default:
+          LoggerE("Conversion error: %d", errno);
+          have_error = TRUE;
+          break;
+      }
+    } else {
+      if (!reset) {
+        // call g_iconv with NULL inbuf to cleanup shift state
+        reset = TRUE;
+        inbytes_remaining = 0;
+      } else {
+        done = TRUE;
+      }
+    }
+  }
+
+  memset(outp, 0, kNulTerminatorLength);
+
+  if ((p - str) != len) {
+    LoggerE("Partial character sequence at end of input");
+    have_error = TRUE;
+  }
+
+  g_iconv_close(cd);
+
+  std::string result;
+
+  if (!have_error) {
+    result = dest;
+  } else {
+    LoggerE("Conversion error");
+  }
+
+  g_free(dest);
+
+  return result;
+}
+
 }  // namespace
 
 std::string MessagingUtil::ConvertToUtf8(const std::string& file_path, const std::string& contents) {
@@ -345,57 +449,22 @@ std::string MessagingUtil::ConvertToUtf8(const std::string& file_path, const std
 
   std::string output;
 
-  // if charset is unknown or it's UTF-8, conversion is not needed
-  if ((0 != g_ascii_strcasecmp(from_charset, UNKNOWN_CHARSET_PLAIN_TEXT_FILE)) &&
-      (0 != g_ascii_strcasecmp(from_charset, "UTF-8"))) {
+  // if charset is unknown, conversion is not needed
+  if ((0 != g_ascii_strcasecmp(from_charset, UNKNOWN_CHARSET_PLAIN_TEXT_FILE))) {
+    // we're performing UTF-8 to UTF-8 conversion to remove malformed data
     LoggerD("performing conversion");
 
-    GError* error = nullptr;
-    const gchar* to_charset = "UTF-8//IGNORE";  // convert to UTF-8, ignore unknown characters
-
-    gchar* result = g_convert(contents.c_str(),  // the string to convert
-                              -1,  // string is null terminated
-                              to_charset,  // target encoding
-                              from_charset,  // source encoding
-                              nullptr,  // ignore bytes read
-                              nullptr,  // ignore bytes written
-                              &error);  // store error
-    if ((nullptr == result || nullptr != error) &&
-        0 == g_ascii_strcasecmp(from_charset, "CP949")) {
-      if (nullptr != error) {
-        g_error_free(error);
-      }
-
-      if (nullptr != result) {
-        g_free(result);
-      }
+    output = PerformConversion(contents, from_charset);
 
+    if ("" == output && 0 == g_ascii_strcasecmp(from_charset, "CP949")) {
       LoggerD("change: CP949 ===> EUC-KR, try again");
-      result = g_convert(contents.c_str(),  // the string to convert
-                         -1,  // string is null terminated
-                         to_charset,  // target encoding
-                         "EUC-KR",  // source encoding
-                         nullptr,  // ignore bytes read
-                         nullptr,  // ignore bytes written
-                         &error);  // store error
+      output = PerformConversion(contents, "EUC-KR");
     }
 
-    if (nullptr == result || nullptr != error) {
-      LoggerE("g_convert() failed!");
-      if (nullptr != error) {
-        LoggerE("error_code: [%d], msg: [%s]", error->code, error->message);
-        g_error_free(error);
-      }
-
-      if (nullptr != result) {
-        g_free(result);
-      }
-
+    if ("" == output) {
+      LoggerE("Conversion failed");
       // conversion failed, use original contents
       output = contents;
-    } else {
-      output = result;
-      g_free(result);
     }
   } else {
     // no conversion