From: Pawel Andruszkiewicz Date: Tue, 21 Jul 2015 13:27:00 +0000 (+0200) Subject: [Messaging] Convert contents of emails to UTF8. X-Git-Tag: submit/tizen_tv/20150803.021740^2^2~43 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8806d6fb214acb83406437df6728567372d2db31;p=platform%2Fcore%2Fapi%2Fwebapi-plugins.git [Messaging] Convert contents of emails to UTF8. [Verification] Automatic TCT pass rate: 100% Change-Id: Id62820c09178ce0404d36b804228128ca52cf406 Signed-off-by: Pawel Andruszkiewicz --- diff --git a/src/messaging/message_body.cc b/src/messaging/message_body.cc index d9f05d24..9c843f72 100755 --- a/src/messaging/message_body.cc +++ b/src/messaging/message_body.cc @@ -115,7 +115,7 @@ PlatformResult MessageBody::updateBody(email_mail_data_t& mail) setLoaded(mail.body_download_status); if (mail.file_path_plain) { - LoggerD("Plain body"); + SLoggerD("Plain body: %s", mail.file_path_plain); std::string result = ""; PlatformResult ret = MessagingUtil::loadFileContentToString(mail.file_path_plain, &result); @@ -128,6 +128,7 @@ PlatformResult MessageBody::updateBody(email_mail_data_t& mail) } if (mail.file_path_html) { + SLoggerD("HTML body: %s", mail.file_path_html); std::string result = ""; PlatformResult ret = MessagingUtil::loadFileContentToString(mail.file_path_html, &result); diff --git a/src/messaging/messaging_util.cc b/src/messaging/messaging_util.cc index 99014e00..7122a153 100755 --- a/src/messaging/messaging_util.cc +++ b/src/messaging/messaging_util.cc @@ -16,6 +16,8 @@ #include "messaging_util.h" +#include + #include #include #include @@ -290,7 +292,7 @@ PlatformResult MessagingUtil::loadFileContentToString(const std::string& file_pa outString.assign((std::istreambuf_iterator(input_file)), std::istreambuf_iterator()); input_file.close(); - *result = outString; + *result = ConvertToUtf8(file_path, outString); } else { std::stringstream ss_error_msg; ss_error_msg << "Failed to open file: " << file_path; @@ -299,6 +301,112 @@ PlatformResult MessagingUtil::loadFileContentToString(const std::string& file_pa return PlatformResult(ErrorCode::NO_ERROR); } +namespace { + +std::string GetFilename(const std::string& file_path) { + LoggerD("Entered"); + const auto start = file_path.find_last_of("/\\"); + const auto basename = file_path.substr(std::string::npos == start ? 0 : start + 1); + return basename.substr(0, basename.find_last_of(".")); +} + +} // namespace + +std::string MessagingUtil::ConvertToUtf8(const std::string& file_path, const std::string& contents) { + LoggerD("Entered"); + + // in case of messages, encoding of the file contents is stored as its filename + // is case of draft messages, it is not... + std::string encoding = GetFilename(file_path); + + LoggerD("encoding: %s", encoding.c_str()); + + // implementation taken from apps/home/email.git, + // file Project-Files/common/src/email-utils.c + + gchar* from_charset = g_ascii_strup(encoding.c_str(), -1); + + if (0 == g_ascii_strcasecmp(from_charset, "KS_C_5601-1987")) { + // "ks_c_5601-1987" is not an encoding name. It's just a charset. + // There's no code page on IANA for "ks_c_5601-1987". + // So we should convert this to encoding name "EUC-KR" + // CP949 is super set of EUC-KR, we use CP949 first + LoggerD("change: KS_C_5601-1987 ===> CP949"); + g_free(from_charset); + from_charset = g_strdup("CP949"); + } else if (0 == g_ascii_strcasecmp(from_charset, "ISO-2022-JP")) { + // iso-2022-jp-2 is a superset of iso-2022-jp. In some email, + // iso-2022-jp is not converted to utf8 correctly. So in this case, + // we use iso-2022-jp-2 instead. + LoggerD("change: ISO-2022-JP ===> ISO-2022-JP-2"); + g_free(from_charset); + from_charset = g_strdup("ISO-2022-JP-2"); + } + + std::string output; + + // if charset is unknown or it's UTF-8, conversion is not needed + if ((0 != g_ascii_strcasecmp(from_charset, UNKNOWN_CHARSET_PLAIN_TEXT_FILE)) && + (0 != g_ascii_strcasecmp(from_charset, "UTF-8"))) { + LoggerD("performing conversion"); + + GError* error = nullptr; + const gchar* to_charset = "UTF-8//IGNORE"; // convert to UTF-8, ignore unknown characters + + gchar* result = g_convert(contents.c_str(), // the string to convert + -1, // string is null terminated + to_charset, // target encoding + from_charset, // source encoding + nullptr, // ignore bytes read + nullptr, // ignore bytes written + &error); // store error + if ((nullptr == result || nullptr != error) && + 0 == g_ascii_strcasecmp(from_charset, "CP949")) { + if (nullptr != error) { + g_error_free(error); + } + + if (nullptr != result) { + g_free(result); + } + + LoggerD("change: CP949 ===> EUC-KR, try again"); + result = g_convert(contents.c_str(), // the string to convert + -1, // string is null terminated + to_charset, // target encoding + "EUC-KR", // source encoding + nullptr, // ignore bytes read + nullptr, // ignore bytes written + &error); // store error + } + + if (nullptr == result || nullptr != error) { + LoggerE("g_convert() failed!"); + if (nullptr != error) { + LoggerE("error_code: [%d], msg: [%s]", error->code, error->message); + g_error_free(error); + } + + if (nullptr != result) { + g_free(result); + } + + // conversion failed, use original contents + output = contents; + } else { + output = result; + g_free(result); + } + } else { + // no conversion + output = contents; + } + + g_free(from_charset); + + return output; +} + std::string MessagingUtil::messageStatusToString(MessageStatus status) { LoggerD("Converting MessageStatus %d to string.", (int)status); switch(status) { diff --git a/src/messaging/messaging_util.h b/src/messaging/messaging_util.h index c1fe5a5b..e5901e40 100755 --- a/src/messaging/messaging_util.h +++ b/src/messaging/messaging_util.h @@ -185,6 +185,8 @@ private: tizen::AbstractFilterPtr* result); static common::PlatformResult jsonFilterToCompositeFilter(const picojson::object& json, tizen::AbstractFilterPtr* result); + + static std::string ConvertToUtf8(const std::string& file_path, const std::string& contents); }; enum PostPriority {