From: Tomas Mlcoch Date: Mon, 25 Mar 2013 14:06:59 +0000 (+0100) Subject: Convert all strings to UTF-8 while dumping XML. (related RhBug: 920795) X-Git-Tag: upstream/0.2.1~243 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6baf36aeee3ce8018103667f1f1a0fa9260a7713;p=services%2Fcreaterepo_c.git Convert all strings to UTF-8 while dumping XML. (related RhBug: 920795) --- diff --git a/src/xml_dump.c b/src/xml_dump.c index 16148d1..a230ea0 100644 --- a/src/xml_dump.c +++ b/src/xml_dump.c @@ -43,6 +43,83 @@ cr_xml_dump_cleanup() void +cr_latin1_to_utf8(const unsigned char *in, unsigned char *out) +{ + // http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c/4059934#4059934 + // This function converts latin1 to utf8 in effective and thread-safe way. + while (*in) { + if (*in<128) { + *out++=*in++; + } else if (*in<192) { + // Found latin1 (iso-8859-1) control code. + // The string is probably misencoded cp-1252 and not a real latin1. + // Just skip this character. + in++; + continue; + } else { + *out++=0xc2+(*in>0xbf); + *out++=(*in++&0x3f)+0x80; + } + } + *out = '\0'; +} + +xmlNodePtr +cr_xmlNewTextChild(xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *orig_content) +{ + int free_content = 0; + xmlChar *content; + xmlNodePtr child; + + if (!orig_content) { + content = ""; + } else if (xmlCheckUTF8(orig_content)) { + content = (xmlChar *) orig_content; + } else { + size_t len = strlen(orig_content); + content = malloc(sizeof(xmlChar)*len*2 + 1); + cr_latin1_to_utf8(orig_content, content); + free_content = 1; + } + + child = xmlNewTextChild(parent, ns, name, content); + + if (free_content) + free(content); + + return child; +} + +xmlAttrPtr +cr_xmlNewProp(xmlNodePtr node, const xmlChar *name, const xmlChar *orig_content) +{ + int free_content = 0; + xmlChar *content; + xmlAttrPtr attr; + + if (!orig_content) { + content = ""; + } else if (xmlCheckUTF8(orig_content)) { + content = (xmlChar *) orig_content; + } else { + size_t len = strlen(orig_content); + content = malloc(sizeof(xmlChar)*len*2 + 1); + cr_latin1_to_utf8(orig_content, content); + free_content = 1; + } + + attr = xmlNewProp(node, name, content); + + if (free_content) + free(content); + + return attr; +} + +void cr_xml_dump_files(xmlNodePtr node, cr_Package *package, int primary) { if (!node || !package->files) { @@ -85,15 +162,15 @@ cr_xml_dump_files(xmlNodePtr node, cr_Package *package, int primary) // ************************************ xmlNodePtr file_node; - file_node = xmlNewTextChild(node, - NULL, - BAD_CAST "file", - BAD_CAST fullname); + file_node = cr_xmlNewTextChild(node, + NULL, + BAD_CAST "file", + BAD_CAST fullname); g_free(fullname); // Write type (skip type if type value is empty of "file") if (entry->type && entry->type[0] != '\0' && strcmp(entry->type, "file")) { - xmlNewProp(file_node, BAD_CAST "type", BAD_CAST entry->type); + cr_xmlNewProp(file_node, BAD_CAST "type", BAD_CAST entry->type); } } } diff --git a/src/xml_dump.h b/src/xml_dump.h index 4f65a19..128d181 100644 --- a/src/xml_dump.h +++ b/src/xml_dump.h @@ -105,6 +105,13 @@ char *cr_xml_dump_other(cr_Package *package); */ struct cr_XmlStruct cr_xml_dump(cr_Package *package); +/** Prepare string to xml dump. + * If string is not utf8 it is converted (source encoding is supposed to be + * iso-8859-1). + * TODO + */ +void cr_latin1_to_utf8(const unsigned char *in, unsigned char *out); + /** @} */ #ifdef __cplusplus diff --git a/src/xml_dump_filelists.c b/src/xml_dump_filelists.c index ef89aa7..9150e87 100644 --- a/src/xml_dump_filelists.c +++ b/src/xml_dump_filelists.c @@ -40,16 +40,13 @@ cr_xml_dump_filelists_items(xmlNodePtr root, cr_Package *package) ************************************/ // Add pkgid attribute - xmlNewProp(root, BAD_CAST "pkgid", - BAD_CAST ((package->pkgId) ? package->pkgId : "")); + cr_xmlNewProp(root, BAD_CAST "pkgid", BAD_CAST package->pkgId); // Add name attribute - xmlNewProp(root, BAD_CAST "name", - BAD_CAST ((package->name) ? package->name : "")); + cr_xmlNewProp(root, BAD_CAST "name", BAD_CAST package->name); // Add arch attribute - xmlNewProp(root, BAD_CAST "arch", - BAD_CAST ((package->arch) ? package->arch : "")); + cr_xmlNewProp(root, BAD_CAST "arch", BAD_CAST package->arch); /*********************************** @@ -62,16 +59,13 @@ cr_xml_dump_filelists_items(xmlNodePtr root, cr_Package *package) version = xmlNewChild(root, NULL, BAD_CAST "version", NULL); // Write version attribute epoch - xmlNewProp(version, BAD_CAST "epoch", - BAD_CAST ((package->epoch) ? package->epoch : "")); + cr_xmlNewProp(version, BAD_CAST "epoch", BAD_CAST package->epoch); // Write version attribute ver - xmlNewProp(version, BAD_CAST "ver", - BAD_CAST ((package->version) ? package->version : "")); + cr_xmlNewProp(version, BAD_CAST "ver", BAD_CAST package->version); // Write version attribute rel - xmlNewProp(version, BAD_CAST "rel", - BAD_CAST ((package->release) ? package->release : "")); + cr_xmlNewProp(version, BAD_CAST "rel", BAD_CAST package->release); // Files dump diff --git a/src/xml_dump_internal.h b/src/xml_dump_internal.h index c0dd40c..4a0c7d1 100644 --- a/src/xml_dump_internal.h +++ b/src/xml_dump_internal.h @@ -25,6 +25,7 @@ extern "C" { #endif #include "package.h" +#include /** * Dump files from the package and append them to the node as childrens. @@ -35,6 +36,21 @@ extern "C" { */ void cr_xml_dump_files(xmlNodePtr node, cr_Package *package, int primary); +/** + * Createrepo wrapper over libxml xmlNewTextChild. + * It allows content to be NULL and non UTF-8 (if content is no UTF8 + * then iso-8859-1 is assumed). + */ +xmlNodePtr cr_xmlNewTextChild(xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); + +/** TODO */ +xmlAttrPtr cr_xmlNewProp(xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); + #ifdef __cplusplus } #endif diff --git a/src/xml_dump_other.c b/src/xml_dump_other.c index a73bb81..74f41dc 100644 --- a/src/xml_dump_other.c +++ b/src/xml_dump_other.c @@ -23,9 +23,11 @@ #include #include #include +#include #include "logging.h" #include "package.h" #include "xml_dump.h" +#include "xml_dump_internal.h" #define FORMAT_XML 1 #define FORMAT_LEVEL 0 @@ -55,15 +57,13 @@ cr_xml_dump_other_changelog(xmlNodePtr root, cr_Package *package) xmlNodePtr changelog; // Add changelog element - changelog = xmlNewTextChild(root, - NULL, - BAD_CAST "changelog", - BAD_CAST ((entry->changelog) ? entry->changelog : "")); + changelog = cr_xmlNewTextChild(root, + NULL, + BAD_CAST "changelog", + BAD_CAST entry->changelog); // Write param author - xmlNewProp(changelog, - BAD_CAST "author", - BAD_CAST ((entry->author) ? entry->author : "")); + cr_xmlNewProp(changelog, BAD_CAST "author", BAD_CAST entry->author); // Write param date char date_str[DATE_MAX_LEN]; @@ -81,16 +81,13 @@ cr_xml_dump_other_items(xmlNodePtr root, cr_Package *package) ************************************/ // Add pkgid attribute - xmlNewProp(root, BAD_CAST "pkgid", - BAD_CAST ((package->pkgId) ? package->pkgId : "")); + cr_xmlNewProp(root, BAD_CAST "pkgid", BAD_CAST package->pkgId); // Add name attribute - xmlNewProp(root, BAD_CAST "name", - BAD_CAST ((package->name) ? package->name : "")); + cr_xmlNewProp(root, BAD_CAST "name", BAD_CAST package->name); // Add arch attribute - xmlNewProp(root, BAD_CAST "arch", - BAD_CAST ((package->arch) ? package->arch : "")); + cr_xmlNewProp(root, BAD_CAST "arch", BAD_CAST package->arch); /*********************************** @@ -103,16 +100,13 @@ cr_xml_dump_other_items(xmlNodePtr root, cr_Package *package) version = xmlNewChild(root, NULL, BAD_CAST "version", NULL); // Write version attribute epoch - xmlNewProp(version, BAD_CAST "epoch", - BAD_CAST ((package->epoch) ? package->epoch : "")); + xmlNewProp(version, BAD_CAST "epoch", BAD_CAST package->epoch); // Write version attribute ver - xmlNewProp(version, BAD_CAST "ver", - BAD_CAST ((package->version) ? package->version : "")); + xmlNewProp(version, BAD_CAST "ver", BAD_CAST package->version); // Write version attribute rel - xmlNewProp(version, BAD_CAST "rel", - BAD_CAST ((package->release) ? package->release : "")); + xmlNewProp(version, BAD_CAST "rel", BAD_CAST package->release); // Changelog dump @@ -142,6 +136,8 @@ cr_xml_dump_other(cr_Package *package) return NULL; } // Seems to be little bit faster than xmlDocDumpFormatMemory +// xmlSaveCtxtPtr savebuf = xmlSaveToBuffer(buf, NULL, XML_SAVE_FORMAT|XML_SAVE_NO_DECL); +// xmlSaveTree(savebuf, root); xmlNodeDump(buf, NULL, root, FORMAT_LEVEL, FORMAT_XML); assert(buf->content); result = g_strndup((char *) buf->content, (buf->use+1)); diff --git a/src/xml_dump_primary.c b/src/xml_dump_primary.c index b141526..50390e0 100644 --- a/src/xml_dump_primary.c +++ b/src/xml_dump_primary.c @@ -93,27 +93,27 @@ cr_xml_dump_primary_dump_pco(xmlNodePtr root, cr_Package *package, int pcotype) /*********************************** - Element: entry + Element: entry ************************************/ xmlNodePtr entry_node; entry_node = xmlNewChild(pcor_node, NULL, BAD_CAST "rpm:entry", NULL); - xmlNewProp(entry_node, BAD_CAST "name", BAD_CAST entry->name); + cr_xmlNewProp(entry_node, BAD_CAST "name", BAD_CAST entry->name); if (entry->flags && entry->flags[0] != '\0') { - xmlNewProp(entry_node, BAD_CAST "flags", BAD_CAST entry->flags); + cr_xmlNewProp(entry_node, BAD_CAST "flags", BAD_CAST entry->flags); if (entry->epoch && entry->epoch[0] != '\0') { - xmlNewProp(entry_node, BAD_CAST "epoch", BAD_CAST entry->epoch); + cr_xmlNewProp(entry_node, BAD_CAST "epoch", BAD_CAST entry->epoch); } if (entry->version && entry->version[0] != '\0') { - xmlNewProp(entry_node, BAD_CAST "ver", BAD_CAST entry->version); + cr_xmlNewProp(entry_node, BAD_CAST "ver", BAD_CAST entry->version); } if (entry->release && entry->release[0] != '\0') { - xmlNewProp(entry_node, BAD_CAST "rel", BAD_CAST entry->release); + cr_xmlNewProp(entry_node, BAD_CAST "rel", BAD_CAST entry->release); } } @@ -141,16 +141,14 @@ cr_xml_dump_primary_base_items(xmlNodePtr root, cr_Package *package) Element: name ************************************/ - xmlNewTextChild(root, NULL, BAD_CAST "name", - BAD_CAST ((package->name) ? package->name : "")); + cr_xmlNewTextChild(root, NULL, BAD_CAST "name", BAD_CAST package->name); /*********************************** Element: arch ************************************/ - xmlNewTextChild(root, NULL, BAD_CAST "arch", - BAD_CAST ((package->arch) ? package->arch : "")); + cr_xmlNewTextChild(root, NULL, BAD_CAST "arch", BAD_CAST package->arch); /*********************************** @@ -163,16 +161,13 @@ cr_xml_dump_primary_base_items(xmlNodePtr root, cr_Package *package) version = xmlNewChild(root, NULL, BAD_CAST "version", NULL); // Write version attribute epoch - xmlNewProp(version, BAD_CAST "epoch", - BAD_CAST ((package->epoch) ? package->epoch : "")); + cr_xmlNewProp(version, BAD_CAST "epoch", BAD_CAST package->epoch); // Write version attribute ver - xmlNewProp(version, BAD_CAST "ver", - BAD_CAST ((package->version) ? package->version : "")); + cr_xmlNewProp(version, BAD_CAST "ver", BAD_CAST package->version); // Write version attribute rel - xmlNewProp(version, BAD_CAST "rel", - BAD_CAST ((package->release) ? package->release : "")); + cr_xmlNewProp(version, BAD_CAST "rel", BAD_CAST package->release); /*********************************** @@ -181,12 +176,13 @@ cr_xml_dump_primary_base_items(xmlNodePtr root, cr_Package *package) xmlNodePtr checksum; - checksum = xmlNewTextChild(root, NULL, BAD_CAST "checksum", - BAD_CAST ((package->pkgId) ? package->pkgId : "")); + checksum = cr_xmlNewTextChild(root, + NULL, + BAD_CAST "checksum", + BAD_CAST package->pkgId); // Write checksum attribute checksum_type - xmlNewProp(checksum, BAD_CAST "type", - BAD_CAST ((package->checksum_type) ? package->checksum_type : "")); + cr_xmlNewProp(checksum, BAD_CAST "type", BAD_CAST package->checksum_type); // Write checksum attribute pkgid xmlNewProp(checksum, BAD_CAST "pkgid", BAD_CAST "YES"); @@ -196,32 +192,31 @@ cr_xml_dump_primary_base_items(xmlNodePtr root, cr_Package *package) Element: summary ************************************/ - xmlNewTextChild(root, NULL, BAD_CAST "summary", - BAD_CAST ((package->summary) ? package->summary : "")); + cr_xmlNewTextChild(root, NULL, BAD_CAST "summary", + BAD_CAST package->summary); /*********************************** Element: description ************************************/ - xmlNewTextChild(root, NULL, BAD_CAST "description", - BAD_CAST ((package->description) ? package->description : "")); + cr_xmlNewTextChild(root, NULL, BAD_CAST "description", + BAD_CAST package->description); /*********************************** Element: packager ************************************/ - xmlNewTextChild(root, NULL, BAD_CAST "packager", - BAD_CAST ((package->rpm_packager) ? package->rpm_packager : "")); + cr_xmlNewTextChild(root, NULL, BAD_CAST "packager", + BAD_CAST package->rpm_packager); /*********************************** Element: url ************************************/ - xmlNewTextChild(root, NULL, BAD_CAST "url", - BAD_CAST ((package->url) ? package->url : "")); + cr_xmlNewTextChild(root, NULL, BAD_CAST "url", BAD_CAST package->url); /*********************************** @@ -279,14 +274,13 @@ cr_xml_dump_primary_base_items(xmlNodePtr root, cr_Package *package) // Write location attribute base if (package->location_base && package->location_base[0] != '\0') { - xmlNewProp(location, - BAD_CAST "xml:base", - BAD_CAST package->location_base); + cr_xmlNewProp(location, + BAD_CAST "xml:base", + BAD_CAST package->location_base); } // Write location attribute href - xmlNewProp(location, BAD_CAST "href", - BAD_CAST ((package->location_href) ? package->location_href : "")); + cr_xmlNewProp(location, BAD_CAST "href", BAD_CAST package->location_href); /*********************************** @@ -302,40 +296,40 @@ cr_xml_dump_primary_base_items(xmlNodePtr root, cr_Package *package) Element: license ************************************/ - xmlNewTextChild(format, NULL, BAD_CAST "rpm:license", - BAD_CAST ((package->rpm_license) ? package->rpm_license : "")); + cr_xmlNewTextChild(format, NULL, BAD_CAST "rpm:license", + BAD_CAST package->rpm_license); /*********************************** Element: vendor ************************************/ - xmlNewTextChild(format, NULL, BAD_CAST "rpm:vendor", - BAD_CAST ((package->rpm_vendor) ? package->rpm_vendor : "")); + cr_xmlNewTextChild(format, NULL, BAD_CAST "rpm:vendor", + BAD_CAST package->rpm_vendor); /*********************************** Element: group ************************************/ - xmlNewTextChild(format, NULL, BAD_CAST "rpm:group", - BAD_CAST ((package->rpm_group) ? package->rpm_group : "")); + cr_xmlNewTextChild(format, NULL, BAD_CAST "rpm:group", + BAD_CAST package->rpm_group); /*********************************** Element: buildhost ************************************/ - xmlNewTextChild(format, NULL, BAD_CAST "rpm:buildhost", - BAD_CAST ((package->rpm_buildhost) ? package->rpm_buildhost : "")); + cr_xmlNewTextChild(format, NULL, BAD_CAST "rpm:buildhost", + BAD_CAST package->rpm_buildhost); /*********************************** Element: sourcerpm ************************************/ - xmlNewTextChild(format, NULL, BAD_CAST "rpm:sourcerpm", - BAD_CAST ((package->rpm_sourcerpm) ? package->rpm_sourcerpm : "")); + cr_xmlNewTextChild(format, NULL, BAD_CAST "rpm:sourcerpm", + BAD_CAST package->rpm_sourcerpm); /***********************************