source/test/letest/xmlreader.cpp

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4  *******************************************************************************
   5  *
   6  *   Copyright (C) 1999-2014, International Business Machines
   7  *   Corporation and others.  All Rights Reserved.
   8  *
   9  *******************************************************************************
  10  */
  11
  12 #include "unicode/utypes.h"
  13 #include "unicode/uclean.h"
  14 #include "unicode/uchar.h"
  15 #include "unicode/unistr.h"
  16 #include "unicode/uscript.h"
  17 #include "unicode/putil.h"
  18 #include "unicode/ctest.h"
  19
  20 #include "layout/LETypes.h"
  21 #include "layout/LEScripts.h"
  22
  23 #include "letsutil.h"
  24 #include "letest.h"
  25
  26 #include "xmlreader.h"
  27
  28 #include "xmlparser.h"
  29
  30 #include <stdlib.h>
  31 #include <stdio.h>
  32 #include <string.h>
  33
  34 //U_NAMESPACE_USE
  35
  36 #define CH_COMMA 0x002C
  37
  38 static le_uint32 *getHexArray(const UnicodeString &numbers, int32_t &arraySize)
  39 {
  40     int32_t offset = -1;
  41
  42     arraySize = 1;
  43     while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
  44         arraySize += 1;
  45     }
  46
  47     le_uint32 *array = NEW_ARRAY(le_uint32, arraySize);
  48     char number[16];
  49     le_int32 count = 0;
  50     le_int32 start = 0, end = 0;
  51     le_int32 len = 0;
  52
  53     // trim leading whitespace
  54     while(u_isUWhiteSpace(numbers[start])) {
  55         start += 1;
  56     }
  57
  58     while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
  59         len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
  60         number[len] = '\0';
  61         start = end + 1;
  62
  63         sscanf(number, "%x", &array[count++]);
  64
  65         // trim whitespace following the comma
  66         while(u_isUWhiteSpace(numbers[start])) {
  67             start += 1;
  68         }
  69     }
  70
  71     // trim trailing whitespace
  72     end = numbers.length();
  73     while(u_isUWhiteSpace(numbers[end - 1])) {
  74         end -= 1;
  75     }
  76
  77     len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
  78     number[len] = '\0';
  79     sscanf(number, "%x", &array[count]);
  80
  81     return array;
  82 }
  83
  84 static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
  85 {
  86     int32_t offset = -1;
  87
  88     arraySize = 1;
  89     while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
  90         arraySize += 1;
  91     }
  92
  93     float *array = NEW_ARRAY(float, arraySize);
  94     char number[32];
  95     le_int32 count = 0;
  96     le_int32 start = 0, end = 0;
  97     le_int32 len = 0;
  98
  99     // trim leading whitespace
 100     while(u_isUWhiteSpace(numbers[start])) {
 101         start += 1;
 102     }
 103
 104     while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
 105         len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
 106         number[len] = '\0';
 107         start = end + 1;
 108
 109         sscanf(number, "%f", &array[count++]);
 110
 111         // trim whiteapce following the comma
 112         while(u_isUWhiteSpace(numbers[start])) {
 113             start += 1;
 114         }
 115     }
 116
 117     while(u_isUWhiteSpace(numbers[start])) {
 118         start += 1;
 119     }
 120
 121     // trim trailing whitespace
 122     end = numbers.length();
 123     while(u_isUWhiteSpace(numbers[end - 1])) {
 124         end -= 1;
 125     }
 126
 127     len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
 128     number[len] = '\0';
 129     sscanf(number, "%f", &array[count]);
 130
 131     return array;
 132 }
 133
 134 U_CDECL_BEGIN
 135 void readTestFile(const char *testFilePath, TestCaseCallback callback)
 136 {
 137 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
 138     UErrorCode status = U_ZERO_ERROR;
 139     UXMLParser  *parser = UXMLParser::createParser(status);
 140     UXMLElement *root   = parser->parseFile(testFilePath, status);
 141
 142     if (root == NULL) {
 143         log_err("Could not open the test data file: %s\n", testFilePath);
 144         delete parser;
 145         return;
 146     }
 147
 148     UnicodeString test_case        = UNICODE_STRING_SIMPLE("test-case");
 149     UnicodeString test_text        = UNICODE_STRING_SIMPLE("test-text");
 150     UnicodeString test_font        = UNICODE_STRING_SIMPLE("test-font");
 151     UnicodeString result_glyphs    = UNICODE_STRING_SIMPLE("result-glyphs");
 152     UnicodeString result_indices   = UNICODE_STRING_SIMPLE("result-indices");
 153     UnicodeString result_positions = UNICODE_STRING_SIMPLE("result-positions");
 154
 155     // test-case attributes
 156     UnicodeString id_attr     = UNICODE_STRING_SIMPLE("id");
 157     UnicodeString script_attr = UNICODE_STRING_SIMPLE("script");
 158     UnicodeString lang_attr   = UNICODE_STRING_SIMPLE("lang");
 159
 160     // test-font attributes
 161     UnicodeString name_attr   = UNICODE_STRING_SIMPLE("name");
 162     UnicodeString ver_attr    = UNICODE_STRING_SIMPLE("version");
 163     UnicodeString cksum_attr  = UNICODE_STRING_SIMPLE("checksum");
 164
 165     const UXMLElement *testCase;
 166     int32_t tc = 0;
 167
 168     while((testCase = root->nextChildElement(tc)) != NULL) {
 169         if (testCase->getTagName().compare(test_case) == 0) {
 170             char *id = getCString(testCase->getAttribute(id_attr));
 171             char *script    = getCString(testCase->getAttribute(script_attr));
 172             char *lang      = getCString(testCase->getAttribute(lang_attr));
 173             char *fontName  = NULL;
 174                         char *fontVer   = NULL;
 175                         char *fontCksum = NULL;
 176             const UXMLElement *element;
 177             int32_t ec = 0;
 178             int32_t charCount = 0;
 179             // int32_t typoFlags = 3; // kerning + ligatures...
 180             UScriptCode scriptCode;
 181             le_int32 languageCode = -1;
 182             UnicodeString text, glyphs, indices, positions;
 183             int32_t glyphCount = 0, indexCount = 0, positionCount = 0;
 184             TestResult expected = {0, NULL, NULL, NULL};
 185
 186             uscript_getCode(script, &scriptCode, 1, &status);
 187             if (LE_FAILURE(status)) {
 188                 log_err("invalid script name: %s.\n", script);
 189                 goto free_c_strings;
 190             }
 191
 192             if (lang != NULL) {
 193                 languageCode = getLanguageCode(lang);
 194
 195                 if (languageCode < 0) {
 196                     log_err("invalid language name: %s.\n", lang);
 197                     goto free_c_strings;
 198                 }
 199             }
 200
 201             while((element = testCase->nextChildElement(ec)) != NULL) {
 202                 UnicodeString tag = element->getTagName();
 203
 204                 // TODO: make sure that each element is only used once.
 205                 if (tag.compare(test_font) == 0) {
 206                     fontName  = getCString(element->getAttribute(name_attr));
 207                     fontVer   = getCString(element->getAttribute(ver_attr));
 208                     fontCksum = getCString(element->getAttribute(cksum_attr));
 209
 210                 } else if (tag.compare(test_text) == 0) {
 211                     text = element->getText(TRUE);
 212                     charCount = text.length();
 213                 } else if (tag.compare(result_glyphs) == 0) {
 214                     glyphs = element->getText(TRUE);
 215                 } else if (tag.compare(result_indices) == 0) {
 216                     indices = element->getText(TRUE);
 217                 } else if (tag.compare(result_positions) == 0) {
 218                     positions = element->getText(TRUE);
 219                 } else {
 220                     // an unknown tag...
 221                     char *cTag = getCString(&tag);
 222
 223                     log_info("Test %s: unknown element with tag \"%s\"\n", id, cTag);
 224                     freeCString(cTag);
 225                 }
 226             }
 227
 228             expected.glyphs    = (LEGlyphID *) getHexArray(glyphs, glyphCount);
 229             expected.indices   = (le_int32 *)  getHexArray(indices, indexCount);
 230             expected.positions = getFloatArray(positions, positionCount);
 231
 232             expected.glyphCount = glyphCount;
 233
 234             if (glyphCount < charCount || indexCount != glyphCount || positionCount < glyphCount * 2 + 2) {
 235                 log_err("Test %s: inconsistent input data: charCount = %d, glyphCount = %d, indexCount = %d, positionCount = %d\n",
 236                     id, charCount, glyphCount, indexCount, positionCount);
 237                 goto free_expected;
 238             };
 239
 240                         (*callback)(id, fontName, fontVer, fontCksum, scriptCode, languageCode, text.getBuffer(), charCount, &expected);
 241
 242 free_expected:
 243             DELETE_ARRAY(expected.positions);
 244             DELETE_ARRAY(expected.indices);
 245             DELETE_ARRAY(expected.glyphs);
 246
 247 free_c_strings:
 248                         freeCString(fontCksum);
 249                         freeCString(fontVer);
 250                         freeCString(fontName);
 251             freeCString(lang);
 252             freeCString(script);
 253             freeCString(id);
 254         }
 255     }
 256
 257     delete root;
 258     delete parser;
 259 #endif
 260 }
 261 U_CDECL_END