2 **************************************************************************
3 * Copyright (C) 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html#License
5 *************************************************************************
6 *************************************************************************
7 * Copyright (C) 2002-2014, International Business Machines
8 * Corporation and others. All Rights Reserved.
9 *************************************************************************
10 * file name: utfperf.cpp
12 * tab size: 8 (not used)
15 * created on: 2005Nov17
16 * created by: Raymond Yang
18 * Ported from utfper.c created by Markus W. Scherer
19 * Performance test program for Unicode converters
24 #include "unicode/uperf.h"
25 #include "cmemory.h" // for UPRV_LENGTHOF
28 /* definitions and text buffers */
30 #define INPUT_CAPACITY (1024*1024)
31 #define INTERMEDIATE_CAPACITY 4096
32 #define INTERMEDIATE_SMALL_CAPACITY 20
33 #define PIVOT_CAPACITY 1024
34 #define OUTPUT_CAPACITY INPUT_CAPACITY
36 static char utf8[INPUT_CAPACITY];
37 static UChar pivot[INTERMEDIATE_CAPACITY];
39 static UChar output[OUTPUT_CAPACITY];
40 static char intermediate[OUTPUT_CAPACITY];
42 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
44 static int32_t fromUCallbackCount;
46 // Command-line options specific to utfperf.
47 // Options do not have abbreviations: Force readable command lines.
48 // (Using U+0001 for abbreviation characters.)
56 static UOption options[UTFPERF_OPTIONS_COUNT]={
57 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG),
58 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG),
59 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG)
62 static const char *const utfperf_usage =
63 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
65 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
66 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
70 class UtfPerformanceTest : public UPerfTest{
72 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
73 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
74 if (U_SUCCESS(status)) {
75 charset = options[CHARSET].value;
77 chunkLength = atoi(options[CHUNK_LENGTH].value);
78 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
79 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
80 status = U_ILLEGAL_ARGUMENT_ERROR;
83 pivotLength = atoi(options[PIVOT_LENGTH].value);
84 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
85 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
86 status = U_ILLEGAL_ARGUMENT_ERROR;
90 UPerfTest::getBuffer(inputLength, status);
91 countInputCodePoints = u_countChar32(buffer, bufferLen);
92 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
96 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
98 const UChar *getBuffer() const { return buffer; }
99 int32_t getBufferLen() const { return bufferLen; }
102 int32_t chunkLength, pivotLength;
106 // Custom callback for counting callback calls.
107 static void U_CALLCONV
108 fromUCallback(const void *context,
109 UConverterFromUnicodeArgs *fromUArgs,
110 const UChar *codeUnits,
113 UConverterCallbackReason reason,
114 UErrorCode *pErrorCode) {
115 if (reason <= UCNV_IRREGULAR) {
116 ++fromUCallbackCount;
118 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
122 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
123 class Command : public UPerfFunction {
125 Command(const UtfPerformanceTest &testcase)
126 : testcase(testcase),
127 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
128 errorCode(U_ZERO_ERROR) {
129 cnv=ucnv_open(testcase.charset, &errorCode);
130 if (U_FAILURE(errorCode)) {
131 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
133 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
137 if(U_SUCCESS(errorCode)) {
141 // virtual void call(UErrorCode* pErrorCode) { ... }
142 virtual long getOperationsPerIteration(){
143 return countInputCodePoints;
146 const UtfPerformanceTest &testcase;
149 UErrorCode errorCode;
153 // Test roundtrip UTF-16->encoding->UTF-16.
154 class Roundtrip : public Command {
156 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
158 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
159 Roundtrip * t = new Roundtrip(testcase);
160 if (U_SUCCESS(t->errorCode)){
167 virtual void call(UErrorCode* pErrorCode){
168 const UChar *pIn, *pInLimit;
169 UChar *pOut, *pOutLimit;
170 char *pInter, *pInterLimit;
175 fromUCallbackCount=0;
178 pInLimit=input+inputLength;
181 pOutLimit=output+OUTPUT_CAPACITY;
183 pInterLimit=intermediate+testcase.chunkLength;
185 encodedLength=outputLength=0;
189 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
191 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
192 encodedLength+=(int32_t)(pInter-intermediate);
194 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
195 /* make sure that we convert once more to really flush */
196 *pErrorCode=U_ZERO_ERROR;
197 } else if(U_FAILURE(*pErrorCode)) {
199 } else if(pIn==pInLimit) {
203 /* convert the block [intermediate..pInter[ back to UTF-16 */
205 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
206 if(U_FAILURE(*pErrorCode)) {
209 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
212 outputLength=pOut-output;
213 if(inputLength!=outputLength) {
214 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
215 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
220 // Test one-way conversion UTF-16->encoding.
221 class FromUnicode : public Command {
223 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
225 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
226 FromUnicode * t = new FromUnicode(testcase);
227 if (U_SUCCESS(t->errorCode)){
234 virtual void call(UErrorCode* pErrorCode){
235 const UChar *pIn, *pInLimit;
236 char *pInter, *pInterLimit;
238 ucnv_resetFromUnicode(cnv);
239 fromUCallbackCount=0;
242 pInLimit=input+inputLength;
244 pInterLimit=intermediate+testcase.chunkLength;
250 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
251 encodedLength+=(int32_t)(pInter-intermediate);
253 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
254 /* make sure that we convert once more to really flush */
255 *pErrorCode=U_ZERO_ERROR;
256 } else if(U_FAILURE(*pErrorCode)) {
265 // Test one-way conversion UTF-8->encoding.
266 class FromUTF8 : public Command {
268 FromUTF8(const UtfPerformanceTest &testcase)
271 input8(utf8), input8Length(utf8Length) {
272 utf8Cnv=ucnv_open("UTF-8", &errorCode);
275 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
276 FromUTF8 * t = new FromUTF8(testcase);
277 if (U_SUCCESS(t->errorCode)){
287 virtual void call(UErrorCode* pErrorCode){
288 const char *pIn, *pInLimit;
289 char *pInter, *pInterLimit;
290 UChar *pivotSource, *pivotTarget, *pivotLimit;
292 ucnv_resetToUnicode(utf8Cnv);
293 ucnv_resetFromUnicode(cnv);
294 fromUCallbackCount=0;
297 pInLimit=input8+input8Length;
299 pInterLimit=intermediate+testcase.chunkLength;
301 pivotSource=pivotTarget=pivot;
302 pivotLimit=pivot+testcase.pivotLength;
308 ucnv_convertEx(cnv, utf8Cnv,
309 &pInter, pInterLimit,
311 pivot, &pivotSource, &pivotTarget, pivotLimit,
312 FALSE, TRUE, pErrorCode);
313 encodedLength+=(int32_t)(pInter-intermediate);
315 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
316 /* make sure that we convert once more to really flush */
317 *pErrorCode=U_ZERO_ERROR;
318 } else if(U_FAILURE(*pErrorCode)) {
328 int32_t input8Length;
331 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
333 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break;
334 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break;
335 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break;
336 default: name = ""; break;
341 int main(int argc, const char *argv[])
343 // Default values for command-line options.
344 options[CHARSET].value = "UTF-8";
345 options[CHUNK_LENGTH].value = "4096";
346 options[PIVOT_LENGTH].value = "1024";
348 UErrorCode status = U_ZERO_ERROR;
349 UtfPerformanceTest test(argc, argv, status);
351 if (U_FAILURE(status)){
352 printf("The error is %s\n", u_errorName(status));
357 if (test.run() == FALSE){
358 fprintf(stderr, "FAILED: Tests could not be run please check the "
363 if (fromUCallbackCount > 0) {
364 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);