1 /*************************************************************************
3 * Copyright (C) 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html#License
6 **************************************************************************
7 **************************************************************************
9 * Copyright (C) 2000-2016, International Business Machines
10 * Corporation and others. All Rights Reserved.
12 ***************************************************************************
13 * file name: convsamp.c
14 * encoding: ASCII (7-bit)
16 * created on: 2000may30
17 * created by: Steven R. Loomis
19 * Sample code for the ICU conversion routines.
21 * Note: Nothing special is needed to build this sample. Link with
22 * the icu UC and icu I18N libraries.
24 * I use 'assert' for error checking, you probably will want
25 * something more flexible. '***BEGIN SAMPLE***' and
26 * '***END SAMPLE***' mark pieces suitable for stand alone
30 * Each test can define it's own BUFFERSIZE
34 #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
37 #include <ctype.h> /* for isspace, etc. */
40 #include <stdlib.h> /* malloc */
43 #include "unicode/utypes.h" /* Basic ICU data types */
44 #include "unicode/ucnv.h" /* C Converter API */
45 #include "unicode/ustring.h" /* some more string fcns*/
46 #include "unicode/uchar.h" /* char names */
47 #include "unicode/uloc.h"
48 #include "unicode/unistr.h"
52 /* Some utility functions */
54 static const UChar kNone[] = { 0x0000 };
56 #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
58 /* Print a UChar if possible, in seven characters. */
59 void prettyPrintUChar(UChar c)
63 printf(" '%c' ", (char)(0x00FF&c));
64 } else if ( c > 0x007F ) {
66 UErrorCode status = U_ZERO_ERROR;
69 o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
70 if(U_SUCCESS(status) && (o>0) ) {
77 switch((char)(c & 0x007F)) {
95 void printUChars(const char *name = "?",
96 const UChar *uch = kNone,
101 if( (len == -1) && (uch) ) {
105 printf("%5s: ", name);
106 for( i = 0; i <len; i++) {
111 printf("%5s: ", "uni");
112 for( i = 0; i <len; i++) {
113 printf("\\u%04X ", (int)uch[i]);
117 printf("%5s:", "ch");
118 for( i = 0; i <len; i++) {
119 prettyPrintUChar(uch[i]);
124 void printBytes(const char *name = "?",
125 const char *uch = "",
130 if( (len == -1) && (uch) ) {
134 printf("%5s: ", name);
135 for( i = 0; i <len; i++) {
140 printf("%5s: ", "uni");
141 for( i = 0; i <len; i++) {
142 printf("\\x%02X ", 0x00FF & (int)uch[i]);
146 printf("%5s:", "ch");
147 for( i = 0; i <len; i++) {
148 if(isgraph(0x00FF & (int)uch[i])) {
149 printf(" '%c' ", (char)uch[i]);
157 void printUChar(UChar32 ch32)
160 printf("ch: U+%06X\n", ch32);
163 UChar ch = (UChar)ch32;
164 printUChars("C", &ch, 1);
168 /*******************************************************************
169 Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
170 followed by an exclamation mark (!) into the KOI8-R Russian code page.
172 This example first creates a UChar String out of the Unicode chars.
174 targetSize must be set to the amount of space available in the target
175 buffer. After fromUChars is called,
176 len will contain the number of bytes in target[] which were
177 used in the resulting codepage. In this case, there is a 1:1 mapping
178 between the input and output characters. The exclamation mark has the
179 same value in both KOI8-R and Unicode.
182 uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
183 ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
186 uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
190 Converting FROM unicode
192 You must call ucnv_close to clean up the memory used by the
195 'len' returns the number of OUTPUT bytes resulting from the
199 UErrorCode convsample_02()
201 printf("\n\n==============================================\n"
202 "Sample 02: C: simple Unicode -> koi8-r conversion\n");
205 // **************************** START SAMPLE *******************
207 UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
208 0x0430, 0x0021, 0x0000 };
210 UErrorCode status = U_ZERO_ERROR;
214 // set up the converter
216 conv = ucnv_open("koi8-r", &status);
218 assert(U_SUCCESS(status));
221 len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
222 assert(U_SUCCESS(status));
224 // close the converter
227 // ***************************** END SAMPLE ********************
230 printUChars("src", source);
232 printBytes("targ", target, len);
238 UErrorCode convsample_03()
240 printf("\n\n==============================================\n"
241 "Sample 03: C: print out all converters\n");
246 // **************************** START SAMPLE *******************
247 count = ucnv_countAvailable();
248 printf("Available converters: %d\n", count);
252 printf("%s ", ucnv_getAvailableName(i));
255 // ***************************** END SAMPLE ********************
264 #define BUFFERSIZE 17 /* make it interesting :) */
267 Converting from a codepage to Unicode in bulk..
268 What is the best way to determine the buffer size?
270 The 'buffersize' is in bytes of input.
271 For a given converter, divinding this by the minimum char size
272 give you the maximum number of Unicode characters that could be
273 expected for a given number of input bytes.
274 see: ucnv_getMinCharSize()
276 For example, a single byte codepage like 'Latin-3' has a
277 minimum char size of 1. (It takes at least 1 byte to represent
278 each Unicode char.) So the unicode buffer has the same number of
279 UChars as the input buffer has bytes.
281 In a strictly double byte codepage such as cp1362 (Windows
282 Korean), the minimum char size is 2. So, only half as many Unicode
283 chars as bytes are needed.
285 This work to calculate the buffer size is an optimization. Any
286 size of input and output buffer can be used, as long as the
287 program handles the following cases: If the input buffer is empty,
288 the source pointer will be equal to sourceLimit. If the output
289 buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
292 UErrorCode convsample_05()
294 printf("\n\n==============================================\n"
295 "Sample 05: C: count the number of letters in a UTF-8 document\n");
299 char inBuf[BUFFERSIZE];
301 const char *sourceLimit;
306 int32_t uBufSize = 0;
308 UErrorCode status = U_ZERO_ERROR;
309 uint32_t letters=0, total=0;
311 f = fopen("data01.txt", "r");
314 fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
315 return U_FILE_ACCESS_ERROR;
318 // **************************** START SAMPLE *******************
319 conv = ucnv_open("utf-8", &status);
320 assert(U_SUCCESS(status));
322 uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
323 printf("input bytes %d / min chars %d = %d UChars\n",
324 BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
325 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
328 // grab another buffer's worth
330 ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
332 // Convert bytes to unicode
334 sourceLimit = inBuf + count;
339 targetLimit = uBuf + uBufSize;
341 ucnv_toUnicode(conv, &target, targetLimit,
342 &source, sourceLimit, NULL,
343 feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
344 /* is true (when no more data will come) */
347 if(status == U_BUFFER_OVERFLOW_ERROR)
349 // simply ran out of space - we'll reset the target ptr the next
350 // time through the loop.
351 status = U_ZERO_ERROR;
355 // Check other errors here.
356 assert(U_SUCCESS(status));
357 // Break out of the loop (by force)
360 // Process the Unicode
361 // Todo: handle UTF-16/surrogates
363 for(p = uBuf; p<target; p++)
369 } while (source < sourceLimit); // while simply out of space
372 printf("%d letters out of %d total UChars.\n", letters, total);
374 // ***************************** END SAMPLE ********************
385 #define BUFFERSIZE 1024
392 UErrorCode convsample_06()
394 printf("\n\n==============================================\n"
395 "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
399 char inBuf[BUFFERSIZE];
401 const char *sourceLimit;
402 int32_t uBufSize = 0;
404 UErrorCode status = U_ZERO_ERROR;
405 uint32_t letters=0, total=0;
408 UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
415 f = fopen("data06.txt", "r");
418 fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
419 return U_FILE_ACCESS_ERROR;
422 info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
425 fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
428 /* reset frequencies */
429 for(p=0;p<charCount;p++)
431 info[p].codepoint = p;
432 info[p].frequency = 0;
435 // **************************** START SAMPLE *******************
436 conv = ucnv_open("utf-8", &status);
437 assert(U_SUCCESS(status));
439 uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
440 printf("input bytes %d / min chars %d = %d UChars\n",
441 BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
443 // grab another buffer's worth
445 ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
447 // Convert bytes to unicode
449 sourceLimit = inBuf + count;
451 while(source < sourceLimit)
453 p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
454 if(U_FAILURE(status))
456 fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
457 status = U_ZERO_ERROR;
466 if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
469 if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
474 fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
478 return U_UNSUPPORTED_ERROR;
488 printf("%d letters out of %d total UChars.\n", letters, total);
489 printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
491 // now, we could sort it..
493 // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
495 for(p=0;p<charCount;p++)
497 if(info[p].frequency)
499 printf("% 5d U+%06X ", info[p].frequency, p);
502 prettyPrintUChar((UChar)p);
508 // ***************************** END SAMPLE ********************
517 /******************************************************
518 You must call ucnv_close to clean up the memory used by the
521 'len' returns the number of OUTPUT bytes resulting from the
525 UErrorCode convsample_12()
527 printf("\n\n==============================================\n"
528 "Sample 12: C: simple sjis -> unicode conversion\n");
531 // **************************** START SAMPLE *******************
533 char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
535 UErrorCode status = U_ZERO_ERROR;
539 // set up the converter
540 conv = ucnv_open("shift_jis", &status);
541 assert(U_SUCCESS(status));
543 // convert to Unicode
544 // Note: we can use strlen, we know it's an 8 bit null terminated codepage
546 len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
548 // close the converter
551 // ***************************** END SAMPLE ********************
554 printBytes("src", source, strlen(source) );
556 printUChars("targ", target, len);
561 /******************************************************************
562 C: Convert from codepage to Unicode one at a time.
565 UErrorCode convsample_13()
567 printf("\n\n==============================================\n"
568 "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
571 const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
572 // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
573 const char *source, *sourceLimit;
575 UErrorCode status = U_ZERO_ERROR;
576 UConverter *conv = NULL;
580 srcCount = sizeof(sourceChars);
582 conv = ucnv_open("Big5", &status);
585 source = sourceChars;
586 sourceLimit = sourceChars + sizeof(sourceChars);
588 // **************************** START SAMPLE *******************
591 printBytes("src",source,sourceLimit-source);
593 while(source < sourceLimit)
596 target = ucnv_getNextUChar (conv,
601 // printBytes("src",source,sourceLimit-source);
608 // ************************** END SAMPLE *************************
610 printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
619 UBool convsample_20_didSubstitute(const char *source)
623 UConverter *conv = NULL;
624 UErrorCode status = U_ZERO_ERROR;
628 FromUFLAGContext * context = NULL;
630 printf("\n\n==============================================\n"
631 "Sample 20: C: Test for substitution using callbacks\n");
633 /* print out the original source */
634 printBytes("src", source);
637 /* First, convert from UTF8 to unicode */
638 conv = ucnv_open("utf-8", &status);
641 len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
644 printUChars("uch", uchars, len);
647 /* Now, close the converter */
650 /* Now, convert to windows-1252 */
651 conv = ucnv_open("windows-1252", &status);
654 /* Converter starts out with the SUBSTITUTE callback set. */
656 /* initialize our callback */
657 context = flagCB_fromU_openContext();
659 /* Set our special callback */
660 ucnv_setFromUCallBack(conv,
663 &(context->subCallback),
664 &(context->subContext),
669 len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
672 flagVal = context->flag; /* it's about to go away when we close the cnv */
676 /* print out the original source */
677 printBytes("bytes", bytes, len2);
679 return flagVal; /* true if callback was called */
682 UErrorCode convsample_20()
684 const char *sample1 = "abc\xdf\xbf";
685 const char *sample2 = "abc_def";
688 if(convsample_20_didSubstitute(sample1))
690 printf("DID substitute.\n******\n");
694 printf("Did NOT substitute.\n*****\n");
697 if(convsample_20_didSubstitute(sample2))
699 printf("DID substitute.\n******\n");
703 printf("Did NOT substitute.\n*****\n");
709 // 21 - C, callback, with clone and debug
713 UBool convsample_21_didSubstitute(const char *source)
717 UConverter *conv = NULL, *cloneCnv = NULL;
718 UErrorCode status = U_ZERO_ERROR;
721 UBool flagVal = FALSE;
722 UConverterFromUCallback junkCB;
724 FromUFLAGContext *flagCtx = NULL,
725 *cloneFlagCtx = NULL;
727 debugCBContext *debugCtx1 = NULL,
729 *cloneDebugCtx = NULL;
731 printf("\n\n==============================================\n"
732 "Sample 21: C: Test for substitution w/ callbacks & clones \n");
734 /* print out the original source */
735 printBytes("src", source);
738 /* First, convert from UTF8 to unicode */
739 conv = ucnv_open("utf-8", &status);
742 len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
745 printUChars("uch", uchars, len);
748 /* Now, close the converter */
751 /* Now, convert to windows-1252 */
752 conv = ucnv_open("windows-1252", &status);
755 /* Converter starts out with the SUBSTITUTE callback set. */
757 /* initialize our callback */
758 /* from the 'bottom' innermost, out
759 * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
762 printf("flagCB_fromU = %p\n", &flagCB_fromU);
763 printf("debugCB_fromU = %p\n", &debugCB_fromU);
766 debugCtx1 = debugCB_openContext();
767 flagCtx = flagCB_fromU_openContext();
768 debugCtx2 = debugCB_openContext();
770 debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */
771 debugCtx1->subContext = flagCtx;
773 flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */
774 flagCtx->subContext = debugCtx2;
776 debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
777 debugCtx2->subContext = NULL;
779 /* Set our special callback */
781 ucnv_setFromUCallBack(conv,
784 &(debugCtx2->subCallback),
785 &(debugCtx2->subContext),
791 printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
792 conv, debugCtx1, debugCtx1->subCallback,
793 debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
796 cloneCnv = ucnv_safeClone(conv, NULL, NULL, &status);
801 printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv);
807 printf("%p closed.\n", conv);
811 /* Now, we have to extract the context */
812 cloneDebugCtx = NULL;
815 ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
816 if(cloneDebugCtx != NULL) {
817 cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
820 printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
821 cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );
823 len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
826 if(cloneFlagCtx != NULL) {
827 flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */
829 printf("** Warning, couldn't get the subcallback \n");
832 ucnv_close(cloneCnv);
834 /* print out the original source */
835 printBytes("bytes", bytes, len2);
837 return flagVal; /* true if callback was called */
840 UErrorCode convsample_21()
842 const char *sample1 = "abc\xdf\xbf";
843 const char *sample2 = "abc_def";
845 if(convsample_21_didSubstitute(sample1))
847 printf("DID substitute.\n******\n");
851 printf("Did NOT substitute.\n*****\n");
854 if(convsample_21_didSubstitute(sample2))
856 printf("DID substitute.\n******\n");
860 printf("Did NOT substitute.\n*****\n");
867 // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
869 #define BUFFERSIZE 17 /* make it interesting :) */
871 UErrorCode convsample_40()
873 printf("\n\n==============================================\n"
874 "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
879 char inBuf[BUFFERSIZE];
881 const char *sourceLimit;
885 int32_t uBufSize = 0;
886 UConverter *conv = NULL;
887 UErrorCode status = U_ZERO_ERROR;
888 uint32_t inbytes=0, total=0;
890 f = fopen("data02.bin", "rb");
893 fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
894 return U_FILE_ACCESS_ERROR;
897 out = fopen("data40.utf16", "wb");
900 fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
902 return U_FILE_ACCESS_ERROR;
905 // **************************** START SAMPLE *******************
906 conv = ucnv_openCCSID(37, UCNV_IBM, &status);
907 assert(U_SUCCESS(status));
909 uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
910 printf("input bytes %d / min chars %d = %d UChars\n",
911 BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
912 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
915 // grab another buffer's worth
917 ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
921 // Convert bytes to unicode
923 sourceLimit = inBuf + count;
928 targetLimit = uBuf + uBufSize;
930 ucnv_toUnicode( conv, &target, targetLimit,
931 &source, sourceLimit, NULL,
932 feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
933 /* is true (when no more data will come) */
936 if(status == U_BUFFER_OVERFLOW_ERROR)
938 // simply ran out of space - we'll reset the target ptr the next
939 // time through the loop.
940 status = U_ZERO_ERROR;
944 // Check other errors here.
945 assert(U_SUCCESS(status));
946 // Break out of the loop (by force)
949 // Process the Unicode
950 // Todo: handle UTF-16/surrogates
951 assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
952 (size_t)(target-uBuf));
953 total += (target-uBuf);
954 } while (source < sourceLimit); // while simply out of space
957 printf("%d bytes in, %d UChars out.\n", inbytes, total);
959 // ***************************** END SAMPLE ********************
972 // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
974 #define BUFFERSIZE 24 /* make it interesting :) */
976 UErrorCode convsample_46()
978 printf("\n\n==============================================\n"
979 "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
984 UChar inBuf[BUFFERSIZE];
986 const UChar *sourceLimit;
992 UConverter *conv = NULL;
993 UErrorCode status = U_ZERO_ERROR;
994 uint32_t inchars=0, total=0;
996 f = fopen("data40.utf16", "rb");
999 fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
1000 return U_FILE_ACCESS_ERROR;
1003 out = fopen("data46.out", "wb");
1006 fprintf(stderr, "Couldn't create file 'data46.out'.\n");
1008 return U_FILE_ACCESS_ERROR;
1011 // **************************** START SAMPLE *******************
1012 conv = ucnv_open( "iso-8859-2", &status);
1013 assert(U_SUCCESS(status));
1015 bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
1016 printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1017 BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
1018 buf = (char*)malloc(bufSize * sizeof(char));
1021 // grab another buffer's worth
1023 ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1027 // Convert bytes to unicode
1029 sourceLimit = inBuf + count;
1034 targetLimit = buf + bufSize;
1036 ucnv_fromUnicode( conv, &target, targetLimit,
1037 &source, sourceLimit, NULL,
1038 feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
1039 /* is true (when no more data will come) */
1042 if(status == U_BUFFER_OVERFLOW_ERROR)
1044 // simply ran out of space - we'll reset the target ptr the next
1045 // time through the loop.
1046 status = U_ZERO_ERROR;
1050 // Check other errors here.
1051 assert(U_SUCCESS(status));
1052 // Break out of the loop (by force)
1055 // Process the Unicode
1056 assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1057 (size_t)(target-buf));
1058 total += (target-buf);
1059 } while (source < sourceLimit); // while simply out of space
1062 printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1064 // ***************************** END SAMPLE ********************
1071 return U_ZERO_ERROR;
1075 #define BUFFERSIZE 219
1077 void convsample_50() {
1078 printf("\n\n==============================================\n"
1079 "Sample 50: C: ucnv_detectUnicodeSignature\n");
1081 //! [ucnv_detectUnicodeSignature]
1082 UErrorCode err = U_ZERO_ERROR;
1083 UBool discardSignature = TRUE; /* set to TRUE to throw away the initial U+FEFF */
1084 char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
1085 int32_t signatureLength = 0;
1086 const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
1087 UConverter *conv = NULL;
1089 UChar *target = output, *out;
1090 const char *source = input;
1091 if(encoding!=NULL && U_SUCCESS(err)){
1092 // should signature be discarded ?
1093 conv = ucnv_open(encoding, &err);
1094 // do the conversion
1095 ucnv_toUnicode(conv,
1096 &target, output + UPRV_LENGTHOF(output),
1097 &source, input + sizeof(input),
1100 if (discardSignature){
1101 ++out; // ignore initial U+FEFF
1103 while(out != target) {
1104 printf("%04x ", *out++);
1108 //! [ucnv_detectUnicodeSignature]
1119 printf("Default Converter=%s\n", ucnv_getDefaultName() );
1121 convsample_02(); // C , u->koi8r, conv
1122 convsample_03(); // C, iterate
1124 convsample_05(); // C, utf8->u, getNextUChar
1125 convsample_06(); // C freq counter thingy
1127 convsample_12(); // C, sjis->u, conv
1128 convsample_13(); // C, big5->u, getNextU
1130 convsample_20(); // C, callback
1131 convsample_21(); // C, callback debug
1133 convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
1135 convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
1137 convsample_50(); // C, detect unicode signature
1139 printf("End of converter samples.\n");