1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 12/02/98 stephen Creation.
17 * 03/13/99 stephen Modified for new C API.
18 *******************************************************************************
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unum.h"
28 #include "unicode/udat.h"
29 #include "unicode/uset.h"
38 /* flag characters for u_scanf */
39 #define FLAG_ASTERISK 0x002A
40 #define FLAG_PAREN 0x0028
42 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
45 /* special characters for u_scanf */
46 #define SPEC_DOLLARSIGN 0x0024
49 #define DIGIT_ZERO 0x0030
50 #define DIGIT_ONE 0x0031
51 #define DIGIT_TWO 0x0032
52 #define DIGIT_THREE 0x0033
53 #define DIGIT_FOUR 0x0034
54 #define DIGIT_FIVE 0x0035
55 #define DIGIT_SIX 0x0036
56 #define DIGIT_SEVEN 0x0037
57 #define DIGIT_EIGHT 0x0038
58 #define DIGIT_NINE 0x0039
60 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
63 (s) == DIGIT_THREE || \
64 (s) == DIGIT_FOUR || \
65 (s) == DIGIT_FIVE || \
67 (s) == DIGIT_SEVEN || \
68 (s) == DIGIT_EIGHT || \
71 /* u_scanf modifiers */
73 #define MOD_LOWERL 0x006C
76 #define ISMOD(s) (s) == MOD_H || \
77 (s) == MOD_LOWERL || \
81 * Struct encapsulating a single uscanf format specification.
83 typedef struct u_scanf_spec_info {
84 int32_t fWidth; /* Width */
86 UChar fSpec; /* Format specification */
88 UChar fPadChar; /* Padding character */
90 UBool fSkipArg; /* TRUE if arg should be skipped */
91 UBool fIsLongDouble; /* L flag */
92 UBool fIsShort; /* h flag */
93 UBool fIsLong; /* l flag */
94 UBool fIsLongLong; /* ll flag */
95 UBool fIsString; /* TRUE if this is a NULL-terminated string. */
100 * Struct encapsulating a single u_scanf format specification.
102 typedef struct u_scanf_spec {
103 u_scanf_spec_info fInfo; /* Information on this spec */
104 int32_t fArgPos; /* Position of data in arg list */
108 * Parse a single u_scanf format specifier in Unicode.
109 * @param fmt A pointer to a '%' character in a u_scanf format specification.
110 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
112 * @return The number of characters contained in this specifier.
115 u_scanf_parse_spec (const UChar *fmt,
118 const UChar *s = fmt;
120 u_scanf_spec_info *info = &(spec->fInfo);
122 /* initialize spec to default values */
126 info->fSpec = 0x0000;
127 info->fPadChar = 0x0020;
128 info->fSkipArg = FALSE;
129 info->fIsLongDouble = FALSE;
130 info->fIsShort = FALSE;
131 info->fIsLong = FALSE;
132 info->fIsLongLong = FALSE;
133 info->fIsString = TRUE;
136 /* skip over the initial '%' */
139 /* Check for positional argument */
142 /* Save the current position */
145 /* handle positional parameters */
147 spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
151 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
155 /* if there is no '$', don't read anything */
156 if(*s != SPEC_DOLLARSIGN) {
165 /* Get any format flags */
171 info->fSkipArg = TRUE;
174 /* pad character specified */
177 /* first four characters are hex values for pad char */
178 info->fPadChar = (UChar)ufmt_digitvalue(*s++);
179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
181 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
183 /* final character is ignored */
192 info->fWidth = (int) (*s++ - DIGIT_ZERO);
196 info->fWidth += (int) (*s++ - DIGIT_ZERO);
200 /* Get any modifiers */
206 info->fIsShort = TRUE;
209 /* long or long long */
211 if(*s == MOD_LOWERL) {
212 info->fIsLongLong = TRUE;
213 /* skip over the next 'l' */
217 info->fIsLong = TRUE;
222 info->fIsLongDouble = TRUE;
227 /* finally, get the specifier letter */
230 /* return # of characters in this specifier */
231 return (int32_t)(s - fmt);
234 #define UP_PERCENT 0x0025
237 /* ANSI style formatting */
238 /* Use US-ASCII characters only for formatting */
241 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
243 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
245 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
247 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
249 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
251 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
253 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
255 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
257 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
259 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
261 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
263 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
265 /* non-ANSI extensions */
266 /* Use US-ASCII characters only for formatting */
269 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
271 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
273 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
274 /* C K is old format */
275 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
276 /* S U is old format */
277 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
280 #define UFMT_EMPTY {ufmt_empty, NULL}
283 * A u_scanf handler function.
284 * A u_scanf handler is responsible for handling a single u_scanf
285 * format specification, for example 'd' or 's'.
286 * @param stream The UFILE to which to write output.
287 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
288 * information on the format specification.
289 * @param args A pointer to the argument data
290 * @param fmt A pointer to the first character in the format string
291 * following the spec.
292 * @param fmtConsumed On output, set to the number of characters consumed
293 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
294 * @param argConverted The number of arguments converted and assigned, or -1 if an
296 * @return The number of code points consumed during reading.
298 typedef int32_t (*u_scanf_handler) (UFILE *stream,
299 u_scanf_spec_info *info,
302 int32_t *fmtConsumed,
303 int32_t *argConverted);
305 typedef struct u_scanf_info {
307 u_scanf_handler handler;
310 #define USCANF_NUM_FMT_HANDLERS 108
311 #define USCANF_SYMBOL_BUFFER_SIZE 8
313 /* We do not use handlers for 0-0x1f */
314 #define USCANF_BASE_FMT_HANDLERS 0x20
318 u_scanf_skip_leading_ws(UFILE *input,
325 /* skip all leading ws in the input */
326 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
331 /* put the final character back on the input */
338 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
340 u_scanf_skip_leading_positive_sign(UFILE *input,
341 UNumberFormat *format,
347 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
349 UErrorCode localStatus = U_ZERO_ERROR;
351 if (U_SUCCESS(*status)) {
352 symbolLen = unum_getSymbol(format,
353 UNUM_PLUS_SIGN_SYMBOL,
355 UPRV_LENGTHOF(plusSymbol),
358 if (U_SUCCESS(localStatus)) {
359 /* skip all leading ws in the input */
360 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
365 /* put the final character back on the input */
376 u_scanf_simple_percent_handler(UFILE *input,
377 u_scanf_spec_info *info,
380 int32_t *fmtConsumed,
381 int32_t *argConverted)
383 /* make sure the next character in the input is a percent */
385 if(u_fgetc(input) != 0x0025) {
392 u_scanf_count_handler(UFILE *input,
393 u_scanf_spec_info *info,
396 int32_t *fmtConsumed,
397 int32_t *argConverted)
399 /* in the special case of count, the u_scanf_spec_info's width */
400 /* will contain the # of items converted thus far */
401 if (!info->fSkipArg) {
403 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
404 else if (info->fIsLongLong)
405 *(int64_t*)(args[0].ptrValue) = info->fWidth;
407 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
411 /* we converted 0 args */
416 u_scanf_double_handler(UFILE *input,
417 u_scanf_spec_info *info,
420 int32_t *fmtConsumed,
421 int32_t *argConverted)
425 UNumberFormat *format;
426 int32_t parsePos = 0;
428 UErrorCode status = U_ZERO_ERROR;
431 /* skip all ws in the input */
432 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
434 /* fill the input's internal buffer */
435 ufile_fill_uchar_buffer(input);
437 /* determine the size of the input's buffer */
438 len = (int32_t)(input->str.fLimit - input->str.fPos);
440 /* truncate to the width, if specified */
441 if(info->fWidth != -1)
442 len = ufmt_min(len, info->fWidth);
444 /* get the formatter */
445 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
451 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
452 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
454 /* parse the number */
455 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
457 if (!info->fSkipArg) {
459 *(double*)(args[0].ptrValue) = num;
460 else if (info->fIsLongDouble)
461 *(long double*)(args[0].ptrValue) = num;
463 *(float*)(args[0].ptrValue) = (float)num;
466 /* mask off any necessary bits */
467 /* if(! info->fIsLong_double)
470 /* update the input's position to reflect consumed data */
471 input->str.fPos += parsePos;
473 /* we converted 1 arg */
474 *argConverted = !info->fSkipArg;
475 return parsePos + skipped;
478 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
481 u_scanf_scientific_handler(UFILE *input,
482 u_scanf_spec_info *info,
485 int32_t *fmtConsumed,
486 int32_t *argConverted)
490 UNumberFormat *format;
491 int32_t parsePos = 0;
493 UErrorCode status = U_ZERO_ERROR;
494 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
495 int32_t srcLen, expLen;
496 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
499 /* skip all ws in the input */
500 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
502 /* fill the input's internal buffer */
503 ufile_fill_uchar_buffer(input);
505 /* determine the size of the input's buffer */
506 len = (int32_t)(input->str.fLimit - input->str.fPos);
508 /* truncate to the width, if specified */
509 if(info->fWidth != -1)
510 len = ufmt_min(len, info->fWidth);
512 /* get the formatter */
513 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
519 /* set the appropriate flags on the formatter */
521 srcLen = unum_getSymbol(format,
522 UNUM_EXPONENTIAL_SYMBOL,
527 /* Upper/lower case the e */
528 if (info->fSpec == (UChar)0x65 /* e */) {
529 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
531 input->str.fBundle.fLocale,
535 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
537 input->str.fBundle.fLocale,
541 unum_setSymbol(format,
542 UNUM_EXPONENTIAL_SYMBOL,
550 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
551 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
553 /* parse the number */
554 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
556 if (!info->fSkipArg) {
558 *(double*)(args[0].ptrValue) = num;
559 else if (info->fIsLongDouble)
560 *(long double*)(args[0].ptrValue) = num;
562 *(float*)(args[0].ptrValue) = (float)num;
565 /* mask off any necessary bits */
566 /* if(! info->fIsLong_double)
569 /* update the input's position to reflect consumed data */
570 input->str.fPos += parsePos;
572 /* we converted 1 arg */
573 *argConverted = !info->fSkipArg;
574 return parsePos + skipped;
578 u_scanf_scidbl_handler(UFILE *input,
579 u_scanf_spec_info *info,
582 int32_t *fmtConsumed,
583 int32_t *argConverted)
587 UNumberFormat *scientificFormat, *genericFormat;
588 /*int32_t scientificResult, genericResult;*/
589 double scientificResult, genericResult;
590 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
592 UErrorCode scientificStatus = U_ZERO_ERROR;
593 UErrorCode genericStatus = U_ZERO_ERROR;
596 /* since we can't determine by scanning the characters whether */
597 /* a number was formatted in the 'f' or 'g' styles, parse the */
598 /* string with both formatters, and assume whichever one */
599 /* parsed the most is the correct formatter to use */
602 /* skip all ws in the input */
603 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
605 /* fill the input's internal buffer */
606 ufile_fill_uchar_buffer(input);
608 /* determine the size of the input's buffer */
609 len = (int32_t)(input->str.fLimit - input->str.fPos);
611 /* truncate to the width, if specified */
612 if(info->fWidth != -1)
613 len = ufmt_min(len, info->fWidth);
615 /* get the formatters */
616 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
617 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
620 if(scientificFormat == 0 || genericFormat == 0)
623 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
624 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
626 /* parse the number using each format*/
628 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
629 &scientificParsePos, &scientificStatus);
631 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
632 &genericParsePos, &genericStatus);
634 /* determine which parse made it farther */
635 if(scientificParsePos > genericParsePos) {
636 /* stash the result in num */
637 num = scientificResult;
638 /* update the input's position to reflect consumed data */
639 parsePos += scientificParsePos;
642 /* stash the result in num */
644 /* update the input's position to reflect consumed data */
645 parsePos += genericParsePos;
647 input->str.fPos += parsePos;
649 if (!info->fSkipArg) {
651 *(double*)(args[0].ptrValue) = num;
652 else if (info->fIsLongDouble)
653 *(long double*)(args[0].ptrValue) = num;
655 *(float*)(args[0].ptrValue) = (float)num;
658 /* mask off any necessary bits */
659 /* if(! info->fIsLong_double)
662 /* we converted 1 arg */
663 *argConverted = !info->fSkipArg;
664 return parsePos + skipped;
668 u_scanf_integer_handler(UFILE *input,
669 u_scanf_spec_info *info,
672 int32_t *fmtConsumed,
673 int32_t *argConverted)
676 void *num = (void*) (args[0].ptrValue);
677 UNumberFormat *format;
678 int32_t parsePos = 0;
680 UErrorCode status = U_ZERO_ERROR;
684 /* skip all ws in the input */
685 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
687 /* fill the input's internal buffer */
688 ufile_fill_uchar_buffer(input);
690 /* determine the size of the input's buffer */
691 len = (int32_t)(input->str.fLimit - input->str.fPos);
693 /* truncate to the width, if specified */
694 if(info->fWidth != -1)
695 len = ufmt_min(len, info->fWidth);
697 /* get the formatter */
698 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
704 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
705 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
707 /* parse the number */
708 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
710 /* mask off any necessary bits */
711 if (!info->fSkipArg) {
713 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
714 else if (info->fIsLongLong)
715 *(int64_t*)num = result;
717 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
720 /* update the input's position to reflect consumed data */
721 input->str.fPos += parsePos;
723 /* we converted 1 arg */
724 *argConverted = !info->fSkipArg;
725 return parsePos + skipped;
729 u_scanf_uinteger_handler(UFILE *input,
730 u_scanf_spec_info *info,
733 int32_t *fmtConsumed,
734 int32_t *argConverted)
736 /* TODO Fix this when Numberformat handles uint64_t */
737 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
741 u_scanf_percent_handler(UFILE *input,
742 u_scanf_spec_info *info,
745 int32_t *fmtConsumed,
746 int32_t *argConverted)
750 UNumberFormat *format;
751 int32_t parsePos = 0;
752 UErrorCode status = U_ZERO_ERROR;
755 /* skip all ws in the input */
756 u_scanf_skip_leading_ws(input, info->fPadChar);
758 /* fill the input's internal buffer */
759 ufile_fill_uchar_buffer(input);
761 /* determine the size of the input's buffer */
762 len = (int32_t)(input->str.fLimit - input->str.fPos);
764 /* truncate to the width, if specified */
765 if(info->fWidth != -1)
766 len = ufmt_min(len, info->fWidth);
768 /* get the formatter */
769 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
775 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
776 u_scanf_skip_leading_positive_sign(input, format, &status);
778 /* parse the number */
779 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
781 if (!info->fSkipArg) {
782 *(double*)(args[0].ptrValue) = num;
785 /* mask off any necessary bits */
786 /* if(! info->fIsLong_double)
789 /* update the input's position to reflect consumed data */
790 input->str.fPos += parsePos;
792 /* we converted 1 arg */
793 *argConverted = !info->fSkipArg;
798 u_scanf_string_handler(UFILE *input,
799 u_scanf_spec_info *info,
802 int32_t *fmtConsumed,
803 int32_t *argConverted)
807 char *arg = (char*)(args[0].ptrValue);
810 UErrorCode status = U_ZERO_ERROR;
814 UBool isNotEOF = FALSE;
816 /* skip all ws in the input */
817 if (info->fIsString) {
818 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
821 /* get the string one character at a time, truncating to the width */
824 /* open the default converter */
825 conv = u_getDefaultConverter(&status);
827 if(U_FAILURE(status))
830 while( (info->fWidth == -1 || count < info->fWidth)
831 && (isNotEOF = ufile_getch(input, &c))
832 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
835 if (!info->fSkipArg) {
836 /* put the character from the input onto the target */
838 /* Since we do this one character at a time, do it this way. */
839 if (info->fWidth > 0) {
840 limit = alias + info->fWidth - count;
843 limit = alias + ucnv_getMaxCharSize(conv);
846 /* convert the character to the default codepage */
847 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
848 NULL, TRUE, &status);
850 if(U_FAILURE(status)) {
852 u_releaseDefaultConverter(conv);
857 /* increment the count */
861 /* put the final character we read back on the input */
862 if (!info->fSkipArg) {
863 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
866 /* add the terminator */
867 if (info->fIsString) {
873 u_releaseDefaultConverter(conv);
875 /* we converted 1 arg */
876 *argConverted = !info->fSkipArg;
877 return count + skipped;
881 u_scanf_char_handler(UFILE *input,
882 u_scanf_spec_info *info,
885 int32_t *fmtConsumed,
886 int32_t *argConverted)
888 if (info->fWidth < 0) {
891 info->fIsString = FALSE;
892 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
896 u_scanf_ustring_handler(UFILE *input,
897 u_scanf_spec_info *info,
900 int32_t *fmtConsumed,
901 int32_t *argConverted)
903 UChar *arg = (UChar*)(args[0].ptrValue);
908 UBool isNotEOF = FALSE;
910 /* skip all ws in the input */
911 if (info->fIsString) {
912 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
915 /* get the string one character at a time, truncating to the width */
918 while( (info->fWidth == -1 || count < info->fWidth)
919 && (isNotEOF = ufile_getch(input, &c))
920 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
923 /* put the character from the input onto the target */
924 if (!info->fSkipArg) {
928 /* increment the count */
932 /* put the final character we read back on the input */
933 if (!info->fSkipArg) {
934 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
938 /* add the terminator */
939 if (info->fIsString) {
944 /* we converted 1 arg */
945 *argConverted = !info->fSkipArg;
946 return count + skipped;
950 u_scanf_uchar_handler(UFILE *input,
951 u_scanf_spec_info *info,
954 int32_t *fmtConsumed,
955 int32_t *argConverted)
957 if (info->fWidth < 0) {
960 info->fIsString = FALSE;
961 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
965 u_scanf_spellout_handler(UFILE *input,
966 u_scanf_spec_info *info,
969 int32_t *fmtConsumed,
970 int32_t *argConverted)
974 UNumberFormat *format;
975 int32_t parsePos = 0;
977 UErrorCode status = U_ZERO_ERROR;
980 /* skip all ws in the input */
981 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
983 /* fill the input's internal buffer */
984 ufile_fill_uchar_buffer(input);
986 /* determine the size of the input's buffer */
987 len = (int32_t)(input->str.fLimit - input->str.fPos);
989 /* truncate to the width, if specified */
990 if(info->fWidth != -1)
991 len = ufmt_min(len, info->fWidth);
993 /* get the formatter */
994 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
1000 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1001 /* This is not applicable to RBNF. */
1002 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1004 /* parse the number */
1005 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
1007 if (!info->fSkipArg) {
1008 *(double*)(args[0].ptrValue) = num;
1011 /* mask off any necessary bits */
1012 /* if(! info->fIsLong_double)
1015 /* update the input's position to reflect consumed data */
1016 input->str.fPos += parsePos;
1018 /* we converted 1 arg */
1019 *argConverted = !info->fSkipArg;
1020 return parsePos + skipped;
1024 u_scanf_hex_handler(UFILE *input,
1025 u_scanf_spec_info *info,
1028 int32_t *fmtConsumed,
1029 int32_t *argConverted)
1033 void *num = (void*) (args[0].ptrValue);
1036 /* skip all ws in the input */
1037 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1039 /* fill the input's internal buffer */
1040 ufile_fill_uchar_buffer(input);
1042 /* determine the size of the input's buffer */
1043 len = (int32_t)(input->str.fLimit - input->str.fPos);
1045 /* truncate to the width, if specified */
1046 if(info->fWidth != -1)
1047 len = ufmt_min(len, info->fWidth);
1049 /* check for alternate form */
1050 if( *(input->str.fPos) == 0x0030 &&
1051 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1053 /* skip the '0' and 'x' or 'X' if present */
1054 input->str.fPos += 2;
1058 /* parse the number */
1059 result = ufmt_uto64(input->str.fPos, &len, 16);
1061 /* update the input's position to reflect consumed data */
1062 input->str.fPos += len;
1064 /* mask off any necessary bits */
1065 if (!info->fSkipArg) {
1067 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1068 else if (info->fIsLongLong)
1069 *(int64_t*)num = result;
1071 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1074 /* we converted 1 arg */
1075 *argConverted = !info->fSkipArg;
1076 return len + skipped;
1080 u_scanf_octal_handler(UFILE *input,
1081 u_scanf_spec_info *info,
1084 int32_t *fmtConsumed,
1085 int32_t *argConverted)
1089 void *num = (void*) (args[0].ptrValue);
1092 /* skip all ws in the input */
1093 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1095 /* fill the input's internal buffer */
1096 ufile_fill_uchar_buffer(input);
1098 /* determine the size of the input's buffer */
1099 len = (int32_t)(input->str.fLimit - input->str.fPos);
1101 /* truncate to the width, if specified */
1102 if(info->fWidth != -1)
1103 len = ufmt_min(len, info->fWidth);
1105 /* parse the number */
1106 result = ufmt_uto64(input->str.fPos, &len, 8);
1108 /* update the input's position to reflect consumed data */
1109 input->str.fPos += len;
1111 /* mask off any necessary bits */
1112 if (!info->fSkipArg) {
1114 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1115 else if (info->fIsLongLong)
1116 *(int64_t*)num = result;
1118 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1121 /* we converted 1 arg */
1122 *argConverted = !info->fSkipArg;
1123 return len + skipped;
1127 u_scanf_pointer_handler(UFILE *input,
1128 u_scanf_spec_info *info,
1131 int32_t *fmtConsumed,
1132 int32_t *argConverted)
1137 void **p = (void**)(args[0].ptrValue);
1140 /* skip all ws in the input */
1141 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1143 /* fill the input's internal buffer */
1144 ufile_fill_uchar_buffer(input);
1146 /* determine the size of the input's buffer */
1147 len = (int32_t)(input->str.fLimit - input->str.fPos);
1149 /* truncate to the width, if specified */
1150 if(info->fWidth != -1) {
1151 len = ufmt_min(len, info->fWidth);
1154 /* Make sure that we don't consume too much */
1155 if (len > (int32_t)(sizeof(void*)*2)) {
1156 len = (int32_t)(sizeof(void*)*2);
1159 /* parse the pointer - assign to temporary value */
1160 result = ufmt_utop(input->str.fPos, &len);
1162 if (!info->fSkipArg) {
1166 /* update the input's position to reflect consumed data */
1167 input->str.fPos += len;
1169 /* we converted 1 arg */
1170 *argConverted = !info->fSkipArg;
1171 return len + skipped;
1175 u_scanf_scanset_handler(UFILE *input,
1176 u_scanf_spec_info *info,
1179 int32_t *fmtConsumed,
1180 int32_t *argConverted)
1183 UErrorCode status = U_ZERO_ERROR;
1184 int32_t chLeft = INT32_MAX;
1186 UChar *alias = (UChar*) (args[0].ptrValue);
1187 UBool isNotEOF = FALSE;
1188 UBool readCharacter = FALSE;
1190 /* Create an empty set */
1191 scanset = uset_open(0, -1);
1193 /* Back up one to get the [ */
1196 /* truncate to the width, if specified and alias the target */
1197 if(info->fWidth >= 0) {
1198 chLeft = info->fWidth;
1201 /* parse the scanset from the fmt string */
1202 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1204 /* verify that the parse was successful */
1205 if (U_SUCCESS(status)) {
1208 /* grab characters one at a time and make sure they are in the scanset */
1210 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1211 readCharacter = TRUE;
1212 if (!info->fSkipArg) {
1214 UBool isError = FALSE;
1216 U16_APPEND(alias, idx, chLeft, c, isError);
1222 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1225 /* if the character's not in the scanset, break out */
1230 /* put the final character we read back on the input */
1231 if(isNotEOF && chLeft > 0) {
1232 u_fungetc(c, input);
1236 uset_close(scanset);
1238 /* if we didn't match at least 1 character, fail */
1241 /* otherwise, add the terminator */
1242 else if (!info->fSkipArg) {
1246 /* we converted 1 arg */
1247 *argConverted = !info->fSkipArg;
1248 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1251 /* Use US-ASCII characters only for formatting. Most codepages have
1252 characters 20-7F from Unicode. Using any other codepage specific
1253 characters will make it very difficult to format the string on
1254 non-Unicode machines */
1255 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1257 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1258 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
1259 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1260 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1263 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1264 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1265 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1266 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1269 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
1270 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
1271 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1272 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
1274 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1276 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1279 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
1280 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1281 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
1283 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
1285 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
1286 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1289 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
1290 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
1291 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
1292 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
1295 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
1296 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
1297 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1298 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
1302 u_scanf_parse(UFILE *f,
1303 const UChar *patternSpecification,
1307 int32_t count, converted, argConsumed, cpConsumed;
1308 uint16_t handlerNum;
1312 ufmt_type_info info;
1313 u_scanf_handler handler;
1315 /* alias the pattern */
1316 alias = patternSpecification;
1318 /* haven't converted anything yet */
1323 /* iterate through the pattern */
1326 /* match any characters up to the next '%' */
1327 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1331 /* if we aren't at a '%', or if we're at end of string, break*/
1332 if(*alias != UP_PERCENT || *alias == 0x0000)
1335 /* parse the specifier */
1336 count = u_scanf_parse_spec(alias, &spec);
1338 /* update the pointer in pattern */
1341 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1342 if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1343 /* skip the argument, if necessary */
1344 /* query the info function for argument information */
1345 info = g_u_scanf_infos[ handlerNum ].info;
1346 if (info != ufmt_count && u_feof(f)) {
1349 else if(spec.fInfo.fSkipArg) {
1350 args.ptrValue = NULL;
1355 /* set the spec's width to the # of items converted */
1356 spec.fInfo.fWidth = cpConsumed;
1366 args.ptrValue = va_arg(ap, void*);
1370 /* else args is ignored */
1371 args.ptrValue = NULL;
1376 /* call the handler function */
1377 handler = g_u_scanf_infos[ handlerNum ].handler;
1380 /* reset count to 1 so that += for alias works. */
1383 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1385 /* if the handler encountered an error condition, break */
1386 if(argConsumed < 0) {
1391 /* add to the # of items converted */
1392 converted += argConsumed;
1394 /* update the pointer in pattern */
1397 /* else do nothing */
1399 /* else do nothing */
1401 /* just ignore unknown tags */
1404 /* return # of items converted */
1408 #endif /* #if !UCONFIG_NO_FORMATTING */