static uint utf16_to_ucs4(const ushort *wbuf);\r
static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);\r
static int is_unicode(int codepage);\r
+static int mbtowc_flags(int codepage);\r
static int must_use_null_useddefaultchar(int codepage);\r
static void check_utf_bom(rec_iconv_t *cd, ushort *wbuf, int *wbufsize);\r
static char *strrstr(const char *str, const char *token);\r
\r
static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);\r
static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);\r
+static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);\r
static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);\r
static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);\r
\r
cv.mblen = eucjp_mblen;\r
}\r
else if (IsValidCodePage(cv.codepage)\r
- && GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0\r
- && (cpinfoex.MaxCharSize == 1 || cpinfoex.MaxCharSize == 2))\r
+ && GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0)\r
{\r
cv.mbtowc = kernel_mbtowc;\r
cv.wctomb = kernel_wctomb;\r
if (cpinfoex.MaxCharSize == 1)\r
cv.mblen = sbcs_mblen;\r
- else\r
+ else if (cpinfoex.MaxCharSize == 2)\r
cv.mblen = dbcs_mblen;\r
+ else\r
+ cv.mblen = mbcs_mblen;\r
}\r
else\r
{\r
codepage == 65000 || codepage == 65001);\r
}\r
\r
+/*\r
+ * Check if codepage is one of those for which the dwFlags parameter\r
+ * to MultiByteToWideChar() must be zero. Return zero or\r
+ * MB_ERR_INVALID_CHARS. The docs in Platform SDK for for Windows\r
+ * Server 2003 R2 claims that also codepage 65001 is one of these, but\r
+ * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave\r
+ * out 65001 (UTF-8), and that indeed seems to be the case on XP, it\r
+ * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting\r
+ * from UTF-8.\r
+ */\r
+static int\r
+mbtowc_flags(int codepage)\r
+{\r
+ return (codepage == 50220 || codepage == 50221 ||\r
+ codepage == 50222 || codepage == 50225 ||\r
+ codepage == 50227 || codepage == 50229 ||\r
+ codepage == 52936 || codepage == 54936 ||\r
+ (codepage >= 57002 && codepage <= 57011) ||\r
+ codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;\r
+}\r
+\r
+/*\r
+ * Check if codepage is one those for which the lpUsedDefaultChar\r
+ * parameter to WideCharToMultiByte() must be NULL. The docs in\r
+ * Platform SDK for for Windows Server 2003 R2 claims that this is the\r
+ * list below, while the MSDN docs for MSVS2008 claim that it is only\r
+ * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform\r
+ * SDK seems to be correct, at least for XP.\r
+ */\r
static int\r
must_use_null_useddefaultchar(int codepage)\r
{\r
}\r
\r
static int\r
+mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)\r
+{\r
+ int len = 0;\r
+\r
+ if (cv->codepage == 54936) {\r
+ if (buf[0] <= 0x7F) len = 1;\r
+ else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&\r
+ bufsize >= 2 &&\r
+ ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||\r
+ (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;\r
+ else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&\r
+ bufsize >= 4 &&\r
+ buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;\r
+ else\r
+ return_error(EINVAL);\r
+ return len;\r
+ }\r
+ else\r
+ return_error(EINVAL);\r
+}\r
+\r
+static int\r
utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize)\r
{\r
int len = 0;\r
len = cv->mblen(cv, buf, bufsize);\r
if (len == -1)\r
return -1;\r
- *wbufsize = MultiByteToWideChar(cv->codepage, MB_ERR_INVALID_CHARS,\r
+ *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),\r
(const char *)buf, len, (wchar_t *)wbuf, *wbufsize);\r
if (*wbufsize == 0)\r
return_error(EILSEQ);\r