+++ /dev/null
-/*****************************************************************************
-
- (c) Cambridge Silicon Radio Limited 2010
- All rights reserved and confidential information of CSR
-
- Refer to LICENSE.txt included with this source for details
- on the license terms.
-
-*****************************************************************************/
-#include <linux/module.h>
-#include <linux/slab.h>
-#include "csr_unicode.h"
-#include "csr_macro.h"
-
-#define UNI_SUR_HIGH_START ((u32) 0xD800)
-#define UNI_SUR_HIGH_END ((u32) 0xDBFF)
-#define UNI_SUR_LOW_START ((u32) 0xDC00)
-#define UNI_SUR_LOW_END ((u32) 0xDFFF)
-#define UNI_REPLACEMENT_CHAR ((u32) 0xFFFD)
-#define UNI_HALF_SHIFT ((u8) 10) /* used for shifting by 10 bits */
-#define UNI_HALF_BASE ((u32) 0x00010000)
-#define UNI_BYTEMASK ((u32) 0xBF)
-#define UNI_BYTEMARK ((u32) 0x80)
-
-#define CAPITAL(x) ((x >= 'a') && (x <= 'z') ? ((x) & 0x00DF) : (x))
-
-/*
-* Index into the table with the first byte to get the number of trailing bytes in a utf-8 character.
-* -1 if the byte has an invalid value.
-*
-* Legal sequences are:
-*
-* byte 1st 2nd 3rd 4th
-*
-* 00-7F
-* C2-DF 80-BF
-* E0 A0-BF 80-BF
-* E1-EC 80-BF 80-BF
-* ED 80-9F 80-BF
-* EE-EF 80-BF 80-BF
-* F0 90-BF 80-BF 80-BF
-* F1-F3 80-BF 80-BF 80-BF
-* F4 80-8F 80-BF 80-BF
-*/
-static const s8 trailingBytesForUtf8[256] =
-{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x1F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x3F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x5F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x7F */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0x80 - 0x9F */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xA0 - 0xBF */
- -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 - 0xDF */
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xE0 - 0xFF */
-};
-
-/* Values to be substracted from a u32 when converting from UTF8 to UTF16 */
-static const u32 offsetsFromUtf8[4] =
-{
- 0x00000000, 0x00003080, 0x000E2080, 0x03C82080
-};
-
-/********************************************************************************
-*
-* Name: CsrUint32ToUtf16String
-*
-* Description: The function converts an 32 bit number to an UTF-16 string
-* that is allocated and 0-terminated.
-*
-* Input: 32 bit number.
-*
-* Output: A string of UTF-16 characters.
-*
-*********************************************************************************/
-u16 *CsrUint32ToUtf16String(u32 number)
-{
- u16 count, noOfDigits;
- u16 *output;
- u32 tempNumber;
-
- /* calculate the number of digits in the output */
- tempNumber = number;
- noOfDigits = 1;
- while (tempNumber >= 10)
- {
- tempNumber = tempNumber / 10;
- noOfDigits++;
- }
-
- output = kmalloc(sizeof(u16) * (noOfDigits + 1), GFP_KERNEL); /*add space for 0-termination*/
-
- tempNumber = number;
- for (count = noOfDigits; count > 0; count--)
- {
- output[count - 1] = (u16) ((tempNumber % 10) + '0');
- tempNumber = tempNumber / 10;
- }
- output[noOfDigits] = '\0';
-
- return output;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16StringToUint32
-*
-* Description: The function converts an UTF-16 string that is
-* 0-terminated into a 32 bit number.
-*
-* Input: A string of UTF-16 characters containig a number.
-*
-* Output: 32 bit number.
-*
-*********************************************************************************/
-u32 CsrUtf16StringToUint32(const u16 *unicodeString)
-{
- u16 numLen, count;
- u32 newNumber = 0;
-
- numLen = (u16) CsrUtf16StrLen(unicodeString);
-
- if ((numLen > 10) || (numLen == 0) || (unicodeString == NULL)) /*CSRMAX number is 4.294.967.295 */
- {
- return 0;
- }
-
- for (count = 0; count < numLen; count++)
- {
- u16 input = unicodeString[count];
- if ((input < 0x30) || (input > 0x39) || ((newNumber == 0x19999999) && (input > 0x35)) || (newNumber > 0x19999999)) /* chars are present or number is too large now causing number to get to large when *10 */
- {
- return 0;
- }
-
- newNumber = (newNumber * 10) + (input - 0x30);
- }
- return newNumber;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16MemCpy
-*
-* Description: The function copies count number of 16 bit data elements
-* from srv to dest.
-*
-* Input: A pointer to an unicoded string.
-*
-* Output: A pointer to an unicoded string.
-*
-*********************************************************************************/
-u16 *CsrUtf16MemCpy(u16 *dest, const u16 *src, u32 count)
-{
- return memcpy((u8 *) dest, (u8 *) src, count * sizeof(u16));
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16ConcatenateTexts
-*
-* Description: The function merge the contents of 4 unicoded input pointers
-* into a new string.
-*
-* Input: 4 unicoded input strings (UTF-16).
-*
-* Output: A new unicoded string (UTF-16) containing the combined strings.
-*
-*********************************************************************************/
-u16 *CsrUtf16ConcatenateTexts(const u16 *inputText1, const u16 *inputText2,
- const u16 *inputText3, const u16 *inputText4)
-{
- u16 *outputText;
- u32 textLen, textLen1, textLen2, textLen3, textLen4;
-
- textLen1 = CsrUtf16StrLen(inputText1);
- textLen2 = CsrUtf16StrLen(inputText2);
- textLen3 = CsrUtf16StrLen(inputText3);
- textLen4 = CsrUtf16StrLen(inputText4);
-
- textLen = textLen1 + textLen2 + textLen3 + textLen4;
-
- if (textLen == 0) /*stop here is all lengths are 0*/
- {
- return NULL;
- }
-
- outputText = kmalloc((textLen + 1) * sizeof(u16), GFP_KERNEL); /* add space for 0-termination*/
-
-
- if (inputText1 != NULL)
- {
- CsrUtf16MemCpy(outputText, inputText1, textLen1);
- }
-
- if (inputText2 != NULL)
- {
- CsrUtf16MemCpy(&(outputText[textLen1]), inputText2, textLen2);
- }
-
- if (inputText3 != NULL)
- {
- CsrUtf16MemCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3);
- }
-
- if (inputText4 != NULL)
- {
- CsrUtf16MemCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4);
- }
-
- outputText[textLen] = '\0';
-
- return outputText;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16StrLen
-*
-* Description: The function returns the number of 16 bit elements present
-* in the 0-terminated string.
-*
-* Input: 0-terminated string of 16 bit unicoded characters.
-*
-* Output: The number of 16 bit elements in the string.
-*
-*********************************************************************************/
-u32 CsrUtf16StrLen(const u16 *unicodeString)
-{
- u32 length;
-
- length = 0;
- if (unicodeString != NULL)
- {
- while (*unicodeString)
- {
- length++;
- unicodeString++;
- }
- }
- return length;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16String2Utf8
-*
-* Description: The function decodes an UTF-16 string into an UTF8 byte
-* oriented string.
-*
-* Input: 0-terminated UTF-16 string characters.
-*
-* Output: 0-terminated string of byte oriented UTF8 coded characters.
-*
-*********************************************************************************/
-u8 *CsrUtf16String2Utf8(const u16 *source)
-{
- u8 *dest, *destStart = NULL;
- u32 i;
- u32 ch;
- u32 length;
- u32 sourceLength;
- u8 bytes;
- u8 appendNull = FALSE;
-
- u8 firstByteMark[5] = {0x00, 0x00, 0xC0, 0xE0, 0xF0};
-
- if (!source)
- {
- return NULL;
- }
-
- length = 0;
- sourceLength = CsrUtf16StrLen(source) + 1;
-
- for (i = 0; i < sourceLength; i++)
- {
- ch = source[i];
- if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
- {
- if (i + 1 < sourceLength) /* The low surrogate is in the source */
- {
- u32 ch2 = source[++i];
- if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate */
- {
- length += 4;
- }
- else /* It is not a low surrogate, instead put a Unicode
- 'REPLACEMENT CHARACTER' (U+FFFD) */
- {
- length += 3;
- i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
- }
- }
- else /* The low surrogate does not exist, instead put a Unicode
- 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
- {
- length += 4;
- }
- }
- else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
- a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
- {
- length += 3;
- }
- else /* Figure out how many bytes that are required */
- {
- if (ch < 0x0080)
- {
- length++;
- }
- else if (ch < 0x0800)
- {
- length += 2;
- }
- else
- {
- length += 3;
- }
- }
- }
-
- dest = kmalloc(length, GFP_KERNEL);
- destStart = dest;
-
- for (i = 0; i < sourceLength; i++)
- {
- ch = source[i];
- if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */
- {
- if (i + 1 < sourceLength) /* The low surrogate is in the source */
- {
- u32 ch2 = source[++i];
- if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate, convert to UTF-32 */
- {
- ch = ((ch - UNI_SUR_HIGH_START) << UNI_HALF_SHIFT) + (ch2 - UNI_SUR_LOW_START) + UNI_HALF_BASE;
- }
- else /* It is not a low surrogate, instead put a Unicode
- 'REPLACEMENT CHARACTER' (U+FFFD) */
- {
- ch = UNI_REPLACEMENT_CHAR;
- i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */
- }
- }
- else /* The low surrogate does not exist, instead put a Unicode
- 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */
- {
- ch = UNI_REPLACEMENT_CHAR;
- appendNull = TRUE;
- }
- }
- else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put
- a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
- {
- ch = UNI_REPLACEMENT_CHAR;
- }
-
- /* Figure out how many bytes that are required */
- if (ch < (u32) 0x80)
- {
- bytes = 1;
- }
- else if (ch < (u32) 0x800)
- {
- bytes = 2;
- }
- else if (ch < (u32) 0x10000)
- {
- bytes = 3;
- }
- else if (ch < (u32) 0x110000)
- {
- bytes = 4;
- }
- else
- {
- bytes = 3;
- ch = UNI_REPLACEMENT_CHAR;
- }
-
- dest += bytes;
-
- switch (bytes) /* Convert character to UTF-8. Note: everything falls through. */
- {
- case 4:
- {
- *--dest = (u8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
- ch >>= 6;
- }
- /* FALLTHROUGH */
- case 3:
- {
- *--dest = (u8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
- ch >>= 6;
- }
- /* FALLTHROUGH */
- case 2:
- {
- *--dest = (u8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK);
- ch >>= 6;
- }
- /* FALLTHROUGH */
- case 1:
- {
- *--dest = (u8) (ch | firstByteMark[bytes]);
- }
- /* FALLTHROUGH */
- default:
- {
- break;
- }
- }
-
- dest += bytes;
- }
-
- if (appendNull) /* Append the \0 character */
- {
- *dest = '\0';
- }
-
- return destStart;
-}
-
-/*****************************************************************************
-
- NAME
- isLegalUtf8
-
- DESCRIPTION
- Returns TRUE if the given UFT-8 code unit is legal as defined by the
- Unicode standard (see Chapter 3: Conformance, Section 3.9: Unicode
- Encoding Forms, UTF-8).
-
- This function assumes that the length parameter is unconditionally
- correct and that the first byte is already validated by looking it up
- in the trailingBytesForUtf8 array, which also reveals the number of
- trailing bytes.
-
- Legal code units are composed of one of the following byte sequences:
-
- 1st 2nd 3rd 4th
- --------------------------------
- 00-7F
- C2-DF 80-BF
- E0 A0-BF 80-BF
- E1-EC 80-BF 80-BF
- ED 80-9F 80-BF
- EE-EF 80-BF 80-BF
- F0 90-BF 80-BF 80-BF
- F1-F3 80-BF 80-BF 80-BF
- F4 80-8F 80-BF 80-BF
-
- Please note that this function only checks whether the 2nd, 3rd and
- 4th bytes fall into the valid ranges.
-
- PARAMETERS
- codeUnit - pointer to the first byte of the byte sequence composing
- the code unit to test.
- length - the number of bytes in the code unit. Valid range is 1 to 4.
-
- RETURNS
- TRUE if the given code unit is legal.
-
-*****************************************************************************/
-static u8 isLegalUtf8(const u8 *codeUnit, u32 length)
-{
- const u8 *srcPtr = codeUnit + length;
- u8 byte;
-
- switch (length) /* Everything falls through except case 1 */
- {
- case 4:
- {
- byte = *--srcPtr;
- if ((byte < 0x80) || (byte > 0xBF))
- {
- return FALSE;
- }
- }
- /* Fallthrough */
- case 3:
- {
- byte = *--srcPtr;
- if ((byte < 0x80) || (byte > 0xBF))
- {
- return FALSE;
- }
- }
- /* Fallthrough */
- case 2:
- {
- byte = *--srcPtr;
- if (byte > 0xBF)
- {
- return FALSE;
- }
-
- switch (*codeUnit) /* No fallthrough */
- {
- case 0xE0:
- {
- if (byte < 0xA0)
- {
- return FALSE;
- }
- break;
- }
- case 0xED:
- {
- if ((byte < 0x80) || (byte > 0x9F))
- {
- return FALSE;
- }
- break;
- }
- case 0xF0:
- {
- if (byte < 0x90)
- {
- return FALSE;
- }
- break;
- }
- case 0xF4:
- {
- if ((byte < 0x80) || (byte > 0x8F))
- {
- return FALSE;
- }
- break;
- }
- default:
- {
- if (byte < 0x80)
- {
- return FALSE;
- }
- break;
- }
- }
- }
- /* Fallthrough */
- case 1:
- default:
- /* The 1st byte and length are assumed correct */
- break;
- }
-
- return TRUE;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf82Utf16String
-*
-* Description: The function decodes an UTF8 byte oriented string into a
-* UTF-16string.
-*
-* Input: 0-terminated string of byte oriented UTF8 coded characters.
-*
-* Output: 0-terminated string of UTF-16 characters.
-*
-*********************************************************************************/
-u16 *CsrUtf82Utf16String(const u8 *utf8String)
-{
- size_t i, length = 0;
- size_t sourceLength;
- u16 *dest = NULL;
- u16 *destStart = NULL;
- s8 extraBytes2Read;
-
- if (!utf8String)
- {
- return NULL;
- }
- sourceLength = strlen((char *)utf8String);
-
- for (i = 0; i < sourceLength; i++)
- {
- extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
-
- if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
- {
- length += 1;
- }
- else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
- CHARACTER' (U+FFFD), and the null terminated character */
- {
- length += 2;
- break;
- }
- else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
- CHARACTER' (U+FFFD) */
- {
- length += 1;
- }
- else
- {
- if (utf8String[i] > 0xEF) /* Needs a high and a low surrogate */
- {
- length += 2;
- }
- else
- {
- length += 1;
- }
- i += extraBytes2Read;
- }
- }
-
- /* Create space for the null terminated character */
- dest = kmalloc((1 + length) * sizeof(u16), GFP_KERNEL);
- destStart = dest;
-
- for (i = 0; i < sourceLength; i++)
- {
- extraBytes2Read = trailingBytesForUtf8[utf8String[i]];
-
- if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */
- {
- *dest++ = UNI_REPLACEMENT_CHAR;
- }
- else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT
- CHARACTER' (U+FFFD), and the null terminated character */
- {
- *dest++ = UNI_REPLACEMENT_CHAR;
- *dest++ = '\0';
- break;
- }
- else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT
- CHARACTER' (U+FFFD) */
- {
- *dest++ = UNI_REPLACEMENT_CHAR;
- }
- else /* It is legal, convert the character to an u32 */
- {
- u32 ch = 0;
-
- switch (extraBytes2Read) /* Everything falls through */
- {
- case 3:
- {
- ch += utf8String[i];
- ch <<= 6;
- i++;
- }
- /* FALLTHROUGH */
- case 2:
- {
- ch += utf8String[i];
- ch <<= 6;
- i++;
- }
- /* FALLTHROUGH */
- case 1:
- {
- ch += utf8String[i];
- ch <<= 6;
- i++;
- }
- /* FALLTHROUGH */
- case 0:
- {
- ch += utf8String[i];
- }
- /* FALLTHROUGH */
- default:
- {
- break;
- }
- }
-
- ch -= offsetsFromUtf8[extraBytes2Read];
-
- if (ch <= 0xFFFF) /* Character can be encoded in one u16 */
- {
- *dest++ = (u16) ch;
- }
- else /* The character needs two u16 */
- {
- ch -= UNI_HALF_BASE;
- *dest++ = (u16) ((ch >> UNI_HALF_SHIFT) | UNI_SUR_HIGH_START);
- *dest++ = (u16) ((ch & 0x03FF) | UNI_SUR_LOW_START);
- }
- }
- }
-
- destStart[length] = 0x00;
-
- return destStart;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16StrCpy
-*
-* Description: The function copies the contents from one UTF-16 string
-* to another UTF-16 string.
-*
-* Input: 0-terminated UTF-16 string.
-*
-* Output: 0-terminated UTF-16 string.
-*
-*********************************************************************************/
-u16 *CsrUtf16StrCpy(u16 *target, const u16 *source)
-{
- if (source) /* if source is not NULL*/
- {
- memcpy(target, source, (CsrUtf16StrLen(source) + 1) * sizeof(u16));
- return target;
- }
- else
- {
- return NULL;
- }
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16StringDuplicate
-*
-* Description: The function allocates a new pointer and copies the input to
-* the new pointer.
-*
-* Input: 0-terminated UTF-16 string.
-*
-* Output: Allocated variable0-terminated UTF-16 string.
-*
-*********************************************************************************/
-u16 *CsrUtf16StringDuplicate(const u16 *source)
-{
- u16 *target = NULL;
- u32 length;
-
- if (source) /* if source is not NULL*/
- {
- length = (CsrUtf16StrLen(source) + 1) * sizeof(u16);
- target = kmalloc(length, GFP_KERNEL);
- memcpy(target, source, length);
- }
- return target;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16StrICmp
-*
-* Description: The function compares two UTF-16 strings.
-*
-* Input: Two 0-terminated UTF-16 string.
-*
-* Output: 0: if the strings are identical.
-*
-*********************************************************************************/
-u16 CsrUtf16StrICmp(const u16 *string1, const u16 *string2)
-{
- while (*string1 || *string2)
- {
- if (CAPITAL(*string1) != CAPITAL(*string2))
- {
- return *string1 - *string2;
- }
- string1++;
- string2++;
- }
-
- return 0;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16StrNICmp
-*
-* Description: The function compares upto count number of elements in the
-* two UTF-16 string.
-*
-* Input: Two 0-terminated UTF-16 string and a maximum
-* number of elements to check.
-*
-* Output: 0: if the strings are identical.
-*
-*********************************************************************************/
-u16 CsrUtf16StrNICmp(const u16 *string1, const u16 *string2, u32 count)
-{
- while ((*string1 || *string2) && count--)
- {
- if (CAPITAL(*string1) != CAPITAL(*string2))
- {
- return *string1 - *string2;
- }
- string1++;
- string2++;
- }
-
- return 0;
-}
-
-/********************************************************************************
-*
-* Name: CsrUtf16String2XML
-*
-* Description: The function converts an unicoded string (UTF-16) into an unicoded XML
-* string where some special characters are encoded according to
-* the XML spec.
-*
-* Input: A unicoded string (UTF-16) which is freed.
-*
-* Output: A new unicoded string (UTF-16) containing the converted output.
-*
-*********************************************************************************/
-u16 *CsrUtf16String2XML(u16 *str)
-{
- u16 *scanString;
- u16 *outputString = NULL;
- u16 *resultString = str;
- u32 stringLength = 0;
- u8 encodeChars = FALSE;
-
- scanString = str;
- if (scanString)
- {
- while (*scanString)
- {
- if (*scanString == L'&')
- {
- stringLength += 5;
- encodeChars = TRUE;
- }
- else if ((*scanString == L'<') || (*scanString == L'>'))
- {
- stringLength += 4;
- encodeChars = TRUE;
- }
- else
- {
- stringLength++;
- }
-
- scanString++;
- }
-
- stringLength++;
-
- if (encodeChars)
- {
- resultString = outputString = kmalloc(stringLength * sizeof(u16), GFP_KERNEL);
-
- scanString = str;
-
- while (*scanString)
- {
- if (*scanString == L'&')
- {
- *outputString++ = '&';
- *outputString++ = 'a';
- *outputString++ = 'm';
- *outputString++ = 'p';
- *outputString++ = ';';
- }
- else if (*scanString == L'<')
- {
- *outputString++ = '&';
- *outputString++ = 'l';
- *outputString++ = 't';
- *outputString++ = ';';
- }
- else if (*scanString == L'>')
- {
- *outputString++ = '&';
- *outputString++ = 'g';
- *outputString++ = 't';
- *outputString++ = ';';
- }
- else
- {
- *outputString++ = *scanString;
- }
-
- scanString++;
- }
-
- *outputString++ = 0;
-
- kfree(str);
- }
- }
-
- return resultString;
-}
-
-/********************************************************************************
-*
-* Name: CsrXML2Utf16String
-*
-* Description: The function converts an unicoded XML string into an unicoded
-* string (UTF-16) where some special XML characters are decoded according to
-* the XML spec.
-*
-* Input: A unicoded XML string which is freed.
-*
-* Output: A new unicoded pointer containing the decoded output.
-*
-*********************************************************************************/
-u16 *CsrXML2Utf16String(u16 *str)
-{
- u16 *scanString;
- u16 *outputString = NULL;
- u16 *resultString = str;
- u32 stringLength = 0;
- u8 encodeChars = FALSE;
-
- scanString = str;
- if (scanString)
- {
- while (*scanString)
- {
- if (*scanString == (u16) L'&')
- {
- scanString++;
-
- if (!CsrUtf16StrNICmp(scanString, (u16 *) L"AMP;", 4))
- {
- scanString += 3;
- encodeChars = TRUE;
- }
- else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"LT;", 3))
- {
- scanString += 2;
- encodeChars = TRUE;
- }
- else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"GT;", 3))
- {
- scanString += 2;
- encodeChars = TRUE;
- }
- if (!CsrUtf16StrNICmp(scanString, (u16 *) L"APOS;", 5))
- {
- scanString += 4;
- encodeChars = TRUE;
- }
- if (!CsrUtf16StrNICmp(scanString, (u16 *) L"QUOT;", 5))
- {
- scanString += 4;
- encodeChars = TRUE;
- }
- else
- {
- scanString--;
- }
- }
-
- stringLength++;
- scanString++;
- }
-
- stringLength++;
-
- if (encodeChars)
- {
- resultString = outputString = kmalloc(stringLength * sizeof(u16), GFP_KERNEL);
-
- scanString = str;
-
- while (*scanString)
- {
- if (*scanString == L'&')
- {
- scanString++;
-
- if (!CsrUtf16StrNICmp(scanString, (u16 *) L"AMP;", 4))
- {
- *outputString++ = L'&';
- scanString += 3;
- }
- else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"LT;", 3))
- {
- *outputString++ = L'<';
- scanString += 2;
- }
- else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"GT;", 3))
- {
- *outputString++ = L'>';
- scanString += 2;
- }
- else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"APOS;", 5))
- {
- *outputString++ = L'\'';
- scanString += 4;
- }
- else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"QUOT;", 5))
- {
- *outputString++ = L'\"';
- scanString += 4;
- }
- else
- {
- *outputString++ = L'&';
- scanString--;
- }
- }
- else
- {
- *outputString++ = *scanString;
- }
-
- scanString++;
- }
-
- *outputString++ = 0;
-
- kfree(str);
- }
- }
-
- return resultString;
-}
-
-u32 CsrUtf8StringLengthInBytes(const u8 *string)
-{
- size_t length = 0;
- if (string)
- {
- length = strlen((const char *)string);
- }
- return (u32) length;
-}
-
-u8 *CsrUtf8StrTruncate(u8 *target, size_t count)
-{
- size_t lastByte = count - 1;
-
- target[count] = '\0';
-
- if (count && (target[lastByte] & 0x80))
- {
- /* the last byte contains non-ascii char */
- if (target[lastByte] & 0x40)
- {
- /* multi-byte char starting just before truncation */
- target[lastByte] = '\0';
- }
- else if ((target[lastByte - 1] & 0xE0) == 0xE0)
- {
- /* 3-byte char starting 2 bytes before truncation */
- target[lastByte - 1] = '\0';
- }
- else if ((target[lastByte - 2] & 0xF0) == 0xF0)
- {
- /* 4-byte char starting 3 bytes before truncation */
- target[lastByte - 2] = '\0';
- }
- }
-
- return target;
-}