fs/cifs/cifs_unicode.h

   1 /*
   2  * cifs_unicode:  Unicode kernel case support
   3  *
   4  * Function:
   5  *     Convert a unicode character to upper or lower case using
   6  *     compressed tables.
   7  *
   8  *   Copyright (c) International Business Machines  Corp., 2000,2009
   9  *
  10  *   This program is free software;  you can redistribute it and/or modify
  11  *   it under the terms of the GNU General Public License as published by
  12  *   the Free Software Foundation; either version 2 of the License, or
  13  *   (at your option) any later version.
  14  *
  15  *   This program is distributed in the hope that it will be useful,
  16  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18  *   the GNU General Public License for more details.
  19  *
  20  *   You should have received a copy of the GNU General Public License
  21  *   along with this program;  if not, write to the Free Software
  22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23  *
  24  *
  25  * Notes:
  26  *     These APIs are based on the C library functions.  The semantics
  27  *     should match the C functions but with expanded size operands.
  28  *
  29  *     The upper/lower functions are based on a table created by mkupr.
  30  *     This is a compressed table of upper and lower case conversion.
  31  *
  32  */
  33 #ifndef _CIFS_UNICODE_H
  34 #define _CIFS_UNICODE_H
  35
  36 #include <asm/byteorder.h>
  37 #include <linux/types.h>
  38 #include <linux/nls.h>
  39
  40 #define  UNIUPR_NOLOWER         /* Example to not expand lower case tables */
  41
  42 /*
  43  * Windows maps these to the user defined 16 bit Unicode range since they are
  44  * reserved symbols (along with \ and /), otherwise illegal to store
  45  * in filenames in NTFS
  46  */
  47 #define UNI_ASTERISK    (__u16) ('*' + 0xF000)
  48 #define UNI_QUESTION    (__u16) ('?' + 0xF000)
  49 #define UNI_COLON       (__u16) (':' + 0xF000)
  50 #define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
  51 #define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
  52 #define UNI_PIPE        (__u16) ('|' + 0xF000)
  53 #define UNI_SLASH       (__u16) ('\\' + 0xF000)
  54
  55 /*
  56  * Macs use an older "SFM" mapping of the symbols above. Fortunately it does
  57  * not conflict (although almost does) with the mapping above.
  58  */
  59
  60 #define SFM_ASTERISK    ((__u16) 0xF021)
  61 #define SFM_QUESTION    ((__u16) 0xF025)
  62 #define SFM_COLON       ((__u16) 0xF022)
  63 #define SFM_GRTRTHAN    ((__u16) 0xF024)
  64 #define SFM_LESSTHAN    ((__u16) 0xF023)
  65 #define SFM_PIPE        ((__u16) 0xF027)
  66 #define SFM_SLASH       ((__u16) 0xF026)
  67
  68 /*
  69  * Mapping mechanism to use when one of the seven reserved characters is
  70  * encountered.  We can only map using one of the mechanisms at a time
  71  * since otherwise readdir could return directory entries which we would
  72  * not be able to open
  73  *
  74  * NO_MAP_UNI_RSVD  = do not perform any remapping of the character
  75  * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible)
  76  * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option)
  77  *
  78  */
  79 #define NO_MAP_UNI_RSVD         0
  80 #define SFM_MAP_UNI_RSVD        1
  81 #define SFU_MAP_UNI_RSVD        2
  82
  83 /* Just define what we want from uniupr.h.  We don't want to define the tables
  84  * in each source file.
  85  */
  86 #ifndef UNICASERANGE_DEFINED
  87 struct UniCaseRange {
  88         wchar_t start;
  89         wchar_t end;
  90         signed char *table;
  91 };
  92 #endif                          /* UNICASERANGE_DEFINED */
  93
  94 #ifndef UNIUPR_NOUPPER
  95 extern signed char CifsUniUpperTable[512];
  96 extern const struct UniCaseRange CifsUniUpperRange[];
  97 #endif                          /* UNIUPR_NOUPPER */
  98
  99 #ifndef UNIUPR_NOLOWER
 100 extern signed char CifsUniLowerTable[512];
 101 extern const struct UniCaseRange CifsUniLowerRange[];
 102 #endif                          /* UNIUPR_NOLOWER */
 103
 104 #ifdef __KERNEL__
 105 int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
 106                     const struct nls_table *cp, int map_type);
 107 int cifs_utf16_bytes(const __le16 *from, int maxbytes,
 108                      const struct nls_table *codepage);
 109 int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
 110 char *cifs_strndup_from_utf16(const char *src, const int maxlen,
 111                               const bool is_unicode,
 112                               const struct nls_table *codepage);
 113 extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
 114                               const struct nls_table *cp, int mapChars);
 115 #ifdef CONFIG_CIFS_SMB2
 116 extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
 117                                      int *utf16_len, const struct nls_table *cp,
 118                                      int remap);
 119 #endif /* CONFIG_CIFS_SMB2 */
 120 #endif
 121
 122 wchar_t cifs_toupper(wchar_t in);
 123
 124 /*
 125  * UniStrcat:  Concatenate the second string to the first
 126  *
 127  * Returns:
 128  *     Address of the first string
 129  */
 130 static inline wchar_t *
 131 UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
 132 {
 133         wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
 134
 135         while (*ucs1++) ;       /* To end of first string */
 136         ucs1--;                 /* Return to the null */
 137         while ((*ucs1++ = *ucs2++)) ;   /* copy string 2 over */
 138         return anchor;
 139 }
 140
 141 /*
 142  * UniStrchr:  Find a character in a string
 143  *
 144  * Returns:
 145  *     Address of first occurrence of character in string
 146  *     or NULL if the character is not in the string
 147  */
 148 static inline wchar_t *
 149 UniStrchr(const wchar_t *ucs, wchar_t uc)
 150 {
 151         while ((*ucs != uc) && *ucs)
 152                 ucs++;
 153
 154         if (*ucs == uc)
 155                 return (wchar_t *) ucs;
 156         return NULL;
 157 }
 158
 159 /*
 160  * UniStrcmp:  Compare two strings
 161  *
 162  * Returns:
 163  *     < 0:  First string is less than second
 164  *     = 0:  Strings are equal
 165  *     > 0:  First string is greater than second
 166  */
 167 static inline int
 168 UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
 169 {
 170         while ((*ucs1 == *ucs2) && *ucs1) {
 171                 ucs1++;
 172                 ucs2++;
 173         }
 174         return (int) *ucs1 - (int) *ucs2;
 175 }
 176
 177 /*
 178  * UniStrcpy:  Copy a string
 179  */
 180 static inline wchar_t *
 181 UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
 182 {
 183         wchar_t *anchor = ucs1; /* save the start of result string */
 184
 185         while ((*ucs1++ = *ucs2++)) ;
 186         return anchor;
 187 }
 188
 189 /*
 190  * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
 191  */
 192 static inline size_t
 193 UniStrlen(const wchar_t *ucs1)
 194 {
 195         int i = 0;
 196
 197         while (*ucs1++)
 198                 i++;
 199         return i;
 200 }
 201
 202 /*
 203  * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
 204  *              string (length limited)
 205  */
 206 static inline size_t
 207 UniStrnlen(const wchar_t *ucs1, int maxlen)
 208 {
 209         int i = 0;
 210
 211         while (*ucs1++) {
 212                 i++;
 213                 if (i >= maxlen)
 214                         break;
 215         }
 216         return i;
 217 }
 218
 219 /*
 220  * UniStrncat:  Concatenate length limited string
 221  */
 222 static inline wchar_t *
 223 UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 224 {
 225         wchar_t *anchor = ucs1; /* save pointer to string 1 */
 226
 227         while (*ucs1++) ;
 228         ucs1--;                 /* point to null terminator of s1 */
 229         while (n-- && (*ucs1 = *ucs2)) {        /* copy s2 after s1 */
 230                 ucs1++;
 231                 ucs2++;
 232         }
 233         *ucs1 = 0;              /* Null terminate the result */
 234         return (anchor);
 235 }
 236
 237 /*
 238  * UniStrncmp:  Compare length limited string
 239  */
 240 static inline int
 241 UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 242 {
 243         if (!n)
 244                 return 0;       /* Null strings are equal */
 245         while ((*ucs1 == *ucs2) && *ucs1 && --n) {
 246                 ucs1++;
 247                 ucs2++;
 248         }
 249         return (int) *ucs1 - (int) *ucs2;
 250 }
 251
 252 /*
 253  * UniStrncmp_le:  Compare length limited string - native to little-endian
 254  */
 255 static inline int
 256 UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 257 {
 258         if (!n)
 259                 return 0;       /* Null strings are equal */
 260         while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
 261                 ucs1++;
 262                 ucs2++;
 263         }
 264         return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
 265 }
 266
 267 /*
 268  * UniStrncpy:  Copy length limited string with pad
 269  */
 270 static inline wchar_t *
 271 UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 272 {
 273         wchar_t *anchor = ucs1;
 274
 275         while (n-- && *ucs2)    /* Copy the strings */
 276                 *ucs1++ = *ucs2++;
 277
 278         n++;
 279         while (n--)             /* Pad with nulls */
 280                 *ucs1++ = 0;
 281         return anchor;
 282 }
 283
 284 /*
 285  * UniStrncpy_le:  Copy length limited string with pad to little-endian
 286  */
 287 static inline wchar_t *
 288 UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 289 {
 290         wchar_t *anchor = ucs1;
 291
 292         while (n-- && *ucs2)    /* Copy the strings */
 293                 *ucs1++ = __le16_to_cpu(*ucs2++);
 294
 295         n++;
 296         while (n--)             /* Pad with nulls */
 297                 *ucs1++ = 0;
 298         return anchor;
 299 }
 300
 301 /*
 302  * UniStrstr:  Find a string in a string
 303  *
 304  * Returns:
 305  *     Address of first match found
 306  *     NULL if no matching string is found
 307  */
 308 static inline wchar_t *
 309 UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
 310 {
 311         const wchar_t *anchor1 = ucs1;
 312         const wchar_t *anchor2 = ucs2;
 313
 314         while (*ucs1) {
 315                 if (*ucs1 == *ucs2) {
 316                         /* Partial match found */
 317                         ucs1++;
 318                         ucs2++;
 319                 } else {
 320                         if (!*ucs2)     /* Match found */
 321                                 return (wchar_t *) anchor1;
 322                         ucs1 = ++anchor1;       /* No match */
 323                         ucs2 = anchor2;
 324                 }
 325         }
 326
 327         if (!*ucs2)             /* Both end together */
 328                 return (wchar_t *) anchor1;     /* Match found */
 329         return NULL;            /* No match */
 330 }
 331
 332 #ifndef UNIUPR_NOUPPER
 333 /*
 334  * UniToupper:  Convert a unicode character to upper case
 335  */
 336 static inline wchar_t
 337 UniToupper(register wchar_t uc)
 338 {
 339         register const struct UniCaseRange *rp;
 340
 341         if (uc < sizeof(CifsUniUpperTable)) {
 342                 /* Latin characters */
 343                 return uc + CifsUniUpperTable[uc];      /* Use base tables */
 344         } else {
 345                 rp = CifsUniUpperRange; /* Use range tables */
 346                 while (rp->start) {
 347                         if (uc < rp->start)     /* Before start of range */
 348                                 return uc;      /* Uppercase = input */
 349                         if (uc <= rp->end)      /* In range */
 350                                 return uc + rp->table[uc - rp->start];
 351                         rp++;   /* Try next range */
 352                 }
 353         }
 354         return uc;              /* Past last range */
 355 }
 356
 357 /*
 358  * UniStrupr:  Upper case a unicode string
 359  */
 360 static inline __le16 *
 361 UniStrupr(register __le16 *upin)
 362 {
 363         register __le16 *up;
 364
 365         up = upin;
 366         while (*up) {           /* For all characters */
 367                 *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
 368                 up++;
 369         }
 370         return upin;            /* Return input pointer */
 371 }
 372 #endif                          /* UNIUPR_NOUPPER */
 373
 374 #ifndef UNIUPR_NOLOWER
 375 /*
 376  * UniTolower:  Convert a unicode character to lower case
 377  */
 378 static inline wchar_t
 379 UniTolower(register wchar_t uc)
 380 {
 381         register const struct UniCaseRange *rp;
 382
 383         if (uc < sizeof(CifsUniLowerTable)) {
 384                 /* Latin characters */
 385                 return uc + CifsUniLowerTable[uc];      /* Use base tables */
 386         } else {
 387                 rp = CifsUniLowerRange; /* Use range tables */
 388                 while (rp->start) {
 389                         if (uc < rp->start)     /* Before start of range */
 390                                 return uc;      /* Uppercase = input */
 391                         if (uc <= rp->end)      /* In range */
 392                                 return uc + rp->table[uc - rp->start];
 393                         rp++;   /* Try next range */
 394                 }
 395         }
 396         return uc;              /* Past last range */
 397 }
 398
 399 /*
 400  * UniStrlwr:  Lower case a unicode string
 401  */
 402 static inline wchar_t *
 403 UniStrlwr(register wchar_t *upin)
 404 {
 405         register wchar_t *up;
 406
 407         up = upin;
 408         while (*up) {           /* For all characters */
 409                 *up = UniTolower(*up);
 410                 up++;
 411         }
 412         return upin;            /* Return input pointer */
 413 }
 414
 415 #endif
 416
 417 #endif /* _CIFS_UNICODE_H */