src/util.c

   1 /*
   2  *
   3  *  oFono - Open Source Telephony
   4  *
   5  *  Copyright (C) 2008-2011  Intel Corporation. All rights reserved.
   6  *  Copyright (C) 2009-2010  Nokia Corporation and/or its subsidiary(-ies).
   7  *
   8  *  This program is free software; you can redistribute it and/or modify
   9  *  it under the terms of the GNU General Public License version 2 as
  10  *  published by the Free Software Foundation.
  11  *
  12  *  This program is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with this program; if not, write to the Free Software
  19  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  20  *
  21  */
  22
  23 #ifdef HAVE_CONFIG_H
  24 #include <config.h>
  25 #endif
  26
  27 #include <stdio.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <stdlib.h>
  31
  32 #include <glib.h>
  33
  34 #include "util.h"
  35
  36 /*
  37         Name:                   GSM 03.38 to Unicode
  38         Unicode version:        3.0
  39         Table version:          1.1
  40         Table format:           Format A
  41         Date:                   2000 May 30
  42         Authors:                Ken Whistler
  43                                 Kent Karlsson
  44                                 Markus Kuhn
  45
  46         Copyright (c) 2000 Unicode, Inc.  All Rights reserved.
  47
  48         This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
  49         No claims are made as to fitness for any particular purpose.  No
  50         warranties of any kind are expressed or implied.  The recipient
  51         agrees to determine applicability of information provided.  If this
  52         file has been provided on optical media by Unicode, Inc., the sole
  53         remedy for any claim will be exchange of defective media within 90
  54         days of receipt.
  55
  56         Unicode, Inc. hereby grants the right to freely use the information
  57         supplied in this file in the creation of products supporting the
  58         Unicode Standard, and to make copies of this file in any form for
  59         internal or external distribution as long as this notice remains
  60         attached.
  61 */
  62
  63 #define GUND                    0xFFFF
  64
  65 #define UTF8_LENGTH(c) \
  66         ((c) < 0x80 ? 1 : ((c) < 0x800 ? 2 : 3))
  67
  68 #define TABLE_SIZE(t) \
  69         (sizeof((t)) / sizeof(struct codepoint))
  70
  71 struct codepoint {
  72         unsigned short from;
  73         unsigned short to;
  74 };
  75
  76 struct conversion_table {
  77         /* To unicode locking shift table */
  78         const struct codepoint *locking_u;
  79         unsigned int locking_len_u;
  80
  81         /* To unicode single shift table */
  82         const struct codepoint *single_u;
  83         unsigned int single_len_u;
  84
  85         /* To GSM locking shift table, fixed size */
  86         const unsigned short *locking_g;
  87
  88         /* To GSM single shift table */
  89         const struct codepoint *single_g;
  90         unsigned int single_len_g;
  91 };
  92
  93 /* GSM to Unicode extension table, for GSM sequences starting with 0x1B */
  94 static const struct codepoint def_ext_gsm[] = {
  95         { 0x0A, 0x000C },               /* See NOTE 3 in 23.038 */
  96         { 0x14, 0x005E },
  97         { 0x1B, 0x0020 },               /* See NOTE 1 in 23.038 */
  98         { 0x28, 0x007B },
  99         { 0x29, 0x007D },
 100         { 0x2F, 0x005C },
 101         { 0x3C, 0x005B },
 102         { 0x3D, 0x007E },
 103         { 0x3E, 0x005D },
 104         { 0x40, 0x007C },
 105         { 0x65, 0x20AC }
 106 };
 107
 108 static const struct codepoint def_ext_unicode[] = {
 109         { 0x000C, 0x1B0A },
 110         { 0x005B, 0x1B3C },
 111         { 0x005C, 0x1B2F },
 112         { 0x005D, 0x1B3E },
 113         { 0x005E, 0x1B14 },
 114         { 0x007B, 0x1B28 },
 115         { 0x007C, 0x1B40 },
 116         { 0x007D, 0x1B29 },
 117         { 0x007E, 0x1B3D },
 118         { 0x20AC, 0x1B65 }
 119 };
 120
 121 /* Appendix A.2.1. in 3GPP TS23.038, V.8.2.0 */
 122 static const struct codepoint tur_ext_gsm[] = {
 123         { 0x0A, 0x000C },               /* See NOTE 3 */
 124         { 0x14, 0x005E },
 125         { 0x1B, 0x0020 },               /* See NOTE 1 */
 126         { 0x28, 0x007B },
 127         { 0x29, 0x007D },
 128         { 0x2F, 0x005C },
 129         { 0x3C, 0x005B },
 130         { 0x3D, 0x007E },
 131         { 0x3E, 0x005D },
 132         { 0x40, 0x007C },
 133         { 0x47, 0x011E },
 134         { 0x49, 0x0130 },
 135         { 0x53, 0x015E },
 136         { 0x63, 0x00E7 },
 137         { 0x65, 0x20AC },
 138         { 0x67, 0x011F },
 139         { 0x69, 0x0131 },
 140         { 0x73, 0x015F }
 141 };
 142
 143 static const struct codepoint tur_ext_unicode[] = {
 144         { 0x000C, 0x1B0A },
 145         { 0x005B, 0x1B3C },
 146         { 0x005C, 0x1B2F },
 147         { 0x005D, 0x1B3E },
 148         { 0x005E, 0x1B14 },
 149         { 0x007B, 0x1B28 },
 150         { 0x007C, 0x1B40 },
 151         { 0x007D, 0x1B29 },
 152         { 0x007E, 0x1B3D },
 153         { 0x00E7, 0x1B63 },
 154         { 0x011E, 0x1B47 },
 155         { 0x011F, 0x1B67 },
 156         { 0x0130, 0x1B49 },
 157         { 0x0131, 0x1B69 },
 158         { 0x015E, 0x1B53 },
 159         { 0x015F, 0x1B73 },
 160         { 0x20AC, 0x1B65 }
 161 };
 162
 163 /* Appendix A.2.2. in 3GPP TS23.038 V.8.2.0*/
 164 static const struct codepoint spa_ext_gsm[] = {
 165         { 0x09, 0x00E7 },
 166         { 0x0A, 0x000C },               /* See NOTE 3 */
 167         { 0x14, 0x005E },
 168         { 0x1B, 0x0020 },               /* See NOTE 1 */
 169         { 0x28, 0x007B },
 170         { 0x29, 0x007D },
 171         { 0x2F, 0x005C },
 172         { 0x3C, 0x005B },
 173         { 0x3D, 0x007E },
 174         { 0x3E, 0x005D },
 175         { 0x40, 0x007C },
 176         { 0x41, 0x00C1 },
 177         { 0x49, 0x00CD },
 178         { 0x4F, 0x00D3 },
 179         { 0x55, 0x00DA },
 180         { 0x61, 0x00E1 },
 181         { 0x65, 0x20AC },
 182         { 0x69, 0x00ED },
 183         { 0x6F, 0x00F3 },
 184         { 0x75, 0x00FA }
 185 };
 186
 187 static const struct codepoint spa_ext_unicode[] = {
 188         { 0x000C, 0x1B0A },
 189         { 0x005B, 0x1B3C },
 190         { 0x005C, 0x1B2F },
 191         { 0x005D, 0x1B3E },
 192         { 0x005E, 0x1B14 },
 193         { 0x007B, 0x1B28 },
 194         { 0x007C, 0x1B40 },
 195         { 0x007D, 0x1B29 },
 196         { 0x007E, 0x1B3D },
 197         { 0x00C1, 0x1B41 },
 198         { 0x00CD, 0x1B49 },
 199         { 0x00D3, 0x1B4F },
 200         { 0x00DA, 0x1B55 },
 201         { 0x00E1, 0x1B61 },
 202         { 0x00E7, 0x1B09 },
 203         { 0x00ED, 0x1B69 },
 204         { 0x00F3, 0x1B6F },
 205         { 0x00FA, 0x1B75 },
 206         { 0x20AC, 0x1B65 }
 207 };
 208
 209 /* Appendix A.2.3. in 3GPP TS23.038 V.8.2.0 */
 210 static const struct codepoint por_ext_gsm[] = {
 211         { 0x05, 0x00EA },
 212         { 0x09, 0x00E7 },
 213         { 0x0A, 0x000C },               /* See NOTE 3 */
 214         { 0x0B, 0x00D4 },
 215         { 0x0C, 0x00F4 },
 216         { 0x0E, 0x00C1 },
 217         { 0x0F, 0x00E1 },
 218         { 0x12, 0x03A6 },
 219         { 0x13, 0x0393 },
 220         { 0x14, 0x005E },
 221         { 0x15, 0x03A9 },
 222         { 0x16, 0x03A0 },
 223         { 0x17, 0x03A8 },
 224         { 0x18, 0x03A3 },
 225         { 0x19, 0x0398 },
 226         { 0x1B, 0x0020 },               /* See NOTE 1 */
 227         { 0x1F, 0x00CA },
 228         { 0x28, 0x007B },
 229         { 0x29, 0x007D },
 230         { 0x2F, 0x005C },
 231         { 0x3C, 0x005B },
 232         { 0x3D, 0x007E },
 233         { 0x3E, 0x005D },
 234         { 0x40, 0x007C },
 235         { 0x41, 0x00C0 },
 236         { 0x49, 0x00CD },
 237         { 0x4F, 0x00D3 },
 238         { 0x55, 0x00DA },
 239         { 0x5B, 0x00C3 },
 240         { 0x5C, 0x00D5 },
 241         { 0x61, 0x00C2 },
 242         { 0x65, 0x20AC },
 243         { 0x69, 0x00ED },
 244         { 0x6F, 0x00F3 },
 245         { 0x75, 0x00FA },
 246         { 0x7B, 0x00E3 },
 247         { 0x7C, 0x00F5 },
 248         { 0x7F, 0x00E2 }
 249 };
 250
 251 static const struct codepoint por_ext_unicode[] = {
 252         { 0x000C, 0x1B0A },
 253         { 0x005B, 0x1B3C },
 254         { 0x005C, 0x1B2F },
 255         { 0x005D, 0x1B3E },
 256         { 0x005E, 0x1B14 },
 257         { 0x007B, 0x1B28 },
 258         { 0x007C, 0x1B40 },
 259         { 0x007D, 0x1B29 },
 260         { 0x007E, 0x1B3D },
 261         { 0x00C0, 0x1B41 },
 262         { 0x00C1, 0x1B0E },
 263         { 0x00C2, 0x1B61 },
 264         { 0x00C3, 0x1B5B },
 265         { 0x00CA, 0x1B1F },
 266         { 0x00CD, 0x1B49 },
 267         { 0x00D3, 0x1B4F },
 268         { 0x00D4, 0x1B0B },
 269         { 0x00D5, 0x1B5C },
 270         { 0x00DA, 0x1B55 },
 271         { 0x00E1, 0x1B0F },
 272         { 0x00E2, 0x1B7F },
 273         { 0x00E3, 0x1B7B },
 274         { 0x00E7, 0x1B09 },
 275         { 0x00EA, 0x1B05 },
 276         { 0x00ED, 0x1B69 },
 277         { 0x00F3, 0x1B6F },
 278         { 0x00F4, 0x1B0C },
 279         { 0x00F5, 0x1B7C },
 280         { 0x00FA, 0x1B75 },
 281         { 0x0393, 0x1B13 },
 282         { 0x0398, 0x1B19 },
 283         { 0x03A0, 0x1B16 },
 284         { 0x03A3, 0x1B18 },
 285         { 0x03A6, 0x1B12 },
 286         { 0x03A8, 0x1B17 },
 287         { 0x03A9, 0x1B15 },
 288         { 0x20AC, 0x1B65 }
 289 };
 290
 291 /* Used for conversion of GSM to Unicode */
 292 static const unsigned short def_gsm[] = {
 293         0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC,
 294         0x00F2, 0x00C7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5,
 295         0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8,
 296         0x03A3, 0x0398, 0x039E, 0x00A0, 0x00C6, 0x00E6, 0x00DF, 0x00C9,
 297         0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027,
 298         0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
 299         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 300         0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
 301         0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 302         0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
 303         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
 304         0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7,
 305         0x00BF, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 306         0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
 307         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
 308         0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0
 309 };
 310
 311 /* Used for conversion of Unicode to GSM */
 312 static const struct codepoint def_unicode[] = {
 313         { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
 314         { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
 315         { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
 316         { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
 317         { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
 318         { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
 319         { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
 320         { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
 321         { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
 322         { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
 323         { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
 324         { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
 325         { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
 326         { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
 327         { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
 328         { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 },
 329         { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 },
 330         { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A },
 331         { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E },
 332         { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 },
 333         { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 },
 334         { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A },
 335         { 0x00A0, 0x20 }, { 0x00A1, 0x40 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 },
 336         { 0x00A5, 0x03 }, { 0x00A7, 0x5F }, { 0x00BF, 0x60 }, { 0x00C4, 0x5B },
 337         { 0x00C5, 0x0E }, { 0x00C6, 0x1C }, { 0x00C7, 0x09 }, { 0x00C9, 0x1F },
 338         { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00D8, 0x0B }, { 0x00DC, 0x5E },
 339         { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
 340         { 0x00E6, 0x1D }, { 0x00E8, 0x04 }, { 0x00E9, 0x05 }, { 0x00EC, 0x07 },
 341         { 0x00F1, 0x7D }, { 0x00F2, 0x08 }, { 0x00F6, 0x7C }, { 0x00F8, 0x0C },
 342         { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x0393, 0x13 }, { 0x0394, 0x10 },
 343         { 0x0398, 0x19 }, { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 },
 344         { 0x03A3, 0x18 }, { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }
 345 };
 346
 347 /* Appendix A.3.1 in 3GPP TS23.038 */
 348 static const unsigned short tur_gsm[] = {
 349         0x0040, 0x00A3, 0x0024, 0x00A5, 0x20AC, 0x00E9, 0x00F9, 0x0131,
 350         0x00F2, 0x00C7, 0x000A, 0x011E, 0x011F, 0x000D, 0x00C5, 0x00E5,
 351         0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8,
 352         0x03A3, 0x0398, 0x039E, 0x00A0, 0x015E, 0x015F, 0x00DF, 0x00C9,
 353         0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027,
 354         0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
 355         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 356         0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
 357         0x0130, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 358         0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
 359         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
 360         0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7,
 361         0x00E7, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 362         0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
 363         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
 364         0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0
 365 };
 366
 367 static const struct codepoint tur_unicode[] = {
 368         { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
 369         { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
 370         { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
 371         { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
 372         { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
 373         { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
 374         { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
 375         { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
 376         { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
 377         { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
 378         { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
 379         { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
 380         { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
 381         { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
 382         { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
 383         { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x0061, 0x61 }, { 0x0062, 0x62 },
 384         { 0x0063, 0x63 }, { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 },
 385         { 0x0067, 0x67 }, { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A },
 386         { 0x006B, 0x6B }, { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E },
 387         { 0x006F, 0x6F }, { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 },
 388         { 0x0073, 0x73 }, { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 },
 389         { 0x0077, 0x77 }, { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A },
 390         { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 }, { 0x00A5, 0x03 },
 391         { 0x00A7, 0x5F }, { 0x00C4, 0x5B }, { 0x00C5, 0x0E }, { 0x00C7, 0x09 },
 392         { 0x00C9, 0x1F }, { 0x00D1, 0x5D }, { 0x00D6, 0x5C }, { 0x00DC, 0x5E },
 393         { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
 394         { 0x00E7, 0x60 }, { 0x00E9, 0x05 }, { 0x00F1, 0x7D }, { 0x00F2, 0x08 },
 395         { 0x00F6, 0x7C }, { 0x00F9, 0x06 }, { 0x00FC, 0x7E }, { 0x011E, 0x0B },
 396         { 0x011F, 0x0C }, { 0x0130, 0x40 }, { 0x0131, 0x07 }, { 0x015E, 0x1C },
 397         { 0x015F, 0x1D }, { 0x0393, 0x13 }, { 0x0394, 0x10 }, { 0x0398, 0x19 },
 398         { 0x039B, 0x14 }, { 0x039E, 0x1A }, { 0x03A0, 0x16 }, { 0x03A3, 0x18 },
 399         { 0x03A6, 0x12 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 }, { 0x20AC, 0x04 }
 400 };
 401
 402 /* Appendix A.3.2 in 3GPP TS23.038 */
 403 static const unsigned short por_gsm[] = {
 404         0x0040, 0x00A3, 0x0024, 0x00A5, 0x00EA, 0x00E9, 0x00FA, 0x00ED,
 405         0x00F3, 0x00E7, 0x000A, 0x00D4, 0x00F4, 0x000D, 0x00C1, 0x00E1,
 406         0x0394, 0x005F, 0x00AA, 0x00C7, 0x00C0, 0x221E, 0x005E, 0x005C,
 407         0x20ac, 0x00D3, 0x007C, 0x00A0, 0x00C2, 0x00E2, 0x00CA, 0x00C9,
 408         0x0020, 0x0021, 0x0022, 0x0023, 0x00BA, 0x0025, 0x0026, 0x0027,
 409         0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
 410         0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 411         0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
 412         0x00CD, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 413         0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
 414         0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
 415         0x0058, 0x0059, 0x005A, 0x00C3, 0x00D5, 0x00DA, 0x00DC, 0x00A7,
 416         0x007E, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 417         0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
 418         0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
 419         0x0078, 0x0079, 0x007A, 0x00E3, 0x00F5, 0x0060, 0x00FC, 0x00E0
 420 };
 421
 422 static const struct codepoint por_unicode[] = {
 423         { 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
 424         { 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
 425         { 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
 426         { 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
 427         { 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
 428         { 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
 429         { 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
 430         { 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
 431         { 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
 432         { 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
 433         { 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
 434         { 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004C, 0x4C }, { 0x004D, 0x4D },
 435         { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 }, { 0x0051, 0x51 },
 436         { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 }, { 0x0055, 0x55 },
 437         { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 }, { 0x0059, 0x59 },
 438         { 0x005A, 0x5A }, { 0x005C, 0x17 }, { 0x005E, 0x16 }, { 0x005F, 0x11 },
 439         { 0x0060, 0x7D }, { 0x0061, 0x61 }, { 0x0062, 0x62 }, { 0x0063, 0x63 },
 440         { 0x0064, 0x64 }, { 0x0065, 0x65 }, { 0x0066, 0x66 }, { 0x0067, 0x67 },
 441         { 0x0068, 0x68 }, { 0x0069, 0x69 }, { 0x006A, 0x6A }, { 0x006B, 0x6B },
 442         { 0x006C, 0x6C }, { 0x006D, 0x6D }, { 0x006E, 0x6E }, { 0x006F, 0x6F },
 443         { 0x0070, 0x70 }, { 0x0071, 0x71 }, { 0x0072, 0x72 }, { 0x0073, 0x73 },
 444         { 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, { 0x0077, 0x77 },
 445         { 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, { 0x007C, 0x1A },
 446         { 0x007E, 0x60 }, { 0x00A0, 0x20 }, { 0x00A3, 0x01 }, { 0x00A5, 0x03 },
 447         { 0x00A7, 0x5F }, { 0x00AA, 0x12 }, { 0x00BA, 0x24 }, { 0x00C0, 0x14 },
 448         { 0x00C1, 0x0E }, { 0x00C2, 0x1C }, { 0x00C3, 0x5B }, { 0x00C7, 0x13 },
 449         { 0x00C9, 0x1F }, { 0x00CA, 0x1E }, { 0x00CD, 0x40 }, { 0x00D3, 0x19 },
 450         { 0x00D4, 0x0B }, { 0x00D5, 0x5C }, { 0x00DA, 0x5D }, { 0x00DC, 0x5E },
 451         { 0x00E0, 0x7F }, { 0x00E1, 0x0F }, { 0x00E2, 0x1D }, { 0x00E3, 0x7B },
 452         { 0x00E7, 0x09 }, { 0x00E9, 0x05 }, { 0x00EA, 0x04 }, { 0x00ED, 0x07 },
 453         { 0x00F3, 0x08 }, { 0x00F4, 0x0C }, { 0x00F5, 0x7C }, { 0x00FA, 0x06 },
 454         { 0x00FC, 0x7E }, { 0x0394, 0x10 }, { 0x20AC, 0x18 }, { 0x221E, 0x15 }
 455 };
 456
 457 static int compare_codepoints(const void *a, const void *b)
 458 {
 459         const struct codepoint *ca = (const struct codepoint *) a;
 460         const struct codepoint *cb = (const struct codepoint *) b;
 461
 462         return (ca->from > cb->from) - (ca->from < cb->from);
 463 }
 464
 465 static unsigned short codepoint_lookup(struct codepoint *key,
 466                                         const struct codepoint *table,
 467                                         unsigned int len)
 468 {
 469         struct codepoint *result = NULL;
 470
 471         result = bsearch(key, table, len, sizeof(struct codepoint),
 472                                 compare_codepoints);
 473
 474         return result ? result->to : GUND;
 475 }
 476
 477 static unsigned short gsm_locking_shift_lookup(struct conversion_table *t,
 478                                                 unsigned char k)
 479 {
 480         return t->locking_g[k];
 481 }
 482
 483 static unsigned short gsm_single_shift_lookup(struct conversion_table *t,
 484                                                 unsigned char k)
 485 {
 486         struct codepoint key = { k, 0 };
 487         return codepoint_lookup(&key, t->single_g, t->single_len_g);
 488 }
 489
 490 static unsigned short unicode_locking_shift_lookup(struct conversion_table *t,
 491                                                         unsigned short k)
 492 {
 493         struct codepoint key = { k, 0 };
 494         return codepoint_lookup(&key, t->locking_u, t->locking_len_u);
 495 }
 496
 497 static unsigned short unicode_single_shift_lookup(struct conversion_table *t,
 498                                                         unsigned short k)
 499 {
 500         struct codepoint key = { k, 0 };
 501         return codepoint_lookup(&key, t->single_u, t->single_len_u);
 502 }
 503
 504 static gboolean populate_locking_shift(struct conversion_table *t,
 505                                         enum gsm_dialect lang)
 506 {
 507         switch (lang) {
 508         case GSM_DIALECT_DEFAULT:
 509         case GSM_DIALECT_SPANISH:
 510                 t->locking_g = def_gsm;
 511                 t->locking_u = def_unicode;
 512                 t->locking_len_u = TABLE_SIZE(def_unicode);
 513                 return TRUE;
 514
 515         case GSM_DIALECT_TURKISH:
 516                 t->locking_g = tur_gsm;
 517                 t->locking_u = tur_unicode;
 518                 t->locking_len_u = TABLE_SIZE(tur_unicode);
 519                 return TRUE;
 520
 521         case GSM_DIALECT_PORTUGUESE:
 522                 t->locking_g = por_gsm;
 523                 t->locking_u = por_unicode;
 524                 t->locking_len_u = TABLE_SIZE(por_unicode);
 525                 return TRUE;
 526         }
 527
 528         return FALSE;
 529 }
 530
 531 static gboolean populate_single_shift(struct conversion_table *t,
 532                                         enum gsm_dialect lang)
 533 {
 534         switch (lang) {
 535         case GSM_DIALECT_DEFAULT:
 536                 t->single_g = def_ext_gsm;
 537                 t->single_len_g = TABLE_SIZE(def_ext_gsm);
 538                 t->single_u = def_ext_unicode;
 539                 t->single_len_u = TABLE_SIZE(def_ext_unicode);
 540                 return TRUE;
 541
 542         case GSM_DIALECT_TURKISH:
 543                 t->single_g = tur_ext_gsm;
 544                 t->single_len_g = TABLE_SIZE(tur_ext_gsm);
 545                 t->single_u = tur_ext_unicode;
 546                 t->single_len_u = TABLE_SIZE(tur_ext_unicode);
 547                 return TRUE;
 548
 549         case GSM_DIALECT_SPANISH:
 550                 t->single_g = spa_ext_gsm;
 551                 t->single_len_g = TABLE_SIZE(spa_ext_gsm);
 552                 t->single_u = spa_ext_unicode;
 553                 t->single_len_u = TABLE_SIZE(spa_ext_unicode);
 554                 return TRUE;
 555
 556         case GSM_DIALECT_PORTUGUESE:
 557                 t->single_g = por_ext_gsm;
 558                 t->single_len_g = TABLE_SIZE(por_ext_gsm);
 559                 t->single_u = por_ext_unicode;
 560                 t->single_len_u = TABLE_SIZE(por_ext_unicode);
 561                 return TRUE;
 562         }
 563
 564         return FALSE;
 565 }
 566
 567 static gboolean conversion_table_init(struct conversion_table *t,
 568                                         enum gsm_dialect locking,
 569                                         enum gsm_dialect single)
 570 {
 571         memset(t, 0, sizeof(struct conversion_table));
 572
 573         return populate_locking_shift(t, locking) &&
 574                         populate_single_shift(t, single);
 575 }
 576
 577 /*!
 578  * Converts text coded using GSM codec into UTF8 encoded text, using
 579  * the given language identifiers for single shift and locking shift
 580  * tables.  If len is less than 0, and terminator character is given,
 581  * the length is computed automatically.
 582  *
 583  * Returns newly-allocated UTF8 encoded string or NULL if the conversion
 584  * could not be performed.  Returns the number of bytes read from the
 585  * GSM encoded string in items_read (if not NULL), not including the
 586  * terminator character. Returns the number of bytes written into the UTF8
 587  * encoded string in items_written (if not NULL) not including the terminal
 588  * '\0' character.  The caller is responsible for freeing the returned value.
 589  */
 590 char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len,
 591                                         long *items_read, long *items_written,
 592                                         unsigned char terminator,
 593                                         enum gsm_dialect locking_lang,
 594                                         enum gsm_dialect single_lang)
 595 {
 596         char *res = NULL;
 597         char *out;
 598         long i = 0;
 599         long res_length;
 600
 601         struct conversion_table t;
 602
 603         if (conversion_table_init(&t, locking_lang, single_lang) == FALSE)
 604                 return NULL;
 605
 606         if (len < 0 && !terminator)
 607                 goto error;
 608
 609         if (len < 0) {
 610                 i = 0;
 611
 612                 while (text[i] != terminator)
 613                         i++;
 614
 615                 len = i;
 616         }
 617
 618         for (i = 0, res_length = 0; i < len; i++) {
 619                 unsigned short c;
 620
 621                 if (text[i] > 0x7f)
 622                         goto error;
 623
 624                 if (text[i] == 0x1b) {
 625                         ++i;
 626                         if (i >= len)
 627                                 goto error;
 628
 629                         c = gsm_single_shift_lookup(&t, text[i]);
 630
 631                         /*
 632                          * According to the comment in the table from
 633                          * 3GPP 23.038, Section 6.2.1.1:
 634                          * "In the event that an MS receives a code where
 635                          * a symbol is not represented in the above table
 636                          * then the MS shall display either the character
 637                          * shown in the main GSM 7 bit default  alphabet
 638                          * table in subclause 6.2.1., or the character from
 639                          * the National Language Locking Shift Table in the
 640                          * case where the locking shift mechanism as defined
 641                          * in subclause 6.2.1.2.3 is used."
 642                          */
 643                         if (c == GUND)
 644                                 c = gsm_locking_shift_lookup(&t, text[i]);
 645                 } else
 646                         c = gsm_locking_shift_lookup(&t, text[i]);
 647
 648                 res_length += UTF8_LENGTH(c);
 649         }
 650
 651         res = g_try_malloc(res_length + 1);
 652         if (res == NULL)
 653                 goto error;
 654
 655         out = res;
 656
 657         i = 0;
 658         while (out < res + res_length) {
 659                 unsigned short c;
 660
 661                 if (text[i] == 0x1b) {
 662                         c = gsm_single_shift_lookup(&t, text[++i]);
 663
 664                         if (c == GUND)
 665                                 c = gsm_locking_shift_lookup(&t, text[i]);
 666                 } else
 667                         c = gsm_locking_shift_lookup(&t, text[i]);
 668
 669                 out += g_unichar_to_utf8(c, out);
 670
 671                 ++i;
 672         }
 673
 674         *out = '\0';
 675
 676         if (items_written)
 677                 *items_written = out - res;
 678
 679 error:
 680         if (items_read)
 681                 *items_read = i;
 682
 683         return res;
 684 }
 685
 686 char *convert_gsm_to_utf8(const unsigned char *text, long len,
 687                                 long *items_read, long *items_written,
 688                                 unsigned char terminator)
 689 {
 690         return convert_gsm_to_utf8_with_lang(text, len, items_read,
 691                                                 items_written,
 692                                                 terminator,
 693                                                 GSM_DIALECT_DEFAULT,
 694                                                 GSM_DIALECT_DEFAULT);
 695 }
 696
 697 /*!
 698  * Converts UTF-8 encoded text to GSM alphabet.  The result is unpacked,
 699  * with the 7th bit always 0.  If terminator is not 0, a terminator character
 700  * is appended to the result.  This should be in the range 0x80-0xf0
 701  *
 702  * Returns the encoded data or NULL if the data could not be encoded.  The
 703  * data must be freed by the caller.  If items_read is not NULL, it contains
 704  * the actual number of bytes read.  If items_written is not NULL, contains
 705  * the number of bytes written.
 706  */
 707 unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len,
 708                                         long *items_read, long *items_written,
 709                                         unsigned char terminator,
 710                                         enum gsm_dialect locking_lang,
 711                                         enum gsm_dialect single_lang)
 712 {
 713         struct conversion_table t;
 714         long nchars = 0;
 715         const char *in;
 716         unsigned char *out;
 717         unsigned char *res = NULL;
 718         long res_len;
 719         long i;
 720
 721         if (conversion_table_init(&t, locking_lang, single_lang) == FALSE)
 722                 return NULL;
 723
 724         in = text;
 725         res_len = 0;
 726
 727         while ((len < 0 || text + len - in > 0) && *in) {
 728                 long max = len < 0 ? 6 : text + len - in;
 729                 gunichar c = g_utf8_get_char_validated(in, max);
 730                 unsigned short converted = GUND;
 731
 732                 if (c & 0x80000000)
 733                         goto err_out;
 734
 735                 if (c > 0xffff)
 736                         goto err_out;
 737
 738                 converted = unicode_locking_shift_lookup(&t, c);
 739
 740                 if (converted == GUND)
 741                         converted = unicode_single_shift_lookup(&t, c);
 742
 743                 if (converted == GUND)
 744                         goto err_out;
 745
 746                 if (converted & 0x1b00)
 747                         res_len += 2;
 748                 else
 749                         res_len += 1;
 750
 751                 in = g_utf8_next_char(in);
 752                 nchars += 1;
 753         }
 754
 755         res = g_try_malloc(res_len + (terminator ? 1 : 0));
 756         if (res == NULL)
 757                 goto err_out;
 758
 759         in = text;
 760         out = res;
 761         for (i = 0; i < nchars; i++) {
 762                 unsigned short converted;
 763
 764                 gunichar c = g_utf8_get_char(in);
 765
 766                 converted = unicode_locking_shift_lookup(&t, c);
 767
 768                 if (converted == GUND)
 769                         converted = unicode_single_shift_lookup(&t, c);
 770
 771                 if (converted & 0x1b00) {
 772                         *out = 0x1b;
 773                         ++out;
 774                 }
 775
 776                 *out = converted;
 777                 ++out;
 778
 779                 in = g_utf8_next_char(in);
 780         }
 781
 782         if (terminator)
 783                 *out = terminator;
 784
 785         if (items_written)
 786                 *items_written = out - res;
 787
 788 err_out:
 789         if (items_read)
 790                 *items_read = in - text;
 791
 792         return res;
 793 }
 794
 795 unsigned char *convert_utf8_to_gsm(const char *text, long len,
 796                                         long *items_read, long *items_written,
 797                                         unsigned char terminator)
 798 {
 799         return convert_utf8_to_gsm_with_lang(text, len, items_read,
 800                                                 items_written,
 801                                                 terminator,
 802                                                 GSM_DIALECT_DEFAULT,
 803                                                 GSM_DIALECT_DEFAULT);
 804 }
 805
 806 /*!
 807  * Converts UTF-8 encoded text to GSM alphabet. It finds an encoding
 808  * that uses the minimum set of GSM dialects based on the hint given.
 809  *
 810  * It first attempts to use the default dialect's single shift and
 811  * locking shift tables. It then tries with only the single shift
 812  * table of the hinted dialect, and finally with both the single shift
 813  * and locking shift tables of the hinted dialect.
 814  *
 815  * Returns the encoded data or NULL if no suitable encoding could be
 816  * found. The data must be freed by the caller. If items_read is not
 817  * NULL, it contains the actual number of bytes read. If items_written
 818  * is not NULL, it contains the number of bytes written. If
 819  * used_locking and used_single are not NULL, they will contain the
 820  * dialects used for the locking shift and single shift tables.
 821  */
 822 unsigned char *convert_utf8_to_gsm_best_lang(const char *utf8, long len,
 823                                         long *items_read, long *items_written,
 824                                         unsigned char terminator,
 825                                         enum gsm_dialect hint,
 826                                         enum gsm_dialect *used_locking,
 827                                         enum gsm_dialect *used_single)
 828 {
 829         enum gsm_dialect locking = GSM_DIALECT_DEFAULT;
 830         enum gsm_dialect single = GSM_DIALECT_DEFAULT;
 831         unsigned char *encoded;
 832
 833         encoded = convert_utf8_to_gsm_with_lang(utf8, len, items_read,
 834                                                 items_written, terminator,
 835                                                 locking, single);
 836         if (encoded != NULL)
 837                 goto out;
 838
 839         if (hint == GSM_DIALECT_DEFAULT)
 840                 return NULL;
 841
 842         single = hint;
 843         encoded = convert_utf8_to_gsm_with_lang(utf8, len, items_read,
 844                                                 items_written, terminator,
 845                                                 locking, single);
 846         if (encoded != NULL)
 847                 goto out;
 848
 849         /* Spanish dialect uses the default locking shift table */
 850         if (hint == GSM_DIALECT_SPANISH)
 851                 return NULL;
 852
 853         locking = hint;
 854         encoded = convert_utf8_to_gsm_with_lang(utf8, len, items_read,
 855                                                 items_written, terminator,
 856                                                 locking, single);
 857
 858         if (encoded == NULL)
 859                 return NULL;
 860
 861 out:
 862         if (used_locking != NULL)
 863                 *used_locking = locking;
 864
 865         if (used_single != NULL)
 866                 *used_single = single;
 867
 868         return encoded;
 869 }
 870
 871 /*!
 872  * Decodes the hex encoded data and converts to a byte array.  If terminator
 873  * is not 0, the terminator character is appended to the end of the result.
 874  * This might be useful for converting GSM encoded data if the CSCS is set
 875  * to HEX.
 876  *
 877  * Please note that this since GSM does allow embedded null characeters, use
 878  * of the terminator or the items_writen is encouraged to find the real size
 879  * of the result.
 880  */
 881 unsigned char *decode_hex_own_buf(const char *in, long len, long *items_written,
 882                                         unsigned char terminator,
 883                                         unsigned char *buf)
 884 {
 885         long i, j;
 886         char c;
 887         unsigned char b;
 888
 889         if (len < 0)
 890                 len = strlen(in);
 891
 892         len &= ~0x1;
 893
 894         for (i = 0, j = 0; i < len; i++, j++) {
 895                 c = toupper(in[i]);
 896
 897                 if (c >= '0' && c <= '9')
 898                         b = c - '0';
 899                 else if (c >= 'A' && c <= 'F')
 900                         b = 10 + c - 'A';
 901                 else
 902                         return NULL;
 903
 904                 i += 1;
 905
 906                 c = toupper(in[i]);
 907
 908                 if (c >= '0' && c <= '9')
 909                         b = b * 16 + c - '0';
 910                 else if (c >= 'A' && c <= 'F')
 911                         b = b * 16 + 10 + c - 'A';
 912                 else
 913                         return NULL;
 914
 915                 buf[j] = b;
 916         }
 917
 918         if (terminator)
 919                 buf[j] = terminator;
 920
 921         if (items_written)
 922                 *items_written = j;
 923
 924         return buf;
 925 }
 926
 927 unsigned char *decode_hex(const char *in, long len, long *items_written,
 928                                 unsigned char terminator)
 929 {
 930         long i;
 931         char c;
 932         unsigned char *buf;
 933
 934         if (len < 0)
 935                 len = strlen(in);
 936
 937         len &= ~0x1;
 938
 939         for (i = 0; i < len; i++) {
 940                 c = toupper(in[i]);
 941
 942                 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'))
 943                         continue;
 944
 945                 return NULL;
 946         }
 947
 948         buf = g_new(unsigned char, (len >> 1) + (terminator ? 1 : 0));
 949
 950         return decode_hex_own_buf(in, len, items_written, terminator, buf);
 951 }
 952
 953 /*!
 954  * Encodes the data using hexadecimal characters.  len can be negative,
 955  * in that case the terminator is used to find the last character.  This is
 956  * useful for handling GSM-encoded strings which allow ASCII NULL character
 957  * in the stream.
 958  */
 959 char *encode_hex_own_buf(const unsigned char *in, long len,
 960                                 unsigned char terminator, char *buf)
 961 {
 962         long i, j;
 963         char c;
 964
 965         if (len < 0) {
 966                 i = 0;
 967
 968                 while (in[i] != terminator)
 969                         i++;
 970
 971                 len = i;
 972         }
 973
 974         for (i = 0, j = 0; i < len; i++, j++) {
 975                 c = (in[i] >> 4) & 0xf;
 976
 977                 if (c <= 9)
 978                         buf[j] = '0' + c;
 979                 else
 980                         buf[j] = 'A' + c - 10;
 981
 982                 j += 1;
 983
 984                 c = (in[i]) & 0xf;
 985
 986                 if (c <= 9)
 987                         buf[j] = '0' + c;
 988                 else
 989                         buf[j] = 'A' + c - 10;
 990         }
 991
 992         buf[j] = '\0';
 993
 994         return buf;
 995 }
 996
 997 char *encode_hex(const unsigned char *in, long len, unsigned char terminator)
 998 {
 999         char *buf;
1000         int i;
1001
1002         if (len < 0) {
1003                 i = 0;
1004
1005                 while (in[i] != terminator)
1006                         i++;
1007
1008                 len = i;
1009         }
1010
1011         buf = g_new(char, len * 2 + 1);
1012
1013         return encode_hex_own_buf(in, len, terminator, buf);
1014 }
1015
1016 unsigned char *unpack_7bit_own_buf(const unsigned char *in, long len,
1017                                         int byte_offset, gboolean ussd,
1018                                         long max_to_unpack, long *items_written,
1019                                         unsigned char terminator,
1020                                         unsigned char *buf)
1021 {
1022         unsigned char rest = 0;
1023         unsigned char *out = buf;
1024         int bits = 7 - (byte_offset % 7);
1025         long i;
1026
1027         if (len <= 0)
1028                 return NULL;
1029
1030         /* In the case of CB, unpack as much as possible */
1031         if (ussd == TRUE)
1032                 max_to_unpack = len * 8 / 7;
1033
1034         for (i = 0; (i < len) && ((out-buf) < max_to_unpack); i++) {
1035                 /* Grab what we have in the current octet */
1036                 *out = (in[i] & ((1 << bits) - 1)) << (7 - bits);
1037
1038                 /* Append what we have from the previous octet, if any */
1039                 *out |= rest;
1040
1041                 /* Figure out the remainder */
1042                 rest = (in[i] >> bits) & ((1 << (8-bits)) - 1);
1043
1044                 /*
1045                  * We have the entire character, here we don't increate
1046                  * out if this is we started at an offset.  Instead
1047                  * we effectively populate variable rest
1048                  */
1049                 if (i != 0 || bits == 7)
1050                         out++;
1051
1052                 if ((out-buf) == max_to_unpack)
1053                         break;
1054
1055                 /*
1056                  * We expected only 1 bit from this octet, means there's 7
1057                  * left, take care of them here
1058                  */
1059                 if (bits == 1) {
1060                         *out = rest;
1061                         out++;
1062                         bits = 7;
1063                         rest = 0;
1064                 } else {
1065                         bits = bits - 1;
1066                 }
1067         }
1068
1069         /*
1070          * According to 23.038 6.1.2.3.1, last paragraph:
1071          * "If the total number of characters to be sent equals (8n-1)
1072          * where n=1,2,3 etc. then there are 7 spare bits at the end
1073          * of the message. To avoid the situation where the receiving
1074          * entity confuses 7 binary zero pad bits as the @ character,
1075          * the carriage return or <CR> character shall be used for
1076          * padding in this situation, just as for Cell Broadcast."
1077          *
1078          * "The receiving entity shall remove the final <CR> character where
1079          * the message ends on an octet boundary with <CR> as the last
1080          * character.
1081          */
1082         if (ussd && (((out - buf) % 8) == 0) && (*(out - 1) == '\r'))
1083                 out = out - 1;
1084
1085         if (terminator)
1086                 *out = terminator;
1087
1088         if (items_written)
1089                 *items_written = out - buf;
1090
1091         return buf;
1092 }
1093
1094 unsigned char *unpack_7bit(const unsigned char *in, long len, int byte_offset,
1095                                 gboolean ussd, long max_to_unpack,
1096                                 long *items_written, unsigned char terminator)
1097 {
1098         unsigned char *buf = g_new(unsigned char,
1099                                         len * 8 / 7 + (terminator ? 1 : 0));
1100
1101         return unpack_7bit_own_buf(in, len, byte_offset, ussd, max_to_unpack,
1102                                 items_written, terminator, buf);
1103 }
1104
1105 unsigned char *pack_7bit_own_buf(const unsigned char *in, long len,
1106                                         int byte_offset, gboolean ussd,
1107                                         long *items_written,
1108                                         unsigned char terminator,
1109                                         unsigned char *buf)
1110 {
1111         int bits = 7 - (byte_offset % 7);
1112         unsigned char *out = buf;
1113         long i;
1114         long total_bits;
1115
1116         if (len == 0)
1117                 return NULL;
1118
1119         if (len < 0) {
1120                 i = 0;
1121
1122                 while (in[i] != terminator)
1123                         i++;
1124
1125                 len = i;
1126         }
1127
1128         total_bits = len * 7;
1129
1130         if (bits != 7) {
1131                 total_bits += bits;
1132                 bits = bits - 1;
1133                 *out = 0;
1134         }
1135
1136         for (i = 0; i < len; i++) {
1137                 if (bits != 7) {
1138                         *out |= (in[i] & ((1 << (7 - bits)) - 1)) <<
1139                                         (bits + 1);
1140                         out++;
1141                 }
1142
1143                 /* This is a no op when bits == 0, lets keep valgrind happy */
1144                 if (bits != 0)
1145                         *out = in[i] >> (7 - bits);
1146
1147                 if (bits == 0)
1148                         bits = 7;
1149                 else
1150                         bits = bits - 1;
1151         }
1152
1153         /*
1154          * If <CR> is intended to be the last character and the message
1155          * (including the wanted <CR>) ends on an octet boundary, then
1156          * another <CR> must be added together with a padding bit 0. The
1157          * receiving entity will perform the carriage return function twice,
1158          * but this will not result in misoperation as the definition of
1159          * <CR> in clause 6.1.1 is identical to the definition of <CR><CR>.
1160          */
1161         if (ussd && ((total_bits % 8) == 1))
1162                 *out |= '\r' << 1;
1163
1164         if (bits != 7)
1165                 out++;
1166
1167         if (ussd && ((total_bits % 8) == 0) && (in[len - 1] == '\r')) {
1168                 *out = '\r';
1169                 out++;
1170         }
1171
1172         if (items_written)
1173                 *items_written = out - buf;
1174
1175         return buf;
1176 }
1177
1178 unsigned char *pack_7bit(const unsigned char *in, long len, int byte_offset,
1179                                 gboolean ussd, long *items_written,
1180                                 unsigned char terminator)
1181 {
1182         int bits = 7 - (byte_offset % 7);
1183         long i;
1184         long total_bits;
1185         unsigned char *buf;
1186
1187         if (len == 0 || items_written == NULL)
1188                 return NULL;
1189
1190         if (len < 0) {
1191                 i = 0;
1192
1193                 while (in[i] != terminator)
1194                         i++;
1195
1196                 len = i;
1197         }
1198
1199         total_bits = len * 7;
1200
1201         if (bits != 7)
1202                 total_bits += bits;
1203
1204         /* Round up number of bytes, must append <cr> if true */
1205         if (ussd && ((total_bits % 8) == 0) && (in[len - 1] == '\r'))
1206                 buf = g_new(unsigned char, (total_bits + 14) / 8);
1207         else
1208                 buf = g_new(unsigned char, (total_bits + 7) / 8);
1209
1210         return pack_7bit_own_buf(in, len, byte_offset, ussd, items_written,
1211                                         terminator, buf);
1212 }
1213
1214 char *sim_string_to_utf8(const unsigned char *buffer, int length)
1215 {
1216         struct conversion_table t;
1217         int i;
1218         int j;
1219         int num_chars;
1220         unsigned short ucs2_offset;
1221         int res_len;
1222         int offset;
1223         char *utf8 = NULL;
1224         char *out;
1225
1226         if (conversion_table_init(&t, GSM_DIALECT_DEFAULT,
1227                                         GSM_DIALECT_DEFAULT) == FALSE)
1228                 return NULL;
1229
1230         if (length < 1)
1231                 return NULL;
1232
1233         if (buffer[0] < 0x80) {
1234                 /*
1235                  * We have to find the real length, since on SIM file system
1236                  * alpha fields are 0xff padded
1237                  */
1238                 for (i = 0; i < length; i++)
1239                         if (buffer[i] == 0xff)
1240                                 break;
1241
1242                 return convert_gsm_to_utf8(buffer, i, NULL, NULL, 0);
1243         }
1244
1245         switch (buffer[0]) {
1246         case 0x80:
1247                 if (((length - 1) % 2) == 1) {
1248                         if (buffer[length - 1] != 0xff)
1249                                 return NULL;
1250
1251                         length = length - 1;
1252                 }
1253
1254                 for (i = 1; i < length; i += 2)
1255                         if (buffer[i] == 0xff && buffer[i + 1] == 0xff)
1256                                 break;
1257
1258                 return g_convert((char *) buffer + 1, i - 1,
1259                                         "UTF-8//TRANSLIT", "UCS-2BE",
1260                                         NULL, NULL, NULL);
1261         case 0x81:
1262                 if (length < 3 || (buffer[1] > (length - 3)))
1263                         return NULL;
1264
1265                 num_chars = buffer[1];
1266                 ucs2_offset = buffer[2] << 7;
1267                 offset = 3;
1268                 break;
1269
1270         case 0x82:
1271                 if (length < 4 || buffer[1] > length - 4)
1272                         return NULL;
1273
1274                 num_chars = buffer[1];
1275                 ucs2_offset = (buffer[2] << 8) | buffer[3];
1276                 offset = 4;
1277                 break;
1278
1279         case 0xff: /* Special case of empty string */
1280                 num_chars = 0;
1281                 ucs2_offset = 0;
1282                 offset = 0;
1283                 break;
1284
1285         default:
1286                 return NULL;
1287         }
1288
1289         res_len = 0;
1290         i = offset;
1291         j = 0;
1292
1293         while ((i < length) && (j < num_chars)) {
1294                 unsigned short c;
1295
1296                 if (buffer[i] & 0x80) {
1297                         c = (buffer[i++] & 0x7f) + ucs2_offset;
1298
1299                         if (c >= 0xd800 && c < 0xe000)
1300                                 return NULL;
1301
1302                         res_len += UTF8_LENGTH(c);
1303                         j += 1;
1304                         continue;
1305                 }
1306
1307                 if (buffer[i] == 0x1b) {
1308                         ++i;
1309                         if (i >= length)
1310                                 return NULL;
1311
1312                         c = gsm_single_shift_lookup(&t, buffer[i++]);
1313
1314                         if (c == 0)
1315                                 return NULL;
1316
1317                         j += 2;
1318                 } else {
1319                         c = gsm_locking_shift_lookup(&t, buffer[i++]);
1320                         j += 1;
1321                 }
1322
1323                 res_len += UTF8_LENGTH(c);
1324         }
1325
1326         if (j != num_chars)
1327                 return NULL;
1328
1329         /* Check that the string is padded out to the length by 0xff */
1330         for (; i < length; i++)
1331                 if (buffer[i] != 0xff)
1332                         return NULL;
1333
1334         utf8 = g_try_malloc(res_len + 1);
1335         if (utf8 == NULL)
1336                 return NULL;
1337
1338         i = offset;
1339         out = utf8;
1340
1341         while (out < utf8 + res_len) {
1342                 unsigned short c;
1343
1344                 if (buffer[i] & 0x80)
1345                         c = (buffer[i++] & 0x7f) + ucs2_offset;
1346                 else if (buffer[i] == 0x1b) {
1347                         ++i;
1348                         c = gsm_single_shift_lookup(&t, buffer[i++]);
1349                 } else
1350                         c = gsm_locking_shift_lookup(&t, buffer[i++]);
1351
1352                 out += g_unichar_to_utf8(c, out);
1353         }
1354
1355         *out = '\0';
1356
1357         return utf8;
1358 }
1359
1360 unsigned char *utf8_to_sim_string(const char *utf, int max_length,
1361                                         int *out_length)
1362 {
1363         unsigned char *result;
1364         unsigned char *ucs2;
1365         long gsm_bytes;
1366         gsize converted;
1367
1368         result = convert_utf8_to_gsm(utf, -1, NULL, &gsm_bytes, 0);
1369         if (result) {
1370                 if (gsm_bytes > max_length) {
1371                         gsm_bytes = max_length;
1372                         while (gsm_bytes && result[gsm_bytes - 1] == 0x1b)
1373                                 gsm_bytes -= 1;
1374                 }
1375
1376                 *out_length = gsm_bytes;
1377                 return result;
1378         }
1379
1380         /* NOTE: UCS2 formats with an offset are never used */
1381
1382         ucs2 = (guint8 *) g_convert(utf, -1, "UCS-2BE//TRANSLIT", "UTF-8",
1383                                         NULL, &converted, NULL);
1384         if (ucs2 == NULL)
1385                 return NULL;
1386
1387         if (max_length != -1 && (int) converted + 1 > max_length)
1388                 converted = (max_length - 1) & ~1;
1389
1390         result = g_try_malloc(converted + 1);
1391         if (result == NULL) {
1392                 g_free(ucs2);
1393                 return NULL;
1394         }
1395
1396         *out_length = converted + 1;
1397
1398         result[0] = 0x80;
1399         memcpy(&result[1], ucs2, converted);
1400         g_free(ucs2);
1401
1402         return result;
1403 }
1404
1405 /*!
1406  * Converts UCS2 encoded text to GSM alphabet. The result is unpacked,
1407  * with the 7th bit always 0. If terminator is not 0, a terminator character
1408  * is appended to the result.
1409  *
1410  * Returns the encoded data or NULL if the data could not be encoded. The
1411  * data must be freed by the caller. If items_read is not NULL, it contains
1412  * the actual number of bytes read. If items_written is not NULL, contains
1413  * the number of bytes written.
1414  */
1415 unsigned char *convert_ucs2_to_gsm_with_lang(const unsigned char *text,
1416                                         long len, long *items_read,
1417                                         long *items_written,
1418                                         unsigned char terminator,
1419                                         enum gsm_dialect locking_lang,
1420                                         enum gsm_dialect single_lang)
1421 {
1422         struct conversion_table t;
1423         long nchars = 0;
1424         const unsigned char *in;
1425         unsigned char *out;
1426         unsigned char *res = NULL;
1427         long res_len;
1428         long i;
1429
1430         if (conversion_table_init(&t, locking_lang, single_lang) == FALSE)
1431                 return NULL;
1432
1433         if (len < 1 || len % 2)
1434                 return NULL;
1435
1436         in = text;
1437         res_len = 0;
1438
1439         for (i = 0; i < len; i += 2) {
1440                 gunichar c = (in[i] << 8) | in[i + 1];
1441                 unsigned short converted = GUND;
1442
1443                 if (c > 0xffff)
1444                         goto err_out;
1445
1446                 converted = unicode_locking_shift_lookup(&t, c);
1447
1448                 if (converted == GUND)
1449                         converted = unicode_single_shift_lookup(&t, c);
1450
1451                 if (converted == GUND)
1452                         goto err_out;
1453
1454                 if (converted & 0x1b00)
1455                         res_len += 2;
1456                 else
1457                         res_len += 1;
1458
1459                 nchars += 1;
1460         }
1461
1462         res = g_try_malloc(res_len + (terminator ? 1 : 0));
1463         if (res == NULL)
1464                 goto err_out;
1465
1466         in = text;
1467         out = res;
1468
1469         for (i = 0; i < len; i += 2) {
1470                 gunichar c = (in[i] << 8) | in[i + 1];
1471                 unsigned short converted = GUND;
1472
1473                 converted = unicode_locking_shift_lookup(&t, c);
1474
1475                 if (converted == GUND)
1476                         converted = unicode_single_shift_lookup(&t, c);
1477
1478                 if (converted & 0x1b00) {
1479                         *out = 0x1b;
1480                         ++out;
1481                 }
1482
1483                 *out = converted;
1484                 ++out;
1485         }
1486
1487         if (terminator)
1488                 *out = terminator;
1489
1490         if (items_written)
1491                 *items_written = out - res;
1492
1493 err_out:
1494         if (items_read)
1495                 *items_read = i;
1496
1497         return res;
1498 }
1499
1500 unsigned char *convert_ucs2_to_gsm(const unsigned char *text, long len,
1501                                 long *items_read, long *items_written,
1502                                 unsigned char terminator)
1503 {
1504         return convert_ucs2_to_gsm_with_lang(text, len, items_read,
1505                                                 items_written,
1506                                                 terminator,
1507                                                 GSM_DIALECT_DEFAULT,
1508                                                 GSM_DIALECT_DEFAULT);
1509 }