glib/gbase64.c

   1 /* gbase64.c - Base64 encoding/decoding
   2  *
   3  *  Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
   4  *  Copyright (C) 2000-2003 Ximian Inc.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  *
  19  * This is based on code in camel, written by:
  20  *    Michael Zucchi <notzed@ximian.com>
  21  *    Jeffrey Stedfast <fejj@ximian.com>
  22  */
  23
  24 #include "config.h"
  25
  26 #include <string.h>
  27
  28 #include "gbase64.h"
  29 #include "gtestutils.h"
  30 #include "glibintl.h"
  31
  32
  33 /**
  34  * SECTION:base64
  35  * @title: Base64 Encoding
  36  * @short_description: encodes and decodes data in Base64 format
  37  *
  38  * Base64 is an encoding that allows a sequence of arbitrary bytes to be
  39  * encoded as a sequence of printable ASCII characters. For the definition
  40  * of Base64, see
  41  * [RFC 1421](http://www.ietf.org/rfc/rfc1421.txt)
  42  * or
  43  * [RFC 2045](http://www.ietf.org/rfc/rfc2045.txt).
  44  * Base64 is most commonly used as a MIME transfer encoding
  45  * for email.
  46  *
  47  * GLib supports incremental encoding using g_base64_encode_step() and
  48  * g_base64_encode_close(). Incremental decoding can be done with
  49  * g_base64_decode_step(). To encode or decode data in one go, use
  50  * g_base64_encode() or g_base64_decode(). To avoid memory allocation when
  51  * decoding, you can use g_base64_decode_inplace().
  52  *
  53  * Support for Base64 encoding has been added in GLib 2.12.
  54  */
  55
  56 static const char base64_alphabet[] =
  57         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  58
  59 /**
  60  * g_base64_encode_step:
  61  * @in: (array length=len) (element-type guint8): the binary data to encode
  62  * @len: the length of @in
  63  * @break_lines: whether to break long lines
  64  * @out: (out) (array) (element-type guint8): pointer to destination buffer
  65  * @state: (inout): Saved state between steps, initialize to 0
  66  * @save: (inout): Saved state between steps, initialize to 0
  67  *
  68  * Incrementally encode a sequence of binary data into its Base-64 stringified
  69  * representation. By calling this function multiple times you can convert
  70  * data in chunks to avoid having to have the full encoded data in memory.
  71  *
  72  * When all of the data has been converted you must call
  73  * g_base64_encode_close() to flush the saved state.
  74  *
  75  * The output buffer must be large enough to fit all the data that will
  76  * be written to it. Due to the way base64 encodes you will need
  77  * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
  78  * non-zero state). If you enable line-breaking you will need at least:
  79  * ((@len / 3 + 1) * 4 + 4) / 72 + 1 bytes of extra space.
  80  *
  81  * @break_lines is typically used when putting base64-encoded data in emails.
  82  * It breaks the lines at 72 columns instead of putting all of the text on
  83  * the same line. This avoids problems with long lines in the email system.
  84  * Note however that it breaks the lines with `LF` characters, not
  85  * `CR LF` sequences, so the result cannot be passed directly to SMTP
  86  * or certain other protocols.
  87  *
  88  * Returns: The number of bytes of output that was written
  89  *
  90  * Since: 2.12
  91  */
  92 gsize
  93 g_base64_encode_step (const guchar *in,
  94                       gsize         len,
  95                       gboolean      break_lines,
  96                       gchar        *out,
  97                       gint         *state,
  98                       gint         *save)
  99 {
 100   char *outptr;
 101   const guchar *inptr;
 102
 103   g_return_val_if_fail (in != NULL, 0);
 104   g_return_val_if_fail (out != NULL, 0);
 105   g_return_val_if_fail (state != NULL, 0);
 106   g_return_val_if_fail (save != NULL, 0);
 107
 108   if (len <= 0)
 109     return 0;
 110
 111   inptr = in;
 112   outptr = out;
 113
 114   if (len + ((char *) save) [0] > 2)
 115     {
 116       const guchar *inend = in+len-2;
 117       int c1, c2, c3;
 118       int already;
 119
 120       already = *state;
 121
 122       switch (((char *) save) [0])
 123         {
 124         case 1:
 125           c1 = ((unsigned char *) save) [1];
 126           goto skip1;
 127         case 2:
 128           c1 = ((unsigned char *) save) [1];
 129           c2 = ((unsigned char *) save) [2];
 130           goto skip2;
 131         }
 132
 133       /*
 134        * yes, we jump into the loop, no i'm not going to change it,
 135        * it's beautiful!
 136        */
 137       while (inptr < inend)
 138         {
 139           c1 = *inptr++;
 140         skip1:
 141           c2 = *inptr++;
 142         skip2:
 143           c3 = *inptr++;
 144           *outptr++ = base64_alphabet [ c1 >> 2 ];
 145           *outptr++ = base64_alphabet [ c2 >> 4 |
 146                                         ((c1&0x3) << 4) ];
 147           *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
 148                                         (c3 >> 6) ];
 149           *outptr++ = base64_alphabet [ c3 & 0x3f ];
 150           /* this is a bit ugly ... */
 151           if (break_lines && (++already) >= 19)
 152             {
 153               *outptr++ = '\n';
 154               already = 0;
 155             }
 156         }
 157
 158       ((char *)save)[0] = 0;
 159       len = 2 - (inptr - inend);
 160       *state = already;
 161     }
 162
 163   if (len>0)
 164     {
 165       char *saveout;
 166
 167       /* points to the slot for the next char to save */
 168       saveout = & (((char *)save)[1]) + ((char *)save)[0];
 169
 170       /* len can only be 0 1 or 2 */
 171       switch(len)
 172         {
 173         case 2: *saveout++ = *inptr++;
 174         case 1: *saveout++ = *inptr++;
 175         }
 176       ((char *)save)[0] += len;
 177     }
 178
 179   return outptr - out;
 180 }
 181
 182 /**
 183  * g_base64_encode_close:
 184  * @break_lines: whether to break long lines
 185  * @out: (out) (array) (element-type guint8): pointer to destination buffer
 186  * @state: (inout): Saved state from g_base64_encode_step()
 187  * @save: (inout): Saved state from g_base64_encode_step()
 188  *
 189  * Flush the status from a sequence of calls to g_base64_encode_step().
 190  *
 191  * The output buffer must be large enough to fit all the data that will
 192  * be written to it. It will need up to 4 bytes, or up to 5 bytes if
 193  * line-breaking is enabled.
 194  *
 195  * The @out array will not be automatically nul-terminated.
 196  *
 197  * Returns: The number of bytes of output that was written
 198  *
 199  * Since: 2.12
 200  */
 201 gsize
 202 g_base64_encode_close (gboolean  break_lines,
 203                        gchar    *out,
 204                        gint     *state,
 205                        gint     *save)
 206 {
 207   int c1, c2;
 208   char *outptr = out;
 209
 210   g_return_val_if_fail (out != NULL, 0);
 211   g_return_val_if_fail (state != NULL, 0);
 212   g_return_val_if_fail (save != NULL, 0);
 213
 214   c1 = ((unsigned char *) save) [1];
 215   c2 = ((unsigned char *) save) [2];
 216
 217   switch (((char *) save) [0])
 218     {
 219     case 2:
 220       outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
 221       g_assert (outptr [2] != 0);
 222       goto skip;
 223     case 1:
 224       outptr[2] = '=';
 225     skip:
 226       outptr [0] = base64_alphabet [ c1 >> 2 ];
 227       outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
 228       outptr [3] = '=';
 229       outptr += 4;
 230       break;
 231     }
 232   if (break_lines)
 233     *outptr++ = '\n';
 234
 235   *save = 0;
 236   *state = 0;
 237
 238   return outptr - out;
 239 }
 240
 241 /**
 242  * g_base64_encode:
 243  * @data: (array length=len) (element-type guint8): the binary data to encode
 244  * @len: the length of @data
 245  *
 246  * Encode a sequence of binary data into its Base-64 stringified
 247  * representation.
 248  *
 249  * Returns: (transfer full): a newly allocated, zero-terminated Base-64
 250  *               encoded string representing @data. The returned string must
 251  *               be freed with g_free().
 252  *
 253  * Since: 2.12
 254  */
 255 gchar *
 256 g_base64_encode (const guchar *data,
 257                  gsize         len)
 258 {
 259   gchar *out;
 260   gint state = 0, outlen;
 261   gint save = 0;
 262
 263   g_return_val_if_fail (data != NULL || len == 0, NULL);
 264
 265   /* We can use a smaller limit here, since we know the saved state is 0,
 266      +1 is needed for trailing \0, also check for unlikely integer overflow */
 267   if (len >= ((G_MAXSIZE - 1) / 4 - 1) * 3)
 268     g_error("%s: input too large for Base64 encoding (%"G_GSIZE_FORMAT" chars)",
 269         G_STRLOC, len);
 270
 271   out = g_malloc ((len / 3 + 1) * 4 + 1);
 272
 273   outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
 274   outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
 275   out[outlen] = '\0';
 276
 277   return (gchar *) out;
 278 }
 279
 280 static const unsigned char mime_base64_rank[256] = {
 281   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 282   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 283   255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
 284    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
 285   255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
 286    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
 287   255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 288    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
 289   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 290   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 291   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 292   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 293   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 294   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 295   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 296   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 297 };
 298
 299 /**
 300  * g_base64_decode_step:
 301  * @in: (array length=len) (element-type guint8): binary input data
 302  * @len: max length of @in data to decode
 303  * @out: (out) (array) (element-type guint8): output buffer
 304  * @state: (inout): Saved state between steps, initialize to 0
 305  * @save: (inout): Saved state between steps, initialize to 0
 306  *
 307  * Incrementally decode a sequence of binary data from its Base-64 stringified
 308  * representation. By calling this function multiple times you can convert
 309  * data in chunks to avoid having to have the full encoded data in memory.
 310  *
 311  * The output buffer must be large enough to fit all the data that will
 312  * be written to it. Since base64 encodes 3 bytes in 4 chars you need
 313  * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
 314  * state).
 315  *
 316  * Returns: The number of bytes of output that was written
 317  *
 318  * Since: 2.12
 319  **/
 320 gsize
 321 g_base64_decode_step (const gchar  *in,
 322                       gsize         len,
 323                       guchar       *out,
 324                       gint         *state,
 325                       guint        *save)
 326 {
 327   const guchar *inptr;
 328   guchar *outptr;
 329   const guchar *inend;
 330   guchar c, rank;
 331   guchar last[2];
 332   unsigned int v;
 333   int i;
 334
 335   g_return_val_if_fail (in != NULL, 0);
 336   g_return_val_if_fail (out != NULL, 0);
 337   g_return_val_if_fail (state != NULL, 0);
 338   g_return_val_if_fail (save != NULL, 0);
 339
 340   if (len <= 0)
 341     return 0;
 342
 343   inend = (const guchar *)in+len;
 344   outptr = out;
 345
 346   /* convert 4 base64 bytes to 3 normal bytes */
 347   v=*save;
 348   i=*state;
 349
 350   last[0] = last[1] = 0;
 351
 352   /* we use the sign in the state to determine if we got a padding character
 353      in the previous sequence */
 354   if (i < 0)
 355     {
 356       i = -i;
 357       last[0] = '=';
 358     }
 359
 360   inptr = (const guchar *)in;
 361   while (inptr < inend)
 362     {
 363       c = *inptr++;
 364       rank = mime_base64_rank [c];
 365       if (rank != 0xff)
 366         {
 367           last[1] = last[0];
 368           last[0] = c;
 369           v = (v<<6) | rank;
 370           i++;
 371           if (i==4)
 372             {
 373               *outptr++ = v>>16;
 374               if (last[1] != '=')
 375                 *outptr++ = v>>8;
 376               if (last[0] != '=')
 377                 *outptr++ = v;
 378               i=0;
 379             }
 380         }
 381     }
 382
 383   *save = v;
 384   *state = last[0] == '=' ? -i : i;
 385
 386   return outptr - out;
 387 }
 388
 389 /**
 390  * g_base64_decode:
 391  * @text: zero-terminated string with base64 text to decode
 392  * @out_len: (out): The length of the decoded data is written here
 393  *
 394  * Decode a sequence of Base-64 encoded text into binary data.  Note
 395  * that the returned binary data is not necessarily zero-terminated,
 396  * so it should not be used as a character string.
 397  *
 398  * Returns: (transfer full) (array length=out_len) (element-type guint8):
 399  *               newly allocated buffer containing the binary data
 400  *               that @text represents. The returned buffer must
 401  *               be freed with g_free().
 402  *
 403  * Since: 2.12
 404  */
 405 guchar *
 406 g_base64_decode (const gchar *text,
 407                  gsize       *out_len)
 408 {
 409   guchar *ret;
 410   gsize input_length;
 411   gint state = 0;
 412   guint save = 0;
 413
 414   g_return_val_if_fail (text != NULL, NULL);
 415   g_return_val_if_fail (out_len != NULL, NULL);
 416
 417   input_length = strlen (text);
 418
 419   /* We can use a smaller limit here, since we know the saved state is 0,
 420      +1 used to avoid calling g_malloc0(0), and hence returning NULL */
 421   ret = g_malloc0 ((input_length / 4) * 3 + 1);
 422
 423   *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
 424
 425   return ret;
 426 }
 427
 428 /**
 429  * g_base64_decode_inplace:
 430  * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
 431  *        string with base64 text to decode
 432  * @out_len: (inout): The length of the decoded data is written here
 433  *
 434  * Decode a sequence of Base-64 encoded text into binary data
 435  * by overwriting the input data.
 436  *
 437  * Returns: (transfer none): The binary data that @text responds. This pointer
 438  *               is the same as the input @text.
 439  *
 440  * Since: 2.20
 441  */
 442 guchar *
 443 g_base64_decode_inplace (gchar *text,
 444                          gsize *out_len)
 445 {
 446   gint input_length, state = 0;
 447   guint save = 0;
 448
 449   g_return_val_if_fail (text != NULL, NULL);
 450   g_return_val_if_fail (out_len != NULL, NULL);
 451
 452   input_length = strlen (text);
 453
 454   g_return_val_if_fail (input_length > 1, NULL);
 455
 456   *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
 457
 458   return (guchar *) text;
 459 }