glib/gbase64.c

   1 /* gbase64.c - Base64 encoding/decoding
   2  *
   3  *  Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
   4  *  Copyright (C) 2000-2003 Ximian Inc.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  *
  19  * This is based on code in camel, written by:
  20  *    Michael Zucchi <notzed@ximian.com>
  21  *    Jeffrey Stedfast <fejj@ximian.com>
  22  */
  23
  24 #include "config.h"
  25
  26 #include <string.h>
  27
  28 #include "gbase64.h"
  29 #include "gtestutils.h"
  30 #include "glibintl.h"
  31
  32
  33 /**
  34  * SECTION:base64
  35  * @title: Base64 Encoding
  36  * @short_description: encodes and decodes data in Base64 format
  37  *
  38  * Base64 is an encoding that allows a sequence of arbitrary bytes to be
  39  * encoded as a sequence of printable ASCII characters. For the definition
  40  * of Base64, see <ulink url="http://www.ietf.org/rfc/rfc1421.txt">RFC
  41  * 1421</ulink> or <ulink url="http://www.ietf.org/rfc/rfc2045.txt">RFC
  42  * 2045</ulink>. Base64 is most commonly used as a MIME transfer encoding
  43  * for email.
  44  *
  45  * GLib supports incremental encoding using g_base64_encode_step() and
  46  * g_base64_encode_close(). Incremental decoding can be done with
  47  * g_base64_decode_step(). To encode or decode data in one go, use
  48  * g_base64_encode() or g_base64_decode(). To avoid memory allocation when
  49  * decoding, you can use g_base64_decode_inplace().
  50  *
  51  * Support for Base64 encoding has been added in GLib 2.12.
  52  */
  53
  54 static const char base64_alphabet[] =
  55         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  56
  57 /**
  58  * g_base64_encode_step:
  59  * @in: (array length=len) (element-type guint8): the binary data to encode
  60  * @len: the length of @in
  61  * @break_lines: whether to break long lines
  62  * @out: (out) (array) (element-type guint8): pointer to destination buffer
  63  * @state: (inout): Saved state between steps, initialize to 0
  64  * @save: (inout): Saved state between steps, initialize to 0
  65  *
  66  * Incrementally encode a sequence of binary data into its Base-64 stringified
  67  * representation. By calling this function multiple times you can convert
  68  * data in chunks to avoid having to have the full encoded data in memory.
  69  *
  70  * When all of the data has been converted you must call
  71  * g_base64_encode_close() to flush the saved state.
  72  *
  73  * The output buffer must be large enough to fit all the data that will
  74  * be written to it. Due to the way base64 encodes you will need
  75  * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
  76  * non-zero state). If you enable line-breaking you will need at least:
  77  * ((@len / 3 + 1) * 4 + 4) / 72 + 1 bytes of extra space.
  78  *
  79  * @break_lines is typically used when putting base64-encoded data in emails.
  80  * It breaks the lines at 72 columns instead of putting all of the text on
  81  * the same line. This avoids problems with long lines in the email system.
  82  * Note however that it breaks the lines with <literal>LF</literal>
  83  * characters, not <literal>CR LF</literal> sequences, so the result cannot
  84  * be passed directly to SMTP or certain other protocols.
  85  *
  86  * Return value: The number of bytes of output that was written
  87  *
  88  * Since: 2.12
  89  */
  90 gsize
  91 g_base64_encode_step (const guchar *in,
  92                       gsize         len,
  93                       gboolean      break_lines,
  94                       gchar        *out,
  95                       gint         *state,
  96                       gint         *save)
  97 {
  98   char *outptr;
  99   const guchar *inptr;
 100
 101   g_return_val_if_fail (in != NULL, 0);
 102   g_return_val_if_fail (out != NULL, 0);
 103   g_return_val_if_fail (state != NULL, 0);
 104   g_return_val_if_fail (save != NULL, 0);
 105
 106   if (len <= 0)
 107     return 0;
 108
 109   inptr = in;
 110   outptr = out;
 111
 112   if (len + ((char *) save) [0] > 2)
 113     {
 114       const guchar *inend = in+len-2;
 115       int c1, c2, c3;
 116       int already;
 117
 118       already = *state;
 119
 120       switch (((char *) save) [0])
 121         {
 122         case 1:
 123           c1 = ((unsigned char *) save) [1];
 124           goto skip1;
 125         case 2:
 126           c1 = ((unsigned char *) save) [1];
 127           c2 = ((unsigned char *) save) [2];
 128           goto skip2;
 129         }
 130
 131       /*
 132        * yes, we jump into the loop, no i'm not going to change it,
 133        * it's beautiful!
 134        */
 135       while (inptr < inend)
 136         {
 137           c1 = *inptr++;
 138         skip1:
 139           c2 = *inptr++;
 140         skip2:
 141           c3 = *inptr++;
 142           *outptr++ = base64_alphabet [ c1 >> 2 ];
 143           *outptr++ = base64_alphabet [ c2 >> 4 |
 144                                         ((c1&0x3) << 4) ];
 145           *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
 146                                         (c3 >> 6) ];
 147           *outptr++ = base64_alphabet [ c3 & 0x3f ];
 148           /* this is a bit ugly ... */
 149           if (break_lines && (++already) >= 19)
 150             {
 151               *outptr++ = '\n';
 152               already = 0;
 153             }
 154         }
 155
 156       ((char *)save)[0] = 0;
 157       len = 2 - (inptr - inend);
 158       *state = already;
 159     }
 160
 161   if (len>0)
 162     {
 163       char *saveout;
 164
 165       /* points to the slot for the next char to save */
 166       saveout = & (((char *)save)[1]) + ((char *)save)[0];
 167
 168       /* len can only be 0 1 or 2 */
 169       switch(len)
 170         {
 171         case 2: *saveout++ = *inptr++;
 172         case 1: *saveout++ = *inptr++;
 173         }
 174       ((char *)save)[0] += len;
 175     }
 176
 177   return outptr - out;
 178 }
 179
 180 /**
 181  * g_base64_encode_close:
 182  * @break_lines: whether to break long lines
 183  * @out: (out) (array) (element-type guint8): pointer to destination buffer
 184  * @state: (inout): Saved state from g_base64_encode_step()
 185  * @save: (inout): Saved state from g_base64_encode_step()
 186  *
 187  * Flush the status from a sequence of calls to g_base64_encode_step().
 188  *
 189  * The output buffer must be large enough to fit all the data that will
 190  * be written to it. It will need up to 4 bytes, or up to 5 bytes if
 191  * line-breaking is enabled.
 192  *
 193  * Return value: The number of bytes of output that was written
 194  *
 195  * Since: 2.12
 196  */
 197 gsize
 198 g_base64_encode_close (gboolean  break_lines,
 199                        gchar    *out,
 200                        gint     *state,
 201                        gint     *save)
 202 {
 203   int c1, c2;
 204   char *outptr = out;
 205
 206   g_return_val_if_fail (out != NULL, 0);
 207   g_return_val_if_fail (state != NULL, 0);
 208   g_return_val_if_fail (save != NULL, 0);
 209
 210   c1 = ((unsigned char *) save) [1];
 211   c2 = ((unsigned char *) save) [2];
 212
 213   switch (((char *) save) [0])
 214     {
 215     case 2:
 216       outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
 217       g_assert (outptr [2] != 0);
 218       goto skip;
 219     case 1:
 220       outptr[2] = '=';
 221     skip:
 222       outptr [0] = base64_alphabet [ c1 >> 2 ];
 223       outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
 224       outptr [3] = '=';
 225       outptr += 4;
 226       break;
 227     }
 228   if (break_lines)
 229     *outptr++ = '\n';
 230
 231   *save = 0;
 232   *state = 0;
 233
 234   return outptr - out;
 235 }
 236
 237 /**
 238  * g_base64_encode:
 239  * @data: (array length=len) (element-type guint8): the binary data to encode
 240  * @len: the length of @data
 241  *
 242  * Encode a sequence of binary data into its Base-64 stringified
 243  * representation.
 244  *
 245  * Return value: (transfer full): a newly allocated, zero-terminated Base-64
 246  *               encoded string representing @data. The returned string must
 247  *               be freed with g_free().
 248  *
 249  * Since: 2.12
 250  */
 251 gchar *
 252 g_base64_encode (const guchar *data,
 253                  gsize         len)
 254 {
 255   gchar *out;
 256   gint state = 0, outlen;
 257   gint save = 0;
 258
 259   g_return_val_if_fail (data != NULL || len == 0, NULL);
 260
 261   /* We can use a smaller limit here, since we know the saved state is 0,
 262      +1 is needed for trailing \0, also check for unlikely integer overflow */
 263   if (len >= ((G_MAXSIZE - 1) / 4 - 1) * 3)
 264     g_error("%s: input too large for Base64 encoding (%"G_GSIZE_FORMAT" chars)",
 265         G_STRLOC, len);
 266
 267   out = g_malloc ((len / 3 + 1) * 4 + 1);
 268
 269   outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
 270   outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
 271   out[outlen] = '\0';
 272
 273   return (gchar *) out;
 274 }
 275
 276 static const unsigned char mime_base64_rank[256] = {
 277   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 278   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 279   255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
 280    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
 281   255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
 282    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
 283   255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 284    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
 285   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 286   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 287   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 288   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 289   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 290   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 291   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 292   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 293 };
 294
 295 /**
 296  * g_base64_decode_step:
 297  * @in: (array length=len) (element-type guint8): binary input data
 298  * @len: max length of @in data to decode
 299  * @out: (out) (array) (element-type guint8): output buffer
 300  * @state: (inout): Saved state between steps, initialize to 0
 301  * @save: (inout): Saved state between steps, initialize to 0
 302  *
 303  * Incrementally decode a sequence of binary data from its Base-64 stringified
 304  * representation. By calling this function multiple times you can convert
 305  * data in chunks to avoid having to have the full encoded data in memory.
 306  *
 307  * The output buffer must be large enough to fit all the data that will
 308  * be written to it. Since base64 encodes 3 bytes in 4 chars you need
 309  * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
 310  * state).
 311  *
 312  * Return value: The number of bytes of output that was written
 313  *
 314  * Since: 2.12
 315  **/
 316 gsize
 317 g_base64_decode_step (const gchar  *in,
 318                       gsize         len,
 319                       guchar       *out,
 320                       gint         *state,
 321                       guint        *save)
 322 {
 323   const guchar *inptr;
 324   guchar *outptr;
 325   const guchar *inend;
 326   guchar c, rank;
 327   guchar last[2];
 328   unsigned int v;
 329   int i;
 330
 331   g_return_val_if_fail (in != NULL, 0);
 332   g_return_val_if_fail (out != NULL, 0);
 333   g_return_val_if_fail (state != NULL, 0);
 334   g_return_val_if_fail (save != NULL, 0);
 335
 336   if (len <= 0)
 337     return 0;
 338
 339   inend = (const guchar *)in+len;
 340   outptr = out;
 341
 342   /* convert 4 base64 bytes to 3 normal bytes */
 343   v=*save;
 344   i=*state;
 345
 346   last[0] = last[1] = 0;
 347
 348   /* we use the sign in the state to determine if we got a padding character
 349      in the previous sequence */
 350   if (i < 0)
 351     {
 352       i = -i;
 353       last[0] = '=';
 354     }
 355
 356   inptr = (const guchar *)in;
 357   while (inptr < inend)
 358     {
 359       c = *inptr++;
 360       rank = mime_base64_rank [c];
 361       if (rank != 0xff)
 362         {
 363           last[1] = last[0];
 364           last[0] = c;
 365           v = (v<<6) | rank;
 366           i++;
 367           if (i==4)
 368             {
 369               *outptr++ = v>>16;
 370               if (last[1] != '=')
 371                 *outptr++ = v>>8;
 372               if (last[0] != '=')
 373                 *outptr++ = v;
 374               i=0;
 375             }
 376         }
 377     }
 378
 379   *save = v;
 380   *state = last[0] == '=' ? -i : i;
 381
 382   return outptr - out;
 383 }
 384
 385 /**
 386  * g_base64_decode:
 387  * @text: zero-terminated string with base64 text to decode
 388  * @out_len: (out): The length of the decoded data is written here
 389  *
 390  * Decode a sequence of Base-64 encoded text into binary data.  Note
 391  * that the returned binary data is not necessarily zero-terminated,
 392  * so it should not be used as a character string.
 393  *
 394  * Return value: (transfer full) (array length=out_len) (element-type guint8):
 395  *               newly allocated buffer containing the binary data
 396  *               that @text represents. The returned buffer must
 397  *               be freed with g_free().
 398  *
 399  * Since: 2.12
 400  */
 401 guchar *
 402 g_base64_decode (const gchar *text,
 403                  gsize       *out_len)
 404 {
 405   guchar *ret;
 406   gsize input_length;
 407   gint state = 0;
 408   guint save = 0;
 409
 410   g_return_val_if_fail (text != NULL, NULL);
 411   g_return_val_if_fail (out_len != NULL, NULL);
 412
 413   input_length = strlen (text);
 414
 415   /* We can use a smaller limit here, since we know the saved state is 0,
 416      +1 used to avoid calling g_malloc0(0), and hence returning NULL */
 417   ret = g_malloc0 ((input_length / 4) * 3 + 1);
 418
 419   *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
 420
 421   return ret;
 422 }
 423
 424 /**
 425  * g_base64_decode_inplace:
 426  * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
 427  *        string with base64 text to decode
 428  * @out_len: (inout): The length of the decoded data is written here
 429  *
 430  * Decode a sequence of Base-64 encoded text into binary data
 431  * by overwriting the input data.
 432  *
 433  * Return value: (transfer none): The binary data that @text responds. This pointer
 434  *               is the same as the input @text.
 435  *
 436  * Since: 2.20
 437  */
 438 guchar *
 439 g_base64_decode_inplace (gchar *text,
 440                          gsize *out_len)
 441 {
 442   gint input_length, state = 0;
 443   guint save = 0;
 444
 445   g_return_val_if_fail (text != NULL, NULL);
 446   g_return_val_if_fail (out_len != NULL, NULL);
 447
 448   input_length = strlen (text);
 449
 450   g_return_val_if_fail (input_length > 1, NULL);
 451
 452   *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
 453
 454   return (guchar *) text;
 455 }