glib/gbase64.c

   1 /* gbase64.c - Base64 encoding/decoding
   2  *
   3  *  Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
   4  *  Copyright (C) 2000-2003 Ximian Inc.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, write to the
  18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19  * Boston, MA 02111-1307, USA.
  20  *
  21  * This is based on code in camel, written by:
  22  *    Michael Zucchi <notzed@ximian.com>
  23  *    Jeffrey Stedfast <fejj@ximian.com>
  24  */
  25
  26 #include "config.h"
  27
  28 #include <string.h>
  29
  30 #include "gbase64.h"
  31 #include "gtestutils.h"
  32 #include "glibintl.h"
  33
  34
  35 /**
  36  * SECTION:base64
  37  * @title: Base64 Encoding
  38  * @short_description: encodes and decodes data in Base64 format
  39  *
  40  * Base64 is an encoding that allows a sequence of arbitrary bytes to be
  41  * encoded as a sequence of printable ASCII characters. For the definition
  42  * of Base64, see <ulink url="http://www.ietf.org/rfc/rfc1421.txt">RFC
  43  * 1421</ulink> or <ulink url="http://www.ietf.org/rfc/rfc2045.txt">RFC
  44  * 2045</ulink>. Base64 is most commonly used as a MIME transfer encoding
  45  * for email.
  46  *
  47  * GLib supports incremental encoding using g_base64_encode_step() and
  48  * g_base64_encode_close(). Incremental decoding can be done with
  49  * g_base64_decode_step(). To encode or decode data in one go, use
  50  * g_base64_encode() or g_base64_decode(). To avoid memory allocation when
  51  * decoding, you can use g_base64_decode_inplace().
  52  *
  53  * Support for Base64 encoding has been added in GLib 2.12.
  54  */
  55
  56 static const char base64_alphabet[] =
  57         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  58
  59 /**
  60  * g_base64_encode_step:
  61  * @in: (array length=len) (element-type guint8): the binary data to encode
  62  * @len: the length of @in
  63  * @break_lines: whether to break long lines
  64  * @out: (out) (array) (element-type guint8): pointer to destination buffer
  65  * @state: (inout): Saved state between steps, initialize to 0
  66  * @save: (inout): Saved state between steps, initialize to 0
  67  *
  68  * Incrementally encode a sequence of binary data into its Base-64 stringified
  69  * representation. By calling this function multiple times you can convert
  70  * data in chunks to avoid having to have the full encoded data in memory.
  71  *
  72  * When all of the data has been converted you must call
  73  * g_base64_encode_close() to flush the saved state.
  74  *
  75  * The output buffer must be large enough to fit all the data that will
  76  * be written to it. Due to the way base64 encodes you will need
  77  * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
  78  * non-zero state). If you enable line-breaking you will need at least:
  79  * ((@len / 3 + 1) * 4 + 4) / 72 + 1 bytes of extra space.
  80  *
  81  * @break_lines is typically used when putting base64-encoded data in emails.
  82  * It breaks the lines at 72 columns instead of putting all of the text on
  83  * the same line. This avoids problems with long lines in the email system.
  84  *
  85  * Return value: The number of bytes of output that was written
  86  *
  87  * Since: 2.12
  88  */
  89 gsize
  90 g_base64_encode_step (const guchar *in,
  91                       gsize         len,
  92                       gboolean      break_lines,
  93                       gchar        *out,
  94                       gint         *state,
  95                       gint         *save)
  96 {
  97   char *outptr;
  98   const guchar *inptr;
  99
 100   g_return_val_if_fail (in != NULL, 0);
 101   g_return_val_if_fail (out != NULL, 0);
 102   g_return_val_if_fail (state != NULL, 0);
 103   g_return_val_if_fail (save != NULL, 0);
 104
 105   if (len <= 0)
 106     return 0;
 107
 108   inptr = in;
 109   outptr = out;
 110
 111   if (len + ((char *) save) [0] > 2)
 112     {
 113       const guchar *inend = in+len-2;
 114       int c1, c2, c3;
 115       int already;
 116
 117       already = *state;
 118
 119       switch (((char *) save) [0])
 120         {
 121         case 1:
 122           c1 = ((unsigned char *) save) [1];
 123           goto skip1;
 124         case 2:
 125           c1 = ((unsigned char *) save) [1];
 126           c2 = ((unsigned char *) save) [2];
 127           goto skip2;
 128         }
 129
 130       /*
 131        * yes, we jump into the loop, no i'm not going to change it,
 132        * it's beautiful!
 133        */
 134       while (inptr < inend)
 135         {
 136           c1 = *inptr++;
 137         skip1:
 138           c2 = *inptr++;
 139         skip2:
 140           c3 = *inptr++;
 141           *outptr++ = base64_alphabet [ c1 >> 2 ];
 142           *outptr++ = base64_alphabet [ c2 >> 4 |
 143                                         ((c1&0x3) << 4) ];
 144           *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
 145                                         (c3 >> 6) ];
 146           *outptr++ = base64_alphabet [ c3 & 0x3f ];
 147           /* this is a bit ugly ... */
 148           if (break_lines && (++already) >= 19)
 149             {
 150               *outptr++ = '\n';
 151               already = 0;
 152             }
 153         }
 154
 155       ((char *)save)[0] = 0;
 156       len = 2 - (inptr - inend);
 157       *state = already;
 158     }
 159
 160   if (len>0)
 161     {
 162       char *saveout;
 163
 164       /* points to the slot for the next char to save */
 165       saveout = & (((char *)save)[1]) + ((char *)save)[0];
 166
 167       /* len can only be 0 1 or 2 */
 168       switch(len)
 169         {
 170         case 2: *saveout++ = *inptr++;
 171         case 1: *saveout++ = *inptr++;
 172         }
 173       ((char *)save)[0] += len;
 174     }
 175
 176   return outptr - out;
 177 }
 178
 179 /**
 180  * g_base64_encode_close:
 181  * @break_lines: whether to break long lines
 182  * @out: (out) (array) (element-type guint8): pointer to destination buffer
 183  * @state: (inout): Saved state from g_base64_encode_step()
 184  * @save: (inout): Saved state from g_base64_encode_step()
 185  *
 186  * Flush the status from a sequence of calls to g_base64_encode_step().
 187  *
 188  * The output buffer must be large enough to fit all the data that will
 189  * be written to it. It will need up to 4 bytes, or up to 5 bytes if
 190  * line-breaking is enabled.
 191  *
 192  * Return value: The number of bytes of output that was written
 193  *
 194  * Since: 2.12
 195  */
 196 gsize
 197 g_base64_encode_close (gboolean  break_lines,
 198                        gchar    *out,
 199                        gint     *state,
 200                        gint     *save)
 201 {
 202   int c1, c2;
 203   char *outptr = out;
 204
 205   g_return_val_if_fail (out != NULL, 0);
 206   g_return_val_if_fail (state != NULL, 0);
 207   g_return_val_if_fail (save != NULL, 0);
 208
 209   c1 = ((unsigned char *) save) [1];
 210   c2 = ((unsigned char *) save) [2];
 211
 212   switch (((char *) save) [0])
 213     {
 214     case 2:
 215       outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
 216       g_assert (outptr [2] != 0);
 217       goto skip;
 218     case 1:
 219       outptr[2] = '=';
 220     skip:
 221       outptr [0] = base64_alphabet [ c1 >> 2 ];
 222       outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
 223       outptr [3] = '=';
 224       outptr += 4;
 225       break;
 226     }
 227   if (break_lines)
 228     *outptr++ = '\n';
 229
 230   *save = 0;
 231   *state = 0;
 232
 233   return outptr - out;
 234 }
 235
 236 /**
 237  * g_base64_encode:
 238  * @data: (array length=len) (element-type guint8): the binary data to encode
 239  * @len: the length of @data
 240  *
 241  * Encode a sequence of binary data into its Base-64 stringified
 242  * representation.
 243  *
 244  * Return value: (transfer full): a newly allocated, zero-terminated Base-64
 245  *               encoded string representing @data. The returned string must
 246  *               be freed with g_free().
 247  *
 248  * Since: 2.12
 249  */
 250 gchar *
 251 g_base64_encode (const guchar *data,
 252                  gsize         len)
 253 {
 254   gchar *out;
 255   gint state = 0, outlen;
 256   gint save = 0;
 257
 258   g_return_val_if_fail (data != NULL || len == 0, NULL);
 259
 260   /* We can use a smaller limit here, since we know the saved state is 0,
 261      +1 is needed for trailing \0, also check for unlikely integer overflow */
 262   if (len >= ((G_MAXSIZE - 1) / 4 - 1) * 3)
 263     g_error("%s: input too large for Base64 encoding (%"G_GSIZE_FORMAT" chars)",
 264         G_STRLOC, len);
 265
 266   out = g_malloc ((len / 3 + 1) * 4 + 1);
 267
 268   outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
 269   outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
 270   out[outlen] = '\0';
 271
 272   return (gchar *) out;
 273 }
 274
 275 static const unsigned char mime_base64_rank[256] = {
 276   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 277   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 278   255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
 279    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
 280   255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
 281    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
 282   255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 283    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
 284   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 285   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 286   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 287   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 288   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 289   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 290   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 291   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 292 };
 293
 294 /**
 295  * g_base64_decode_step:
 296  * @in: (array length=len) (element-type guint8): binary input data
 297  * @len: max length of @in data to decode
 298  * @out: (out) (array) (element-type guint8): output buffer
 299  * @state: (inout): Saved state between steps, initialize to 0
 300  * @save: (inout): Saved state between steps, initialize to 0
 301  *
 302  * Incrementally decode a sequence of binary data from its Base-64 stringified
 303  * representation. By calling this function multiple times you can convert
 304  * data in chunks to avoid having to have the full encoded data in memory.
 305  *
 306  * The output buffer must be large enough to fit all the data that will
 307  * be written to it. Since base64 encodes 3 bytes in 4 chars you need
 308  * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
 309  * state).
 310  *
 311  * Return value: The number of bytes of output that was written
 312  *
 313  * Since: 2.12
 314  **/
 315 gsize
 316 g_base64_decode_step (const gchar  *in,
 317                       gsize         len,
 318                       guchar       *out,
 319                       gint         *state,
 320                       guint        *save)
 321 {
 322   const guchar *inptr;
 323   guchar *outptr;
 324   const guchar *inend;
 325   guchar c, rank;
 326   guchar last[2];
 327   unsigned int v;
 328   int i;
 329
 330   g_return_val_if_fail (in != NULL, 0);
 331   g_return_val_if_fail (out != NULL, 0);
 332   g_return_val_if_fail (state != NULL, 0);
 333   g_return_val_if_fail (save != NULL, 0);
 334
 335   if (len <= 0)
 336     return 0;
 337
 338   inend = (const guchar *)in+len;
 339   outptr = out;
 340
 341   /* convert 4 base64 bytes to 3 normal bytes */
 342   v=*save;
 343   i=*state;
 344   inptr = (const guchar *)in;
 345   last[0] = last[1] = 0;
 346   while (inptr < inend)
 347     {
 348       c = *inptr++;
 349       rank = mime_base64_rank [c];
 350       if (rank != 0xff)
 351         {
 352           last[1] = last[0];
 353           last[0] = c;
 354           v = (v<<6) | rank;
 355           i++;
 356           if (i==4)
 357             {
 358               *outptr++ = v>>16;
 359               if (last[1] != '=')
 360                 *outptr++ = v>>8;
 361               if (last[0] != '=')
 362                 *outptr++ = v;
 363               i=0;
 364             }
 365         }
 366     }
 367
 368   *save = v;
 369   *state = i;
 370
 371   return outptr - out;
 372 }
 373
 374 /**
 375  * g_base64_decode:
 376  * @text: zero-terminated string with base64 text to decode
 377  * @out_len: (out): The length of the decoded data is written here
 378  *
 379  * Decode a sequence of Base-64 encoded text into binary data
 380  *
 381  * Return value: (transfer full) (array length=out_len) (element-type guint8):
 382  *               newly allocated buffer containing the binary data
 383  *               that @text represents. The returned buffer must
 384  *               be freed with g_free().
 385  *
 386  * Since: 2.12
 387  */
 388 guchar *
 389 g_base64_decode (const gchar *text,
 390                  gsize       *out_len)
 391 {
 392   guchar *ret;
 393   gsize input_length;
 394   gint state = 0;
 395   guint save = 0;
 396
 397   g_return_val_if_fail (text != NULL, NULL);
 398   g_return_val_if_fail (out_len != NULL, NULL);
 399
 400   input_length = strlen (text);
 401
 402   /* We can use a smaller limit here, since we know the saved state is 0,
 403      +1 used to avoid calling g_malloc0(0), and hence retruning NULL */
 404   ret = g_malloc0 ((input_length / 4) * 3 + 1);
 405
 406   *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
 407
 408   return ret;
 409 }
 410
 411 /**
 412  * g_base64_decode_inplace:
 413  * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
 414  *        string with base64 text to decode
 415  * @out_len: (inout): The length of the decoded data is written here
 416  *
 417  * Decode a sequence of Base-64 encoded text into binary data
 418  * by overwriting the input data.
 419  *
 420  * Return value: (transfer none): The binary data that @text responds. This pointer
 421  *               is the same as the input @text.
 422  *
 423  * Since: 2.20
 424  */
 425 guchar *
 426 g_base64_decode_inplace (gchar *text,
 427                          gsize *out_len)
 428 {
 429   gint input_length, state = 0;
 430   guint save = 0;
 431
 432   g_return_val_if_fail (text != NULL, NULL);
 433   g_return_val_if_fail (out_len != NULL, NULL);
 434
 435   input_length = strlen (text);
 436
 437   g_return_val_if_fail (input_length > 1, NULL);
 438
 439   *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
 440
 441   return (guchar *) text;
 442 }