glib/gbase64.c

   1 /* gbase64.c - Base64 encoding/decoding
   2  *
   3  *  Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
   4  *  Copyright (C) 2000-2003 Ximian Inc.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public License
  17  * along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  *
  19  * This is based on code in camel, written by:
  20  *    Michael Zucchi <notzed@ximian.com>
  21  *    Jeffrey Stedfast <fejj@ximian.com>
  22  */
  23
  24 #include "config.h"
  25
  26 #include <string.h>
  27
  28 #include "gbase64.h"
  29 #include "gtestutils.h"
  30 #include "glibintl.h"
  31
  32
  33 /**
  34  * SECTION:base64
  35  * @title: Base64 Encoding
  36  * @short_description: encodes and decodes data in Base64 format
  37  *
  38  * Base64 is an encoding that allows a sequence of arbitrary bytes to be
  39  * encoded as a sequence of printable ASCII characters. For the definition
  40  * of Base64, see
  41  * [RFC 1421](http://www.ietf.org/rfc/rfc1421.txt)
  42  * or
  43  * [RFC 2045](http://www.ietf.org/rfc/rfc2045.txt).
  44  * Base64 is most commonly used as a MIME transfer encoding
  45  * for email.
  46  *
  47  * GLib supports incremental encoding using g_base64_encode_step() and
  48  * g_base64_encode_close(). Incremental decoding can be done with
  49  * g_base64_decode_step(). To encode or decode data in one go, use
  50  * g_base64_encode() or g_base64_decode(). To avoid memory allocation when
  51  * decoding, you can use g_base64_decode_inplace().
  52  *
  53  * Support for Base64 encoding has been added in GLib 2.12.
  54  */
  55
  56 static const char base64_alphabet[] =
  57         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  58
  59 /**
  60  * g_base64_encode_step:
  61  * @in: (array length=len) (element-type guint8): the binary data to encode
  62  * @len: the length of @in
  63  * @break_lines: whether to break long lines
  64  * @out: (out) (array) (element-type guint8): pointer to destination buffer
  65  * @state: (inout): Saved state between steps, initialize to 0
  66  * @save: (inout): Saved state between steps, initialize to 0
  67  *
  68  * Incrementally encode a sequence of binary data into its Base-64 stringified
  69  * representation. By calling this function multiple times you can convert
  70  * data in chunks to avoid having to have the full encoded data in memory.
  71  *
  72  * When all of the data has been converted you must call
  73  * g_base64_encode_close() to flush the saved state.
  74  *
  75  * The output buffer must be large enough to fit all the data that will
  76  * be written to it. Due to the way base64 encodes you will need
  77  * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
  78  * non-zero state). If you enable line-breaking you will need at least:
  79  * ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space.
  80  *
  81  * @break_lines is typically used when putting base64-encoded data in emails.
  82  * It breaks the lines at 76 columns instead of putting all of the text on
  83  * the same line. This avoids problems with long lines in the email system.
  84  * Note however that it breaks the lines with `LF` characters, not
  85  * `CR LF` sequences, so the result cannot be passed directly to SMTP
  86  * or certain other protocols.
  87  *
  88  * Returns: The number of bytes of output that was written
  89  *
  90  * Since: 2.12
  91  */
  92 gsize
  93 g_base64_encode_step (const guchar *in,
  94                       gsize         len,
  95                       gboolean      break_lines,
  96                       gchar        *out,
  97                       gint         *state,
  98                       gint         *save)
  99 {
 100   char *outptr;
 101   const guchar *inptr;
 102
 103   g_return_val_if_fail (in != NULL || len == 0, 0);
 104   g_return_val_if_fail (out != NULL, 0);
 105   g_return_val_if_fail (state != NULL, 0);
 106   g_return_val_if_fail (save != NULL, 0);
 107
 108   if (len == 0)
 109     return 0;
 110
 111   inptr = in;
 112   outptr = out;
 113
 114   if (len + ((char *) save) [0] > 2)
 115     {
 116       const guchar *inend = in+len-2;
 117       int c1, c2, c3;
 118       int already;
 119
 120       already = *state;
 121
 122       switch (((char *) save) [0])
 123         {
 124         case 1:
 125           c1 = ((unsigned char *) save) [1];
 126           goto skip1;
 127         case 2:
 128           c1 = ((unsigned char *) save) [1];
 129           c2 = ((unsigned char *) save) [2];
 130           goto skip2;
 131         }
 132
 133       /*
 134        * yes, we jump into the loop, no i'm not going to change it,
 135        * it's beautiful!
 136        */
 137       while (inptr < inend)
 138         {
 139           c1 = *inptr++;
 140         skip1:
 141           c2 = *inptr++;
 142         skip2:
 143           c3 = *inptr++;
 144           *outptr++ = base64_alphabet [ c1 >> 2 ];
 145           *outptr++ = base64_alphabet [ c2 >> 4 |
 146                                         ((c1&0x3) << 4) ];
 147           *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
 148                                         (c3 >> 6) ];
 149           *outptr++ = base64_alphabet [ c3 & 0x3f ];
 150           /* this is a bit ugly ... */
 151           if (break_lines && (++already) >= 19)
 152             {
 153               *outptr++ = '\n';
 154               already = 0;
 155             }
 156         }
 157
 158       ((char *)save)[0] = 0;
 159       len = 2 - (inptr - inend);
 160       *state = already;
 161     }
 162
 163   g_assert (len == 0 || len == 1 || len == 2);
 164
 165     {
 166       char *saveout;
 167
 168       /* points to the slot for the next char to save */
 169       saveout = & (((char *)save)[1]) + ((char *)save)[0];
 170
 171       /* len can only be 0 1 or 2 */
 172       switch(len)
 173         {
 174         case 2:
 175           *saveout++ = *inptr++;
 176           G_GNUC_FALLTHROUGH;
 177         case 1:
 178           *saveout++ = *inptr++;
 179         }
 180       ((char *)save)[0] += len;
 181     }
 182
 183   return outptr - out;
 184 }
 185
 186 /**
 187  * g_base64_encode_close:
 188  * @break_lines: whether to break long lines
 189  * @out: (out) (array) (element-type guint8): pointer to destination buffer
 190  * @state: (inout): Saved state from g_base64_encode_step()
 191  * @save: (inout): Saved state from g_base64_encode_step()
 192  *
 193  * Flush the status from a sequence of calls to g_base64_encode_step().
 194  *
 195  * The output buffer must be large enough to fit all the data that will
 196  * be written to it. It will need up to 4 bytes, or up to 5 bytes if
 197  * line-breaking is enabled.
 198  *
 199  * The @out array will not be automatically nul-terminated.
 200  *
 201  * Returns: The number of bytes of output that was written
 202  *
 203  * Since: 2.12
 204  */
 205 gsize
 206 g_base64_encode_close (gboolean  break_lines,
 207                        gchar    *out,
 208                        gint     *state,
 209                        gint     *save)
 210 {
 211   int c1, c2;
 212   char *outptr = out;
 213
 214   g_return_val_if_fail (out != NULL, 0);
 215   g_return_val_if_fail (state != NULL, 0);
 216   g_return_val_if_fail (save != NULL, 0);
 217
 218   c1 = ((unsigned char *) save) [1];
 219   c2 = ((unsigned char *) save) [2];
 220
 221   switch (((char *) save) [0])
 222     {
 223     case 2:
 224       outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
 225       g_assert (outptr [2] != 0);
 226       goto skip;
 227     case 1:
 228       outptr[2] = '=';
 229       c2 = 0;  /* saved state here is not relevant */
 230     skip:
 231       outptr [0] = base64_alphabet [ c1 >> 2 ];
 232       outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
 233       outptr [3] = '=';
 234       outptr += 4;
 235       break;
 236     }
 237   if (break_lines)
 238     *outptr++ = '\n';
 239
 240   *save = 0;
 241   *state = 0;
 242
 243   return outptr - out;
 244 }
 245
 246 /**
 247  * g_base64_encode:
 248  * @data: (array length=len) (element-type guint8) (nullable): the binary data to encode
 249  * @len: the length of @data
 250  *
 251  * Encode a sequence of binary data into its Base-64 stringified
 252  * representation.
 253  *
 254  * Returns: (transfer full): a newly allocated, zero-terminated Base-64
 255  *               encoded string representing @data. The returned string must
 256  *               be freed with g_free().
 257  *
 258  * Since: 2.12
 259  */
 260 gchar *
 261 g_base64_encode (const guchar *data,
 262                  gsize         len)
 263 {
 264   gchar *out;
 265   gint state = 0, outlen;
 266   gint save = 0;
 267
 268   g_return_val_if_fail (data != NULL || len == 0, NULL);
 269
 270   /* We can use a smaller limit here, since we know the saved state is 0,
 271      +1 is needed for trailing \0, also check for unlikely integer overflow */
 272   g_return_val_if_fail (len < ((G_MAXSIZE - 1) / 4 - 1) * 3, NULL);
 273
 274   out = g_malloc ((len / 3 + 1) * 4 + 1);
 275
 276   outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
 277   outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
 278   out[outlen] = '\0';
 279
 280   return (gchar *) out;
 281 }
 282
 283 static const unsigned char mime_base64_rank[256] = {
 284   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 285   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 286   255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
 287    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
 288   255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
 289    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
 290   255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 291    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
 292   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 293   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 294   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 295   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 296   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 297   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 298   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 299   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 300 };
 301
 302 /**
 303  * g_base64_decode_step: (skip)
 304  * @in: (array length=len) (element-type guint8): binary input data
 305  * @len: max length of @in data to decode
 306  * @out: (out caller-allocates) (array) (element-type guint8): output buffer
 307  * @state: (inout): Saved state between steps, initialize to 0
 308  * @save: (inout): Saved state between steps, initialize to 0
 309  *
 310  * Incrementally decode a sequence of binary data from its Base-64 stringified
 311  * representation. By calling this function multiple times you can convert
 312  * data in chunks to avoid having to have the full encoded data in memory.
 313  *
 314  * The output buffer must be large enough to fit all the data that will
 315  * be written to it. Since base64 encodes 3 bytes in 4 chars you need
 316  * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
 317  * state).
 318  *
 319  * Returns: The number of bytes of output that was written
 320  *
 321  * Since: 2.12
 322  **/
 323 gsize
 324 g_base64_decode_step (const gchar  *in,
 325                       gsize         len,
 326                       guchar       *out,
 327                       gint         *state,
 328                       guint        *save)
 329 {
 330   const guchar *inptr;
 331   guchar *outptr;
 332   const guchar *inend;
 333   guchar c, rank;
 334   guchar last[2];
 335   unsigned int v;
 336   int i;
 337
 338   g_return_val_if_fail (in != NULL || len == 0, 0);
 339   g_return_val_if_fail (out != NULL, 0);
 340   g_return_val_if_fail (state != NULL, 0);
 341   g_return_val_if_fail (save != NULL, 0);
 342
 343   if (len == 0)
 344     return 0;
 345
 346   inend = (const guchar *)in+len;
 347   outptr = out;
 348
 349   /* convert 4 base64 bytes to 3 normal bytes */
 350   v=*save;
 351   i=*state;
 352
 353   last[0] = last[1] = 0;
 354
 355   /* we use the sign in the state to determine if we got a padding character
 356      in the previous sequence */
 357   if (i < 0)
 358     {
 359       i = -i;
 360       last[0] = '=';
 361     }
 362
 363   inptr = (const guchar *)in;
 364   while (inptr < inend)
 365     {
 366       c = *inptr++;
 367       rank = mime_base64_rank [c];
 368       if (rank != 0xff)
 369         {
 370           last[1] = last[0];
 371           last[0] = c;
 372           v = (v<<6) | rank;
 373           i++;
 374           if (i==4)
 375             {
 376               *outptr++ = v>>16;
 377               if (last[1] != '=')
 378                 *outptr++ = v>>8;
 379               if (last[0] != '=')
 380                 *outptr++ = v;
 381               i=0;
 382             }
 383         }
 384     }
 385
 386   *save = v;
 387   *state = last[0] == '=' ? -i : i;
 388
 389   return outptr - out;
 390 }
 391
 392 /**
 393  * g_base64_decode:
 394  * @text: (not nullable): zero-terminated string with base64 text to decode
 395  * @out_len: (out): The length of the decoded data is written here
 396  *
 397  * Decode a sequence of Base-64 encoded text into binary data.  Note
 398  * that the returned binary data is not necessarily zero-terminated,
 399  * so it should not be used as a character string.
 400  *
 401  * Returns: (transfer full) (array length=out_len) (element-type guint8):
 402  *               newly allocated buffer containing the binary data
 403  *               that @text represents. The returned buffer must
 404  *               be freed with g_free().
 405  *
 406  * Since: 2.12
 407  */
 408 guchar *
 409 g_base64_decode (const gchar *text,
 410                  gsize       *out_len)
 411 {
 412   guchar *ret;
 413   gsize input_length;
 414   gint state = 0;
 415   guint save = 0;
 416
 417   g_return_val_if_fail (text != NULL, NULL);
 418   g_return_val_if_fail (out_len != NULL, NULL);
 419
 420   input_length = strlen (text);
 421
 422   /* We can use a smaller limit here, since we know the saved state is 0,
 423      +1 used to avoid calling g_malloc0(0), and hence returning NULL */
 424   ret = g_malloc0 ((input_length / 4) * 3 + 1);
 425
 426   *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
 427
 428   return ret;
 429 }
 430
 431 /**
 432  * g_base64_decode_inplace:
 433  * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
 434  *        string with base64 text to decode
 435  * @out_len: (inout): The length of the decoded data is written here
 436  *
 437  * Decode a sequence of Base-64 encoded text into binary data
 438  * by overwriting the input data.
 439  *
 440  * Returns: (transfer none): The binary data that @text responds. This pointer
 441  *               is the same as the input @text.
 442  *
 443  * Since: 2.20
 444  */
 445 guchar *
 446 g_base64_decode_inplace (gchar *text,
 447                          gsize *out_len)
 448 {
 449   gint input_length, state = 0;
 450   guint save = 0;
 451
 452   g_return_val_if_fail (text != NULL, NULL);
 453   g_return_val_if_fail (out_len != NULL, NULL);
 454
 455   input_length = strlen (text);
 456
 457   g_return_val_if_fail (input_length > 1, NULL);
 458
 459   *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
 460
 461   return (guchar *) text;
 462 }