glib/gbase64.c

   1 /* gbase64.c - Base64 encoding/decoding
   2  *
   3  *  Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
   4  *  Copyright (C) 2000-2003 Ximian Inc.
   5  *
   6  * SPDX-License-Identifier: LGPL-2.1-or-later
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public License
  19  * along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  *
  21  * This is based on code in camel, written by:
  22  *    Michael Zucchi <notzed@ximian.com>
  23  *    Jeffrey Stedfast <fejj@ximian.com>
  24  */
  25
  26 #include "config.h"
  27
  28 #include <string.h>
  29
  30 #include "gbase64.h"
  31 #include "gtestutils.h"
  32 #include "glibintl.h"
  33
  34
  35 /**
  36  * SECTION:base64
  37  * @title: Base64 Encoding
  38  * @short_description: encodes and decodes data in Base64 format
  39  *
  40  * Base64 is an encoding that allows a sequence of arbitrary bytes to be
  41  * encoded as a sequence of printable ASCII characters. For the definition
  42  * of Base64, see
  43  * [RFC 1421](http://www.ietf.org/rfc/rfc1421.txt)
  44  * or
  45  * [RFC 2045](http://www.ietf.org/rfc/rfc2045.txt).
  46  * Base64 is most commonly used as a MIME transfer encoding
  47  * for email.
  48  *
  49  * GLib supports incremental encoding using g_base64_encode_step() and
  50  * g_base64_encode_close(). Incremental decoding can be done with
  51  * g_base64_decode_step(). To encode or decode data in one go, use
  52  * g_base64_encode() or g_base64_decode(). To avoid memory allocation when
  53  * decoding, you can use g_base64_decode_inplace().
  54  *
  55  * Support for Base64 encoding has been added in GLib 2.12.
  56  */
  57
  58 static const char base64_alphabet[] =
  59         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  60
  61 /**
  62  * g_base64_encode_step:
  63  * @in: (array length=len) (element-type guint8): the binary data to encode
  64  * @len: the length of @in
  65  * @break_lines: whether to break long lines
  66  * @out: (out) (array) (element-type guint8): pointer to destination buffer
  67  * @state: (inout): Saved state between steps, initialize to 0
  68  * @save: (inout): Saved state between steps, initialize to 0
  69  *
  70  * Incrementally encode a sequence of binary data into its Base-64 stringified
  71  * representation. By calling this function multiple times you can convert
  72  * data in chunks to avoid having to have the full encoded data in memory.
  73  *
  74  * When all of the data has been converted you must call
  75  * g_base64_encode_close() to flush the saved state.
  76  *
  77  * The output buffer must be large enough to fit all the data that will
  78  * be written to it. Due to the way base64 encodes you will need
  79  * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
  80  * non-zero state). If you enable line-breaking you will need at least:
  81  * ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space.
  82  *
  83  * @break_lines is typically used when putting base64-encoded data in emails.
  84  * It breaks the lines at 76 columns instead of putting all of the text on
  85  * the same line. This avoids problems with long lines in the email system.
  86  * Note however that it breaks the lines with `LF` characters, not
  87  * `CR LF` sequences, so the result cannot be passed directly to SMTP
  88  * or certain other protocols.
  89  *
  90  * Returns: The number of bytes of output that was written
  91  *
  92  * Since: 2.12
  93  */
  94 gsize
  95 g_base64_encode_step (const guchar *in,
  96                       gsize         len,
  97                       gboolean      break_lines,
  98                       gchar        *out,
  99                       gint         *state,
 100                       gint         *save)
 101 {
 102   char *outptr;
 103   const guchar *inptr;
 104
 105   g_return_val_if_fail (in != NULL || len == 0, 0);
 106   g_return_val_if_fail (out != NULL, 0);
 107   g_return_val_if_fail (state != NULL, 0);
 108   g_return_val_if_fail (save != NULL, 0);
 109
 110   if (len == 0)
 111     return 0;
 112
 113   inptr = in;
 114   outptr = out;
 115
 116   if (len + ((char *) save) [0] > 2)
 117     {
 118       const guchar *inend = in+len-2;
 119       int c1, c2, c3;
 120       int already;
 121
 122       already = *state;
 123
 124       switch (((char *) save) [0])
 125         {
 126         case 1:
 127           c1 = ((unsigned char *) save) [1];
 128           goto skip1;
 129         case 2:
 130           c1 = ((unsigned char *) save) [1];
 131           c2 = ((unsigned char *) save) [2];
 132           goto skip2;
 133         }
 134
 135       /*
 136        * yes, we jump into the loop, no i'm not going to change it,
 137        * it's beautiful!
 138        */
 139       while (inptr < inend)
 140         {
 141           c1 = *inptr++;
 142         skip1:
 143           c2 = *inptr++;
 144         skip2:
 145           c3 = *inptr++;
 146           *outptr++ = base64_alphabet [ c1 >> 2 ];
 147           *outptr++ = base64_alphabet [ c2 >> 4 |
 148                                         ((c1&0x3) << 4) ];
 149           *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
 150                                         (c3 >> 6) ];
 151           *outptr++ = base64_alphabet [ c3 & 0x3f ];
 152           /* this is a bit ugly ... */
 153           if (break_lines && (++already) >= 19)
 154             {
 155               *outptr++ = '\n';
 156               already = 0;
 157             }
 158         }
 159
 160       ((char *)save)[0] = 0;
 161       len = 2 - (inptr - inend);
 162       *state = already;
 163     }
 164
 165   g_assert (len == 0 || len == 1 || len == 2);
 166
 167     {
 168       char *saveout;
 169
 170       /* points to the slot for the next char to save */
 171       saveout = & (((char *)save)[1]) + ((char *)save)[0];
 172
 173       /* len can only be 0 1 or 2 */
 174       switch(len)
 175         {
 176         case 2:
 177           *saveout++ = *inptr++;
 178           G_GNUC_FALLTHROUGH;
 179         case 1:
 180           *saveout++ = *inptr++;
 181         }
 182       ((char *)save)[0] += len;
 183     }
 184
 185   return outptr - out;
 186 }
 187
 188 /**
 189  * g_base64_encode_close:
 190  * @break_lines: whether to break long lines
 191  * @out: (out) (array) (element-type guint8): pointer to destination buffer
 192  * @state: (inout): Saved state from g_base64_encode_step()
 193  * @save: (inout): Saved state from g_base64_encode_step()
 194  *
 195  * Flush the status from a sequence of calls to g_base64_encode_step().
 196  *
 197  * The output buffer must be large enough to fit all the data that will
 198  * be written to it. It will need up to 4 bytes, or up to 5 bytes if
 199  * line-breaking is enabled.
 200  *
 201  * The @out array will not be automatically nul-terminated.
 202  *
 203  * Returns: The number of bytes of output that was written
 204  *
 205  * Since: 2.12
 206  */
 207 gsize
 208 g_base64_encode_close (gboolean  break_lines,
 209                        gchar    *out,
 210                        gint     *state,
 211                        gint     *save)
 212 {
 213   int c1, c2;
 214   char *outptr = out;
 215
 216   g_return_val_if_fail (out != NULL, 0);
 217   g_return_val_if_fail (state != NULL, 0);
 218   g_return_val_if_fail (save != NULL, 0);
 219
 220   c1 = ((unsigned char *) save) [1];
 221   c2 = ((unsigned char *) save) [2];
 222
 223   switch (((char *) save) [0])
 224     {
 225     case 2:
 226       outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
 227       g_assert (outptr [2] != 0);
 228       goto skip;
 229     case 1:
 230       outptr[2] = '=';
 231       c2 = 0;  /* saved state here is not relevant */
 232     skip:
 233       outptr [0] = base64_alphabet [ c1 >> 2 ];
 234       outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
 235       outptr [3] = '=';
 236       outptr += 4;
 237       break;
 238     }
 239   if (break_lines)
 240     *outptr++ = '\n';
 241
 242   *save = 0;
 243   *state = 0;
 244
 245   return outptr - out;
 246 }
 247
 248 /**
 249  * g_base64_encode:
 250  * @data: (array length=len) (element-type guint8) (nullable): the binary data to encode
 251  * @len: the length of @data
 252  *
 253  * Encode a sequence of binary data into its Base-64 stringified
 254  * representation.
 255  *
 256  * Returns: (transfer full): a newly allocated, zero-terminated Base-64
 257  *               encoded string representing @data. The returned string must
 258  *               be freed with g_free().
 259  *
 260  * Since: 2.12
 261  */
 262 gchar *
 263 g_base64_encode (const guchar *data,
 264                  gsize         len)
 265 {
 266   gchar *out;
 267   gint state = 0, outlen;
 268   gint save = 0;
 269
 270   g_return_val_if_fail (data != NULL || len == 0, NULL);
 271
 272   /* We can use a smaller limit here, since we know the saved state is 0,
 273      +1 is needed for trailing \0, also check for unlikely integer overflow */
 274   g_return_val_if_fail (len < ((G_MAXSIZE - 1) / 4 - 1) * 3, NULL);
 275
 276   out = g_malloc ((len / 3 + 1) * 4 + 1);
 277
 278   outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
 279   outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
 280   out[outlen] = '\0';
 281
 282   return (gchar *) out;
 283 }
 284
 285 static const unsigned char mime_base64_rank[256] = {
 286   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 287   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 288   255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
 289    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
 290   255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
 291    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
 292   255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 293    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
 294   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 295   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 296   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 297   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 298   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 299   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 300   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 301   255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
 302 };
 303
 304 /**
 305  * g_base64_decode_step: (skip)
 306  * @in: (array length=len) (element-type guint8): binary input data
 307  * @len: max length of @in data to decode
 308  * @out: (out caller-allocates) (array) (element-type guint8): output buffer
 309  * @state: (inout): Saved state between steps, initialize to 0
 310  * @save: (inout): Saved state between steps, initialize to 0
 311  *
 312  * Incrementally decode a sequence of binary data from its Base-64 stringified
 313  * representation. By calling this function multiple times you can convert
 314  * data in chunks to avoid having to have the full encoded data in memory.
 315  *
 316  * The output buffer must be large enough to fit all the data that will
 317  * be written to it. Since base64 encodes 3 bytes in 4 chars you need
 318  * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
 319  * state).
 320  *
 321  * Returns: The number of bytes of output that was written
 322  *
 323  * Since: 2.12
 324  **/
 325 gsize
 326 g_base64_decode_step (const gchar  *in,
 327                       gsize         len,
 328                       guchar       *out,
 329                       gint         *state,
 330                       guint        *save)
 331 {
 332   const guchar *inptr;
 333   guchar *outptr;
 334   const guchar *inend;
 335   guchar c, rank;
 336   guchar last[2];
 337   unsigned int v;
 338   int i;
 339
 340   g_return_val_if_fail (in != NULL || len == 0, 0);
 341   g_return_val_if_fail (out != NULL, 0);
 342   g_return_val_if_fail (state != NULL, 0);
 343   g_return_val_if_fail (save != NULL, 0);
 344
 345   if (len == 0)
 346     return 0;
 347
 348   inend = (const guchar *)in+len;
 349   outptr = out;
 350
 351   /* convert 4 base64 bytes to 3 normal bytes */
 352   v=*save;
 353   i=*state;
 354
 355   last[0] = last[1] = 0;
 356
 357   /* we use the sign in the state to determine if we got a padding character
 358      in the previous sequence */
 359   if (i < 0)
 360     {
 361       i = -i;
 362       last[0] = '=';
 363     }
 364
 365   inptr = (const guchar *)in;
 366   while (inptr < inend)
 367     {
 368       c = *inptr++;
 369       rank = mime_base64_rank [c];
 370       if (rank != 0xff)
 371         {
 372           last[1] = last[0];
 373           last[0] = c;
 374           v = (v<<6) | rank;
 375           i++;
 376           if (i==4)
 377             {
 378               *outptr++ = v>>16;
 379               if (last[1] != '=')
 380                 *outptr++ = v>>8;
 381               if (last[0] != '=')
 382                 *outptr++ = v;
 383               i=0;
 384             }
 385         }
 386     }
 387
 388   *save = v;
 389   *state = last[0] == '=' ? -i : i;
 390
 391   return outptr - out;
 392 }
 393
 394 /**
 395  * g_base64_decode:
 396  * @text: (not nullable): zero-terminated string with base64 text to decode
 397  * @out_len: (out): The length of the decoded data is written here
 398  *
 399  * Decode a sequence of Base-64 encoded text into binary data.  Note
 400  * that the returned binary data is not necessarily zero-terminated,
 401  * so it should not be used as a character string.
 402  *
 403  * Returns: (transfer full) (array length=out_len) (element-type guint8):
 404  *               newly allocated buffer containing the binary data
 405  *               that @text represents. The returned buffer must
 406  *               be freed with g_free().
 407  *
 408  * Since: 2.12
 409  */
 410 guchar *
 411 g_base64_decode (const gchar *text,
 412                  gsize       *out_len)
 413 {
 414   guchar *ret;
 415   gsize input_length;
 416   gint state = 0;
 417   guint save = 0;
 418
 419   g_return_val_if_fail (text != NULL, NULL);
 420   g_return_val_if_fail (out_len != NULL, NULL);
 421
 422   input_length = strlen (text);
 423
 424   /* We can use a smaller limit here, since we know the saved state is 0,
 425      +1 used to avoid calling g_malloc0(0), and hence returning NULL */
 426   ret = g_malloc0 ((input_length / 4) * 3 + 1);
 427
 428   *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
 429
 430   return ret;
 431 }
 432
 433 /**
 434  * g_base64_decode_inplace:
 435  * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
 436  *        string with base64 text to decode
 437  * @out_len: (inout): The length of the decoded data is written here
 438  *
 439  * Decode a sequence of Base-64 encoded text into binary data
 440  * by overwriting the input data.
 441  *
 442  * Returns: (transfer none): The binary data that @text responds. This pointer
 443  *               is the same as the input @text.
 444  *
 445  * Since: 2.20
 446  */
 447 guchar *
 448 g_base64_decode_inplace (gchar *text,
 449                          gsize *out_len)
 450 {
 451   gint input_length, state = 0;
 452   guint save = 0;
 453
 454   g_return_val_if_fail (text != NULL, NULL);
 455   g_return_val_if_fail (out_len != NULL, NULL);
 456
 457   input_length = strlen (text);
 458
 459   g_return_val_if_fail (input_length > 1, NULL);
 460
 461   *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
 462
 463   return (guchar *) text;
 464 }