libdispatch update
[platform/upstream/gcd.git] / dispatch-1.0 / src / transform.c
1 /*
2  * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20
21 #include "internal.h"
22
23 #include <libkern/OSByteOrder.h>
24
25 #if defined(__LITTLE_ENDIAN__)
26 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16LE
27 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16BE
28 #elif defined(__BIG_ENDIAN__)
29 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16BE
30 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16LE
31 #endif
32
33 enum {
34         _DISPATCH_DATA_FORMAT_NONE = 0x1,
35         _DISPATCH_DATA_FORMAT_UTF8 = 0x2,
36         _DISPATCH_DATA_FORMAT_UTF16LE = 0x4,
37         _DISPATCH_DATA_FORMAT_UTF16BE = 0x8,
38         _DISPATCH_DATA_FORMAT_UTF_ANY = 0x10,
39         _DISPATCH_DATA_FORMAT_BASE32 = 0x20,
40         _DISPATCH_DATA_FORMAT_BASE64 = 0x40,
41 };
42
43 #pragma mark -
44 #pragma mark baseXX tables
45
46 static const char base32_encode_table[] =
47                 "ABCDEFGHIJKLMNOPQRSTUVWXYZ23456789";
48
49 static const char base32_decode_table[] = {
50         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
51         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26,
53         27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -2, -1, -1, -1,  0,  1,  2,
54          3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
55         20, 21, 22, 23, 24, 25
56 };
57 static const ssize_t base32_decode_table_size = sizeof(base32_decode_table)
58                 / sizeof(*base32_decode_table);
59
60 static const char base64_encode_table[] =
61                 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
62
63 static const char base64_decode_table[] = {
64         -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
65         -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
66         -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
67         -1,  62,  -1,  -1,  -1,  63,  52,  53,  54,  55,  56,  57,  58,  59,
68         60,  61,  -1,  -1,  -1,  -2,  -1,  -1,  -1,   0,   1,   2,   3,   4,
69          5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,
70         19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,  -1,  26,
71         27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
72         41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51
73 };
74
75 static const ssize_t base64_decode_table_size = sizeof(base64_decode_table)
76                 / sizeof(*base64_decode_table);
77
78 #pragma mark -
79 #pragma mark dispatch_transform_buffer
80
81 typedef struct dispatch_transform_buffer_s {
82         dispatch_data_t data;
83         uint8_t *start;
84         union {
85                 uint8_t *u8;
86                 uint16_t *u16;
87         } ptr;
88         size_t size;
89 } dispatch_transform_buffer_s;
90
91 static size_t
92 _dispatch_transform_sizet_mul(size_t a, size_t b)
93 {
94         size_t rv = SIZE_MAX;
95         if (a == 0 || rv/a >= b) {
96                 rv = a * b;
97         }
98         return rv;
99 }
100
101 #define BUFFER_MALLOC_MAX (100*1024*1024)
102
103 static bool
104 _dispatch_transform_buffer_new(dispatch_transform_buffer_s *buffer,
105                 size_t required, size_t size)
106 {
107         size_t remaining = buffer->size - (buffer->ptr.u8 - buffer->start);
108         if (required == 0 || remaining < required) {
109                 if (buffer->start) {
110                         if (buffer->ptr.u8 > buffer->start) {
111                                 dispatch_data_t _new = dispatch_data_create(buffer->start,
112                                                 buffer->ptr.u8 - buffer->start, NULL,
113                                                 DISPATCH_DATA_DESTRUCTOR_FREE);
114                                 dispatch_data_t _concat = dispatch_data_create_concat(
115                                                 buffer->data, _new);
116                                 dispatch_release(_new);
117                                 dispatch_release(buffer->data);
118                                 buffer->data = _concat;
119                         } else {
120                                 free(buffer->start);
121                         }
122                 }
123                 buffer->size = required + size;
124                 buffer->start = NULL;
125                 if (buffer->size > 0) {
126                         if (buffer->size > BUFFER_MALLOC_MAX) {
127                                 return false;
128                         }
129                         buffer->start = (uint8_t*)malloc(buffer->size);
130                         if (buffer->start == NULL) {
131                                 return false;
132                         }
133                 }
134                 buffer->ptr.u8 = buffer->start;
135         }
136         return true;
137 }
138
139 #pragma mark -
140 #pragma mark dispatch_transform_helpers
141
142 static dispatch_data_t
143 _dispatch_data_subrange_map(dispatch_data_t data, const void **ptr,
144                 size_t offset, size_t size)
145 {
146         dispatch_data_t subrange, map = NULL;
147
148         subrange = dispatch_data_create_subrange(data, offset, size);
149         if (dispatch_data_get_size(subrange) == size) {
150                 map = dispatch_data_create_map(subrange, ptr, NULL);
151         }
152         dispatch_release(subrange);
153         return map;
154 }
155
156 static dispatch_data_format_type_t
157 _dispatch_transform_detect_utf(dispatch_data_t data)
158 {
159         const void *p;
160         dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 0, 2);
161
162         if (subrange == NULL) {
163                 return NULL;
164         }
165
166         const uint16_t ch = *(const uint16_t *)p;
167         dispatch_data_format_type_t type = DISPATCH_DATA_FORMAT_TYPE_UTF8;
168
169         if (ch == 0xfeff) {
170                 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST;
171         } else if (ch == 0xfffe) {
172                 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_REV;
173         }
174
175         dispatch_release(subrange);
176
177         return type;
178 }
179
180 static uint16_t
181 _dispatch_transform_swap_to_host(uint16_t x, int32_t byteOrder)
182 {
183         if (byteOrder == OSLittleEndian) {
184                 return OSSwapLittleToHostInt16(x);
185         }
186         return OSSwapBigToHostInt16(x);
187 }
188
189 static uint16_t
190 _dispatch_transform_swap_from_host(uint16_t x, int32_t byteOrder)
191 {
192         if (byteOrder == OSLittleEndian) {
193                 return OSSwapHostToLittleInt16(x);
194         }
195         return OSSwapHostToBigInt16(x);
196 }
197
198 #pragma mark -
199 #pragma mark UTF-8
200
201 static uint8_t
202 _dispatch_transform_utf8_length(uint8_t byte)
203 {
204         if ((byte & 0x80) == 0) {
205                 return 1;
206         } else if ((byte & 0xe0) == 0xc0) {
207                 return 2;
208         } else if ((byte & 0xf0) == 0xe0) {
209                 return 3;
210         } else if ((byte & 0xf8) == 0xf0) {
211                 return 4;
212         }
213         return 0;
214 }
215
216 static uint32_t
217 _dispatch_transform_read_utf8_sequence(const uint8_t *bytes)
218 {
219         uint32_t wch = 0;
220         uint8_t seq_length = _dispatch_transform_utf8_length(*bytes);
221
222         switch (seq_length) {
223         case 4:
224                 wch |= (*bytes & 0x7);
225                 wch <<= 6;
226                 break;
227         case 3:
228                 wch |= (*bytes & 0xf);
229                 wch <<= 6;
230                 break;
231         case 2:
232                 wch |= (*bytes & 0x1f);
233                 wch <<= 6;
234                 break;
235         case 1:
236                 wch = (*bytes & 0x7f);
237                 break;
238         default:
239                 // Not a utf-8 sequence
240                 break;
241         }
242
243         bytes++;
244         seq_length--;
245
246         while (seq_length > 0) {
247                 wch |= (*bytes & 0x3f);
248                 bytes++;
249                 seq_length--;
250
251                 if (seq_length > 0) {
252                         wch <<= 6;
253                 }
254         }
255         return wch;
256 }
257
258 #pragma mark -
259 #pragma mark UTF-16
260
261 static dispatch_data_t
262 _dispatch_transform_to_utf16(dispatch_data_t data, int32_t byteOrder)
263 {
264         __block size_t skip = 0;
265
266         __block dispatch_transform_buffer_s buffer = {
267                 .data = dispatch_data_empty,
268         };
269
270         bool success = dispatch_data_apply(data, ^(
271                         DISPATCH_UNUSED dispatch_data_t region,
272                         size_t offset, const void *_buffer, size_t size) {
273                 const uint8_t *src = _buffer;
274                 size_t i;
275
276                 if (offset == 0) {
277                         size_t dest_size = 2 + _dispatch_transform_sizet_mul(size,
278                                         sizeof(uint16_t));
279                         if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
280                                 return (bool)false;
281                         }
282                         // Insert BOM
283                         *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(0xfeff,
284                                         byteOrder);
285                 }
286
287                 // Skip is incremented if the previous block read-ahead into our block
288                 if (skip >= size) {
289                         skip -= size;
290                         return (bool)true;
291                 } else if (skip > 0) {
292                         src += skip;
293                         size -= skip;
294                         skip = 0;
295                 }
296
297                 for (i = 0; i < size;) {
298                         uint32_t wch = 0;
299                         uint8_t byte_size = _dispatch_transform_utf8_length(*src);
300
301                         if (byte_size == 0) {
302                                 return (bool)false;
303                         } else if (byte_size + i > size) {
304                                 // UTF-8 byte sequence spans over into the next block(s)
305                                 const void *p;
306                                 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p,
307                                                 offset + i, byte_size);
308                                 if (subrange == NULL) {
309                                         return (bool)false;
310                                 }
311
312                                 wch = _dispatch_transform_read_utf8_sequence(p);
313                                 skip += byte_size - (size - i);
314                                 src += byte_size;
315                                 i = size;
316
317                                 dispatch_release(subrange);
318                         } else {
319                                 wch = _dispatch_transform_read_utf8_sequence(src);
320                                 src += byte_size;
321                                 i += byte_size;
322                         }
323
324                         size_t next = _dispatch_transform_sizet_mul(size - i, sizeof(uint16_t));
325                         if (wch >= 0xd800 && wch < 0xdfff) {
326                                 // Illegal range (surrogate pair)
327                                 return (bool)false;
328                         } else if (wch >= 0x10000) {
329                                 // Surrogate pair
330                                 if (!_dispatch_transform_buffer_new(&buffer, 2 *
331                                                 sizeof(uint16_t), next)) {
332                                         return (bool)false;
333                                 }
334                                 wch -= 0x10000;
335                                 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
336                                                 ((wch >> 10) & 0x3ff) + 0xd800, byteOrder);
337                                 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
338                                                 (wch & 0x3ff) + 0xdc00, byteOrder);
339                         } else {
340                                 if (!_dispatch_transform_buffer_new(&buffer, 1 *
341                                                 sizeof(uint16_t), next)) {
342                                         return (bool)false;
343                                 }
344                                 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
345                                                 (wch & 0xffff), byteOrder);
346                         }
347                 }
348
349                 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
350
351                 return (bool)true;
352         });
353
354         if (!success) {
355                 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
356                 dispatch_release(buffer.data);
357                 return NULL;
358         }
359
360         return buffer.data;
361 }
362
363 static dispatch_data_t
364 _dispatch_transform_from_utf16(dispatch_data_t data, int32_t byteOrder)
365 {
366         __block size_t skip = 0;
367
368         __block dispatch_transform_buffer_s buffer = {
369                 .data = dispatch_data_empty,
370         };
371
372         bool success = dispatch_data_apply(data, ^(
373                         DISPATCH_UNUSED dispatch_data_t region, size_t offset,
374                         const void *_buffer, size_t size) {
375                 const uint16_t *src = _buffer;
376
377                 if (offset == 0) {
378                         // Assume first buffer will be mostly single-byte UTF-8 sequences
379                         size_t dest_size = _dispatch_transform_sizet_mul(size, 2) / 3;
380                         if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
381                                 return (bool)false;
382                         }
383                 }
384
385                 size_t i = 0, max = size / 2;
386
387                 // Skip is incremented if the previous block read-ahead into our block
388                 if (skip >= size) {
389                         skip -= size;
390                         return (bool)true;
391                 } else if (skip > 0) {
392                         src = (uint16_t *)(((uint8_t *)src) + skip);
393                         size -= skip;
394                         max = (size / 2);
395                         skip = 0;
396                 }
397
398                 // If the buffer is an odd size, allow read ahead into the next region
399                 if ((size % 2) != 0) {
400                         max += 1;
401                 }
402
403                 for (i = 0; i < max; i++) {
404                         uint32_t wch = 0;
405                         uint16_t ch;
406
407                         if ((i == (max - 1)) && (max > (size / 2))) {
408                                 // Last byte of an odd sized range
409                                 const void *p;
410                                 dispatch_data_t range = _dispatch_data_subrange_map(data, &p,
411                                                 offset + (i * 2), 2);
412                                 if (range == NULL) {
413                                         return (bool)false;
414                                 }
415                                 ch = _dispatch_transform_swap_to_host(*(uint64_t*)p, byteOrder);
416                                 dispatch_release(range);
417                                 skip += 1;
418                         } else {
419                                 ch =  _dispatch_transform_swap_to_host(src[i], byteOrder);
420                         }
421
422                         if (ch == 0xfffe && offset == 0 && i == 0) {
423                                 // Wrong-endian BOM at beginning of data
424                                 return (bool)false;
425                         } else if (ch == 0xfeff && offset == 0 && i == 0) {
426                                 // Correct-endian BOM, skip it
427                                 continue;
428                         }
429
430                         if ((ch >= 0xd800) && (ch <= 0xdbff)) {
431                                 // Surrogate pair
432                                 wch = ((ch - 0xd800) << 10);
433                                 if (++i >= max) {
434                                         // Surrogate byte isn't in this block
435                                         const void *p;
436                                         dispatch_data_t range = _dispatch_data_subrange_map(data,
437                                                         &p, offset + (i * 2), 2);
438                                         if (range == NULL) {
439                                                 return (bool)false;
440                                         }
441                                         ch = _dispatch_transform_swap_to_host(*(uint16_t *)p,
442                                                         byteOrder);
443                                         dispatch_release(range);
444                                         skip += 2;
445                                 } else {
446                                         ch = _dispatch_transform_swap_to_host(src[i], byteOrder);
447                                 }
448                                 if (!((ch >= 0xdc00) && (ch <= 0xdfff))) {
449                                         return (bool)false;
450                                 }
451                                 wch = (wch | (ch & 0x3ff));
452                                 wch += 0x10000;
453                         } else if ((ch >= 0xdc00) && (ch <= 0xdfff)) {
454                                 return (bool)false;
455                         } else {
456                                 wch = ch;
457                         }
458
459                         size_t next = _dispatch_transform_sizet_mul(max - i, 2);
460                         if (wch < 0x80) {
461                                 if (!_dispatch_transform_buffer_new(&buffer, 1, next)) {
462                                         return (bool)false;
463                                 }
464                                 *(buffer.ptr.u8)++ = (uint8_t)(wch & 0xff);
465                         } else if (wch < 0x800) {
466                                 if (!_dispatch_transform_buffer_new(&buffer, 2, next)) {
467                                         return (bool)false;
468                                 }
469                                 *(buffer.ptr.u8)++ = (uint8_t)(0xc0 | (wch >> 6));
470                                 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
471                         } else if (wch < 0x10000) {
472                                 if (!_dispatch_transform_buffer_new(&buffer, 3, next)) {
473                                         return (bool)false;
474                                 }
475                                 *(buffer.ptr.u8)++ = (uint8_t)(0xe0 | (wch >> 12));
476                                 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
477                                 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
478                         } else if (wch < 0x200000) {
479                                 if (!_dispatch_transform_buffer_new(&buffer, 4, next)) {
480                                         return (bool)false;
481                                 }
482                                 *(buffer.ptr.u8)++ = (uint8_t)(0xf0 | (wch >> 18));
483                                 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 12) & 0x3f));
484                                 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
485                                 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
486                         }
487                 }
488
489                 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
490
491                 return (bool)true;
492         });
493
494         if (!success) {
495                 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
496                 dispatch_release(buffer.data);
497                 return NULL;
498         }
499
500         return buffer.data;
501 }
502
503 static dispatch_data_t
504 _dispatch_transform_from_utf16le(dispatch_data_t data)
505 {
506         return _dispatch_transform_from_utf16(data, OSLittleEndian);
507 }
508
509 static dispatch_data_t
510 _dispatch_transform_from_utf16be(dispatch_data_t data)
511 {
512         return _dispatch_transform_from_utf16(data, OSBigEndian);
513 }
514
515 static dispatch_data_t
516 _dispatch_transform_to_utf16le(dispatch_data_t data)
517 {
518         return _dispatch_transform_to_utf16(data, OSLittleEndian);
519 }
520
521 static dispatch_data_t
522 _dispatch_transform_to_utf16be(dispatch_data_t data)
523 {
524         return _dispatch_transform_to_utf16(data, OSBigEndian);
525 }
526
527 #pragma mark -
528 #pragma mark base32
529
530 static dispatch_data_t
531 _dispatch_transform_from_base32(dispatch_data_t data)
532 {
533         __block uint64_t x = 0, count = 0, pad = 0;
534
535         __block dispatch_data_t rv = dispatch_data_empty;
536
537         bool success = dispatch_data_apply(data, ^(
538                         DISPATCH_UNUSED dispatch_data_t region,
539                         DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
540                 size_t i, dest_size = (size * 5) / 8;
541
542                 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(char));
543                 uint8_t *ptr = dest;
544                 if (dest == NULL) {
545                         return (bool)false;
546                 }
547
548                 const uint8_t *bytes = buffer;
549
550                 for (i = 0; i < size; i++) {
551                         if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
552                                 continue;
553                         }
554
555                         ssize_t index = bytes[i];
556                         if (index >= base32_decode_table_size ||
557                                         base32_decode_table[index] == -1) {
558                                 free(dest);
559                                 return (bool)false;
560                         }
561                         count++;
562
563                         char value = base32_decode_table[index];
564                         if (value == -2) {
565                                 value = 0;
566                                 pad++;
567                         }
568
569                         x <<= 5;
570                         x += value;
571
572                         if ((count & 0x7) == 0) {
573                                 *ptr++ = (x >> 32) & 0xff;
574                                 *ptr++ = (x >> 24) & 0xff;
575                                 *ptr++ = (x >> 16) & 0xff;
576                                 *ptr++ = (x >> 8) & 0xff;
577                                 *ptr++ = x & 0xff;
578                         }
579                 }
580
581                 size_t final = (ptr - dest);
582                 switch (pad) {
583                 case 1:
584                         final -= 1;
585                         break;
586                 case 3:
587                         final -= 2;
588                         break;
589                 case 4:
590                         final -= 3;
591                         break;
592                 case 6:
593                         final -= 4;
594                         break;
595                 }
596
597                 dispatch_data_t val = dispatch_data_create(dest, final, NULL,
598                                 DISPATCH_DATA_DESTRUCTOR_FREE);
599                 dispatch_data_t concat = dispatch_data_create_concat(rv, val);
600
601                 dispatch_release(val);
602                 dispatch_release(rv);
603                 rv = concat;
604
605                 return (bool)true;
606         });
607
608         if (!success) {
609                 dispatch_release(rv);
610                 return NULL;
611         }
612
613         return rv;
614 }
615
616 static dispatch_data_t
617 _dispatch_transform_to_base32(dispatch_data_t data)
618 {
619         size_t total = dispatch_data_get_size(data);
620         __block size_t count = 0;
621
622         size_t dest_size = ((total + 4) * 8) / 5;
623         dest_size -= dest_size % 8;
624
625         uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
626         if (dest == NULL) {
627                 return NULL;
628         }
629
630         __block uint8_t *ptr = dest;
631
632         /*
633                                             0        1        2        3        4
634          8-bit bytes:   xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy
635          5-bit chunks:  aaaaabbb bbcccccd ddddeeee efffffgg ggghhhhh
636          */
637
638         bool success = dispatch_data_apply(data, ^(
639                         DISPATCH_UNUSED dispatch_data_t region,
640                         size_t offset, const void *buffer, size_t size) {
641                 const uint8_t *bytes = buffer;
642                 size_t i;
643
644                 for (i = 0; i < size; i++, count++) {
645                         uint8_t curr = bytes[i], last = 0;
646
647                         if ((count % 5) != 0) {
648                                 if (i == 0) {
649                                         const void *p;
650                                         dispatch_data_t subrange = _dispatch_data_subrange_map(data,
651                                                         &p, offset - 1, 1);
652                                         if (subrange == NULL) {
653                                                 return (bool)false;
654                                         }
655                                         last = *(uint8_t*)p;
656                                         dispatch_release(subrange);
657                                 } else {
658                                         last = bytes[i - 1];
659                                 }
660                         }
661
662                         switch (count % 5) {
663                         case 0:
664                                 // a
665                                 *ptr++ = base32_encode_table[(curr >> 3) & 0x1f];
666                                 break;
667                         case 1:
668                                 // b + c
669                                 *ptr++ = base32_encode_table[((last << 2)|(curr >> 6)) & 0x1f];
670                                 *ptr++ = base32_encode_table[(curr >> 1) & 0x1f];
671                                 break;
672                         case 2:
673                                 // d
674                                 *ptr++ = base32_encode_table[((last << 4)|(curr >> 4)) & 0x1f];
675                                 break;
676                         case 3:
677                                 // e + f
678                                 *ptr++ = base32_encode_table[((last << 1)|(curr >> 7)) & 0x1f];
679                                 *ptr++ = base32_encode_table[(curr >> 2) & 0x1f];
680                                 break;
681                         case 4:
682                                 // g + h
683                                 *ptr++ = base32_encode_table[((last << 3)|(curr >> 5)) & 0x1f];
684                                 *ptr++ = base32_encode_table[curr & 0x1f];
685                                 break;
686                         }
687                 }
688
689                 // Last region, insert padding bytes, if needed
690                 if (offset + size == total) {
691                         switch (count % 5) {
692                         case 0:
693                                 break;
694                         case 1:
695                                 // b[4:2]
696                                 *ptr++ = base32_encode_table[(bytes[size-1] << 2) & 0x1c];
697                                 break;
698                         case 2:
699                                 // d[4]
700                                 *ptr++ = base32_encode_table[(bytes[size-1] << 4) & 0x10];
701                                 break;
702                         case 3:
703                                 // e[4:1]
704                                 *ptr++ = base32_encode_table[(bytes[size-1] << 1) & 0x1e];
705                                 break;
706                         case 4:
707                                 // g[4:3]
708                                 *ptr++ = base32_encode_table[bytes[size-1] & 0x18];
709                                 break;
710                         }
711                         switch (count % 5) {
712                         case 0:
713                                 break;
714                         case 1:
715                                 *ptr++ = '='; // c
716                                 *ptr++ = '='; // d
717                         case 2:
718                                 *ptr++ = '='; // e
719                         case 3:
720                                 *ptr++ = '='; // f
721                                 *ptr++ = '='; // g
722                         case 4:
723                                 *ptr++ = '='; // h
724                                 break;
725                         }
726                 }
727
728                 return (bool)true;
729         });
730
731         if (!success) {
732                 free(dest);
733                 return NULL;
734         }
735         return dispatch_data_create(dest, dest_size, NULL,
736                         DISPATCH_DATA_DESTRUCTOR_FREE);
737 }
738
739 #pragma mark -
740 #pragma mark base64
741
742 static dispatch_data_t
743 _dispatch_transform_from_base64(dispatch_data_t data)
744 {
745         __block uint64_t x = 0, count = 0;
746         __block size_t pad = 0;
747
748         __block dispatch_data_t rv = dispatch_data_empty;
749
750         bool success = dispatch_data_apply(data, ^(
751                         DISPATCH_UNUSED dispatch_data_t region,
752                         DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
753                 size_t i, dest_size = (size * 3) / 4;
754
755                 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
756                 uint8_t *ptr = dest;
757                 if (dest == NULL) {
758                         return (bool)false;
759                 }
760
761                 const uint8_t *bytes = buffer;
762
763                 for (i = 0; i < size; i++) {
764                         if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
765                                 continue;
766                         }
767
768                         ssize_t index = bytes[i];
769                         if (index >= base64_decode_table_size ||
770                                         base64_decode_table[index] == -1) {
771                                 free(dest);
772                                 return (bool)false;
773                         }
774                         count++;
775
776                         char value = base64_decode_table[index];
777                         if (value == -2) {
778                                 value = 0;
779                                 pad++;
780                         }
781
782                         x <<= 6;
783                         x += value;
784
785                         if ((count & 0x3) == 0) {
786                                 *ptr++ = (x >> 16) & 0xff;
787                                 *ptr++ = (x >> 8) & 0xff;
788                                 *ptr++ = x & 0xff;
789                         }
790                 }
791
792                 size_t final = (ptr - dest);
793                 if (pad > 0) {
794                         // 2 bytes of pad means only had one char in final group
795                         final -= pad;
796                 }
797
798                 dispatch_data_t val = dispatch_data_create(dest, final, NULL,
799                                 DISPATCH_DATA_DESTRUCTOR_FREE);
800                 dispatch_data_t concat = dispatch_data_create_concat(rv, val);
801
802                 dispatch_release(val);
803                 dispatch_release(rv);
804                 rv = concat;
805
806                 return (bool)true;
807         });
808
809         if (!success) {
810                 dispatch_release(rv);
811                 return NULL;
812         }
813
814         return rv;
815 }
816
817 static dispatch_data_t
818 _dispatch_transform_to_base64(dispatch_data_t data)
819 {
820         // RFC 4648 states that we should not linebreak
821         // http://tools.ietf.org/html/rfc4648
822         size_t total = dispatch_data_get_size(data);
823         __block size_t count = 0;
824
825         size_t dest_size = ((total + 2) * 4) / 3;
826         dest_size -= dest_size % 4;
827
828         uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
829         if (dest == NULL) {
830                 return NULL;
831         }
832
833         __block uint8_t *ptr = dest;
834
835         /*
836          * 3 8-bit bytes:       xxxxxxxx yyyyyyyy zzzzzzzz
837          * 4 6-bit chunks:      aaaaaabb bbbbcccc ccdddddd
838          */
839
840         bool success = dispatch_data_apply(data, ^(
841                         DISPATCH_UNUSED dispatch_data_t region,
842                         size_t offset, const void *buffer, size_t size) {
843                 const uint8_t *bytes = buffer;
844                 size_t i;
845
846                 for (i = 0; i < size; i++, count++) {
847                         uint8_t curr = bytes[i], last = 0;
848
849                         if ((count % 3) != 0) {
850                                 if (i == 0) {
851                                         const void *p;
852                                         dispatch_data_t subrange = _dispatch_data_subrange_map(data,
853                                                 &p, offset - 1, 1);
854                                         if (subrange == NULL) {
855                                                 return (bool)false;
856                                         }
857                                         last = *(uint8_t*)p;
858                                         dispatch_release(subrange);
859                                 } else {
860                                         last = bytes[i - 1];
861                                 }
862                         }
863
864                         switch (count % 3) {
865                         case 0:
866                                 *ptr++ = base64_encode_table[(curr >> 2) & 0x3f];
867                                 break;
868                         case 1:
869                                 *ptr++ = base64_encode_table[((last << 4)|(curr >> 4)) & 0x3f];
870                                 break;
871                         case 2:
872                                 *ptr++ = base64_encode_table[((last << 2)|(curr >> 6)) & 0x3f];
873                                 *ptr++ = base64_encode_table[(curr & 0x3f)];
874                                 break;
875                         }
876                 }
877
878                 // Last region, insert padding bytes, if needed
879                 if (offset + size == total) {
880                         switch (count % 3) {
881                         case 0:
882                                 break;
883                         case 1:
884                                 *ptr++ = base64_encode_table[(bytes[size-1] << 4) & 0x30];
885                                 *ptr++ = '=';
886                                 *ptr++ = '=';
887                                 break;
888                         case 2:
889                                 *ptr++ = base64_encode_table[(bytes[size-1] << 2) & 0x3c];
890                                 *ptr++ = '=';
891                                 break;
892                         }
893                 }
894
895                 return (bool)true;
896         });
897
898         if (!success) {
899                 free(dest);
900                 return NULL;
901         }
902         return dispatch_data_create(dest, dest_size, NULL,
903                         DISPATCH_DATA_DESTRUCTOR_FREE);
904 }
905
906 #pragma mark -
907 #pragma mark dispatch_data_transform
908
909 dispatch_data_t
910 dispatch_data_create_with_transform(dispatch_data_t data,
911                 dispatch_data_format_type_t input, dispatch_data_format_type_t output)
912 {
913         if (input->type == _DISPATCH_DATA_FORMAT_UTF_ANY) {
914                 input = _dispatch_transform_detect_utf(data);
915                 if (input == NULL) {
916                         return NULL;
917                 }
918         }
919
920         if ((input->type & ~output->input_mask) != 0) {
921                 return NULL;
922         }
923
924         if ((output->type & ~input->output_mask) != 0) {
925                 return NULL;
926         }
927
928         if (dispatch_data_get_size(data) == 0) {
929                 return data;
930         }
931
932         dispatch_data_t temp1;
933         if (input->decode) {
934                 temp1 = input->decode(data);
935         } else {
936                 dispatch_retain(data);
937                 temp1 = data;
938         }
939
940         if (!temp1) {
941                 return NULL;
942         }
943
944         dispatch_data_t temp2;
945         if (output->encode) {
946                 temp2 = output->encode(temp1);
947         } else {
948                 dispatch_retain(temp1);
949                 temp2 = temp1;
950         }
951
952         dispatch_release(temp1);
953         return temp2;
954 }
955
956 const struct dispatch_data_format_type_s _dispatch_data_format_type_none = {
957         .type = _DISPATCH_DATA_FORMAT_NONE,
958         .input_mask = ~0,
959         .output_mask = ~0,
960         .decode = NULL,
961         .encode = NULL,
962 };
963
964 const struct dispatch_data_format_type_s _dispatch_data_format_type_base32 = {
965         .type = _DISPATCH_DATA_FORMAT_BASE32,
966         .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
967                         _DISPATCH_DATA_FORMAT_BASE64),
968         .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
969                         _DISPATCH_DATA_FORMAT_BASE64),
970         .decode = _dispatch_transform_from_base32,
971         .encode = _dispatch_transform_to_base32,
972 };
973
974 const struct dispatch_data_format_type_s _dispatch_data_format_type_base64 = {
975         .type = _DISPATCH_DATA_FORMAT_BASE64,
976         .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
977                         _DISPATCH_DATA_FORMAT_BASE64),
978         .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
979                         _DISPATCH_DATA_FORMAT_BASE64),
980         .decode = _dispatch_transform_from_base64,
981         .encode = _dispatch_transform_to_base64,
982 };
983
984 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16le = {
985         .type = _DISPATCH_DATA_FORMAT_UTF16LE,
986         .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
987                         _DISPATCH_DATA_FORMAT_UTF16LE),
988         .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
989                         _DISPATCH_DATA_FORMAT_UTF16LE),
990         .decode = _dispatch_transform_from_utf16le,
991         .encode = _dispatch_transform_to_utf16le,
992 };
993
994 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16be = {
995         .type = _DISPATCH_DATA_FORMAT_UTF16BE,
996         .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
997                         _DISPATCH_DATA_FORMAT_UTF16LE),
998         .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
999                         _DISPATCH_DATA_FORMAT_UTF16LE),
1000         .decode = _dispatch_transform_from_utf16be,
1001         .encode = _dispatch_transform_to_utf16be,
1002 };
1003
1004 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf8 = {
1005         .type = _DISPATCH_DATA_FORMAT_UTF8,
1006         .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1007                         _DISPATCH_DATA_FORMAT_UTF16LE),
1008         .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1009                         _DISPATCH_DATA_FORMAT_UTF16LE),
1010         .decode = NULL,
1011         .encode = NULL,
1012 };
1013
1014 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf_any = {
1015         .type = _DISPATCH_DATA_FORMAT_UTF_ANY,
1016         .input_mask = 0,
1017         .output_mask = 0,
1018         .decode = NULL,
1019         .encode = NULL,
1020 };