3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 # define OFF16(p) ((char *) (p))
117 # define OFF32(p) ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119 # define OFF16(p) ((char*)(p))
120 # define OFF32(p) ((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
125 # error "bad cray byte order"
128 #define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
138 # error "Unsupported byteorder"
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142 logic adding to deal with any mixed-endian transformations needed.
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
149 if (UNLIKELY(utf8)) { \
150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
151 (char *) (buf), len, datumtype)) break; \
153 if (UNLIKELY(needs_swap)) \
154 S_reverse_copy(s, (char *) (buf), len); \
156 Copy(s, (char *) (buf), len, char); \
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
220 #define TYPE_IS_SHRIEKING 0x100
221 #define TYPE_IS_BIG_ENDIAN 0x200
222 #define TYPE_IS_LITTLE_ENDIAN 0x400
223 #define TYPE_IS_PACK 0x800
224 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
225 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
226 #define TYPE_NO_MODIFIERS(t) ((U8) (t))
228 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
231 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
233 #define PACK_SIZE_CANNOT_CSUM 0x80
234 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
235 #define PACK_SIZE_MASK 0x3F
237 #include "packsizetables.inc"
240 S_reverse_copy(const char *src, char *dest, STRLEN len)
248 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
258 if (retlen == (STRLEN) -1)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
281 const char *from = *s;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
287 if (UNLIKELY(needs_swap))
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1) {
294 from += UTF8_SAFE_SKIP(from, end);
296 } else from += retlen;
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
304 *(U8 *)buf++ = (U8)val;
306 /* We have enough characters for the buffer. Did we have problems ? */
309 /* Rewalk the string fragment while warning */
311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
316 if (from > end) from = end;
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
356 Copy(buf, cur, len, char); \
361 #define SAFE_UTF8_EXPAND(var) \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
368 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
375 #define GROWING(utf8, cat, start, cur, in_len) \
377 STRLEN glen = (in_len); \
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
383 (start) = sv_exp_grow(cat, glen); \
384 (cur) = (start) + SvCUR(cat); \
388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
390 const STRLEN glen = (in_len); \
392 if (utf8) SAFE_UTF8_EXPAND(gl); \
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
395 SvCUR_set((cat), (cur) - (start)); \
396 (start) = sv_exp_grow(cat, gl); \
397 (cur) = (start) + SvCUR(cat); \
399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
402 #define PUSH_BYTE(utf8, s, byte) \
405 const U8 au8 = (byte); \
406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407 } else *(U8 *)(s)++ = (byte); \
410 /* Only to be used inside a loop (see the break) */
411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
416 if (retlen == (STRLEN) -1) { \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
423 static const char *_action( const tempsym_t* symptr )
425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
428 /* Returns the sizeof() struct described by pat */
430 S_measure_struct(pTHX_ tempsym_t* symptr)
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
436 while (next_symbol(symptr)) {
439 switch (symptr->howlen) {
441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
445 /* e_no_len and e_number */
446 len = symptr->length;
450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
456 /* diag_listed_as: Invalid type '%s' in %s */
457 Perl_croak(aTHX_ "Invalid type '%c' in %s",
458 (int)TYPE_NO_MODIFIERS(symptr->code),
460 case '.' | TYPE_IS_SHRIEKING:
461 case '@' | TYPE_IS_SHRIEKING:
465 case 'U': /* XXXX Is it correct? */
468 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
469 (int) TYPE_NO_MODIFIERS(symptr->code),
476 tempsym_t savsym = *symptr;
477 symptr->patptr = savsym.grpbeg;
478 symptr->patend = savsym.grpend;
479 /* XXXX Theoretically, we need to measure many times at
480 different positions, since the subexpression may contain
481 alignment commands, but be not of aligned length.
482 Need to detect this and croak(). */
483 size = measure_struct(symptr);
487 case 'X' | TYPE_IS_SHRIEKING:
488 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
490 if (!len) /* Avoid division by 0 */
492 len = total % len; /* Assumed: the start is aligned. */
497 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
499 case 'x' | TYPE_IS_SHRIEKING:
500 if (!len) /* Avoid division by 0 */
502 star = total % len; /* Assumed: the start is aligned. */
503 if (star) /* Other portable ways? */
527 size = sizeof(char*);
537 /* locate matching closing parenthesis or bracket
538 * returns char pointer to char after match, or NULL
541 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
543 PERL_ARGS_ASSERT_GROUP_END;
544 Size_t opened = 0; /* number of pending opened brackets */
546 while (patptr < patend) {
547 const char c = *patptr++;
549 if (opened == 0 && c == ender)
552 while (patptr < patend && *patptr != '\n')
555 } else if (c == '(' || c == '[')
557 else if (c == ')' || c == ']') {
559 Perl_croak(aTHX_ "Mismatched brackets in template");
563 Perl_croak(aTHX_ "No group ending character '%c' found in template",
565 NOT_REACHED; /* NOTREACHED */
569 /* Convert unsigned decimal number to binary.
570 * Expects a pointer to the first digit and address of length variable
571 * Advances char pointer to 1st non-digit char and returns number
574 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
576 SSize_t len = *patptr++ - '0';
578 PERL_ARGS_ASSERT_GET_NUM;
580 while (isDIGIT(*patptr)) {
581 SSize_t nlen = (len * 10) + (*patptr++ - '0');
582 if (nlen < 0 || nlen/10 != len)
583 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
590 /* The marvellous template parsing routine: Using state stored in *symptr,
591 * locates next template code and count
594 S_next_symbol(pTHX_ tempsym_t* symptr )
596 const char* patptr = symptr->patptr;
597 const char* const patend = symptr->patend;
599 PERL_ARGS_ASSERT_NEXT_SYMBOL;
601 symptr->flags &= ~FLAG_SLASH;
603 while (patptr < patend) {
604 if (isSPACE(*patptr))
606 else if (*patptr == '#') {
608 while (patptr < patend && *patptr != '\n')
613 /* We should have found a template code */
614 I32 code = (U8) *patptr++;
615 U32 inherited_modifiers = 0;
617 /* unrecognised characters in pack/unpack formats were made fatal in
618 * 5.004, with an exception added in 5.004_04 for ',' to "just" warn: */
620 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
621 symptr->flags |= FLAG_COMMA;
622 /* diag_listed_as: Invalid type '%s' in %s */
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Invalid type ',' in %s", _action( symptr ) );
629 /* for '(', skip to ')' */
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
647 /* look for modifiers */
648 while (patptr < patend) {
653 modifier = TYPE_IS_SHRIEKING;
654 allowed = "sSiIlLxXnNvV@.";
657 modifier = TYPE_IS_BIG_ENDIAN;
658 allowed = ENDIANNESS_ALLOWED_TYPES;
661 modifier = TYPE_IS_LITTLE_ENDIAN;
662 allowed = ENDIANNESS_ALLOWED_TYPES;
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
675 allowed, _action( symptr ) );
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
683 *patptr, _action( symptr ) );
685 if ((code & modifier)) {
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
696 /* inherit modifiers */
697 code |= inherited_modifiers;
699 /* look for count and/or / */
700 if (patptr < patend) {
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
703 symptr->howlen = e_number;
705 } else if (*patptr == '*') {
707 symptr->howlen = e_star;
709 } else if (*patptr == '[') {
710 const char* lenptr = ++patptr;
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
727 symptr->howlen = e_no_len;
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
735 else if (*patptr == '#') {
737 while (patptr < patend && *patptr != '\n')
742 if (*patptr == '/') {
743 symptr->flags |= FLAG_SLASH;
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
760 symptr->patptr = patptr;
764 symptr->patptr = patptr;
769 There is no way to cleanly handle the case where we should process the
770 string per byte in its upgraded form while it's really in downgraded form
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
775 themselves if they need to do a lot of unpacks like this on it
778 need_utf8(const char *pat, const char *patend)
782 PERL_ARGS_ASSERT_NEED_UTF8;
784 while (pat < patend) {
787 pat = (const char *) memchr(pat, '\n', patend-pat);
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
798 first_symbol(const char *pat, const char *patend) {
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801 while (pat < patend) {
802 if (pat[0] != '#') return pat[0];
804 pat = (const char *) memchr(pat, '\n', patend-pat);
813 =for apidoc unpackstring
815 The engine implementing the C<unpack()> Perl function.
817 Using the template C<pat..patend>, this function unpacks the string
818 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
819 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
820 C<SPAGAIN> after the call to this function). It returns the number of
823 The C<strend> and C<patend> pointers should point to the byte following the
824 last character of each string.
826 Although this function returns its values on the perl argument stack, it
827 doesn't take any parameters from that stack (and thus in particular
828 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
834 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
838 PERL_ARGS_ASSERT_UNPACKSTRING;
840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
848 flags |= FLAG_DO_UTF8;
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
854 TEMPSYM_INIT(&sym, pat, patend, flags);
856 return unpack_rec(&sym, s, s, strend, NULL );
860 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
864 const SSize_t start_sp_offset = SP - PL_stack_base;
866 SSize_t checksum = 0;
869 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
871 bool explicit_length;
872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
875 PERL_ARGS_ASSERT_UNPACK_REC;
877 symptr->strbeg = s - strbeg;
879 while (next_symbol(symptr)) {
882 I32 datumtype = symptr->code;
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
892 switch (howlen = symptr->howlen) {
894 len = strend - strbeg; /* long enough */
897 /* e_no_len and e_number */
898 len = symptr->length;
902 explicit_length = TRUE;
904 beyond = s >= strend;
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
908 /* props nonzero means we can process this letter. */
909 const SSize_t size = props & PACK_SIZE_MASK;
910 const SSize_t howmany = (strend - s) / size;
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
921 needs_swap = NEEDS_SWAP(datumtype);
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
925 /* diag_listed_as: Invalid type '%s' in %s */
926 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
929 if (howlen == e_no_len)
930 len = 16; /* len is not specified */
938 tempsym_t savsym = *symptr;
939 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
940 symptr->flags |= group_modifiers;
941 symptr->patend = savsym.grpend;
942 /* cppcheck-suppress autoVariables */
943 symptr->previous = &savsym;
946 if (len && unpack_only_one) len = 1;
948 symptr->patptr = savsym.grpbeg;
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
953 break; /* No way to continue */
956 savsym.flags = symptr->flags & ~group_modifiers;
960 case '.' | TYPE_IS_SHRIEKING:
964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
968 tempsym_t *group = symptr;
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
979 case '@' | TYPE_IS_SHRIEKING:
981 s = strbeg + symptr->strbeg;
982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
994 Perl_croak(aTHX_ "'@' outside of string in unpack");
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1002 const char *hop, *last;
1004 hop = last = strbeg;
1006 hop += UTF8SKIP(hop);
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1017 len = (s - strbeg) % len;
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1036 case 'x' | TYPE_IS_SHRIEKING: {
1038 if (!len) /* Avoid division by 0 */
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
1042 if (ai32 == 0) break;
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1084 } else if (len > strend - s)
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
1089 const char *ptr, *end;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--) {
1101 && !UTF8_IS_CONTINUATION(*ptr)
1102 && !isSPACE_utf8_safe(ptr, strend))
1107 if (ptr >= s) ptr += UTF8SKIP(ptr);
1110 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1112 for (ptr = s+len-1; ptr >= s; ptr--)
1113 if (*ptr != 0 && !isSPACE(*ptr)) break;
1116 sv = newSVpvn(s, ptr-s);
1117 } else sv = newSVpvn(s, len);
1121 /* Undo any upgrade done due to need_utf8() */
1122 if (!(symptr->flags & FLAG_WAS_UTF8))
1123 sv_utf8_downgrade(sv, 0);
1131 if (howlen == e_star || len > (strend - s) * 8)
1132 len = (strend - s) * 8;
1135 while (len >= 8 && s < strend) {
1136 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1141 cuv += PL_bitcount[*(U8 *)s++];
1144 if (len && s < strend) {
1146 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1147 if (datumtype == 'b')
1149 if (bits & 1) cuv++;
1154 if (bits & 0x80) cuv++;
1161 sv = sv_2mortal(newSV(len ? len : 1));
1164 if (datumtype == 'b') {
1166 const SSize_t ai32 = len;
1167 for (len = 0; len < ai32; len++) {
1168 if (len & 7) bits >>= 1;
1170 if (s >= strend) break;
1171 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1172 } else bits = *(U8 *) s++;
1173 *str++ = bits & 1 ? '1' : '0';
1177 const SSize_t ai32 = len;
1178 for (len = 0; len < ai32; len++) {
1179 if (len & 7) bits <<= 1;
1181 if (s >= strend) break;
1182 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1183 } else bits = *(U8 *) s++;
1184 *str++ = bits & 0x80 ? '1' : '0';
1188 SvCUR_set(sv, str - SvPVX_const(sv));
1195 /* Preliminary length estimate, acceptable for utf8 too */
1196 if (howlen == e_star || len > (strend - s) * 2)
1197 len = (strend - s) * 2;
1199 sv = sv_2mortal(newSV(len ? len : 1));
1203 if (datumtype == 'h') {
1206 for (len = 0; len < ai32; len++) {
1207 if (len & 1) bits >>= 4;
1209 if (s >= strend) break;
1210 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1211 } else bits = * (U8 *) s++;
1213 *str++ = PL_hexdigit[bits & 15];
1217 const SSize_t ai32 = len;
1218 for (len = 0; len < ai32; len++) {
1219 if (len & 1) bits <<= 4;
1221 if (s >= strend) break;
1222 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1223 } else bits = *(U8 *) s++;
1225 *str++ = PL_hexdigit[(bits >> 4) & 15];
1230 SvCUR_set(sv, str - SvPVX_const(sv));
1237 if (explicit_length)
1238 /* Switch to "character" mode */
1239 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1244 while (len-- > 0 && s < strend) {
1249 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1250 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1251 if (retlen == (STRLEN) -1)
1252 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1256 aint = *(U8 *)(s)++;
1257 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1261 else if (checksum > bits_in_uv)
1262 cdouble += (NV)aint;
1270 while (len-- > 0 && s < strend) {
1272 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1273 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1274 if (retlen == (STRLEN) -1)
1275 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1279 else if (checksum > bits_in_uv)
1280 cdouble += (NV) val;
1284 } else if (!checksum)
1286 const U8 ch = *(U8 *) s++;
1289 else if (checksum > bits_in_uv)
1290 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1292 while (len-- > 0) cuv += *(U8 *) s++;
1296 if (explicit_length && howlen != e_star) {
1297 /* Switch to "bytes in UTF-8" mode */
1298 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1300 /* Should be impossible due to the need_utf8() test */
1301 Perl_croak(aTHX_ "U0 mode on a byte string");
1305 if (len > strend - s) len = strend - s;
1307 if (len && unpack_only_one) len = 1;
1311 while (len-- > 0 && s < strend) {
1315 U8 result[UTF8_MAXLEN+1];
1316 const char *ptr = s;
1318 /* Bug: warns about bad utf8 even if we are short on bytes
1319 and will break out of the loop */
1320 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1323 len = UTF8SKIP(result);
1324 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1325 (char *) &result[1], len-1, 'U')) break;
1326 auv = utf8n_to_uvchr(result, len, &retlen,
1327 UTF8_ALLOW_DEFAULT);
1330 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
1331 UTF8_ALLOW_DEFAULT);
1332 if (retlen == (STRLEN) -1)
1333 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1338 else if (checksum > bits_in_uv)
1339 cdouble += (NV) auv;
1344 case 's' | TYPE_IS_SHRIEKING:
1345 #if SHORTSIZE != SIZE16
1348 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV)ashort;
1364 #if U16SIZE > SIZE16
1367 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1368 #if U16SIZE > SIZE16
1374 else if (checksum > bits_in_uv)
1375 cdouble += (NV)ai16;
1380 case 'S' | TYPE_IS_SHRIEKING:
1381 #if SHORTSIZE != SIZE16
1383 unsigned short aushort;
1384 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1387 else if (checksum > bits_in_uv)
1388 cdouble += (NV)aushort;
1401 #if U16SIZE > SIZE16
1404 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1405 if (datumtype == 'n')
1406 au16 = PerlSock_ntohs(au16);
1407 if (datumtype == 'v')
1411 else if (checksum > bits_in_uv)
1412 cdouble += (NV) au16;
1417 case 'v' | TYPE_IS_SHRIEKING:
1418 case 'n' | TYPE_IS_SHRIEKING:
1421 # if U16SIZE > SIZE16
1424 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1425 /* There should never be any byte-swapping here. */
1426 assert(!TYPE_ENDIANNESS(datumtype));
1427 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1428 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1429 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1430 ai16 = (I16) vtohs((U16) ai16);
1433 else if (checksum > bits_in_uv)
1434 cdouble += (NV) ai16;
1440 case 'i' | TYPE_IS_SHRIEKING:
1443 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)aint;
1453 case 'I' | TYPE_IS_SHRIEKING:
1456 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1459 else if (checksum > bits_in_uv)
1460 cdouble += (NV)auint;
1468 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1471 else if (checksum > bits_in_uv)
1480 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1483 else if (checksum > bits_in_uv)
1489 case 'l' | TYPE_IS_SHRIEKING:
1490 #if LONGSIZE != SIZE32
1493 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1496 else if (checksum > bits_in_uv)
1497 cdouble += (NV)along;
1508 #if U32SIZE > SIZE32
1511 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1512 #if U32SIZE > SIZE32
1513 if (ai32 > 2147483647) ai32 -= 4294967296;
1517 else if (checksum > bits_in_uv)
1518 cdouble += (NV)ai32;
1523 case 'L' | TYPE_IS_SHRIEKING:
1524 #if LONGSIZE != SIZE32
1526 unsigned long aulong;
1527 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1530 else if (checksum > bits_in_uv)
1531 cdouble += (NV)aulong;
1544 #if U32SIZE > SIZE32
1547 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1548 if (datumtype == 'N')
1549 au32 = PerlSock_ntohl(au32);
1550 if (datumtype == 'V')
1554 else if (checksum > bits_in_uv)
1555 cdouble += (NV)au32;
1560 case 'V' | TYPE_IS_SHRIEKING:
1561 case 'N' | TYPE_IS_SHRIEKING:
1564 #if U32SIZE > SIZE32
1567 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1568 /* There should never be any byte swapping here. */
1569 assert(!TYPE_ENDIANNESS(datumtype));
1570 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1572 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1573 ai32 = (I32)vtohl((U32)ai32);
1576 else if (checksum > bits_in_uv)
1577 cdouble += (NV)ai32;
1585 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1586 /* newSVpv generates undef if aptr is NULL */
1587 mPUSHs(newSVpv(aptr, 0));
1595 while (len > 0 && s < strend) {
1597 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1598 auv = (auv << 7) | (ch & 0x7f);
1599 /* UTF8_IS_XXXXX not right here because this is a BER, not
1600 * UTF-8 format - using constant 0x80 */
1608 if (++bytes >= sizeof(UV)) { /* promote to string */
1611 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1612 (int)TYPE_DIGITS(UV), auv);
1613 while (s < strend) {
1614 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1615 sv = mul128(sv, (U8)(ch & 0x7f));
1621 t = SvPV_nolen_const(sv);
1630 if ((s >= strend) && bytes)
1631 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1635 if (symptr->howlen == e_star)
1636 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1638 if (s + sizeof(char*) <= strend) {
1640 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1641 /* newSVpvn generates undef if aptr is NULL */
1642 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1645 #if defined(HAS_QUAD) && IVSIZE >= 8
1649 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1651 mPUSHs(newSViv((IV)aquad));
1652 else if (checksum > bits_in_uv)
1653 cdouble += (NV)aquad;
1661 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1663 mPUSHs(newSVuv((UV)auquad));
1664 else if (checksum > bits_in_uv)
1665 cdouble += (NV)auquad;
1671 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1675 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1685 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1695 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1696 datumtype, needs_swap);
1703 #if defined(HAS_LONG_DOUBLE)
1707 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1708 sizeof(aldouble.bytes), datumtype, needs_swap);
1709 /* The most common long double format, the x86 80-bit
1710 * extended precision, has either 2 or 6 unused bytes,
1711 * which may contain garbage, which may contain
1712 * unintentional data. While we do zero the bytes of
1713 * the long double data in pack(), here in unpack() we
1714 * don't, because it's really hard to envision that
1715 * reading the long double off aldouble would be
1716 * affected by the unused bytes.
1718 * Note that trying to unpack 'long doubles' of 'long
1719 * doubles' packed in another system is in the general
1720 * case doomed without having more detail. */
1722 mPUSHn(aldouble.ld);
1724 cdouble += aldouble.ld;
1730 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1731 sv = sv_2mortal(newSV(l));
1738 /* Note that all legal uuencoded strings are ASCII printables, so
1739 * have the same representation under UTF-8 vs not. This means we
1740 * can ignore UTF8ness on legal input. For illegal we stop at the
1741 * first failure, and don't report where/what that is, so again we
1742 * can ignore UTF8ness */
1744 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1748 len = PL_uudmap[*(U8*)s++] & 077;
1750 if (s < strend && ISUUCHAR(*s))
1751 a = PL_uudmap[*(U8*)s++] & 077;
1754 if (s < strend && ISUUCHAR(*s))
1755 b = PL_uudmap[*(U8*)s++] & 077;
1758 if (s < strend && ISUUCHAR(*s))
1759 c = PL_uudmap[*(U8*)s++] & 077;
1762 if (s < strend && ISUUCHAR(*s))
1763 d = PL_uudmap[*(U8*)s++] & 077;
1766 hunk[0] = (char)((a << 2) | (b >> 4));
1767 hunk[1] = (char)((b << 4) | (c >> 2));
1768 hunk[2] = (char)((c << 6) | d);
1770 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1775 else /* possible checksum byte */
1776 if (s + 1 < strend && s[1] == '\n')
1782 } /* End of switch */
1785 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1786 (checksum > bits_in_uv &&
1787 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1790 anv = (NV) (1 << (checksum & 15));
1791 while (checksum >= 16) {
1795 while (cdouble < 0.0)
1797 cdouble = Perl_modf(cdouble / anv, &trouble);
1798 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1799 /* Workaround for powerpc doubledouble modfl bug:
1800 * close to 1.0L and -1.0L cdouble is 0, and trouble
1801 * is cdouble / anv. */
1802 if (trouble != Perl_ceil(trouble)) {
1804 if (cdouble > 1.0L) cdouble -= 1.0L;
1805 if (cdouble < -1.0L) cdouble += 1.0L;
1809 sv = newSVnv(cdouble);
1812 if (checksum < bits_in_uv) {
1813 UV mask = nBIT_MASK(checksum);
1822 if (symptr->flags & FLAG_SLASH){
1823 if (SP - PL_stack_base - start_sp_offset <= 0)
1825 if( next_symbol(symptr) ){
1826 if( symptr->howlen == e_number )
1827 Perl_croak(aTHX_ "Count after length/code in unpack" );
1829 /* ...end of char buffer then no decent length available */
1830 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1832 /* take top of stack (hope it's numeric) */
1835 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1838 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1840 datumtype = symptr->code;
1841 explicit_length = FALSE;
1849 return SP - PL_stack_base - start_sp_offset;
1859 const char *pat = SvPV_const(left, llen);
1860 const char *s = SvPV_const(right, rlen);
1861 const char *strend = s + rlen;
1862 const char *patend = pat + llen;
1866 cnt = unpackstring(pat, patend, s, strend,
1867 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1868 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1871 if ( !cnt && gimme == G_SCALAR )
1872 PUSHs(&PL_sv_undef);
1877 doencodes(U8 *h, const U8 *s, SSize_t len)
1879 *h++ = PL_uuemap[len];
1881 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1882 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1883 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1884 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1889 const U8 r = (len > 1 ? s[1] : '\0');
1890 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1891 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1892 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1893 *h++ = PL_uuemap[0];
1900 S_is_an_int(pTHX_ const char *s, STRLEN l)
1902 SV *result = newSVpvn(s, l);
1903 char *const result_c = SvPV_nolen(result); /* convenience */
1904 char *out = result_c;
1908 PERL_ARGS_ASSERT_IS_AN_INT;
1916 SvREFCNT_dec(result);
1939 SvREFCNT_dec(result);
1945 SvCUR_set(result, out - result_c);
1949 /* pnum must be '\0' terminated */
1951 S_div128(pTHX_ SV *pnum, bool *done)
1954 char * const s = SvPV(pnum, len);
1958 PERL_ARGS_ASSERT_DIV128;
1962 const int i = m * 10 + (*t - '0');
1963 const int r = (i >> 7); /* r < 10 */
1971 SvCUR_set(pnum, (STRLEN) (t - s));
1976 =for apidoc packlist
1978 The engine implementing C<pack()> Perl function.
1984 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1988 PERL_ARGS_ASSERT_PACKLIST;
1990 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1992 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1993 Also make sure any UTF8 flag is loaded */
1994 SvPV_force_nolen(cat);
1996 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1998 (void)pack_rec( cat, &sym, beglist, endlist );
2001 /* like sv_utf8_upgrade, but also repoint the group start markers */
2003 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2006 const char *from_ptr, *from_start, *from_end, **marks, **m;
2007 char *to_start, *to_ptr;
2009 if (SvUTF8(sv)) return;
2011 from_start = SvPVX_const(sv);
2012 from_end = from_start + SvCUR(sv);
2013 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2014 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2015 if (from_ptr == from_end) {
2016 /* Simple case: no character needs to be changed */
2021 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2022 Newx(to_start, len, char);
2023 Copy(from_start, to_start, from_ptr-from_start, char);
2024 to_ptr = to_start + (from_ptr-from_start);
2026 Newx(marks, sym_ptr->level+2, const char *);
2027 for (group=sym_ptr; group; group = group->previous)
2028 marks[group->level] = from_start + group->strbeg;
2029 marks[sym_ptr->level+1] = from_end+1;
2030 for (m = marks; *m < from_ptr; m++)
2031 *m = to_start + (*m-from_start);
2033 for (;from_ptr < from_end; from_ptr++) {
2034 while (*m == from_ptr) *m++ = to_ptr;
2035 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2039 while (*m == from_ptr) *m++ = to_ptr;
2040 if (m != marks + sym_ptr->level+1) {
2043 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2044 "level=%d", m, marks, sym_ptr->level);
2046 for (group=sym_ptr; group; group = group->previous)
2047 group->strbeg = marks[group->level] - to_start;
2052 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2053 from_start -= SvIVX(sv);
2056 SvFLAGS(sv) &= ~SVf_OOK;
2059 Safefree(from_start);
2060 SvPV_set(sv, to_start);
2061 SvCUR_set(sv, to_ptr - to_start);
2066 /* Exponential string grower. Makes string extension effectively O(n)
2067 needed says how many extra bytes we need (not counting the final '\0')
2068 Only grows the string if there is an actual lack of space
2071 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2072 const STRLEN cur = SvCUR(sv);
2073 const STRLEN len = SvLEN(sv);
2076 PERL_ARGS_ASSERT_SV_EXP_GROW;
2078 if (len - cur > needed) return SvPVX(sv);
2079 extend = needed > len ? needed : len;
2080 return SvGROW(sv, len+extend+1);
2084 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2087 if (UNLIKELY(SvAMAGIC(sv)))
2089 if (UNLIKELY(isinfnansv(sv))) {
2090 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2091 const NV nv = SvNV_nomg(sv);
2093 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2095 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2100 #define SvIV_no_inf(sv,d) \
2101 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2102 #define SvUV_no_inf(sv,d) \
2103 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2107 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2109 tempsym_t lookahead;
2110 SSize_t items = endlist - beglist;
2111 bool found = next_symbol(symptr);
2112 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2113 bool warn_utf8 = ckWARN(WARN_UTF8);
2116 PERL_ARGS_ASSERT_PACK_REC;
2118 if (symptr->level == 0 && found && symptr->code == 'U') {
2119 marked_upgrade(aTHX_ cat, symptr);
2120 symptr->flags |= FLAG_DO_UTF8;
2123 symptr->strbeg = SvCUR(cat);
2129 SV *lengthcode = NULL;
2130 I32 datumtype = symptr->code;
2131 howlen_t howlen = symptr->howlen;
2132 char *start = SvPVX(cat);
2133 char *cur = start + SvCUR(cat);
2136 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2137 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2141 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2145 /* e_no_len and e_number */
2146 len = symptr->length;
2151 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2153 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2154 /* We can process this letter. */
2155 STRLEN size = props & PACK_SIZE_MASK;
2156 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2160 /* Look ahead for next symbol. Do we have code/code? */
2161 lookahead = *symptr;
2162 found = next_symbol(&lookahead);
2163 if (symptr->flags & FLAG_SLASH) {
2165 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2166 if (memCHRs("aAZ", lookahead.code)) {
2167 if (lookahead.howlen == e_number) count = lookahead.length;
2170 count = sv_len_utf8(*beglist);
2173 if (lookahead.code == 'Z') count++;
2176 if (lookahead.howlen == e_number && lookahead.length < items)
2177 count = lookahead.length;
2180 lookahead.howlen = e_number;
2181 lookahead.length = count;
2182 lengthcode = sv_2mortal(newSViv(count));
2185 needs_swap = NEEDS_SWAP(datumtype);
2187 /* Code inside the switch must take care to properly update
2188 cat (CUR length and '\0' termination) if it updated *cur and
2189 doesn't simply leave using break */
2190 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2192 /* diag_listed_as: Invalid type '%s' in %s */
2193 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2194 (int) TYPE_NO_MODIFIERS(datumtype));
2196 Perl_croak(aTHX_ "'%%' may not be used in pack");
2198 case '.' | TYPE_IS_SHRIEKING:
2200 if (howlen == e_star) from = start;
2201 else if (len == 0) from = cur;
2203 tempsym_t *group = symptr;
2205 while (--len && group) group = group->previous;
2206 from = group ? start + group->strbeg : start;
2209 len = SvIV_no_inf(fromstr, datumtype);
2211 case '@' | TYPE_IS_SHRIEKING:
2213 from = start + symptr->strbeg;
2215 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2217 while (len && from < cur) {
2218 from += UTF8SKIP(from);
2222 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2224 /* Here we know from == cur */
2226 GROWING(0, cat, start, cur, len);
2227 Zero(cur, len, char);
2229 } else if (from < cur) {
2232 } else goto no_change;
2240 if (len > 0) goto grow;
2241 if (len == 0) goto no_change;
2248 tempsym_t savsym = *symptr;
2249 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2250 symptr->flags |= group_modifiers;
2251 symptr->patend = savsym.grpend;
2253 /* cppcheck-suppress autoVariables */
2254 symptr->previous = &lookahead;
2257 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2258 else symptr->flags &= ~FLAG_PARSE_UTF8;
2259 was_utf8 = SvUTF8(cat);
2260 symptr->patptr = savsym.grpbeg;
2261 beglist = pack_rec(cat, symptr, beglist, endlist);
2262 if (SvUTF8(cat) != was_utf8)
2263 /* This had better be an upgrade while in utf8==0 mode */
2266 if (savsym.howlen == e_star && beglist == endlist)
2267 break; /* No way to continue */
2269 items = endlist - beglist;
2270 lookahead.flags = symptr->flags & ~group_modifiers;
2273 case 'X' | TYPE_IS_SHRIEKING:
2274 if (!len) /* Avoid division by 0 */
2281 hop += UTF8SKIP(hop);
2288 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2292 len = (cur-start) % len;
2296 if (len < 1) goto no_change;
2300 Perl_croak(aTHX_ "'%c' outside of string in pack",
2301 (int) TYPE_NO_MODIFIERS(datumtype));
2302 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2304 Perl_croak(aTHX_ "'%c' outside of string in pack",
2305 (int) TYPE_NO_MODIFIERS(datumtype));
2311 if (cur - start < len)
2312 Perl_croak(aTHX_ "'%c' outside of string in pack",
2313 (int) TYPE_NO_MODIFIERS(datumtype));
2316 if (cur < start+symptr->strbeg) {
2317 /* Make sure group starts don't point into the void */
2319 const STRLEN length = cur-start;
2320 for (group = symptr;
2321 group && length < group->strbeg;
2322 group = group->previous) group->strbeg = length;
2323 lookahead.strbeg = length;
2326 case 'x' | TYPE_IS_SHRIEKING: {
2328 if (!len) /* Avoid division by 0 */
2330 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2331 else ai32 = (cur - start) % len;
2332 if (ai32 == 0) goto no_change;
2344 aptr = SvPV_const(fromstr, fromlen);
2345 if (DO_UTF8(fromstr)) {
2346 const char *end, *s;
2348 if (!utf8 && !SvUTF8(cat)) {
2349 marked_upgrade(aTHX_ cat, symptr);
2350 lookahead.flags |= FLAG_DO_UTF8;
2351 lookahead.strbeg = symptr->strbeg;
2354 cur = start + SvCUR(cat);
2356 if (howlen == e_star) {
2357 if (utf8) goto string_copy;
2361 end = aptr + fromlen;
2362 fromlen = datumtype == 'Z' ? len-1 : len;
2363 while ((SSize_t) fromlen > 0 && s < end) {
2368 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2371 if (datumtype == 'Z') len++;
2377 fromlen = len - fromlen;
2378 if (datumtype == 'Z') fromlen--;
2379 if (howlen == e_star) {
2381 if (datumtype == 'Z') len++;
2383 GROWING(0, cat, start, cur, len);
2384 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2385 datumtype | TYPE_IS_PACK))
2386 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2387 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2388 (int)datumtype, aptr, end, cur, fromlen);
2392 if (howlen == e_star) {
2394 if (datumtype == 'Z') len++;
2396 if (len <= (SSize_t) fromlen) {
2398 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2400 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2402 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2403 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2405 while (fromlen > 0) {
2406 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2412 if (howlen == e_star) {
2414 if (datumtype == 'Z') len++;
2416 if (len <= (SSize_t) fromlen) {
2418 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2420 GROWING(0, cat, start, cur, len);
2421 Copy(aptr, cur, fromlen, char);
2425 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2432 const char *str, *end;
2433 SSize_t l, field_len;
2439 str = SvPV_const(fromstr, fromlen);
2440 end = str + fromlen;
2441 if (DO_UTF8(fromstr)) {
2443 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2445 utf8_source = FALSE;
2446 utf8_flags = 0; /* Unused, but keep compilers happy */
2448 if (howlen == e_star) len = fromlen;
2449 field_len = (len+7)/8;
2450 GROWING(utf8, cat, start, cur, field_len);
2451 if (len > (SSize_t)fromlen) len = fromlen;
2454 if (datumtype == 'B')
2458 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2460 } else bits |= *str++ & 1;
2461 if (l & 7) bits <<= 1;
2463 PUSH_BYTE(utf8, cur, bits);
2468 /* datumtype == 'b' */
2472 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2473 if (val & 1) bits |= 0x80;
2474 } else if (*str++ & 1)
2476 if (l & 7) bits >>= 1;
2478 PUSH_BYTE(utf8, cur, bits);
2484 if (datumtype == 'B')
2485 bits <<= 7 - (l & 7);
2487 bits >>= 7 - (l & 7);
2488 PUSH_BYTE(utf8, cur, bits);
2491 /* Determine how many chars are left in the requested field */
2493 if (howlen == e_star) field_len = 0;
2494 else field_len -= l;
2495 Zero(cur, field_len, char);
2501 const char *str, *end;
2502 SSize_t l, field_len;
2508 str = SvPV_const(fromstr, fromlen);
2509 end = str + fromlen;
2510 if (DO_UTF8(fromstr)) {
2512 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2514 utf8_source = FALSE;
2515 utf8_flags = 0; /* Unused, but keep compilers happy */
2517 if (howlen == e_star) len = fromlen;
2518 field_len = (len+1)/2;
2519 GROWING(utf8, cat, start, cur, field_len);
2520 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2523 if (datumtype == 'H')
2527 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2528 if (val < 256 && isALPHA(val))
2529 bits |= (val + 9) & 0xf;
2532 } else if (isALPHA(*str))
2533 bits |= (*str++ + 9) & 0xf;
2535 bits |= *str++ & 0xf;
2536 if (l & 1) bits <<= 4;
2538 PUSH_BYTE(utf8, cur, bits);
2546 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2547 if (val < 256 && isALPHA(val))
2548 bits |= ((val + 9) & 0xf) << 4;
2550 bits |= (val & 0xf) << 4;
2551 } else if (isALPHA(*str))
2552 bits |= ((*str++ + 9) & 0xf) << 4;
2554 bits |= (*str++ & 0xf) << 4;
2555 if (l & 1) bits >>= 4;
2557 PUSH_BYTE(utf8, cur, bits);
2563 PUSH_BYTE(utf8, cur, bits);
2566 /* Determine how many chars are left in the requested field */
2568 if (howlen == e_star) field_len = 0;
2569 else field_len -= l;
2570 Zero(cur, field_len, char);
2578 aiv = SvIV_no_inf(fromstr, datumtype);
2579 if ((-128 > aiv || aiv > 127))
2580 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2581 "Character in 'c' format wrapped in pack");
2582 PUSH_BYTE(utf8, cur, (U8)aiv);
2587 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2593 aiv = SvIV_no_inf(fromstr, datumtype);
2594 if ((0 > aiv || aiv > 0xff))
2595 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2596 "Character in 'C' format wrapped in pack");
2597 PUSH_BYTE(utf8, cur, (U8)aiv);
2602 U8 in_bytes = (U8)IN_BYTES;
2604 end = start+SvLEN(cat)-1;
2605 if (utf8) end -= UTF8_MAXLEN-1;
2609 auv = SvUV_no_inf(fromstr, datumtype);
2610 if (in_bytes) auv = auv % 0x100;
2615 SvCUR_set(cat, cur - start);
2617 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2618 end = start+SvLEN(cat)-UTF8_MAXLEN;
2620 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2625 SvCUR_set(cat, cur - start);
2626 marked_upgrade(aTHX_ cat, symptr);
2627 lookahead.flags |= FLAG_DO_UTF8;
2628 lookahead.strbeg = symptr->strbeg;
2631 cur = start + SvCUR(cat);
2632 end = start+SvLEN(cat)-UTF8_MAXLEN;
2635 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2636 "Character in 'W' format wrapped in pack");
2641 SvCUR_set(cat, cur - start);
2642 GROWING(0, cat, start, cur, len+1);
2643 end = start+SvLEN(cat)-1;
2645 *(U8 *) cur++ = (U8)auv;
2654 if (!(symptr->flags & FLAG_DO_UTF8)) {
2655 marked_upgrade(aTHX_ cat, symptr);
2656 lookahead.flags |= FLAG_DO_UTF8;
2657 lookahead.strbeg = symptr->strbeg;
2663 end = start+SvLEN(cat);
2664 if (!utf8) end -= UTF8_MAXLEN;
2668 auv = SvUV_no_inf(fromstr, datumtype);
2670 U8 buffer[UTF8_MAXLEN+1], *endb;
2671 endb = uvchr_to_utf8_flags(buffer, auv, 0);
2672 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2674 SvCUR_set(cat, cur - start);
2675 GROWING(0, cat, start, cur,
2676 len+(endb-buffer)*UTF8_EXPAND);
2677 end = start+SvLEN(cat);
2679 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2683 SvCUR_set(cat, cur - start);
2684 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2685 end = start+SvLEN(cat)-UTF8_MAXLEN;
2687 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2692 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2698 anv = SvNV(fromstr);
2699 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2700 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2701 * on Alpha; fake it if we don't have them.
2705 else if (anv < -FLT_MAX)
2707 else afloat = (float)anv;
2709 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2711 afloat = (float)NV_NAN;
2715 /* a simple cast to float is undefined if outside
2716 * the range of values that can be represented */
2717 afloat = (float)(anv > FLT_MAX ? NV_INF :
2718 anv < -FLT_MAX ? -NV_INF : anv);
2721 PUSH_VAR(utf8, cur, afloat, needs_swap);
2729 anv = SvNV(fromstr);
2730 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2731 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2732 * on Alpha; fake it if we don't have them.
2736 else if (anv < -DBL_MAX)
2738 else adouble = (double)anv;
2740 adouble = (double)anv;
2742 PUSH_VAR(utf8, cur, adouble, needs_swap);
2747 Zero(&anv, 1, NV); /* can be long double with unused bits */
2751 /* to work round a gcc/x86 bug; don't use SvNV */
2752 anv.nv = sv_2nv(fromstr);
2753 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2754 && LONG_DOUBLESIZE > 10
2755 /* GCC sometimes overwrites the padding in the
2757 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2760 anv.nv = SvNV(fromstr);
2762 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2766 #if defined(HAS_LONG_DOUBLE)
2769 /* long doubles can have unused bits, which may be nonzero */
2770 Zero(&aldouble, 1, long double);
2774 /* to work round a gcc/x86 bug; don't use SvNV */
2775 aldouble.ld = (long double)sv_2nv(fromstr);
2776 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2777 /* GCC sometimes overwrites the padding in the
2779 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2782 aldouble.ld = (long double)SvNV(fromstr);
2784 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2790 case 'n' | TYPE_IS_SHRIEKING:
2795 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2796 ai16 = PerlSock_htons(ai16);
2797 PUSH16(utf8, cur, &ai16, FALSE);
2800 case 'v' | TYPE_IS_SHRIEKING:
2805 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2807 PUSH16(utf8, cur, &ai16, FALSE);
2810 case 'S' | TYPE_IS_SHRIEKING:
2811 #if SHORTSIZE != SIZE16
2813 unsigned short aushort;
2815 aushort = SvUV_no_inf(fromstr, datumtype);
2816 PUSH_VAR(utf8, cur, aushort, needs_swap);
2826 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2827 PUSH16(utf8, cur, &au16, needs_swap);
2830 case 's' | TYPE_IS_SHRIEKING:
2831 #if SHORTSIZE != SIZE16
2835 ashort = SvIV_no_inf(fromstr, datumtype);
2836 PUSH_VAR(utf8, cur, ashort, needs_swap);
2846 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2847 PUSH16(utf8, cur, &ai16, needs_swap);
2851 case 'I' | TYPE_IS_SHRIEKING:
2855 auint = SvUV_no_inf(fromstr, datumtype);
2856 PUSH_VAR(utf8, cur, auint, needs_swap);
2863 aiv = SvIV_no_inf(fromstr, datumtype);
2864 PUSH_VAR(utf8, cur, aiv, needs_swap);
2871 auv = SvUV_no_inf(fromstr, datumtype);
2872 PUSH_VAR(utf8, cur, auv, needs_swap);
2879 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2880 anv = SvNV_nomg(fromstr);
2884 SvCUR_set(cat, cur - start);
2885 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2888 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2889 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2890 any negative IVs will have already been got by the croak()
2891 above. IOK is untrue for fractions, so we test them
2892 against UV_MAX_P1. */
2893 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2894 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2895 char *in = buf + sizeof(buf);
2896 UV auv = SvUV_nomg(fromstr);
2899 *--in = (char)((auv & 0x7f) | 0x80);
2902 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2903 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2904 in, (buf + sizeof(buf)) - in);
2905 } else if (SvPOKp(fromstr))
2907 else if (SvNOKp(fromstr)) {
2908 /* 10**NV_MAX_10_EXP is the largest power of 10
2909 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2910 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2911 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2912 And with that many bytes only Inf can overflow.
2913 Some C compilers are strict about integral constant
2914 expressions so we conservatively divide by a slightly
2915 smaller integer instead of multiplying by the exact
2916 floating-point value.
2918 #ifdef NV_MAX_10_EXP
2919 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2920 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2922 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2923 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2925 char *in = buf + sizeof(buf);
2927 anv = Perl_floor(anv);
2929 const NV next = Perl_floor(anv / 128);
2930 if (in <= buf) /* this cannot happen ;-) */
2931 Perl_croak(aTHX_ "Cannot compress integer in pack");
2932 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2935 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2936 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2937 in, (buf + sizeof(buf)) - in);
2946 /* Copy string and check for compliance */
2947 from = SvPV_nomg_const(fromstr, len);
2948 if ((norm = is_an_int(from, len)) == NULL)
2949 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2951 Newx(result, len, char);
2954 while (!done) *--in = div128(norm, &done) | 0x80;
2955 result[len - 1] &= 0x7F; /* clear continue bit */
2956 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2957 in, (result + len) - in);
2959 SvREFCNT_dec(norm); /* free norm */
2964 case 'i' | TYPE_IS_SHRIEKING:
2968 aint = SvIV_no_inf(fromstr, datumtype);
2969 PUSH_VAR(utf8, cur, aint, needs_swap);
2972 case 'N' | TYPE_IS_SHRIEKING:
2977 au32 = SvUV_no_inf(fromstr, datumtype);
2978 au32 = PerlSock_htonl(au32);
2979 PUSH32(utf8, cur, &au32, FALSE);
2982 case 'V' | TYPE_IS_SHRIEKING:
2987 au32 = SvUV_no_inf(fromstr, datumtype);
2989 PUSH32(utf8, cur, &au32, FALSE);
2992 case 'L' | TYPE_IS_SHRIEKING:
2993 #if LONGSIZE != SIZE32
2995 unsigned long aulong;
2997 aulong = SvUV_no_inf(fromstr, datumtype);
2998 PUSH_VAR(utf8, cur, aulong, needs_swap);
3008 au32 = SvUV_no_inf(fromstr, datumtype);
3009 PUSH32(utf8, cur, &au32, needs_swap);
3012 case 'l' | TYPE_IS_SHRIEKING:
3013 #if LONGSIZE != SIZE32
3017 along = SvIV_no_inf(fromstr, datumtype);
3018 PUSH_VAR(utf8, cur, along, needs_swap);
3028 ai32 = SvIV_no_inf(fromstr, datumtype);
3029 PUSH32(utf8, cur, &ai32, needs_swap);
3032 #if defined(HAS_QUAD) && IVSIZE >= 8
3037 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3038 PUSH_VAR(utf8, cur, auquad, needs_swap);
3045 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3046 PUSH_VAR(utf8, cur, aquad, needs_swap);
3051 len = 1; /* assume SV is correct length */
3052 GROWING(utf8, cat, start, cur, sizeof(char *));
3059 SvGETMAGIC(fromstr);
3060 if (!SvOK(fromstr)) aptr = NULL;
3062 /* XXX better yet, could spirit away the string to
3063 * a safe spot and hang on to it until the result
3064 * of pack() (and all copies of the result) are
3067 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3068 || (SvPADTMP(fromstr) &&
3069 !SvREADONLY(fromstr)))) {
3070 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3071 "Attempt to pack pointer to temporary value");
3073 if (SvPOK(fromstr) || SvNIOK(fromstr))
3074 aptr = SvPV_nomg_const_nolen(fromstr);
3076 aptr = SvPV_force_flags_nolen(fromstr, 0);
3078 PUSH_VAR(utf8, cur, aptr, needs_swap);
3082 const char *aptr, *aend;
3086 if (len <= 2) len = 45;
3087 else len = len / 3 * 3;
3089 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3090 "Field too wide in 'u' format in pack");
3093 aptr = SvPV_const(fromstr, fromlen);
3094 from_utf8 = DO_UTF8(fromstr);
3096 aend = aptr + fromlen;
3097 fromlen = sv_len_utf8_nomg(fromstr);
3098 } else aend = NULL; /* Unused, but keep compilers happy */
3099 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3100 while (fromlen > 0) {
3103 U8 hunk[1+63/3*4+1];
3105 if ((SSize_t)fromlen > len)
3111 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3112 'u' | TYPE_IS_PACK)) {
3114 SvCUR_set(cat, cur - start);
3115 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3116 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3117 aptr, aend, buffer, todo);
3119 end = doencodes(hunk, (const U8 *)buffer, todo);
3121 end = doencodes(hunk, (const U8 *)aptr, todo);
3124 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3131 SvCUR_set(cat, cur - start);
3133 *symptr = lookahead;
3142 dSP; dMARK; dORIGMARK; dTARGET;
3145 SV *pat_sv = *++MARK;
3146 const char *pat = SvPV_const(pat_sv, fromlen);
3147 const char *patend = pat + fromlen;
3153 packlist(cat, pat, patend, MARK, SP + 1);
3157 const char * result = SvPV_nomg(cat, result_len);
3158 const U8 * error_pos;
3160 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3161 _force_out_malformed_utf8_message(error_pos,
3162 (U8 *) result + result_len,
3166 NOT_REACHED; /* NOTREACHED */
3177 * ex: set ts=8 sts=4 sw=4 et: