From 9baac1a3613bd641a847683d7877b3cfab3244bc Mon Sep 17 00:00:00 2001 From: Slaven Rezic Date: Wed, 10 Jul 2013 14:18:18 +1000 Subject: [PATCH] Data::Dumper: useqq implementation for xs Tests are mainly unchanged, just a "cheat" and a couple of TODOs were removed. --- dist/Data-Dumper/Dumper.pm | 1 - dist/Data-Dumper/Dumper.xs | 85 ++++++++++++++++++++++++++++++++------------- dist/Data-Dumper/t/dumper.t | 17 ++------- 3 files changed, 63 insertions(+), 40 deletions(-) diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm index 7c778dc..e11323a 100644 --- a/dist/Data-Dumper/Dumper.pm +++ b/dist/Data-Dumper/Dumper.pm @@ -221,7 +221,6 @@ sub DESTROY {} sub Dump { return &Dumpxs unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) || - $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) || $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse}); return &Dumpperl; } diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs index b74650a..2a19097 100644 --- a/dist/Data-Dumper/Dumper.xs +++ b/dist/Data-Dumper/Dumper.xs @@ -18,7 +18,7 @@ static I32 num_q (const char *s, STRLEN slen); static I32 esc_q (char *dest, const char *src, STRLEN slen); -static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen); +static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq); static I32 needs_quote(const char *s, STRLEN len); static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, @@ -26,7 +26,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, - I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash); + I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq); #ifndef HvNAME_get #define HvNAME_get HvNAME @@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen) return ret; } +/* this function is also misused for implementing $Useqq */ static I32 -esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) +esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq) { char *r, *rstart; const char *s = src; @@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) int increment; /* this will need EBCDICification */ - for (s = src; s < send; s += increment) { - const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL); + for (s = src; s < send; do_utf8 ? s += increment : s++) { + const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s; /* check for invalid utf8 */ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s); @@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) k <= 0xFFFFFFFF ? 8 : UVSIZE * 4 #endif ); +#ifndef EBCDIC + } else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) { + grow += 2; + } else if (useqq && k <= 31) { + grow += 3; + } else if (useqq && k >= 127) { + grow += 4; +#endif } else if (k == '\\') { backslashes++; } else if (k == '\'') { @@ -205,7 +214,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) normal++; } } - if (grow) { + if (grow || useqq) { /* We have something needing hex. 3 is ""\0 */ sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes + 2*qq_escapables + normal); @@ -213,8 +222,9 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) *r++ = '"'; - for (s = src; s < send; s += UTF8SKIP(s)) { - const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL); + for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) { + const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s; + if (k == '"' || k == '\\' || k == '$' || k == '@') { *r++ = '\\'; @@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) #ifdef EBCDIC if (isprint(k) && k < 256) #else + if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) { + *r++ = '\\'; + switch (k) { + case 7: *r++ = 'a'; break; + case 8: *r++ = 'b'; break; + case 9: *r++ = 't'; break; + case 10: *r++ = 'n'; break; + case 12: *r++ = 'f'; break; + case 13: *r++ = 'r'; break; + case 27: *r++ = 'e'; break; + default: + /* faster than + * r = r + my_sprintf(r, "%o", k); + */ + if (k <= 7) { + *r++ = (char)k + '0'; + } else if (k <= 63) { + *r++ = (char)(k>>3) + '0'; + *r++ = (char)(k&7) + '0'; + } else { + *r++ = (char)(k>>6) + '0'; + *r++ = (char)((k&63)>>3) + '0'; + *r++ = (char)(k&7) + '0'; + } + } + } + else if (k < 0x80) #endif *r++ = (char)k; @@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys, - int use_sparse_seen_hash) + int use_sparse_seen_hash, I32 useqq) { char tmpbuf[128]; U32 i; @@ -524,7 +561,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); sv_catpvn(retval, ")}", 2); } /* plain */ else { @@ -532,7 +569,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); } SvREFCNT_dec(namesv); } @@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); SvREFCNT_dec(namesv); } else if (realtype == SVt_PVAV) { @@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); if (ix < ixmax) sv_catpvn(retval, ",", 1); } @@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, The code is also smaller (22044 vs 22260) because I've been able to pull the common logic out to both sides. */ if (quotekeys || needs_quote(key,keylen)) { - if (do_utf8) { + if (do_utf8 || useqq) { STRLEN ocur = SvCUR(retval); - nlen = esc_q_utf8(aTHX_ retval, key, klen); + nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq); nkey = SvPVX(retval) + ocur; } else { @@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv, postav, levelp, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); SvREFCNT_dec(sname); Safefree(nkey_buffer); if (indent >= 2) @@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, r = SvPVX(retval)+SvCUR(retval); r[0] = '*'; r[1] = '{'; SvCUR_set(retval, SvCUR(retval)+2); - esc_q_utf8(aTHX_ retval, c, i); + esc_q_utf8(aTHX_ retval, c, i, 1, useqq); sv_grow(retval, SvCUR(retval)+2); r = SvPVX(retval)+SvCUR(retval); r[0] = '}'; r[1] = '\0'; @@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, seenhv, postav, &nlevel, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, maxdepth, - sortkeys, use_sparse_seen_hash); + sortkeys, use_sparse_seen_hash, useqq); SvREFCNT_dec(e); } } @@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, else { integer_came_from_string: c = SvPV(val, i); - if (DO_UTF8(val)) - i += esc_q_utf8(aTHX_ retval, c, i); + if (DO_UTF8(val) || useqq) + i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq); else { sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ r = SvPVX(retval) + SvCUR(retval); @@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...) HV *seenhv = NULL; AV *postav, *todumpav, *namesav; I32 level = 0; - I32 indent, terse, i, imax, postlen; + I32 indent, terse, useqq, i, imax, postlen; SV **svp; SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname; SV *freezer, *toaster, *bless, *sortkeys; @@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...) = freezer = toaster = bless = sortkeys = &PL_sv_undef; name = sv_newmortal(); indent = 2; - terse = purity = deepcopy = 0; + terse = purity = deepcopy = useqq = 0; quotekeys = 1; retval = newSVpvn("", 0); @@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...) purity = SvIV(*svp); if ((svp = hv_fetch(hv, "terse", 5, FALSE))) terse = SvTRUE(*svp); -#if 0 /* useqq currently unused */ if ((svp = hv_fetch(hv, "useqq", 5, FALSE))) useqq = SvTRUE(*svp); -#endif if ((svp = hv_fetch(hv, "pad", 3, FALSE))) pad = *svp; if ((svp = hv_fetch(hv, "xpad", 4, FALSE))) @@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...) DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv, postav, &level, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, - bless, maxdepth, sortkeys, use_sparse_seen_hash); + bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq); SPAGAIN; if (indent >= 2 && !terse) diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t index c1e5fe6..5ae287e 100644 --- a/dist/Data-Dumper/t/dumper.t +++ b/dist/Data-Dumper/t/dumper.t @@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000", { local $Data::Dumper::Useqq = 1; TEST q(Dumper($foo)); + TEST q(Data::Dumper::DumperX($foo)) if $XS; } - $WANT = <<"EOT"; -#\$VAR1 = { -# 'abc\0\\'\efg' => 'mno\0', -# 'reftest' => \\\\1 -#}; -EOT - - { - local $Data::Dumper::Useqq = 1; - TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat - } - ############# @@ -1461,7 +1450,7 @@ EOT $foo = [ join "", map chr, 0..255 ]; local $Data::Dumper::Useqq = 1; TEST q(Dumper($foo)), 'All latin1 characters'; - for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS; + TEST q(Data::Dumper::DumperX($foo)) if $XS; } ############# 372 @@ -1481,7 +1470,7 @@ EOT TEST q(Dumper($foo)), 'All latin1 characters with utf8 flag including a wide character'; } - for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS; + TEST q(Data::Dumper::DumperX($foo)) if $XS; } ############# 378 -- 2.7.4