I32 matches = 0;
STRLEN len;
short *tbl;
+ I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
UV c;
STRLEN ulen;
c = utf8_to_uv(s, send - s, &ulen, 0);
- if (c < 0x100 && tbl[c] >= 0)
+ if (c < 0x100) {
+ if (tbl[c] >= 0)
+ matches++;
+ } else if (complement)
matches++;
s += ulen;
}
I32 isutf8;
I32 matches = 0;
I32 grows = PL_op->op_private & OPpTRANS_GROWS;
- STRLEN len;
+ I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
+ I32 del = PL_op->op_private & OPpTRANS_DELETE;
+ STRLEN len, rlen;
short *tbl;
I32 ch;
else
d = s;
dstart = d;
+ if (complement && !del)
+ rlen = tbl[0x100];
#ifdef MACOS_TRADITIONAL
#define comp CoMP /* "comp" is a keyword in some compilers ... */
STRLEN len;
UV comp = utf8_to_uv_simple(s, &len);
- if (comp > 0xff) { /* always unmapped */
- Copy(s, d, len, U8);
- d += len;
+ if (comp > 0xff) {
+ if (!complement) {
+ Copy(s, d, len, U8);
+ d += len;
+ }
+ else {
+ matches++;
+ if (!del) {
+ ch = (comp - 0x100 < rlen) ?
+ tbl[comp+1] : tbl[0x100+rlen];
+ if (ch != pch) {
+ d = uv_to_utf8(d, ch);
+ pch = ch;
+ }
+ s += len;
+ continue;
+ }
+ }
}
else if ((ch = tbl[comp]) >= 0) {
matches++;
while (s < send) {
STRLEN len;
UV comp = utf8_to_uv_simple(s, &len);
- if (comp > 0xff) { /* always unmapped */
- Copy(s, d, len, U8);
- d += len;
+ if (comp > 0xff) {
+ if (!complement) {
+ Copy(s, d, len, U8);
+ d += len;
+ }
+ else {
+ matches++;
+ if (!del) {
+ if (comp - 0x100 < rlen)
+ d = uv_to_utf8(d, tbl[comp+1]);
+ else
+ d = uv_to_utf8(d, tbl[0x100+rlen]);
+ }
+ }
}
else if ((ch = tbl[comp]) >= 0) {
d = uv_to_utf8(d, ch);
}
if (uv < none) {
matches++;
- d = uv_to_utf8(d, uv);
s += UTF8SKIP(s);
+ d = uv_to_utf8(d, uv);
continue;
}
else if (uv == none) { /* "none" is unmapped character */
}
}
}
+ if (!del) {
+ if (j >= rlen)
+ j = rlen - 1;
+ else
+ cPVOPo->op_pv = (char*)Renew(tbl, 0x101+rlen-j, short);
+ tbl[0x100] = rlen - j;
+ for (i=0; i < rlen - j; i++)
+ tbl[0x101+i] = r[j+i];
+ }
}
else {
if (!rlen && !del) {
@INC = '../lib';
}
-print "1..51\n";
+print "1..55\n";
$_ = "abcdefghijklmnopqrstuvwxyz";
use utf8;
}
# 11 - changing UTF8 characters in a UTF8 string, same length.
-$l = chr(300); $r = chr(400);
+my $l = chr(300); my $r = chr(400);
$x = 200.300.400;
$x =~ tr/\x{12c}/\x{190}/;
printf "not (%vd) ", $x if $x ne 200.400.400 or length $x != 3;
print "not " unless sprintf("%vd", $a) eq '196.172.200';
print "ok 49\n";
-# UTF8 range
+# UTF8 range tests from Inaba Hiroto
($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
print "not " unless $a eq v192.196.172.194.197.172;
($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
print "not " unless $a eq v300.300.172.302.301.172;
print "ok 51\n";
+
+# UTF8 range tests from Karsten Sperling (patch #9008 required)
+
+($a = "\x{0100}") =~ tr/\x00-\x{100}/X/;
+print "not " unless $a eq "X";
+print "ok 52\n";
+
+($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
+print "not " unless $a eq "X";
+print "ok 53\n";
+
+($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
+print "not " unless $a eq "X";
+print "ok 54\n";
+
+($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
+print "not " unless $a eq "X";
+print "ok 55\n";
+
I32 min; /* first character in range */
I32 max; /* last character in range */
+ if (utf) {
+ char *c = (char*)utf8_hop((U8*)d, -1);
+ char *e = d++;
+ while (e-- > c)
+ *(e + 1) = *e;
+ *c = 0xff;
+ /* mark the range as done, and continue */
+ dorange = FALSE;
+ didrange = TRUE;
+ continue;
+ }
i = d - SvPVX(sv); /* remember current offset */
SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
d = SvPVX(sv) + i; /* refresh d after realloc */
char *src, *dst;
d = SvGROW(sv,
- SvCUR(sv) + hicount + 1) +
+ SvLEN(sv) + hicount + 1) +
(d - old_pvx);
src = d - 1;
if (len > e - s + 4) {
char *odest = SvPVX(sv);
- SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
+ SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
d = SvPVX(sv) + (d - odest);
}
Copy(str, d, len, char);
Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
- New(803,tbl,256,short);
- o = newPVOP(OP_TRANS, 0, (char*)tbl);
-
complement = del = squash = 0;
while (strchr("cds", *s)) {
if (*s == 'c')
squash = OPpTRANS_SQUASH;
s++;
}
+
+ New(803, tbl, complement&&!del?258:256, short);
+ o = newPVOP(OP_TRANS, 0, (char*)tbl);
o->op_private = del|squash|complement|
(DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
(DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);