if none of the characters in the string are > 0xff,
the result is a complemented byte string, not a (UTF-8)
char string. Based on the summary in
Subject: Re: [ID
20000918.005] ~ on wide chars
Message-ID: <jSDD6gzkgi/T092yn@efn.org>
This should give us the maximum backward (pre-char string)
compatibility and utf8 compatibility. The other alternative
would be to limit the bit complement to be always byte only,
taking the least significant byte of the chars.
p4raw-id: //depot/perl@7665
if (left_utf && !right_utf)
sv_utf8_upgrade(right);
- if (!left_utf && right_utf)
+ else if (!left_utf && right_utf)
sv_utf8_upgrade(left);
if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
=item *
+The bit string operators C<& | ^ ~> can operate on character data.
+However, for backward compatibility reasons (bit string operations
+when the characters all are less than 256 in ordinal value) one cannot
+mix C<~> (the bit complement) and characters both less than 256 and
+equal or greater than 256. Most importantly, the DeMorgan's laws
+(C<~($x|$y) eq ~$x&~$y>, C<~($x&$y) eq ~$x|~$y>) won't hold.
+Another way to look at this is that the complement cannot return
+B<both> the 8-bit (byte) wide bit complement, and the full character
+wide bit complement.
+
+=item *
+
And finally, C<scalar reverse()> reverses by character rather than by byte.
=back
tmps = (U8*)SvPV_force(TARG, len);
anum = len;
if (SvUTF8(TARG)) {
- /* Calculate exact length, let's not estimate */
+ /* Calculate exact length, let's not estimate. */
STRLEN targlen = 0;
U8 *result;
U8 *send;
STRLEN l;
+ UV nchar = 0;
+ UV nwide = 0;
send = tmps + len;
while (tmps < send) {
UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
tmps += UTF8SKIP(tmps);
targlen += UNISKIP(~c);
+ nchar++;
+ if (c > 0xff)
+ nwide++;
}
/* Now rewind strings and write them. */
tmps -= len;
- Newz(0, result, targlen + 1, U8);
- while (tmps < send) {
- UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
- tmps += UTF8SKIP(tmps);
- result = uv_to_utf8(result,(UV)~c);
+
+ if (nwide) {
+ Newz(0, result, targlen + 1, U8);
+ while (tmps < send) {
+ UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ tmps += UTF8SKIP(tmps);
+ result = uv_to_utf8(result, ~c);
+ }
+ *result = '\0';
+ result -= targlen;
+ sv_setpvn(TARG, (char*)result, targlen);
+ SvUTF8_on(TARG);
+ }
+ else {
+ Newz(0, result, nchar + 1, U8);
+ while (tmps < send) {
+ U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ tmps += UTF8SKIP(tmps);
+ *result++ = ~c;
+ }
+ *result = '\0';
+ result -= nchar;
+ sv_setpvn(TARG, (char*)result, nchar);
}
- *result = '\0';
- result -= targlen;
- sv_setpvn(TARG, (char*)result, targlen);
- SvUTF8_on(TARG);
Safefree(result);
SETs(TARG);
RETURN;
@INC = '../lib';
}
-print "1..38\n";
+print "1..40\n";
# numerics
print ((0xdead & 0xbeef) == 0x9ead ? "ok 1\n" : "not ok 1\n");
if $a ne chr(~$_) or length($a) != 1 or ~$a ne chr($_);
}
if (@not36) {
- print "# test 36 failed: @not36\n";
+ print "# test 36 failed\n";
print "not ";
}
print "ok 36\n";
push @not37, sprintf("%#03X %#03X", $i, $j)
if $a ne chr(~$i).chr(~$j) or
length($a) != 2 or
- ~$a ne chr($i).chr($j);
+ ~$a ne chr($i).chr($j);
}
}
if (@not37) {
- print "# test 37 failed: @not37\n";
+ print "# test 37 failed\n";
print "not ";
}
print "ok 37\n";
print "not " unless ~chr(~0) eq "\0";
print "ok 38\n";
+
+my @not39;
+
+for my $i (0x100..0x120) {
+ for my $j (0x100...0x120) {
+ push @not39, sprintf("%#03X %#03X", $i, $j)
+ if ~(chr($i)|chr($j)) ne (~chr($i)&~chr($j));
+ }
+}
+if (@not39) {
+ print "# test 39 failed\n";
+ print "not ";
+}
+print "ok 39\n";
+
+my @not40;
+
+for my $i (0x100..0x120) {
+ for my $j (0x100...0x120) {
+ push @not40, sprintf("%#03X %#03X", $i, $j)
+ if ~(chr($i)&chr($j)) ne (~chr($i)|~chr($j));
+ }
+}
+if (@not40) {
+ print "# test 40 failed\n";
+ print "not ";
+}
+print "ok 40\n";