}
if (charcount < recsize) {
- /* read the rest of the current character, and maybe the
- beginning of the next, if we need it */
- STRLEN readsize = (charstart ? 0 : skip - (bend - bufp))
- + (charcount + 1 < recsize);
+ STRLEN readsize;
STRLEN bufp_offset = bufp - buffer;
SSize_t morebytesread;
+ /* originally I read enough to fill any incomplete
+ character and the first byte of the next
+ character if needed, but if there's many
+ multi-byte encoded characters we're going to be
+ making a read call for every character beyond
+ the original read size.
+
+ So instead, read the rest of the character if
+ any, and enough bytes to match at least the
+ start bytes for each character we're going to
+ read.
+ */
+ if (charstart)
+ readsize = recsize - charcount;
+ else
+ readsize = skip - (bend - bufp) + recsize - charcount - 1;
buffer = SvGROW(sv, append + bytesread + readsize + 1) + append;
bend = buffer + bytesread;
morebytesread = PerlIO_read(fp, bend, readsize);
no utf8; # needed for use utf8 not griping about the raw octets
-plan(tests => 59);
+plan(tests => 61);
$| = 1;
open F, ">:utf8", $a_file;
print F "foo\xE4";
print F "bar\xFE";
+ print F "\xC0\xC8\xCC\xD2";
+ print F "a\xE4ab";
print F "a\xE4a";
close F;
open F, "<:utf8", $a_file;
$line .= <F>;
is($line, "foo\xE4bar\xFE", "rcatline with \$/ = \\4");
$line = <F>;
+ is($line, "\xC0\xC8\xCC\xD2", "readline with several encoded characters");
+ $line = <F>;
+ is($line, "a\xE4ab", "readline with another boundary condition");
+ $line = <F>;
is($line, "a\xE4a", "readline with boundary condition");
close F;