From 6d51015587940c2032a6533d886163f69ca028f9 Mon Sep 17 00:00:00 2001 From: Jan Dubois Date: Wed, 13 Apr 2011 17:02:39 -0700 Subject: [PATCH] [perl #88420] BOM support on Windows broken in 5.13.11 When Perl reads the script in text mode, then the tell() position on the script handle may include stripped carriage return characters. Therefore the file position after reading the first line of the script may be one larger than the length of the input buffer. --- MANIFEST | 1 + t/io/bom.t | 14 ++++++++++++++ toke.c | 8 +++++++- 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 t/io/bom.t diff --git a/MANIFEST b/MANIFEST index 0e02719..a28ada3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4683,6 +4683,7 @@ time64_config.h 64 bit clean time.h (configuration) time64.h 64 bit clean time.h (header) t/io/argv.t See if ARGV stuff works t/io/binmode.t See if binmode() works +t/io/bom.t See if scripts can start with a byte order mark t/io/crlf.t See if :crlf works t/io/crlf_through.t See if pipe passes data intact with :crlf t/io/data.t See if DATA works diff --git a/t/io/bom.t b/t/io/bom.t new file mode 100644 index 0000000..dd1be65 --- /dev/null +++ b/t/io/bom.t @@ -0,0 +1,14 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { require "./test.pl"; } + +plan(tests => 1); + +# It is important that the script contains at least one newline character +# that can be expanded to \r\n on DOSish systems. +fresh_perl_is("\xEF\xBB\xBFprint 1;\nprint 2", "12", {}, "script starts with a BOM" ); diff --git a/toke.c b/toke.c index c4cda7b..0f08d42 100644 --- a/toke.c +++ b/toke.c @@ -4792,7 +4792,13 @@ Perl_yylex(pTHX) *(U8*)s == 0xEF || *(U8*)s >= 0xFE || s[1] == 0)) { - bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr); + IV offset = (IV)PerlIO_tell(PL_rsfp); + bof = (offset == SvCUR(PL_linestr)); +#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) + /* offset may include swallowed CR */ + if (!bof) + bof = (offset == SvCUR(PL_linestr)+1); +#endif if (bof) { PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); s = swallow_bom((U8*)s); -- 2.7.4