From 78ae23f54fc4e301b66d2a32897cb42fa2633fe7 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Fri, 14 Jul 2000 01:33:59 +0000 Subject: [PATCH] Fix the BOM bug: not a byteorder bug, a signedness bug. p4raw-id: //depot/cfgperl@6394 --- embed.pl | 2 +- global.sym | 5 +++++ proto.h | 2 +- toke.c | 20 ++++++++++---------- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/embed.pl b/embed.pl index 3d4f3bb..ccb294d 100755 --- a/embed.pl +++ b/embed.pl @@ -2475,7 +2475,7 @@ s |char* |scan_trans |char *start s |char* |scan_word |char *s|char *dest|STRLEN destlen \ |int allow_package|STRLEN *slp s |char* |skipspace |char *s -s |char* |swallow_bom |char *s +s |char* |swallow_bom |U8 *s s |void |checkcomma |char *s|char *name|char *what s |void |force_ident |char *s|int kind s |void |incline |char *s diff --git a/global.sym b/global.sym index 719e50a..6ee8fc4 100644 --- a/global.sym +++ b/global.sym @@ -21,6 +21,7 @@ Perl_get_context Perl_set_context Perl_amagic_call Perl_Gv_AMupdate +Perl_apply_attrs_string Perl_avhv_delete_ent Perl_avhv_exists_ent Perl_avhv_fetch_ent @@ -185,6 +186,7 @@ Perl_to_uni_upper_lc Perl_to_uni_title_lc Perl_to_uni_lower_lc Perl_is_utf8_char +Perl_is_utf8_string Perl_is_utf8_alnum Perl_is_utf8_alnumc Perl_is_utf8_idfirst @@ -460,6 +462,8 @@ Perl_utf16_to_utf8 Perl_utf16_to_utf8_reversed Perl_utf8_distance Perl_utf8_hop +Perl_utf8_to_bytes +Perl_bytes_to_utf8 Perl_utf8_to_uv Perl_uv_to_utf8 Perl_warn @@ -543,3 +547,4 @@ Perl_ptr_table_fetch Perl_ptr_table_store Perl_ptr_table_split Perl_sys_intern_clear +Perl_sys_intern_init diff --git a/proto.h b/proto.h index bd222fe..358f530 100644 --- a/proto.h +++ b/proto.h @@ -1228,7 +1228,7 @@ STATIC char* S_scan_subst(pTHX_ char *start); STATIC char* S_scan_trans(pTHX_ char *start); STATIC char* S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp); STATIC char* S_skipspace(pTHX_ char *s); -STATIC char* S_swallow_bom(pTHX_ char *s); +STATIC char* S_swallow_bom(pTHX_ U8 *s); STATIC void S_checkcomma(pTHX_ char *s, char *name, char *what); STATIC void S_force_ident(pTHX_ char *s, int kind); STATIC void S_incline(pTHX_ char *s); diff --git a/toke.c b/toke.c index b312050..2a5df63 100644 --- a/toke.c +++ b/toke.c @@ -7406,20 +7406,20 @@ Perl_yyerror(pTHX_ char *s) STATIC char* -S_swallow_bom(pTHX_ char *s) { +S_swallow_bom(pTHX_ U8 *s) { STRLEN slen; slen = SvCUR(PL_linestr); switch (*s) { - case -1: - if ((s[1] & 255) == 254) { + case 0xFF: + if (s[1] == 0xFE) { /* UTF-16 little-endian */ #ifdef PERL_UTF16_FILTER U8 *news; #endif - s+=2; - if (*s == 0 && s[1] == 0) /* UTF-32 little-endian */ + if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ Perl_croak(aTHX_ "Unsupported script encoding"); #ifdef PERL_UTF16_FILTER + s+=2; filter_add(S_utf16rev_textfilter, NULL); New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8); PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s); @@ -7430,8 +7430,8 @@ S_swallow_bom(pTHX_ char *s) { } break; - case -2: - if ((s[1] & 255) == 255) { /* UTF-16 big-endian */ + case 0xFE: + if (s[1] == 0xFF) { /* UTF-16 big-endian */ #ifdef PERL_UTF16_FILTER U8 *news; filter_add(S_utf16_textfilter, NULL); @@ -7444,14 +7444,14 @@ S_swallow_bom(pTHX_ char *s) { } break; - case -17: - if ( slen>2 && (s[1] & 255) == 187 && (s[2] & 255) == 191) { + case 0xEF: + if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { s+=3; /* UTF-8 */ } break; case 0: if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */ - s[2] & 255 == 254 && s[3] & 255 == 255) + s[2] == 0xFE && s[3] == 0xFF) Perl_croak(aTHX_ "Unsupported script encoding"); } return s; -- 2.7.4