From 45ec036eed0e74556fa7c8362469405d6f752898 Mon Sep 17 00:00:00 2001 From: Per Bothner Date: Sat, 13 Mar 1999 11:21:38 -0800 Subject: [PATCH] lex.c (java_read_char): UNGET invalid non-initial utf8 character. h * lex.c (java_read_char): UNGET invalid non-initial utf8 character. * lex.h (UNGETC): Change misleading macro. From-SVN: r25753 --- gcc/java/lex.c | 13 ++++++++++++- gcc/java/lex.h | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/gcc/java/lex.c b/gcc/java/lex.c index 8c40d47..54708ad 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -227,6 +227,7 @@ java_read_char () c1 = GETC (); if ((c1 & 0xc0) == 0x80) return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); + c = c1; } else if ((c & 0xf0) == 0xe0) { @@ -237,8 +238,18 @@ java_read_char () if ((c2 & 0xc0) == 0x80) return (unicode_t)(((c & 0xf) << 12) + (( c1 & 0x3f) << 6) + (c2 & 0x3f)); + else + c = c2; } + else + c = c1; } + /* We looked for a UTF8 multi-byte sequence (since we saw an initial + byte with the high bit set), but found invalid bytes instead. + If the most recent byte was Ascii (and not EOF), we should + unget it, in case it was a comment terminator or other delimitor. */ + if ((c & 0x80) == 0) + UNGETC (c); return BAD_UTF8_VALUE; } } @@ -308,7 +319,7 @@ java_read_unicode (term_context, unicode_escape_p) return (term_context ? unicode : (java_lineterminator (c) ? '\n' : unicode)); } - UNGETC (c); + ctxp->unget_utf8_value = c; } return (unicode_t)'\\'; } diff --git a/gcc/java/lex.h b/gcc/java/lex.h index 42cb1c4..d40196e 100644 --- a/gcc/java/lex.h +++ b/gcc/java/lex.h @@ -99,8 +99,8 @@ typedef struct _java_lc { #define JAVA_LINE_MAX 80 -/* Macro to read and unread chars */ -#define UNGETC(c) ctxp->unget_utf8_value = (c); +/* Macro to read and unread bytes */ +#define UNGETC(c) ungetc(c, finput) #define GETC() getc(finput) /* Build a location compound integer */ -- 2.7.4