From 45ec036eed0e74556fa7c8362469405d6f752898 Mon Sep 17 00:00:00 2001
From: Per Bothner <bothner@gcc.gnu.org>
Date: Sat, 13 Mar 1999 11:21:38 -0800
Subject: [PATCH] lex.c (java_read_char): UNGET invalid non-initial utf8
 character.

h
	* lex.c (java_read_char):  UNGET invalid non-initial utf8 character.
	* lex.h (UNGETC):  Change misleading macro.

From-SVN: r25753
---
 gcc/java/lex.c | 13 ++++++++++++-
 gcc/java/lex.h |  4 ++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index 8c40d47..54708ad 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -227,6 +227,7 @@ java_read_char ()
           c1 = GETC ();
 	  if ((c1 & 0xc0) == 0x80)
 	    return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
+	  c = c1;
 	}
       else if ((c & 0xf0) == 0xe0)
         {
@@ -237,8 +238,18 @@ java_read_char ()
 	      if ((c2 & 0xc0) == 0x80)
 	        return (unicode_t)(((c & 0xf) << 12) + 
 				   (( c1 & 0x3f) << 6) + (c2 & 0x3f));
+	      else
+		c = c2;
 	    }
+	  else
+	    c = c1;
 	}
+      /* We looked for a UTF8 multi-byte sequence (since we saw an initial
+	 byte with the high bit set), but found invalid bytes instead.
+	 If the most recent byte was Ascii (and not EOF), we should
+	 unget it, in case it was a comment terminator or other delimitor. */
+      if ((c & 0x80) == 0)
+	UNGETC (c);
       return BAD_UTF8_VALUE;
     }
 }
@@ -308,7 +319,7 @@ java_read_unicode (term_context, unicode_escape_p)
 	  return (term_context ? unicode :
 		  (java_lineterminator (c) ? '\n' : unicode));
 	}
-      UNGETC (c);
+      ctxp->unget_utf8_value = c;
     }
   return (unicode_t)'\\';
 }
diff --git a/gcc/java/lex.h b/gcc/java/lex.h
index 42cb1c4..d40196e 100644
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -99,8 +99,8 @@ typedef struct _java_lc {
 
 #define JAVA_LINE_MAX 80
 
-/* Macro to read and unread chars */
-#define UNGETC(c) ctxp->unget_utf8_value = (c);
+/* Macro to read and unread bytes */
+#define UNGETC(c) ungetc(c, finput)
 #define GETC()    getc(finput)
 
 /* Build a location compound integer */
-- 
2.7.4