+2009-05-04 Joseph Myers <joseph@codesourcery.com>
+
+ * intl.c (locale_encoding, locale_utf8): New.
+ (gcc_init_libintl): Initialize locale_encoding and locale_utf8.
+ * intl.h (locale_encoding, locale_utf8): Declare.
+ * pretty-print.c: Include ggc.h. Include iconv.h if HAVE_ICONV.
+ (pp_base_tree_identifier, decode_utf8_char, identifier_to_locale):
+ New.
+ * pretty-print.h (pp_identifier): Call identifier_to_locale on ID
+ argument.
+ (pp_tree_identifier): Define to call pp_base_tree_identifier.
+ (pp_base_tree_identifier): Declare as function.
+ (identifier_to_locale): Declare.
+ * Makefile.in (pretty-print.o): Update dependencies.
+ * varasm.c (finish_aliases_1): Use %qE for identifiers in
+ diagnostics.
+
2009-05-04 Richard Guenther <rguenther@suse.de>
PR middle-end/40015
pointer-set.o: pointer-set.c pointer-set.h $(CONFIG_H) $(SYSTEM_H)
hooks.o: hooks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(HOOKS_H)
pretty-print.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h intl.h $(PRETTY_PRINT_H) \
- $(TREE_H)
+ $(TREE_H) $(GGC_H)
errors.o : errors.c $(CONFIG_H) $(SYSTEM_H) errors.h $(BCONFIG_H)
dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h errors.h $(DBGCNT_H) \
$(TM_H) $(RTL_H) output.h
/* Message translation utilities.
- Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008
+ Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of GCC.
/* Closing quotation mark for diagnostics. */
const char *close_quote = "'";
+/* The name of the locale encoding. */
+const char *locale_encoding = NULL;
+
+/* Whether the locale is using UTF-8. */
+bool locale_utf8 = false;
+
#ifdef ENABLE_NLS
/* Initialize the translation library for GCC. This performs the
/* Closing quotation mark. */
close_quote = _("'");
- if (!strcmp (open_quote, "`") && !strcmp (close_quote, "'"))
- {
#if defined HAVE_LANGINFO_CODESET
- const char *encoding;
+ locale_encoding = nl_langinfo (CODESET);
+ if (locale_encoding != NULL
+ && (!strcasecmp (locale_encoding, "utf-8")
+ || !strcasecmp (locale_encoding, "utf8")))
+ locale_utf8 = true;
#endif
+
+ if (!strcmp (open_quote, "`") && !strcmp (close_quote, "'"))
+ {
/* Untranslated quotes that it may be possible to replace with
U+2018 and U+2019; but otherwise use "'" instead of "`" as
opening quote. */
open_quote = "'";
#if defined HAVE_LANGINFO_CODESET
- encoding = nl_langinfo (CODESET);
- if (encoding != NULL
- && (!strcasecmp (encoding, "utf-8")
- || !strcasecmp (encoding, "utf8")))
+ if (locale_utf8)
{
open_quote = "\xe2\x80\x98";
close_quote = "\xe2\x80\x99";
/* intl.h - internationalization
- Copyright 1998, 2001, 2003, 2004, 2007 Free Software Foundation, Inc.
+ Copyright 1998, 2001, 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
extern const char *open_quote;
extern const char *close_quote;
+extern const char *locale_encoding;
+extern bool locale_utf8;
#endif /* intl.h */
#include "intl.h"
#include "pretty-print.h"
#include "tree.h"
+#include "ggc.h"
+
+#if HAVE_ICONV
+#include <iconv.h>
+#endif
#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free
pp_base (pp)->padding = pp_none;
}
}
+
+/* Print the identifier ID to PRETTY-PRINTER. */
+
+void
+pp_base_tree_identifier (pretty_printer *pp, tree id)
+{
+ const char *text = identifier_to_locale (IDENTIFIER_POINTER (id));
+ pp_append_text (pp, text, text + strlen (text));
+}
+\f
+/* The string starting at P has LEN (at least 1) bytes left; if they
+ start with a valid UTF-8 sequence, return the length of that
+ sequence and set *VALUE to the value of that sequence, and
+ otherwise return 0 and set *VALUE to (unsigned int) -1. */
+
+static int
+decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value)
+{
+ unsigned int t = *p;
+
+ if (len == 0)
+ abort ();
+ if (t & 0x80)
+ {
+ size_t utf8_len = 0;
+ unsigned int ch;
+ size_t i;
+ for (t = *p; t & 0x80; t <<= 1)
+ utf8_len++;
+
+ if (utf8_len > len || utf8_len < 2 || utf8_len > 6)
+ {
+ *value = (unsigned int) -1;
+ return 0;
+ }
+ ch = *p & ((1 << (7 - utf8_len)) - 1);
+ for (i = 1; i < utf8_len; i++)
+ {
+ unsigned int u = p[i];
+ if ((u & 0xC0) != 0x80)
+ {
+ *value = (unsigned int) -1;
+ return 0;
+ }
+ ch = (ch << 6) | (u & 0x3F);
+ }
+ if ( (ch <= 0x7F && utf8_len > 1)
+ || (ch <= 0x7FF && utf8_len > 2)
+ || (ch <= 0xFFFF && utf8_len > 3)
+ || (ch <= 0x1FFFFF && utf8_len > 4)
+ || (ch <= 0x3FFFFFF && utf8_len > 5)
+ || (ch >= 0xD800 && ch <= 0xDFFF))
+ {
+ *value = (unsigned int) -1;
+ return 0;
+ }
+ *value = ch;
+ return utf8_len;
+ }
+ else
+ {
+ *value = t;
+ return 1;
+ }
+}
+
+/* Given IDENT, an identifier in the internal encoding, return a
+ version of IDENT suitable for diagnostics in the locale character
+ set: either IDENT itself, or a garbage-collected string converted
+ to the locale character set and using escape sequences if not
+ representable in the locale character set or containing control
+ characters or invalid byte sequences. Existing backslashes in
+ IDENT are not doubled, so the result may not uniquely specify the
+ contents of an arbitrary byte sequence identifier. */
+
+const char *
+identifier_to_locale (const char *ident)
+{
+ const unsigned char *uid = (const unsigned char *) ident;
+ size_t idlen = strlen (ident);
+ bool valid_printable_utf8 = true;
+ bool all_ascii = true;
+ size_t i;
+
+ for (i = 0; i < idlen;)
+ {
+ unsigned int c;
+ size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
+ if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F))
+ {
+ valid_printable_utf8 = false;
+ break;
+ }
+ if (utf8_len > 1)
+ all_ascii = false;
+ i += utf8_len;
+ }
+
+ /* If IDENT contains invalid UTF-8 sequences (which may occur with
+ attributes putting arbitrary byte sequences in identifiers), or
+ control characters, we use octal escape sequences for all bytes
+ outside printable ASCII. */
+ if (!valid_printable_utf8)
+ {
+ char *ret = GGC_NEWVEC (char, 4 * idlen + 1);
+ char *p = ret;
+ for (i = 0; i < idlen; i++)
+ {
+ if (uid[i] > 0x1F && uid[i] < 0x7F)
+ *p++ = uid[i];
+ else
+ {
+ sprintf (p, "\\%03o", uid[i]);
+ p += 4;
+ }
+ }
+ *p = 0;
+ return ret;
+ }
+
+ /* Otherwise, if it is valid printable ASCII, or printable UTF-8
+ with the locale character set being UTF-8, IDENT is used. */
+ if (all_ascii || locale_utf8)
+ return ident;
+
+ /* Otherwise IDENT is converted to the locale character set if
+ possible. */
+#if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
+ if (locale_encoding != NULL)
+ {
+ iconv_t cd = iconv_open (locale_encoding, "UTF-8");
+ bool conversion_ok = true;
+ char *ret = NULL;
+ if (cd != (iconv_t) -1)
+ {
+ size_t ret_alloc = 4 * idlen + 1;
+ for (;;)
+ {
+ /* Repeat the whole conversion process as needed with
+ larger buffers so non-reversible transformations can
+ always be detected. */
+ ICONV_CONST char *inbuf = CONST_CAST (char *, ident);
+ char *outbuf;
+ size_t inbytesleft = idlen;
+ size_t outbytesleft = ret_alloc - 1;
+ size_t iconv_ret;
+
+ ret = GGC_NEWVEC (char, ret_alloc);
+ outbuf = ret;
+
+ if (iconv (cd, 0, 0, 0, 0) == (size_t) -1)
+ {
+ conversion_ok = false;
+ break;
+ }
+
+ iconv_ret = iconv (cd, &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+ if (iconv_ret == (size_t) -1 || inbytesleft != 0)
+ {
+ if (errno == E2BIG)
+ {
+ ret_alloc *= 2;
+ ggc_free (ret);
+ ret = NULL;
+ continue;
+ }
+ else
+ {
+ conversion_ok = false;
+ break;
+ }
+ }
+ else if (iconv_ret != 0)
+ {
+ conversion_ok = false;
+ break;
+ }
+ /* Return to initial shift state. */
+ if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1)
+ {
+ if (errno == E2BIG)
+ {
+ ret_alloc *= 2;
+ ggc_free (ret);
+ ret = NULL;
+ continue;
+ }
+ else
+ {
+ conversion_ok = false;
+ break;
+ }
+ }
+ *outbuf = 0;
+ break;
+ }
+ iconv_close (cd);
+ if (conversion_ok)
+ return ret;
+ }
+ }
+#endif
+
+ /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
+ {
+ char *ret = GGC_NEWVEC (char, 10 * idlen + 1);
+ char *p = ret;
+ for (i = 0; i < idlen;)
+ {
+ unsigned int c;
+ size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
+ if (utf8_len == 1)
+ *p++ = uid[i];
+ else
+ {
+ sprintf (p, "\\U%08x", c);
+ p += 10;
+ }
+ i += utf8_len;
+ }
+ *p = 0;
+ return ret;
+ }
+}
pp_scalar (PP, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) I)
#define pp_pointer(PP, P) pp_scalar (PP, "%p", P)
-#define pp_identifier(PP, ID) pp_string (PP, ID)
+#define pp_identifier(PP, ID) pp_string (PP, identifier_to_locale (ID))
#define pp_tree_identifier(PP, T) \
- pp_append_text(PP, IDENTIFIER_POINTER (T), \
- IDENTIFIER_POINTER (T) + IDENTIFIER_LENGTH (T))
+ pp_base_tree_identifier (pp_base (PP), T)
#define pp_unsupported_tree(PP, T) \
pp_verbatim (pp_base (PP), "#%qs not supported by %s#", \
extern void pp_base_string (pretty_printer *, const char *);
extern void pp_write_text_to_stream (pretty_printer *pp);
extern void pp_base_maybe_space (pretty_printer *);
+extern void pp_base_tree_identifier (pretty_printer *, tree);
/* Switch into verbatim mode and return the old mode. */
static inline pp_wrapping_mode_t
}
#define pp_set_verbatim_wrapping(PP) pp_set_verbatim_wrapping_ (pp_base (PP))
+extern const char *identifier_to_locale (const char *);
+
#endif /* GCC_PRETTY_PRINT_H */
+2009-05-04 Joseph Myers <joseph@codesourcery.com>
+
+ * gcc.dg/attr-alias-5.c, gcc.dg/ucnid-7.c: New tests.
+
2009-05-03 Eric Botcazou <ebotcazou@adacore.com>
* gcc.target/sparc/fpmul-2.c: Replace final_cleanup with optimized.
--- /dev/null
+/* Verify diagnostics for aliases to strings containing extended
+ identifiers or bad characters. */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99" } */
+/* { dg-require-alias "" } */
+
+void f0 (void) __attribute__((alias("\xa1"))); /* { dg-error "undefined symbol '\\\\241'" } */
+void f1 (void) __attribute__((alias("\u00e9"))); /* { dg-error "undefined symbol '\\\\U000000e9'" } */
+void f2 (void) __attribute__((alias("\uffff"))); /* { dg-error "undefined symbol '\\\\U0000ffff'" } */
+void f3 (void) __attribute__((alias("\U000fffff"))); /* { dg-error "undefined symbol '\\\\U000fffff'" } */
+void f4 (void) __attribute__((alias("\U00ffffff"))); /* { dg-error "undefined symbol '\\\\U00ffffff'" } */
+void f5 (void) __attribute__((alias("\U0fffffff"))); /* { dg-error "undefined symbol '\\\\U0fffffff'" } */
--- /dev/null
+/* Verify diagnostics for extended identifiers refer to UCNs (in the C
+ locale). */
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -fextended-identifiers" } */
+
+void *p = &\u00e9; /* { dg-error "'\\\\U000000e9' undeclared" } */
+void *q = &\u1e00; /* { dg-error "'\\\\U00001e00' undeclared" } */
if (target_decl == NULL)
{
if (! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)))
- error ("%q+D aliased to undefined symbol %qs",
- p->decl, IDENTIFIER_POINTER (p->target));
+ error ("%q+D aliased to undefined symbol %qE",
+ p->decl, p->target);
}
else if (DECL_EXTERNAL (target_decl)
&& ! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)))
- error ("%q+D aliased to external symbol %qs",
- p->decl, IDENTIFIER_POINTER (p->target));
+ error ("%q+D aliased to external symbol %qE",
+ p->decl, p->target);
}
}