gdb/charset.c

   1 /* Character set conversion support for GDB.
   2
   3    Copyright (C) 2001-2014 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "defs.h"
  21 #include "charset.h"
  22 #include "gdbcmd.h"
  23 #include "gdb_obstack.h"
  24 #include "gdb_wait.h"
  25 #include "charset-list.h"
  26 #include "vec.h"
  27 #include "environ.h"
  28 #include "arch-utils.h"
  29 #include "gdb_vecs.h"
  30
  31 #include <string.h>
  32 #include <ctype.h>
  33
  34 #ifdef USE_WIN32API
  35 #include <windows.h>
  36 #endif
  37 \f
  38 /* How GDB's character set support works
  39
  40    GDB has three global settings:
  41
  42    - The `current host character set' is the character set GDB should
  43      use in talking to the user, and which (hopefully) the user's
  44      terminal knows how to display properly.  Most users should not
  45      change this.
  46
  47    - The `current target character set' is the character set the
  48      program being debugged uses.
  49
  50    - The `current target wide character set' is the wide character set
  51      the program being debugged uses, that is, the encoding used for
  52      wchar_t.
  53
  54    There are commands to set each of these, and mechanisms for
  55    choosing reasonable default values.  GDB has a global list of
  56    character sets that it can use as its host or target character
  57    sets.
  58
  59    The header file `charset.h' declares various functions that
  60    different pieces of GDB need to perform tasks like:
  61
  62    - printing target strings and characters to the user's terminal
  63      (mostly target->host conversions),
  64
  65    - building target-appropriate representations of strings and
  66      characters the user enters in expressions (mostly host->target
  67      conversions),
  68
  69      and so on.
  70
  71    To avoid excessive code duplication and maintenance efforts,
  72    GDB simply requires a capable iconv function.  Users on platforms
  73    without a suitable iconv can use the GNU iconv library.  */
  74
  75 \f
  76 #ifdef PHONY_ICONV
  77
  78 /* Provide a phony iconv that does as little as possible.  Also,
  79    arrange for there to be a single available character set.  */
  80
  81 #undef GDB_DEFAULT_HOST_CHARSET
  82 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
  83 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
  84 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
  85 #undef DEFAULT_CHARSET_NAMES
  86 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
  87
  88 #undef iconv_t
  89 #define iconv_t int
  90 #undef iconv_open
  91 #define iconv_open phony_iconv_open
  92 #undef iconv
  93 #define iconv phony_iconv
  94 #undef iconv_close
  95 #define iconv_close phony_iconv_close
  96
  97 #undef ICONV_CONST
  98 #define ICONV_CONST const
  99
 100 /* Some systems don't have EILSEQ, so we define it here, but not as
 101    EINVAL, because callers of `iconv' want to distinguish EINVAL and
 102    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
 103    that wchar.h may also define EILSEQ, so this needs to be after we
 104    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
 105 #ifndef EILSEQ
 106 #define EILSEQ ENOENT
 107 #endif
 108
 109 static iconv_t
 110 phony_iconv_open (const char *to, const char *from)
 111 {
 112   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
 113      We allow conversions to wchar_t and the host charset.  */
 114   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
 115       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
 116     return -1;
 117   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
 118     return -1;
 119
 120   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
 121      used as a flag in calls to iconv.  */
 122   return !strcmp (from, "UTF-32BE");
 123 }
 124
 125 static int
 126 phony_iconv_close (iconv_t arg)
 127 {
 128   return 0;
 129 }
 130
 131 static size_t
 132 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 133              char **outbuf, size_t *outbytesleft)
 134 {
 135   if (utf_flag)
 136     {
 137       while (*inbytesleft >= 4)
 138         {
 139           size_t j;
 140           unsigned long c = 0;
 141
 142           for (j = 0; j < 4; ++j)
 143             {
 144               c <<= 8;
 145               c += (*inbuf)[j] & 0xff;
 146             }
 147
 148           if (c >= 256)
 149             {
 150               errno = EILSEQ;
 151               return -1;
 152             }
 153           **outbuf = c & 0xff;
 154           ++*outbuf;
 155           --*outbytesleft;
 156
 157           ++*inbuf;
 158           *inbytesleft -= 4;
 159         }
 160       if (*inbytesleft < 4)
 161         {
 162           errno = EINVAL;
 163           return -1;
 164         }
 165     }
 166   else
 167     {
 168       /* In all other cases we simply copy input bytes to the
 169          output.  */
 170       size_t amt = *inbytesleft;
 171
 172       if (amt > *outbytesleft)
 173         amt = *outbytesleft;
 174       memcpy (*outbuf, *inbuf, amt);
 175       *inbuf += amt;
 176       *outbuf += amt;
 177       *inbytesleft -= amt;
 178       *outbytesleft -= amt;
 179     }
 180
 181   if (*inbytesleft)
 182     {
 183       errno = E2BIG;
 184       return -1;
 185     }
 186
 187   /* The number of non-reversible conversions -- but they were all
 188      reversible.  */
 189   return 0;
 190 }
 191
 192 #endif
 193
 194
 195 \f
 196 /* The global lists of character sets and translations.  */
 197
 198
 199 #ifndef GDB_DEFAULT_TARGET_CHARSET
 200 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
 201 #endif
 202
 203 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
 204 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
 205 #endif
 206
 207 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
 208 static const char *host_charset_name = "auto";
 209 static void
 210 show_host_charset_name (struct ui_file *file, int from_tty,
 211                         struct cmd_list_element *c,
 212                         const char *value)
 213 {
 214   if (!strcmp (value, "auto"))
 215     fprintf_filtered (file,
 216                       _("The host character set is \"auto; currently %s\".\n"),
 217                       auto_host_charset_name);
 218   else
 219     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
 220 }
 221
 222 static const char *target_charset_name = "auto";
 223 static void
 224 show_target_charset_name (struct ui_file *file, int from_tty,
 225                           struct cmd_list_element *c, const char *value)
 226 {
 227   if (!strcmp (value, "auto"))
 228     fprintf_filtered (file,
 229                       _("The target character set is \"auto; "
 230                         "currently %s\".\n"),
 231                       gdbarch_auto_charset (get_current_arch ()));
 232   else
 233     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
 234                       value);
 235 }
 236
 237 static const char *target_wide_charset_name = "auto";
 238 static void
 239 show_target_wide_charset_name (struct ui_file *file,
 240                                int from_tty,
 241                                struct cmd_list_element *c,
 242                                const char *value)
 243 {
 244   if (!strcmp (value, "auto"))
 245     fprintf_filtered (file,
 246                       _("The target wide character set is \"auto; "
 247                         "currently %s\".\n"),
 248                       gdbarch_auto_wide_charset (get_current_arch ()));
 249   else
 250     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
 251                       value);
 252 }
 253
 254 static const char *default_charset_names[] =
 255 {
 256   DEFAULT_CHARSET_NAMES
 257   0
 258 };
 259
 260 static const char **charset_enum;
 261
 262 \f
 263 /* If the target wide character set has big- or little-endian
 264    variants, these are the corresponding names.  */
 265 static const char *target_wide_charset_be_name;
 266 static const char *target_wide_charset_le_name;
 267
 268 /* The architecture for which the BE- and LE-names are valid.  */
 269 static struct gdbarch *be_le_arch;
 270
 271 /* A helper function which sets the target wide big- and little-endian
 272    character set names, if possible.  */
 273
 274 static void
 275 set_be_le_names (struct gdbarch *gdbarch)
 276 {
 277   int i, len;
 278   const char *target_wide;
 279
 280   if (be_le_arch == gdbarch)
 281     return;
 282   be_le_arch = gdbarch;
 283
 284   target_wide_charset_le_name = NULL;
 285   target_wide_charset_be_name = NULL;
 286
 287   target_wide = target_wide_charset_name;
 288   if (!strcmp (target_wide, "auto"))
 289     target_wide = gdbarch_auto_wide_charset (gdbarch);
 290
 291   len = strlen (target_wide);
 292   for (i = 0; charset_enum[i]; ++i)
 293     {
 294       if (strncmp (target_wide, charset_enum[i], len))
 295         continue;
 296       if ((charset_enum[i][len] == 'B'
 297            || charset_enum[i][len] == 'L')
 298           && charset_enum[i][len + 1] == 'E'
 299           && charset_enum[i][len + 2] == '\0')
 300         {
 301           if (charset_enum[i][len] == 'B')
 302             target_wide_charset_be_name = charset_enum[i];
 303           else
 304             target_wide_charset_le_name = charset_enum[i];
 305         }
 306     }
 307 }
 308
 309 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
 310    target-wide-charset', 'set charset' sfunc's.  */
 311
 312 static void
 313 validate (struct gdbarch *gdbarch)
 314 {
 315   iconv_t desc;
 316   const char *host_cset = host_charset ();
 317   const char *target_cset = target_charset (gdbarch);
 318   const char *target_wide_cset = target_wide_charset_name;
 319
 320   if (!strcmp (target_wide_cset, "auto"))
 321     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
 322
 323   desc = iconv_open (target_wide_cset, host_cset);
 324   if (desc == (iconv_t) -1)
 325     error (_("Cannot convert between character sets `%s' and `%s'"),
 326            target_wide_cset, host_cset);
 327   iconv_close (desc);
 328
 329   desc = iconv_open (target_cset, host_cset);
 330   if (desc == (iconv_t) -1)
 331     error (_("Cannot convert between character sets `%s' and `%s'"),
 332            target_cset, host_cset);
 333   iconv_close (desc);
 334
 335   /* Clear the cache.  */
 336   be_le_arch = NULL;
 337 }
 338
 339 /* This is the sfunc for the 'set charset' command.  */
 340 static void
 341 set_charset_sfunc (char *charset, int from_tty,
 342                    struct cmd_list_element *c)
 343 {
 344   /* CAREFUL: set the target charset here as well.  */
 345   target_charset_name = host_charset_name;
 346   validate (get_current_arch ());
 347 }
 348
 349 /* 'set host-charset' command sfunc.  We need a wrapper here because
 350    the function needs to have a specific signature.  */
 351 static void
 352 set_host_charset_sfunc (char *charset, int from_tty,
 353                         struct cmd_list_element *c)
 354 {
 355   validate (get_current_arch ());
 356 }
 357
 358 /* Wrapper for the 'set target-charset' command.  */
 359 static void
 360 set_target_charset_sfunc (char *charset, int from_tty,
 361                           struct cmd_list_element *c)
 362 {
 363   validate (get_current_arch ());
 364 }
 365
 366 /* Wrapper for the 'set target-wide-charset' command.  */
 367 static void
 368 set_target_wide_charset_sfunc (char *charset, int from_tty,
 369                                struct cmd_list_element *c)
 370 {
 371   validate (get_current_arch ());
 372 }
 373
 374 /* sfunc for the 'show charset' command.  */
 375 static void
 376 show_charset (struct ui_file *file, int from_tty,
 377               struct cmd_list_element *c,
 378               const char *name)
 379 {
 380   show_host_charset_name (file, from_tty, c, host_charset_name);
 381   show_target_charset_name (file, from_tty, c, target_charset_name);
 382   show_target_wide_charset_name (file, from_tty, c,
 383                                  target_wide_charset_name);
 384 }
 385
 386 \f
 387 /* Accessor functions.  */
 388
 389 const char *
 390 host_charset (void)
 391 {
 392   if (!strcmp (host_charset_name, "auto"))
 393     return auto_host_charset_name;
 394   return host_charset_name;
 395 }
 396
 397 const char *
 398 target_charset (struct gdbarch *gdbarch)
 399 {
 400   if (!strcmp (target_charset_name, "auto"))
 401     return gdbarch_auto_charset (gdbarch);
 402   return target_charset_name;
 403 }
 404
 405 const char *
 406 target_wide_charset (struct gdbarch *gdbarch)
 407 {
 408   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
 409
 410   set_be_le_names (gdbarch);
 411   if (byte_order == BFD_ENDIAN_BIG)
 412     {
 413       if (target_wide_charset_be_name)
 414         return target_wide_charset_be_name;
 415     }
 416   else
 417     {
 418       if (target_wide_charset_le_name)
 419         return target_wide_charset_le_name;
 420     }
 421
 422   if (!strcmp (target_wide_charset_name, "auto"))
 423     return gdbarch_auto_wide_charset (gdbarch);
 424
 425   return target_wide_charset_name;
 426 }
 427
 428 \f
 429 /* Host character set management.  For the time being, we assume that
 430    the host character set is some superset of ASCII.  */
 431
 432 char
 433 host_letter_to_control_character (char c)
 434 {
 435   if (c == '?')
 436     return 0177;
 437   return c & 0237;
 438 }
 439
 440 /* Convert a host character, C, to its hex value.  C must already have
 441    been validated using isxdigit.  */
 442
 443 int
 444 host_hex_value (char c)
 445 {
 446   if (isdigit (c))
 447     return c - '0';
 448   if (c >= 'a' && c <= 'f')
 449     return 10 + c - 'a';
 450   gdb_assert (c >= 'A' && c <= 'F');
 451   return 10 + c - 'A';
 452 }
 453
 454 \f
 455 /* Public character management functions.  */
 456
 457 /* A cleanup function which is run to close an iconv descriptor.  */
 458
 459 static void
 460 cleanup_iconv (void *p)
 461 {
 462   iconv_t *descp = p;
 463   iconv_close (*descp);
 464 }
 465
 466 void
 467 convert_between_encodings (const char *from, const char *to,
 468                            const gdb_byte *bytes, unsigned int num_bytes,
 469                            int width, struct obstack *output,
 470                            enum transliterations translit)
 471 {
 472   iconv_t desc;
 473   struct cleanup *cleanups;
 474   size_t inleft;
 475   ICONV_CONST char *inp;
 476   unsigned int space_request;
 477
 478   /* Often, the host and target charsets will be the same.  */
 479   if (!strcmp (from, to))
 480     {
 481       obstack_grow (output, bytes, num_bytes);
 482       return;
 483     }
 484
 485   desc = iconv_open (to, from);
 486   if (desc == (iconv_t) -1)
 487     perror_with_name (_("Converting character sets"));
 488   cleanups = make_cleanup (cleanup_iconv, &desc);
 489
 490   inleft = num_bytes;
 491   inp = (ICONV_CONST char *) bytes;
 492
 493   space_request = num_bytes;
 494
 495   while (inleft > 0)
 496     {
 497       char *outp;
 498       size_t outleft, r;
 499       int old_size;
 500
 501       old_size = obstack_object_size (output);
 502       obstack_blank (output, space_request);
 503
 504       outp = (char *) obstack_base (output) + old_size;
 505       outleft = space_request;
 506
 507       r = iconv (desc, &inp, &inleft, &outp, &outleft);
 508
 509       /* Now make sure that the object on the obstack only includes
 510          bytes we have converted.  */
 511       obstack_blank (output, - (int) outleft);
 512
 513       if (r == (size_t) -1)
 514         {
 515           switch (errno)
 516             {
 517             case EILSEQ:
 518               {
 519                 int i;
 520
 521                 /* Invalid input sequence.  */
 522                 if (translit == translit_none)
 523                   error (_("Could not convert character "
 524                            "to `%s' character set"), to);
 525
 526                 /* We emit escape sequence for the bytes, skip them,
 527                    and try again.  */
 528                 for (i = 0; i < width; ++i)
 529                   {
 530                     char octal[5];
 531
 532                     xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
 533                     obstack_grow_str (output, octal);
 534
 535                     ++inp;
 536                     --inleft;
 537                   }
 538               }
 539               break;
 540
 541             case E2BIG:
 542               /* We ran out of space in the output buffer.  Make it
 543                  bigger next time around.  */
 544               space_request *= 2;
 545               break;
 546
 547             case EINVAL:
 548               /* Incomplete input sequence.  FIXME: ought to report this
 549                  to the caller somehow.  */
 550               inleft = 0;
 551               break;
 552
 553             default:
 554               perror_with_name (_("Internal error while "
 555                                   "converting character sets"));
 556             }
 557         }
 558     }
 559
 560   do_cleanups (cleanups);
 561 }
 562
 563 \f
 564
 565 /* An iterator that returns host wchar_t's from a target string.  */
 566 struct wchar_iterator
 567 {
 568   /* The underlying iconv descriptor.  */
 569   iconv_t desc;
 570
 571   /* The input string.  This is updated as convert characters.  */
 572   const gdb_byte *input;
 573   /* The number of bytes remaining in the input.  */
 574   size_t bytes;
 575
 576   /* The width of an input character.  */
 577   size_t width;
 578
 579   /* The output buffer and its size.  */
 580   gdb_wchar_t *out;
 581   size_t out_size;
 582 };
 583
 584 /* Create a new iterator.  */
 585 struct wchar_iterator *
 586 make_wchar_iterator (const gdb_byte *input, size_t bytes,
 587                      const char *charset, size_t width)
 588 {
 589   struct wchar_iterator *result;
 590   iconv_t desc;
 591
 592   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
 593   if (desc == (iconv_t) -1)
 594     perror_with_name (_("Converting character sets"));
 595
 596   result = XNEW (struct wchar_iterator);
 597   result->desc = desc;
 598   result->input = input;
 599   result->bytes = bytes;
 600   result->width = width;
 601
 602   result->out = XNEW (gdb_wchar_t);
 603   result->out_size = 1;
 604
 605   return result;
 606 }
 607
 608 static void
 609 do_cleanup_iterator (void *p)
 610 {
 611   struct wchar_iterator *iter = p;
 612
 613   iconv_close (iter->desc);
 614   xfree (iter->out);
 615   xfree (iter);
 616 }
 617
 618 struct cleanup *
 619 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
 620 {
 621   return make_cleanup (do_cleanup_iterator, iter);
 622 }
 623
 624 int
 625 wchar_iterate (struct wchar_iterator *iter,
 626                enum wchar_iterate_result *out_result,
 627                gdb_wchar_t **out_chars,
 628                const gdb_byte **ptr,
 629                size_t *len)
 630 {
 631   size_t out_request;
 632
 633   /* Try to convert some characters.  At first we try to convert just
 634      a single character.  The reason for this is that iconv does not
 635      necessarily update its outgoing arguments when it encounters an
 636      invalid input sequence -- but we want to reliably report this to
 637      our caller so it can emit an escape sequence.  */
 638   out_request = 1;
 639   while (iter->bytes > 0)
 640     {
 641       ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
 642       char *outptr = (char *) &iter->out[0];
 643       const gdb_byte *orig_inptr = iter->input;
 644       size_t orig_in = iter->bytes;
 645       size_t out_avail = out_request * sizeof (gdb_wchar_t);
 646       size_t num;
 647       size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
 648
 649       iter->input = (gdb_byte *) inptr;
 650
 651       if (r == (size_t) -1)
 652         {
 653           switch (errno)
 654             {
 655             case EILSEQ:
 656               /* Invalid input sequence.  We still might have
 657                  converted a character; if so, return it.  */
 658               if (out_avail < out_request * sizeof (gdb_wchar_t))
 659                 break;
 660
 661               /* Otherwise skip the first invalid character, and let
 662                  the caller know about it.  */
 663               *out_result = wchar_iterate_invalid;
 664               *ptr = iter->input;
 665               *len = iter->width;
 666               iter->input += iter->width;
 667               iter->bytes -= iter->width;
 668               return 0;
 669
 670             case E2BIG:
 671               /* We ran out of space.  We still might have converted a
 672                  character; if so, return it.  Otherwise, grow the
 673                  buffer and try again.  */
 674               if (out_avail < out_request * sizeof (gdb_wchar_t))
 675                 break;
 676
 677               ++out_request;
 678               if (out_request > iter->out_size)
 679                 {
 680                   iter->out_size = out_request;
 681                   iter->out = xrealloc (iter->out,
 682                                         out_request * sizeof (gdb_wchar_t));
 683                 }
 684               continue;
 685
 686             case EINVAL:
 687               /* Incomplete input sequence.  Let the caller know, and
 688                  arrange for future calls to see EOF.  */
 689               *out_result = wchar_iterate_incomplete;
 690               *ptr = iter->input;
 691               *len = iter->bytes;
 692               iter->bytes = 0;
 693               return 0;
 694
 695             default:
 696               perror_with_name (_("Internal error while "
 697                                   "converting character sets"));
 698             }
 699         }
 700
 701       /* We converted something.  */
 702       num = out_request - out_avail / sizeof (gdb_wchar_t);
 703       *out_result = wchar_iterate_ok;
 704       *out_chars = iter->out;
 705       *ptr = orig_inptr;
 706       *len = orig_in - iter->bytes;
 707       return num;
 708     }
 709
 710   /* Really done.  */
 711   *out_result = wchar_iterate_eof;
 712   return -1;
 713 }
 714
 715 \f
 716 /* The charset.c module initialization function.  */
 717
 718 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
 719
 720 static VEC (char_ptr) *charsets;
 721
 722 #ifdef PHONY_ICONV
 723
 724 static void
 725 find_charset_names (void)
 726 {
 727   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
 728   VEC_safe_push (char_ptr, charsets, NULL);
 729 }
 730
 731 #else /* PHONY_ICONV */
 732
 733 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
 734    provides different symbols in the static and dynamic libraries.
 735    So, configure may see libiconvlist but not iconvlist.  But, calling
 736    iconvlist is the right thing to do and will work.  Hence we do a
 737    check here but unconditionally call iconvlist below.  */
 738 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
 739
 740 /* A helper function that adds some character sets to the vector of
 741    all character sets.  This is a callback function for iconvlist.  */
 742
 743 static int
 744 add_one (unsigned int count, const char *const *names, void *data)
 745 {
 746   unsigned int i;
 747
 748   for (i = 0; i < count; ++i)
 749     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
 750
 751   return 0;
 752 }
 753
 754 static void
 755 find_charset_names (void)
 756 {
 757   iconvlist (add_one, NULL);
 758   VEC_safe_push (char_ptr, charsets, NULL);
 759 }
 760
 761 #else
 762
 763 /* Return non-zero if LINE (output from iconv) should be ignored.
 764    Older iconv programs (e.g. 2.2.2) include the human readable
 765    introduction even when stdout is not a tty.  Newer versions omit
 766    the intro if stdout is not a tty.  */
 767
 768 static int
 769 ignore_line_p (const char *line)
 770 {
 771   /* This table is used to filter the output.  If this text appears
 772      anywhere in the line, it is ignored (strstr is used).  */
 773   static const char * const ignore_lines[] =
 774     {
 775       "The following",
 776       "not necessarily",
 777       "the FROM and TO",
 778       "listed with several",
 779       NULL
 780     };
 781   int i;
 782
 783   for (i = 0; ignore_lines[i] != NULL; ++i)
 784     {
 785       if (strstr (line, ignore_lines[i]) != NULL)
 786         return 1;
 787     }
 788
 789   return 0;
 790 }
 791
 792 static void
 793 find_charset_names (void)
 794 {
 795   struct pex_obj *child;
 796   char *args[3];
 797   int err, status;
 798   int fail = 1;
 799   int flags;
 800   struct gdb_environ *iconv_env;
 801   char *iconv_program;
 802
 803   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
 804      not a tty.  We need to recognize it and ignore it.  This text is
 805      subject to translation, so force LANGUAGE=C.  */
 806   iconv_env = make_environ ();
 807   init_environ (iconv_env);
 808   set_in_environ (iconv_env, "LANGUAGE", "C");
 809   set_in_environ (iconv_env, "LC_ALL", "C");
 810
 811   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
 812
 813 #ifdef ICONV_BIN
 814   {
 815     char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
 816                                               ICONV_BIN_RELOCATABLE);
 817     iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
 818     xfree (iconv_dir);
 819   }
 820 #else
 821   iconv_program = xstrdup ("iconv");
 822 #endif
 823   args[0] = iconv_program;
 824   args[1] = "-l";
 825   args[2] = NULL;
 826   flags = PEX_STDERR_TO_STDOUT;
 827 #ifndef ICONV_BIN
 828   flags |= PEX_SEARCH;
 829 #endif
 830   /* Note that we simply ignore errors here.  */
 831   if (!pex_run_in_environment (child, flags,
 832                                args[0], args, environ_vector (iconv_env),
 833                                NULL, NULL, &err))
 834     {
 835       FILE *in = pex_read_output (child, 0);
 836
 837       /* POSIX says that iconv -l uses an unspecified format.  We
 838          parse the glibc and libiconv formats; feel free to add others
 839          as needed.  */
 840
 841       while (in != NULL && !feof (in))
 842         {
 843           /* The size of buf is chosen arbitrarily.  */
 844           char buf[1024];
 845           char *start, *r;
 846           int len;
 847
 848           r = fgets (buf, sizeof (buf), in);
 849           if (!r)
 850             break;
 851           len = strlen (r);
 852           if (len <= 3)
 853             continue;
 854           if (ignore_line_p (r))
 855             continue;
 856
 857           /* Strip off the newline.  */
 858           --len;
 859           /* Strip off one or two '/'s.  glibc will print lines like
 860              "8859_7//", but also "10646-1:1993/UCS4/".  */
 861           if (buf[len - 1] == '/')
 862             --len;
 863           if (buf[len - 1] == '/')
 864             --len;
 865           buf[len] = '\0';
 866
 867           /* libiconv will print multiple entries per line, separated
 868              by spaces.  Older iconvs will print multiple entries per
 869              line, indented by two spaces, and separated by ", "
 870              (i.e. the human readable form).  */
 871           start = buf;
 872           while (1)
 873             {
 874               int keep_going;
 875               char *p;
 876
 877               /* Skip leading blanks.  */
 878               for (p = start; *p && *p == ' '; ++p)
 879                 ;
 880               start = p;
 881               /* Find the next space, comma, or end-of-line.  */
 882               for ( ; *p && *p != ' ' && *p != ','; ++p)
 883                 ;
 884               /* Ignore an empty result.  */
 885               if (p == start)
 886                 break;
 887               keep_going = *p;
 888               *p = '\0';
 889               VEC_safe_push (char_ptr, charsets, xstrdup (start));
 890               if (!keep_going)
 891                 break;
 892               /* Skip any extra spaces.  */
 893               for (start = p + 1; *start && *start == ' '; ++start)
 894                 ;
 895             }
 896         }
 897
 898       if (pex_get_status (child, 1, &status)
 899           && WIFEXITED (status) && !WEXITSTATUS (status))
 900         fail = 0;
 901
 902     }
 903
 904   xfree (iconv_program);
 905   pex_free (child);
 906   free_environ (iconv_env);
 907
 908   if (fail)
 909     {
 910       /* Some error occurred, so drop the vector.  */
 911       free_char_ptr_vec (charsets);
 912       charsets = NULL;
 913     }
 914   else
 915     VEC_safe_push (char_ptr, charsets, NULL);
 916 }
 917
 918 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
 919 #endif /* PHONY_ICONV */
 920
 921 /* The "auto" target charset used by default_auto_charset.  */
 922 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
 923
 924 const char *
 925 default_auto_charset (void)
 926 {
 927   return auto_target_charset_name;
 928 }
 929
 930 const char *
 931 default_auto_wide_charset (void)
 932 {
 933   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
 934 }
 935
 936
 937 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
 938 /* Macro used for UTF or UCS endianness suffix.  */
 939 #if WORDS_BIGENDIAN
 940 #define ENDIAN_SUFFIX "BE"
 941 #else
 942 #define ENDIAN_SUFFIX "LE"
 943 #endif
 944
 945 /* The code below serves to generate a compile time error if
 946    gdb_wchar_t type is not of size 2 nor 4, despite the fact that
 947    macro __STDC_ISO_10646__ is defined.
 948    This is better than a gdb_assert call, because GDB cannot handle
 949    strings correctly if this size is different.  */
 950
 951 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
 952                                        || sizeof (gdb_wchar_t) == 4)
 953                                       ? 1 : -1];
 954
 955 /* intermediate_encoding returns the charset used internally by
 956    GDB to convert between target and host encodings. As the test above
 957    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
 958    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
 959    otherwise an error is generated.  */
 960
 961 const char *
 962 intermediate_encoding (void)
 963 {
 964   iconv_t desc;
 965   static const char *stored_result = NULL;
 966   char *result;
 967
 968   if (stored_result)
 969     return stored_result;
 970   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
 971                        ENDIAN_SUFFIX);
 972   /* Check that the name is supported by iconv_open.  */
 973   desc = iconv_open (result, host_charset ());
 974   if (desc != (iconv_t) -1)
 975     {
 976       iconv_close (desc);
 977       stored_result = result;
 978       return result;
 979     }
 980   /* Not valid, free the allocated memory.  */
 981   xfree (result);
 982   /* Second try, with UCS-2 type.  */
 983   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
 984                        ENDIAN_SUFFIX);
 985   /* Check that the name is supported by iconv_open.  */
 986   desc = iconv_open (result, host_charset ());
 987   if (desc != (iconv_t) -1)
 988     {
 989       iconv_close (desc);
 990       stored_result = result;
 991       return result;
 992     }
 993   /* Not valid, free the allocated memory.  */
 994   xfree (result);
 995   /* No valid charset found, generate error here.  */
 996   error (_("Unable to find a vaild charset for string conversions"));
 997 }
 998
 999 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
1000
1001 void
1002 _initialize_charset (void)
1003 {
1004   /* The first element is always "auto".  */
1005   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
1006   find_charset_names ();
1007
1008   if (VEC_length (char_ptr, charsets) > 1)
1009     charset_enum = (const char **) VEC_address (char_ptr, charsets);
1010   else
1011     charset_enum = default_charset_names;
1012
1013 #ifndef PHONY_ICONV
1014 #ifdef HAVE_LANGINFO_CODESET
1015   /* The result of nl_langinfo may be overwritten later.  This may
1016      leak a little memory, if the user later changes the host charset,
1017      but that doesn't matter much.  */
1018   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1019   /* Solaris will return `646' here -- but the Solaris iconv then does
1020      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
1021      which GNU libiconv doesn't like (infinite loop).  */
1022   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1023     auto_host_charset_name = "ASCII";
1024   auto_target_charset_name = auto_host_charset_name;
1025 #elif defined (USE_WIN32API)
1026   {
1027     /* "CP" + x<=5 digits + paranoia.  */
1028     static char w32_host_default_charset[16];
1029
1030     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1031               "CP%d", GetACP());
1032     auto_host_charset_name = w32_host_default_charset;
1033     auto_target_charset_name = auto_host_charset_name;
1034   }
1035 #endif
1036 #endif
1037
1038   add_setshow_enum_cmd ("charset", class_support,
1039                         charset_enum, &host_charset_name, _("\
1040 Set the host and target character sets."), _("\
1041 Show the host and target character sets."), _("\
1042 The `host character set' is the one used by the system GDB is running on.\n\
1043 The `target character set' is the one used by the program being debugged.\n\
1044 You may only use supersets of ASCII for your host character set; GDB does\n\
1045 not support any others.\n\
1046 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1047                         /* Note that the sfunc below needs to set
1048                            target_charset_name, because the 'set
1049                            charset' command sets two variables.  */
1050                         set_charset_sfunc,
1051                         show_charset,
1052                         &setlist, &showlist);
1053
1054   add_setshow_enum_cmd ("host-charset", class_support,
1055                         charset_enum, &host_charset_name, _("\
1056 Set the host character set."), _("\
1057 Show the host character set."), _("\
1058 The `host character set' is the one used by the system GDB is running on.\n\
1059 You may only use supersets of ASCII for your host character set; GDB does\n\
1060 not support any others.\n\
1061 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1062                         set_host_charset_sfunc,
1063                         show_host_charset_name,
1064                         &setlist, &showlist);
1065
1066   add_setshow_enum_cmd ("target-charset", class_support,
1067                         charset_enum, &target_charset_name, _("\
1068 Set the target character set."), _("\
1069 Show the target character set."), _("\
1070 The `target character set' is the one used by the program being debugged.\n\
1071 GDB translates characters and strings between the host and target\n\
1072 character sets as needed.\n\
1073 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1074                         set_target_charset_sfunc,
1075                         show_target_charset_name,
1076                         &setlist, &showlist);
1077
1078   add_setshow_enum_cmd ("target-wide-charset", class_support,
1079                         charset_enum, &target_wide_charset_name,
1080                         _("\
1081 Set the target wide character set."), _("\
1082 Show the target wide character set."), _("\
1083 The `target wide character set' is the one used by the program being debugged.\
1084 \nIn particular it is the encoding used by `wchar_t'.\n\
1085 GDB translates characters and strings between the host and target\n\
1086 character sets as needed.\n\
1087 To see a list of the character sets GDB supports, type\n\
1088 `set target-wide-charset'<TAB>"),
1089                         set_target_wide_charset_sfunc,
1090                         show_target_wide_charset_name,
1091                         &setlist, &showlist);
1092 }