gdb/charset.c

   1 /* Character set conversion support for GDB.
   2
   3    Copyright (C) 2001-2014 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "defs.h"
  21 #include "charset.h"
  22 #include "gdbcmd.h"
  23 #include "gdb_obstack.h"
  24 #include "gdb_wait.h"
  25 #include "charset-list.h"
  26 #include "vec.h"
  27 #include "environ.h"
  28 #include "arch-utils.h"
  29 #include "gdb_vecs.h"
  30 #include <ctype.h>
  31
  32 #ifdef USE_WIN32API
  33 #include <windows.h>
  34 #endif
  35 \f
  36 /* How GDB's character set support works
  37
  38    GDB has three global settings:
  39
  40    - The `current host character set' is the character set GDB should
  41      use in talking to the user, and which (hopefully) the user's
  42      terminal knows how to display properly.  Most users should not
  43      change this.
  44
  45    - The `current target character set' is the character set the
  46      program being debugged uses.
  47
  48    - The `current target wide character set' is the wide character set
  49      the program being debugged uses, that is, the encoding used for
  50      wchar_t.
  51
  52    There are commands to set each of these, and mechanisms for
  53    choosing reasonable default values.  GDB has a global list of
  54    character sets that it can use as its host or target character
  55    sets.
  56
  57    The header file `charset.h' declares various functions that
  58    different pieces of GDB need to perform tasks like:
  59
  60    - printing target strings and characters to the user's terminal
  61      (mostly target->host conversions),
  62
  63    - building target-appropriate representations of strings and
  64      characters the user enters in expressions (mostly host->target
  65      conversions),
  66
  67      and so on.
  68
  69    To avoid excessive code duplication and maintenance efforts,
  70    GDB simply requires a capable iconv function.  Users on platforms
  71    without a suitable iconv can use the GNU iconv library.  */
  72
  73 \f
  74 #ifdef PHONY_ICONV
  75
  76 /* Provide a phony iconv that does as little as possible.  Also,
  77    arrange for there to be a single available character set.  */
  78
  79 #undef GDB_DEFAULT_HOST_CHARSET
  80 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
  81 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
  82 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
  83 #undef DEFAULT_CHARSET_NAMES
  84 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
  85
  86 #undef iconv_t
  87 #define iconv_t int
  88 #undef iconv_open
  89 #define iconv_open phony_iconv_open
  90 #undef iconv
  91 #define iconv phony_iconv
  92 #undef iconv_close
  93 #define iconv_close phony_iconv_close
  94
  95 #undef ICONV_CONST
  96 #define ICONV_CONST const
  97
  98 /* Some systems don't have EILSEQ, so we define it here, but not as
  99    EINVAL, because callers of `iconv' want to distinguish EINVAL and
 100    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
 101    that wchar.h may also define EILSEQ, so this needs to be after we
 102    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
 103 #ifndef EILSEQ
 104 #define EILSEQ ENOENT
 105 #endif
 106
 107 static iconv_t
 108 phony_iconv_open (const char *to, const char *from)
 109 {
 110   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
 111      We allow conversions to wchar_t and the host charset.  */
 112   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
 113       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
 114     return -1;
 115   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
 116     return -1;
 117
 118   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
 119      used as a flag in calls to iconv.  */
 120   return !strcmp (from, "UTF-32BE");
 121 }
 122
 123 static int
 124 phony_iconv_close (iconv_t arg)
 125 {
 126   return 0;
 127 }
 128
 129 static size_t
 130 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 131              char **outbuf, size_t *outbytesleft)
 132 {
 133   if (utf_flag)
 134     {
 135       while (*inbytesleft >= 4)
 136         {
 137           size_t j;
 138           unsigned long c = 0;
 139
 140           for (j = 0; j < 4; ++j)
 141             {
 142               c <<= 8;
 143               c += (*inbuf)[j] & 0xff;
 144             }
 145
 146           if (c >= 256)
 147             {
 148               errno = EILSEQ;
 149               return -1;
 150             }
 151           **outbuf = c & 0xff;
 152           ++*outbuf;
 153           --*outbytesleft;
 154
 155           ++*inbuf;
 156           *inbytesleft -= 4;
 157         }
 158       if (*inbytesleft < 4)
 159         {
 160           errno = EINVAL;
 161           return -1;
 162         }
 163     }
 164   else
 165     {
 166       /* In all other cases we simply copy input bytes to the
 167          output.  */
 168       size_t amt = *inbytesleft;
 169
 170       if (amt > *outbytesleft)
 171         amt = *outbytesleft;
 172       memcpy (*outbuf, *inbuf, amt);
 173       *inbuf += amt;
 174       *outbuf += amt;
 175       *inbytesleft -= amt;
 176       *outbytesleft -= amt;
 177     }
 178
 179   if (*inbytesleft)
 180     {
 181       errno = E2BIG;
 182       return -1;
 183     }
 184
 185   /* The number of non-reversible conversions -- but they were all
 186      reversible.  */
 187   return 0;
 188 }
 189
 190 #endif
 191
 192
 193 \f
 194 /* The global lists of character sets and translations.  */
 195
 196
 197 #ifndef GDB_DEFAULT_TARGET_CHARSET
 198 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
 199 #endif
 200
 201 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
 202 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
 203 #endif
 204
 205 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
 206 static const char *host_charset_name = "auto";
 207 static void
 208 show_host_charset_name (struct ui_file *file, int from_tty,
 209                         struct cmd_list_element *c,
 210                         const char *value)
 211 {
 212   if (!strcmp (value, "auto"))
 213     fprintf_filtered (file,
 214                       _("The host character set is \"auto; currently %s\".\n"),
 215                       auto_host_charset_name);
 216   else
 217     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
 218 }
 219
 220 static const char *target_charset_name = "auto";
 221 static void
 222 show_target_charset_name (struct ui_file *file, int from_tty,
 223                           struct cmd_list_element *c, const char *value)
 224 {
 225   if (!strcmp (value, "auto"))
 226     fprintf_filtered (file,
 227                       _("The target character set is \"auto; "
 228                         "currently %s\".\n"),
 229                       gdbarch_auto_charset (get_current_arch ()));
 230   else
 231     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
 232                       value);
 233 }
 234
 235 static const char *target_wide_charset_name = "auto";
 236 static void
 237 show_target_wide_charset_name (struct ui_file *file,
 238                                int from_tty,
 239                                struct cmd_list_element *c,
 240                                const char *value)
 241 {
 242   if (!strcmp (value, "auto"))
 243     fprintf_filtered (file,
 244                       _("The target wide character set is \"auto; "
 245                         "currently %s\".\n"),
 246                       gdbarch_auto_wide_charset (get_current_arch ()));
 247   else
 248     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
 249                       value);
 250 }
 251
 252 static const char *default_charset_names[] =
 253 {
 254   DEFAULT_CHARSET_NAMES
 255   0
 256 };
 257
 258 static const char **charset_enum;
 259
 260 \f
 261 /* If the target wide character set has big- or little-endian
 262    variants, these are the corresponding names.  */
 263 static const char *target_wide_charset_be_name;
 264 static const char *target_wide_charset_le_name;
 265
 266 /* The architecture for which the BE- and LE-names are valid.  */
 267 static struct gdbarch *be_le_arch;
 268
 269 /* A helper function which sets the target wide big- and little-endian
 270    character set names, if possible.  */
 271
 272 static void
 273 set_be_le_names (struct gdbarch *gdbarch)
 274 {
 275   int i, len;
 276   const char *target_wide;
 277
 278   if (be_le_arch == gdbarch)
 279     return;
 280   be_le_arch = gdbarch;
 281
 282   target_wide_charset_le_name = NULL;
 283   target_wide_charset_be_name = NULL;
 284
 285   target_wide = target_wide_charset_name;
 286   if (!strcmp (target_wide, "auto"))
 287     target_wide = gdbarch_auto_wide_charset (gdbarch);
 288
 289   len = strlen (target_wide);
 290   for (i = 0; charset_enum[i]; ++i)
 291     {
 292       if (strncmp (target_wide, charset_enum[i], len))
 293         continue;
 294       if ((charset_enum[i][len] == 'B'
 295            || charset_enum[i][len] == 'L')
 296           && charset_enum[i][len + 1] == 'E'
 297           && charset_enum[i][len + 2] == '\0')
 298         {
 299           if (charset_enum[i][len] == 'B')
 300             target_wide_charset_be_name = charset_enum[i];
 301           else
 302             target_wide_charset_le_name = charset_enum[i];
 303         }
 304     }
 305 }
 306
 307 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
 308    target-wide-charset', 'set charset' sfunc's.  */
 309
 310 static void
 311 validate (struct gdbarch *gdbarch)
 312 {
 313   iconv_t desc;
 314   const char *host_cset = host_charset ();
 315   const char *target_cset = target_charset (gdbarch);
 316   const char *target_wide_cset = target_wide_charset_name;
 317
 318   if (!strcmp (target_wide_cset, "auto"))
 319     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
 320
 321   desc = iconv_open (target_wide_cset, host_cset);
 322   if (desc == (iconv_t) -1)
 323     error (_("Cannot convert between character sets `%s' and `%s'"),
 324            target_wide_cset, host_cset);
 325   iconv_close (desc);
 326
 327   desc = iconv_open (target_cset, host_cset);
 328   if (desc == (iconv_t) -1)
 329     error (_("Cannot convert between character sets `%s' and `%s'"),
 330            target_cset, host_cset);
 331   iconv_close (desc);
 332
 333   /* Clear the cache.  */
 334   be_le_arch = NULL;
 335 }
 336
 337 /* This is the sfunc for the 'set charset' command.  */
 338 static void
 339 set_charset_sfunc (char *charset, int from_tty,
 340                    struct cmd_list_element *c)
 341 {
 342   /* CAREFUL: set the target charset here as well.  */
 343   target_charset_name = host_charset_name;
 344   validate (get_current_arch ());
 345 }
 346
 347 /* 'set host-charset' command sfunc.  We need a wrapper here because
 348    the function needs to have a specific signature.  */
 349 static void
 350 set_host_charset_sfunc (char *charset, int from_tty,
 351                         struct cmd_list_element *c)
 352 {
 353   validate (get_current_arch ());
 354 }
 355
 356 /* Wrapper for the 'set target-charset' command.  */
 357 static void
 358 set_target_charset_sfunc (char *charset, int from_tty,
 359                           struct cmd_list_element *c)
 360 {
 361   validate (get_current_arch ());
 362 }
 363
 364 /* Wrapper for the 'set target-wide-charset' command.  */
 365 static void
 366 set_target_wide_charset_sfunc (char *charset, int from_tty,
 367                                struct cmd_list_element *c)
 368 {
 369   validate (get_current_arch ());
 370 }
 371
 372 /* sfunc for the 'show charset' command.  */
 373 static void
 374 show_charset (struct ui_file *file, int from_tty,
 375               struct cmd_list_element *c,
 376               const char *name)
 377 {
 378   show_host_charset_name (file, from_tty, c, host_charset_name);
 379   show_target_charset_name (file, from_tty, c, target_charset_name);
 380   show_target_wide_charset_name (file, from_tty, c,
 381                                  target_wide_charset_name);
 382 }
 383
 384 \f
 385 /* Accessor functions.  */
 386
 387 const char *
 388 host_charset (void)
 389 {
 390   if (!strcmp (host_charset_name, "auto"))
 391     return auto_host_charset_name;
 392   return host_charset_name;
 393 }
 394
 395 const char *
 396 target_charset (struct gdbarch *gdbarch)
 397 {
 398   if (!strcmp (target_charset_name, "auto"))
 399     return gdbarch_auto_charset (gdbarch);
 400   return target_charset_name;
 401 }
 402
 403 const char *
 404 target_wide_charset (struct gdbarch *gdbarch)
 405 {
 406   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
 407
 408   set_be_le_names (gdbarch);
 409   if (byte_order == BFD_ENDIAN_BIG)
 410     {
 411       if (target_wide_charset_be_name)
 412         return target_wide_charset_be_name;
 413     }
 414   else
 415     {
 416       if (target_wide_charset_le_name)
 417         return target_wide_charset_le_name;
 418     }
 419
 420   if (!strcmp (target_wide_charset_name, "auto"))
 421     return gdbarch_auto_wide_charset (gdbarch);
 422
 423   return target_wide_charset_name;
 424 }
 425
 426 \f
 427 /* Host character set management.  For the time being, we assume that
 428    the host character set is some superset of ASCII.  */
 429
 430 char
 431 host_letter_to_control_character (char c)
 432 {
 433   if (c == '?')
 434     return 0177;
 435   return c & 0237;
 436 }
 437
 438 /* Convert a host character, C, to its hex value.  C must already have
 439    been validated using isxdigit.  */
 440
 441 int
 442 host_hex_value (char c)
 443 {
 444   if (isdigit (c))
 445     return c - '0';
 446   if (c >= 'a' && c <= 'f')
 447     return 10 + c - 'a';
 448   gdb_assert (c >= 'A' && c <= 'F');
 449   return 10 + c - 'A';
 450 }
 451
 452 \f
 453 /* Public character management functions.  */
 454
 455 /* A cleanup function which is run to close an iconv descriptor.  */
 456
 457 static void
 458 cleanup_iconv (void *p)
 459 {
 460   iconv_t *descp = p;
 461   iconv_close (*descp);
 462 }
 463
 464 void
 465 convert_between_encodings (const char *from, const char *to,
 466                            const gdb_byte *bytes, unsigned int num_bytes,
 467                            int width, struct obstack *output,
 468                            enum transliterations translit)
 469 {
 470   iconv_t desc;
 471   struct cleanup *cleanups;
 472   size_t inleft;
 473   ICONV_CONST char *inp;
 474   unsigned int space_request;
 475
 476   /* Often, the host and target charsets will be the same.  */
 477   if (!strcmp (from, to))
 478     {
 479       obstack_grow (output, bytes, num_bytes);
 480       return;
 481     }
 482
 483   desc = iconv_open (to, from);
 484   if (desc == (iconv_t) -1)
 485     perror_with_name (_("Converting character sets"));
 486   cleanups = make_cleanup (cleanup_iconv, &desc);
 487
 488   inleft = num_bytes;
 489   inp = (ICONV_CONST char *) bytes;
 490
 491   space_request = num_bytes;
 492
 493   while (inleft > 0)
 494     {
 495       char *outp;
 496       size_t outleft, r;
 497       int old_size;
 498
 499       old_size = obstack_object_size (output);
 500       obstack_blank (output, space_request);
 501
 502       outp = (char *) obstack_base (output) + old_size;
 503       outleft = space_request;
 504
 505       r = iconv (desc, &inp, &inleft, &outp, &outleft);
 506
 507       /* Now make sure that the object on the obstack only includes
 508          bytes we have converted.  */
 509       obstack_blank (output, - (int) outleft);
 510
 511       if (r == (size_t) -1)
 512         {
 513           switch (errno)
 514             {
 515             case EILSEQ:
 516               {
 517                 int i;
 518
 519                 /* Invalid input sequence.  */
 520                 if (translit == translit_none)
 521                   error (_("Could not convert character "
 522                            "to `%s' character set"), to);
 523
 524                 /* We emit escape sequence for the bytes, skip them,
 525                    and try again.  */
 526                 for (i = 0; i < width; ++i)
 527                   {
 528                     char octal[5];
 529
 530                     xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
 531                     obstack_grow_str (output, octal);
 532
 533                     ++inp;
 534                     --inleft;
 535                   }
 536               }
 537               break;
 538
 539             case E2BIG:
 540               /* We ran out of space in the output buffer.  Make it
 541                  bigger next time around.  */
 542               space_request *= 2;
 543               break;
 544
 545             case EINVAL:
 546               /* Incomplete input sequence.  FIXME: ought to report this
 547                  to the caller somehow.  */
 548               inleft = 0;
 549               break;
 550
 551             default:
 552               perror_with_name (_("Internal error while "
 553                                   "converting character sets"));
 554             }
 555         }
 556     }
 557
 558   do_cleanups (cleanups);
 559 }
 560
 561 \f
 562
 563 /* An iterator that returns host wchar_t's from a target string.  */
 564 struct wchar_iterator
 565 {
 566   /* The underlying iconv descriptor.  */
 567   iconv_t desc;
 568
 569   /* The input string.  This is updated as convert characters.  */
 570   const gdb_byte *input;
 571   /* The number of bytes remaining in the input.  */
 572   size_t bytes;
 573
 574   /* The width of an input character.  */
 575   size_t width;
 576
 577   /* The output buffer and its size.  */
 578   gdb_wchar_t *out;
 579   size_t out_size;
 580 };
 581
 582 /* Create a new iterator.  */
 583 struct wchar_iterator *
 584 make_wchar_iterator (const gdb_byte *input, size_t bytes,
 585                      const char *charset, size_t width)
 586 {
 587   struct wchar_iterator *result;
 588   iconv_t desc;
 589
 590   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
 591   if (desc == (iconv_t) -1)
 592     perror_with_name (_("Converting character sets"));
 593
 594   result = XNEW (struct wchar_iterator);
 595   result->desc = desc;
 596   result->input = input;
 597   result->bytes = bytes;
 598   result->width = width;
 599
 600   result->out = XNEW (gdb_wchar_t);
 601   result->out_size = 1;
 602
 603   return result;
 604 }
 605
 606 static void
 607 do_cleanup_iterator (void *p)
 608 {
 609   struct wchar_iterator *iter = p;
 610
 611   iconv_close (iter->desc);
 612   xfree (iter->out);
 613   xfree (iter);
 614 }
 615
 616 struct cleanup *
 617 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
 618 {
 619   return make_cleanup (do_cleanup_iterator, iter);
 620 }
 621
 622 int
 623 wchar_iterate (struct wchar_iterator *iter,
 624                enum wchar_iterate_result *out_result,
 625                gdb_wchar_t **out_chars,
 626                const gdb_byte **ptr,
 627                size_t *len)
 628 {
 629   size_t out_request;
 630
 631   /* Try to convert some characters.  At first we try to convert just
 632      a single character.  The reason for this is that iconv does not
 633      necessarily update its outgoing arguments when it encounters an
 634      invalid input sequence -- but we want to reliably report this to
 635      our caller so it can emit an escape sequence.  */
 636   out_request = 1;
 637   while (iter->bytes > 0)
 638     {
 639       ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
 640       char *outptr = (char *) &iter->out[0];
 641       const gdb_byte *orig_inptr = iter->input;
 642       size_t orig_in = iter->bytes;
 643       size_t out_avail = out_request * sizeof (gdb_wchar_t);
 644       size_t num;
 645       size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
 646
 647       iter->input = (gdb_byte *) inptr;
 648
 649       if (r == (size_t) -1)
 650         {
 651           switch (errno)
 652             {
 653             case EILSEQ:
 654               /* Invalid input sequence.  We still might have
 655                  converted a character; if so, return it.  */
 656               if (out_avail < out_request * sizeof (gdb_wchar_t))
 657                 break;
 658
 659               /* Otherwise skip the first invalid character, and let
 660                  the caller know about it.  */
 661               *out_result = wchar_iterate_invalid;
 662               *ptr = iter->input;
 663               *len = iter->width;
 664               iter->input += iter->width;
 665               iter->bytes -= iter->width;
 666               return 0;
 667
 668             case E2BIG:
 669               /* We ran out of space.  We still might have converted a
 670                  character; if so, return it.  Otherwise, grow the
 671                  buffer and try again.  */
 672               if (out_avail < out_request * sizeof (gdb_wchar_t))
 673                 break;
 674
 675               ++out_request;
 676               if (out_request > iter->out_size)
 677                 {
 678                   iter->out_size = out_request;
 679                   iter->out = xrealloc (iter->out,
 680                                         out_request * sizeof (gdb_wchar_t));
 681                 }
 682               continue;
 683
 684             case EINVAL:
 685               /* Incomplete input sequence.  Let the caller know, and
 686                  arrange for future calls to see EOF.  */
 687               *out_result = wchar_iterate_incomplete;
 688               *ptr = iter->input;
 689               *len = iter->bytes;
 690               iter->bytes = 0;
 691               return 0;
 692
 693             default:
 694               perror_with_name (_("Internal error while "
 695                                   "converting character sets"));
 696             }
 697         }
 698
 699       /* We converted something.  */
 700       num = out_request - out_avail / sizeof (gdb_wchar_t);
 701       *out_result = wchar_iterate_ok;
 702       *out_chars = iter->out;
 703       *ptr = orig_inptr;
 704       *len = orig_in - iter->bytes;
 705       return num;
 706     }
 707
 708   /* Really done.  */
 709   *out_result = wchar_iterate_eof;
 710   return -1;
 711 }
 712
 713 \f
 714 /* The charset.c module initialization function.  */
 715
 716 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
 717
 718 static VEC (char_ptr) *charsets;
 719
 720 #ifdef PHONY_ICONV
 721
 722 static void
 723 find_charset_names (void)
 724 {
 725   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
 726   VEC_safe_push (char_ptr, charsets, NULL);
 727 }
 728
 729 #else /* PHONY_ICONV */
 730
 731 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
 732    provides different symbols in the static and dynamic libraries.
 733    So, configure may see libiconvlist but not iconvlist.  But, calling
 734    iconvlist is the right thing to do and will work.  Hence we do a
 735    check here but unconditionally call iconvlist below.  */
 736 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
 737
 738 /* A helper function that adds some character sets to the vector of
 739    all character sets.  This is a callback function for iconvlist.  */
 740
 741 static int
 742 add_one (unsigned int count, const char *const *names, void *data)
 743 {
 744   unsigned int i;
 745
 746   for (i = 0; i < count; ++i)
 747     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
 748
 749   return 0;
 750 }
 751
 752 static void
 753 find_charset_names (void)
 754 {
 755   iconvlist (add_one, NULL);
 756   VEC_safe_push (char_ptr, charsets, NULL);
 757 }
 758
 759 #else
 760
 761 /* Return non-zero if LINE (output from iconv) should be ignored.
 762    Older iconv programs (e.g. 2.2.2) include the human readable
 763    introduction even when stdout is not a tty.  Newer versions omit
 764    the intro if stdout is not a tty.  */
 765
 766 static int
 767 ignore_line_p (const char *line)
 768 {
 769   /* This table is used to filter the output.  If this text appears
 770      anywhere in the line, it is ignored (strstr is used).  */
 771   static const char * const ignore_lines[] =
 772     {
 773       "The following",
 774       "not necessarily",
 775       "the FROM and TO",
 776       "listed with several",
 777       NULL
 778     };
 779   int i;
 780
 781   for (i = 0; ignore_lines[i] != NULL; ++i)
 782     {
 783       if (strstr (line, ignore_lines[i]) != NULL)
 784         return 1;
 785     }
 786
 787   return 0;
 788 }
 789
 790 static void
 791 find_charset_names (void)
 792 {
 793   struct pex_obj *child;
 794   char *args[3];
 795   int err, status;
 796   int fail = 1;
 797   int flags;
 798   struct gdb_environ *iconv_env;
 799   char *iconv_program;
 800
 801   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
 802      not a tty.  We need to recognize it and ignore it.  This text is
 803      subject to translation, so force LANGUAGE=C.  */
 804   iconv_env = make_environ ();
 805   init_environ (iconv_env);
 806   set_in_environ (iconv_env, "LANGUAGE", "C");
 807   set_in_environ (iconv_env, "LC_ALL", "C");
 808
 809   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
 810
 811 #ifdef ICONV_BIN
 812   {
 813     char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
 814                                               ICONV_BIN_RELOCATABLE);
 815     iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
 816     xfree (iconv_dir);
 817   }
 818 #else
 819   iconv_program = xstrdup ("iconv");
 820 #endif
 821   args[0] = iconv_program;
 822   args[1] = "-l";
 823   args[2] = NULL;
 824   flags = PEX_STDERR_TO_STDOUT;
 825 #ifndef ICONV_BIN
 826   flags |= PEX_SEARCH;
 827 #endif
 828   /* Note that we simply ignore errors here.  */
 829   if (!pex_run_in_environment (child, flags,
 830                                args[0], args, environ_vector (iconv_env),
 831                                NULL, NULL, &err))
 832     {
 833       FILE *in = pex_read_output (child, 0);
 834
 835       /* POSIX says that iconv -l uses an unspecified format.  We
 836          parse the glibc and libiconv formats; feel free to add others
 837          as needed.  */
 838
 839       while (in != NULL && !feof (in))
 840         {
 841           /* The size of buf is chosen arbitrarily.  */
 842           char buf[1024];
 843           char *start, *r;
 844           int len;
 845
 846           r = fgets (buf, sizeof (buf), in);
 847           if (!r)
 848             break;
 849           len = strlen (r);
 850           if (len <= 3)
 851             continue;
 852           if (ignore_line_p (r))
 853             continue;
 854
 855           /* Strip off the newline.  */
 856           --len;
 857           /* Strip off one or two '/'s.  glibc will print lines like
 858              "8859_7//", but also "10646-1:1993/UCS4/".  */
 859           if (buf[len - 1] == '/')
 860             --len;
 861           if (buf[len - 1] == '/')
 862             --len;
 863           buf[len] = '\0';
 864
 865           /* libiconv will print multiple entries per line, separated
 866              by spaces.  Older iconvs will print multiple entries per
 867              line, indented by two spaces, and separated by ", "
 868              (i.e. the human readable form).  */
 869           start = buf;
 870           while (1)
 871             {
 872               int keep_going;
 873               char *p;
 874
 875               /* Skip leading blanks.  */
 876               for (p = start; *p && *p == ' '; ++p)
 877                 ;
 878               start = p;
 879               /* Find the next space, comma, or end-of-line.  */
 880               for ( ; *p && *p != ' ' && *p != ','; ++p)
 881                 ;
 882               /* Ignore an empty result.  */
 883               if (p == start)
 884                 break;
 885               keep_going = *p;
 886               *p = '\0';
 887               VEC_safe_push (char_ptr, charsets, xstrdup (start));
 888               if (!keep_going)
 889                 break;
 890               /* Skip any extra spaces.  */
 891               for (start = p + 1; *start && *start == ' '; ++start)
 892                 ;
 893             }
 894         }
 895
 896       if (pex_get_status (child, 1, &status)
 897           && WIFEXITED (status) && !WEXITSTATUS (status))
 898         fail = 0;
 899
 900     }
 901
 902   xfree (iconv_program);
 903   pex_free (child);
 904   free_environ (iconv_env);
 905
 906   if (fail)
 907     {
 908       /* Some error occurred, so drop the vector.  */
 909       free_char_ptr_vec (charsets);
 910       charsets = NULL;
 911     }
 912   else
 913     VEC_safe_push (char_ptr, charsets, NULL);
 914 }
 915
 916 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
 917 #endif /* PHONY_ICONV */
 918
 919 /* The "auto" target charset used by default_auto_charset.  */
 920 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
 921
 922 const char *
 923 default_auto_charset (void)
 924 {
 925   return auto_target_charset_name;
 926 }
 927
 928 const char *
 929 default_auto_wide_charset (void)
 930 {
 931   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
 932 }
 933
 934
 935 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
 936 /* Macro used for UTF or UCS endianness suffix.  */
 937 #if WORDS_BIGENDIAN
 938 #define ENDIAN_SUFFIX "BE"
 939 #else
 940 #define ENDIAN_SUFFIX "LE"
 941 #endif
 942
 943 /* The code below serves to generate a compile time error if
 944    gdb_wchar_t type is not of size 2 nor 4, despite the fact that
 945    macro __STDC_ISO_10646__ is defined.
 946    This is better than a gdb_assert call, because GDB cannot handle
 947    strings correctly if this size is different.  */
 948
 949 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
 950                                        || sizeof (gdb_wchar_t) == 4)
 951                                       ? 1 : -1];
 952
 953 /* intermediate_encoding returns the charset used internally by
 954    GDB to convert between target and host encodings. As the test above
 955    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
 956    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
 957    otherwise an error is generated.  */
 958
 959 const char *
 960 intermediate_encoding (void)
 961 {
 962   iconv_t desc;
 963   static const char *stored_result = NULL;
 964   char *result;
 965
 966   if (stored_result)
 967     return stored_result;
 968   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
 969                        ENDIAN_SUFFIX);
 970   /* Check that the name is supported by iconv_open.  */
 971   desc = iconv_open (result, host_charset ());
 972   if (desc != (iconv_t) -1)
 973     {
 974       iconv_close (desc);
 975       stored_result = result;
 976       return result;
 977     }
 978   /* Not valid, free the allocated memory.  */
 979   xfree (result);
 980   /* Second try, with UCS-2 type.  */
 981   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
 982                        ENDIAN_SUFFIX);
 983   /* Check that the name is supported by iconv_open.  */
 984   desc = iconv_open (result, host_charset ());
 985   if (desc != (iconv_t) -1)
 986     {
 987       iconv_close (desc);
 988       stored_result = result;
 989       return result;
 990     }
 991   /* Not valid, free the allocated memory.  */
 992   xfree (result);
 993   /* No valid charset found, generate error here.  */
 994   error (_("Unable to find a vaild charset for string conversions"));
 995 }
 996
 997 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
 998
 999 void
1000 _initialize_charset (void)
1001 {
1002   /* The first element is always "auto".  */
1003   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
1004   find_charset_names ();
1005
1006   if (VEC_length (char_ptr, charsets) > 1)
1007     charset_enum = (const char **) VEC_address (char_ptr, charsets);
1008   else
1009     charset_enum = default_charset_names;
1010
1011 #ifndef PHONY_ICONV
1012 #ifdef HAVE_LANGINFO_CODESET
1013   /* The result of nl_langinfo may be overwritten later.  This may
1014      leak a little memory, if the user later changes the host charset,
1015      but that doesn't matter much.  */
1016   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1017   /* Solaris will return `646' here -- but the Solaris iconv then does
1018      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
1019      which GNU libiconv doesn't like (infinite loop).  */
1020   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1021     auto_host_charset_name = "ASCII";
1022   auto_target_charset_name = auto_host_charset_name;
1023 #elif defined (USE_WIN32API)
1024   {
1025     /* "CP" + x<=5 digits + paranoia.  */
1026     static char w32_host_default_charset[16];
1027
1028     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1029               "CP%d", GetACP());
1030     auto_host_charset_name = w32_host_default_charset;
1031     auto_target_charset_name = auto_host_charset_name;
1032   }
1033 #endif
1034 #endif
1035
1036   add_setshow_enum_cmd ("charset", class_support,
1037                         charset_enum, &host_charset_name, _("\
1038 Set the host and target character sets."), _("\
1039 Show the host and target character sets."), _("\
1040 The `host character set' is the one used by the system GDB is running on.\n\
1041 The `target character set' is the one used by the program being debugged.\n\
1042 You may only use supersets of ASCII for your host character set; GDB does\n\
1043 not support any others.\n\
1044 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1045                         /* Note that the sfunc below needs to set
1046                            target_charset_name, because the 'set
1047                            charset' command sets two variables.  */
1048                         set_charset_sfunc,
1049                         show_charset,
1050                         &setlist, &showlist);
1051
1052   add_setshow_enum_cmd ("host-charset", class_support,
1053                         charset_enum, &host_charset_name, _("\
1054 Set the host character set."), _("\
1055 Show the host character set."), _("\
1056 The `host character set' is the one used by the system GDB is running on.\n\
1057 You may only use supersets of ASCII for your host character set; GDB does\n\
1058 not support any others.\n\
1059 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1060                         set_host_charset_sfunc,
1061                         show_host_charset_name,
1062                         &setlist, &showlist);
1063
1064   add_setshow_enum_cmd ("target-charset", class_support,
1065                         charset_enum, &target_charset_name, _("\
1066 Set the target character set."), _("\
1067 Show the target character set."), _("\
1068 The `target character set' is the one used by the program being debugged.\n\
1069 GDB translates characters and strings between the host and target\n\
1070 character sets as needed.\n\
1071 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1072                         set_target_charset_sfunc,
1073                         show_target_charset_name,
1074                         &setlist, &showlist);
1075
1076   add_setshow_enum_cmd ("target-wide-charset", class_support,
1077                         charset_enum, &target_wide_charset_name,
1078                         _("\
1079 Set the target wide character set."), _("\
1080 Show the target wide character set."), _("\
1081 The `target wide character set' is the one used by the program being debugged.\
1082 \nIn particular it is the encoding used by `wchar_t'.\n\
1083 GDB translates characters and strings between the host and target\n\
1084 character sets as needed.\n\
1085 To see a list of the character sets GDB supports, type\n\
1086 `set target-wide-charset'<TAB>"),
1087                         set_target_wide_charset_sfunc,
1088                         show_target_wide_charset_name,
1089                         &setlist, &showlist);
1090 }