gdb/charset.c

   1 /* Character set conversion support for GDB.
   2
   3    Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
   4    Free Software Foundation, Inc.
   5
   6    This file is part of GDB.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 #include "defs.h"
  22 #include "charset.h"
  23 #include "gdbcmd.h"
  24 #include "gdb_assert.h"
  25 #include "gdb_obstack.h"
  26 #include "gdb_wait.h"
  27 #include "charset-list.h"
  28 #include "vec.h"
  29 #include "environ.h"
  30
  31 #include <stddef.h>
  32 #include "gdb_string.h"
  33 #include <ctype.h>
  34
  35 \f
  36 /* How GDB's character set support works
  37
  38    GDB has three global settings:
  39
  40    - The `current host character set' is the character set GDB should
  41      use in talking to the user, and which (hopefully) the user's
  42      terminal knows how to display properly.  Most users should not
  43      change this.
  44
  45    - The `current target character set' is the character set the
  46      program being debugged uses.
  47
  48    - The `current target wide character set' is the wide character set
  49      the program being debugged uses, that is, the encoding used for
  50      wchar_t.
  51
  52    There are commands to set each of these, and mechanisms for
  53    choosing reasonable default values.  GDB has a global list of
  54    character sets that it can use as its host or target character
  55    sets.
  56
  57    The header file `charset.h' declares various functions that
  58    different pieces of GDB need to perform tasks like:
  59
  60    - printing target strings and characters to the user's terminal
  61      (mostly target->host conversions),
  62
  63    - building target-appropriate representations of strings and
  64      characters the user enters in expressions (mostly host->target
  65      conversions),
  66
  67      and so on.
  68
  69    To avoid excessive code duplication and maintenance efforts,
  70    GDB simply requires a capable iconv function.  Users on platforms
  71    without a suitable iconv can use the GNU iconv library.  */
  72
  73 \f
  74 #ifdef PHONY_ICONV
  75
  76 /* Provide a phony iconv that does as little as possible.  Also,
  77    arrange for there to be a single available character set.  */
  78
  79 #undef GDB_DEFAULT_HOST_CHARSET
  80 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
  81 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
  82 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
  83 #undef DEFAULT_CHARSET_NAMES
  84 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
  85
  86 #undef iconv_t
  87 #define iconv_t int
  88 #undef iconv_open
  89 #undef iconv
  90 #undef iconv_close
  91
  92 #undef ICONV_CONST
  93 #define ICONV_CONST const
  94
  95 /* Some systems don't have EILSEQ, so we define it here, but not as
  96    EINVAL, because callers of `iconv' want to distinguish EINVAL and
  97    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
  98    that wchar.h may also define EILSEQ, so this needs to be after we
  99    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
 100 #ifndef EILSEQ
 101 #define EILSEQ ENOENT
 102 #endif
 103
 104 iconv_t
 105 iconv_open (const char *to, const char *from)
 106 {
 107   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
 108      We allow conversions to wchar_t and the host charset.  */
 109   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
 110       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
 111     return -1;
 112   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
 113     return -1;
 114
 115   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
 116      used as a flag in calls to iconv.  */
 117   return !strcmp (from, "UTF-32BE");
 118 }
 119
 120 int
 121 iconv_close (iconv_t arg)
 122 {
 123   return 0;
 124 }
 125
 126 size_t
 127 iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 128        char **outbuf, size_t *outbytesleft)
 129 {
 130   if (utf_flag)
 131     {
 132       while (*inbytesleft >= 4)
 133         {
 134           size_t j;
 135           unsigned long c = 0;
 136
 137           for (j = 0; j < 4; ++j)
 138             {
 139               c <<= 8;
 140               c += (*inbuf)[j] & 0xff;
 141             }
 142
 143           if (c >= 256)
 144             {
 145               errno = EILSEQ;
 146               return -1;
 147             }
 148           **outbuf = c & 0xff;
 149           ++*outbuf;
 150           --*outbytesleft;
 151
 152           ++*inbuf;
 153           *inbytesleft -= 4;
 154         }
 155       if (*inbytesleft < 4)
 156         {
 157           errno = EINVAL;
 158           return -1;
 159         }
 160     }
 161   else
 162     {
 163       /* In all other cases we simply copy input bytes to the
 164          output.  */
 165       size_t amt = *inbytesleft;
 166       if (amt > *outbytesleft)
 167         amt = *outbytesleft;
 168       memcpy (*outbuf, *inbuf, amt);
 169       *inbuf += amt;
 170       *outbuf += amt;
 171       *inbytesleft -= amt;
 172       *outbytesleft -= amt;
 173     }
 174
 175   if (*inbytesleft)
 176     {
 177       errno = E2BIG;
 178       return -1;
 179     }
 180
 181   /* The number of non-reversible conversions -- but they were all
 182      reversible.  */
 183   return 0;
 184 }
 185
 186 #endif
 187
 188
 189 \f
 190 /* The global lists of character sets and translations.  */
 191
 192
 193 #ifndef GDB_DEFAULT_TARGET_CHARSET
 194 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
 195 #endif
 196
 197 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
 198 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
 199 #endif
 200
 201 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
 202 static const char *host_charset_name = "auto";
 203 static void
 204 show_host_charset_name (struct ui_file *file, int from_tty,
 205                         struct cmd_list_element *c,
 206                         const char *value)
 207 {
 208   if (!strcmp (value, "auto"))
 209     fprintf_filtered (file,
 210                       _("The host character set is \"auto; currently %s\".\n"),
 211                       auto_host_charset_name);
 212   else
 213     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
 214 }
 215
 216 static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
 217 static void
 218 show_target_charset_name (struct ui_file *file, int from_tty,
 219                           struct cmd_list_element *c, const char *value)
 220 {
 221   fprintf_filtered (file, _("The target character set is \"%s\".\n"),
 222                     value);
 223 }
 224
 225 static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
 226 static void
 227 show_target_wide_charset_name (struct ui_file *file, int from_tty,
 228                                struct cmd_list_element *c, const char *value)
 229 {
 230   fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
 231                     value);
 232 }
 233
 234 static const char *default_charset_names[] =
 235 {
 236   DEFAULT_CHARSET_NAMES
 237   0
 238 };
 239
 240 static const char **charset_enum;
 241
 242 \f
 243 /* If the target wide character set has big- or little-endian
 244    variants, these are the corresponding names.  */
 245 static const char *target_wide_charset_be_name;
 246 static const char *target_wide_charset_le_name;
 247
 248 /* A helper function for validate which sets the target wide big- and
 249    little-endian character set names, if possible.  */
 250
 251 static void
 252 set_be_le_names (void)
 253 {
 254   int i, len;
 255
 256   target_wide_charset_le_name = NULL;
 257   target_wide_charset_be_name = NULL;
 258
 259   len = strlen (target_wide_charset_name);
 260   for (i = 0; charset_enum[i]; ++i)
 261     {
 262       if (strncmp (target_wide_charset_name, charset_enum[i], len))
 263         continue;
 264       if ((charset_enum[i][len] == 'B'
 265            || charset_enum[i][len] == 'L')
 266           && charset_enum[i][len + 1] == 'E'
 267           && charset_enum[i][len + 2] == '\0')
 268         {
 269           if (charset_enum[i][len] == 'B')
 270             target_wide_charset_be_name = charset_enum[i];
 271           else
 272             target_wide_charset_le_name = charset_enum[i];
 273         }
 274     }
 275 }
 276
 277 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
 278    target-wide-charset', 'set charset' sfunc's.  */
 279
 280 static void
 281 validate (void)
 282 {
 283   iconv_t desc;
 284   const char *host_cset = host_charset ();
 285
 286   desc = iconv_open (target_wide_charset_name, host_cset);
 287   if (desc == (iconv_t) -1)
 288     error ("Cannot convert between character sets `%s' and `%s'",
 289            target_wide_charset_name, host_cset);
 290   iconv_close (desc);
 291
 292   desc = iconv_open (target_charset_name, host_cset);
 293   if (desc == (iconv_t) -1)
 294     error ("Cannot convert between character sets `%s' and `%s'",
 295            target_charset_name, host_cset);
 296   iconv_close (desc);
 297
 298   set_be_le_names ();
 299 }
 300
 301 /* This is the sfunc for the 'set charset' command.  */
 302 static void
 303 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
 304 {
 305   /* CAREFUL: set the target charset here as well. */
 306   target_charset_name = host_charset_name;
 307   validate ();
 308 }
 309
 310 /* 'set host-charset' command sfunc.  We need a wrapper here because
 311    the function needs to have a specific signature.  */
 312 static void
 313 set_host_charset_sfunc (char *charset, int from_tty,
 314                         struct cmd_list_element *c)
 315 {
 316   validate ();
 317 }
 318
 319 /* Wrapper for the 'set target-charset' command.  */
 320 static void
 321 set_target_charset_sfunc (char *charset, int from_tty,
 322                           struct cmd_list_element *c)
 323 {
 324   validate ();
 325 }
 326
 327 /* Wrapper for the 'set target-wide-charset' command.  */
 328 static void
 329 set_target_wide_charset_sfunc (char *charset, int from_tty,
 330                                struct cmd_list_element *c)
 331 {
 332   validate ();
 333 }
 334
 335 /* sfunc for the 'show charset' command.  */
 336 static void
 337 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
 338               const char *name)
 339 {
 340   show_host_charset_name (file, from_tty, c, host_charset_name);
 341   show_target_charset_name (file, from_tty, c, target_charset_name);
 342   show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
 343 }
 344
 345 \f
 346 /* Accessor functions.  */
 347
 348 const char *
 349 host_charset (void)
 350 {
 351   if (!strcmp (host_charset_name, "auto"))
 352     return auto_host_charset_name;
 353   return host_charset_name;
 354 }
 355
 356 const char *
 357 target_charset (void)
 358 {
 359   return target_charset_name;
 360 }
 361
 362 const char *
 363 target_wide_charset (enum bfd_endian byte_order)
 364 {
 365   if (byte_order == BFD_ENDIAN_BIG)
 366     {
 367       if (target_wide_charset_be_name)
 368         return target_wide_charset_be_name;
 369     }
 370   else
 371     {
 372       if (target_wide_charset_le_name)
 373         return target_wide_charset_le_name;
 374     }
 375
 376   return target_wide_charset_name;
 377 }
 378
 379 \f
 380 /* Host character set management.  For the time being, we assume that
 381    the host character set is some superset of ASCII.  */
 382
 383 char
 384 host_letter_to_control_character (char c)
 385 {
 386   if (c == '?')
 387     return 0177;
 388   return c & 0237;
 389 }
 390
 391 /* Convert a host character, C, to its hex value.  C must already have
 392    been validated using isxdigit.  */
 393
 394 int
 395 host_hex_value (char c)
 396 {
 397   if (isdigit (c))
 398     return c - '0';
 399   if (c >= 'a' && c <= 'f')
 400     return 10 + c - 'a';
 401   gdb_assert (c >= 'A' && c <= 'F');
 402   return 10 + c - 'A';
 403 }
 404
 405 \f
 406 /* Public character management functions.  */
 407
 408 /* A cleanup function which is run to close an iconv descriptor.  */
 409
 410 static void
 411 cleanup_iconv (void *p)
 412 {
 413   iconv_t *descp = p;
 414   iconv_close (*descp);
 415 }
 416
 417 void
 418 convert_between_encodings (const char *from, const char *to,
 419                            const gdb_byte *bytes, unsigned int num_bytes,
 420                            int width, struct obstack *output,
 421                            enum transliterations translit)
 422 {
 423   iconv_t desc;
 424   struct cleanup *cleanups;
 425   size_t inleft;
 426   char *inp;
 427   unsigned int space_request;
 428
 429   /* Often, the host and target charsets will be the same.  */
 430   if (!strcmp (from, to))
 431     {
 432       obstack_grow (output, bytes, num_bytes);
 433       return;
 434     }
 435
 436   desc = iconv_open (to, from);
 437   if (desc == (iconv_t) -1)
 438     perror_with_name ("Converting character sets");
 439   cleanups = make_cleanup (cleanup_iconv, &desc);
 440
 441   inleft = num_bytes;
 442   inp = (char *) bytes;
 443
 444   space_request = num_bytes;
 445
 446   while (inleft > 0)
 447     {
 448       char *outp;
 449       size_t outleft, r;
 450       int old_size;
 451
 452       old_size = obstack_object_size (output);
 453       obstack_blank (output, space_request);
 454
 455       outp = obstack_base (output) + old_size;
 456       outleft = space_request;
 457
 458       r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
 459
 460       /* Now make sure that the object on the obstack only includes
 461          bytes we have converted.  */
 462       obstack_blank (output, - (int) outleft);
 463
 464       if (r == (size_t) -1)
 465         {
 466           switch (errno)
 467             {
 468             case EILSEQ:
 469               {
 470                 int i;
 471
 472                 /* Invalid input sequence.  */
 473                 if (translit == translit_none)
 474                   error (_("Could not convert character to `%s' character set"),
 475                          to);
 476
 477                 /* We emit escape sequence for the bytes, skip them,
 478                    and try again.  */
 479                 for (i = 0; i < width; ++i)
 480                   {
 481                     char octal[5];
 482
 483                     sprintf (octal, "\\%.3o", *inp & 0xff);
 484                     obstack_grow_str (output, octal);
 485
 486                     ++inp;
 487                     --inleft;
 488                   }
 489               }
 490               break;
 491
 492             case E2BIG:
 493               /* We ran out of space in the output buffer.  Make it
 494                  bigger next time around.  */
 495               space_request *= 2;
 496               break;
 497
 498             case EINVAL:
 499               /* Incomplete input sequence.  FIXME: ought to report this
 500                  to the caller somehow.  */
 501               inleft = 0;
 502               break;
 503
 504             default:
 505               perror_with_name ("Internal error while converting character sets");
 506             }
 507         }
 508     }
 509
 510   do_cleanups (cleanups);
 511 }
 512
 513 \f
 514
 515 /* An iterator that returns host wchar_t's from a target string.  */
 516 struct wchar_iterator
 517 {
 518   /* The underlying iconv descriptor.  */
 519   iconv_t desc;
 520
 521   /* The input string.  This is updated as convert characters.  */
 522   char *input;
 523   /* The number of bytes remaining in the input.  */
 524   size_t bytes;
 525
 526   /* The width of an input character.  */
 527   size_t width;
 528
 529   /* The output buffer and its size.  */
 530   gdb_wchar_t *out;
 531   size_t out_size;
 532 };
 533
 534 /* Create a new iterator.  */
 535 struct wchar_iterator *
 536 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
 537                      size_t width)
 538 {
 539   struct wchar_iterator *result;
 540   iconv_t desc;
 541
 542   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
 543   if (desc == (iconv_t) -1)
 544     perror_with_name ("Converting character sets");
 545
 546   result = XNEW (struct wchar_iterator);
 547   result->desc = desc;
 548   result->input = (char *) input;
 549   result->bytes = bytes;
 550   result->width = width;
 551
 552   result->out = XNEW (gdb_wchar_t);
 553   result->out_size = 1;
 554
 555   return result;
 556 }
 557
 558 static void
 559 do_cleanup_iterator (void *p)
 560 {
 561   struct wchar_iterator *iter = p;
 562
 563   iconv_close (iter->desc);
 564   xfree (iter->out);
 565   xfree (iter);
 566 }
 567
 568 struct cleanup *
 569 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
 570 {
 571   return make_cleanup (do_cleanup_iterator, iter);
 572 }
 573
 574 int
 575 wchar_iterate (struct wchar_iterator *iter,
 576                enum wchar_iterate_result *out_result,
 577                gdb_wchar_t **out_chars,
 578                const gdb_byte **ptr,
 579                size_t *len)
 580 {
 581   size_t out_request;
 582
 583   /* Try to convert some characters.  At first we try to convert just
 584      a single character.  The reason for this is that iconv does not
 585      necessarily update its outgoing arguments when it encounters an
 586      invalid input sequence -- but we want to reliably report this to
 587      our caller so it can emit an escape sequence.  */
 588   out_request = 1;
 589   while (iter->bytes > 0)
 590     {
 591       char *outptr = (char *) &iter->out[0];
 592       char *orig_inptr = iter->input;
 593       size_t orig_in = iter->bytes;
 594       size_t out_avail = out_request * sizeof (gdb_wchar_t);
 595       size_t num;
 596       gdb_wchar_t result;
 597
 598       size_t r = iconv (iter->desc,
 599                         (ICONV_CONST char **) &iter->input, &iter->bytes,
 600                         &outptr, &out_avail);
 601       if (r == (size_t) -1)
 602         {
 603           switch (errno)
 604             {
 605             case EILSEQ:
 606               /* Invalid input sequence.  Skip it, and let the caller
 607                  know about it.  */
 608               *out_result = wchar_iterate_invalid;
 609               *ptr = iter->input;
 610               *len = iter->width;
 611               iter->input += iter->width;
 612               iter->bytes -= iter->width;
 613               return 0;
 614
 615             case E2BIG:
 616               /* We ran out of space.  We still might have converted a
 617                  character; if so, return it.  Otherwise, grow the
 618                  buffer and try again.  */
 619               if (out_avail < out_request * sizeof (gdb_wchar_t))
 620                 break;
 621
 622               ++out_request;
 623               if (out_request > iter->out_size)
 624                 {
 625                   iter->out_size = out_request;
 626                   iter->out = xrealloc (iter->out,
 627                                         out_request * sizeof (gdb_wchar_t));
 628                 }
 629               continue;
 630
 631             case EINVAL:
 632               /* Incomplete input sequence.  Let the caller know, and
 633                  arrange for future calls to see EOF.  */
 634               *out_result = wchar_iterate_incomplete;
 635               *ptr = iter->input;
 636               *len = iter->bytes;
 637               iter->bytes = 0;
 638               return 0;
 639
 640             default:
 641               perror_with_name ("Internal error while converting character sets");
 642             }
 643         }
 644
 645       /* We converted something.  */
 646       num = out_request - out_avail / sizeof (gdb_wchar_t);
 647       *out_result = wchar_iterate_ok;
 648       *out_chars = iter->out;
 649       *ptr = orig_inptr;
 650       *len = orig_in - iter->bytes;
 651       return num;
 652     }
 653
 654   /* Really done.  */
 655   *out_result = wchar_iterate_eof;
 656   return -1;
 657 }
 658
 659 \f
 660 /* The charset.c module initialization function.  */
 661
 662 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
 663
 664 typedef char *char_ptr;
 665 DEF_VEC_P (char_ptr);
 666
 667 static VEC (char_ptr) *charsets;
 668
 669 #ifdef PHONY_ICONV
 670
 671 static void
 672 find_charset_names (void)
 673 {
 674   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
 675   VEC_safe_push (char_ptr, charsets, NULL);
 676 }
 677
 678 #else /* PHONY_ICONV */
 679
 680 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
 681    provides different symbols in the static and dynamic libraries.
 682    So, configure may see libiconvlist but not iconvlist.  But, calling
 683    iconvlist is the right thing to do and will work.  Hence we do a
 684    check here but unconditionally call iconvlist below.  */
 685 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
 686
 687 /* A helper function that adds some character sets to the vector of
 688    all character sets.  This is a callback function for iconvlist.  */
 689
 690 static int
 691 add_one (unsigned int count, const char *const *names, void *data)
 692 {
 693   unsigned int i;
 694
 695   for (i = 0; i < count; ++i)
 696     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
 697
 698   return 0;
 699 }
 700
 701 static void
 702 find_charset_names (void)
 703 {
 704   iconvlist (add_one, NULL);
 705   VEC_safe_push (char_ptr, charsets, NULL);
 706 }
 707
 708 #else
 709
 710 /* Return non-zero if LINE (output from iconv) should be ignored.
 711    Older iconv programs (e.g. 2.2.2) include the human readable
 712    introduction even when stdout is not a tty.  Newer versions omit
 713    the intro if stdout is not a tty.  */
 714
 715 static int
 716 ignore_line_p (const char *line)
 717 {
 718   /* This table is used to filter the output.  If this text appears
 719      anywhere in the line, it is ignored (strstr is used).  */
 720   static const char * const ignore_lines[] =
 721     {
 722       "The following",
 723       "not necessarily",
 724       "the FROM and TO",
 725       "listed with several",
 726       NULL
 727     };
 728   int i;
 729
 730   for (i = 0; ignore_lines[i] != NULL; ++i)
 731     {
 732       if (strstr (line, ignore_lines[i]) != NULL)
 733         return 1;
 734     }
 735
 736   return 0;
 737 }
 738
 739 static void
 740 find_charset_names (void)
 741 {
 742   struct pex_obj *child;
 743   char *args[3];
 744   int err, status;
 745   int fail = 1;
 746   struct gdb_environ *iconv_env;
 747
 748   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
 749      a tty.  We need to recognize it and ignore it.  This text is subject
 750      to translation, so force LANGUAGE=C.  */
 751   iconv_env = make_environ ();
 752   init_environ (iconv_env);
 753   set_in_environ (iconv_env, "LANGUAGE", "C");
 754   set_in_environ (iconv_env, "LC_ALL", "C");
 755
 756   child = pex_init (0, "iconv", NULL);
 757
 758   args[0] = "iconv";
 759   args[1] = "-l";
 760   args[2] = NULL;
 761   /* Note that we simply ignore errors here.  */
 762   if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
 763                                "iconv", args, environ_vector (iconv_env),
 764                                NULL, NULL, &err))
 765     {
 766       FILE *in = pex_read_output (child, 0);
 767
 768       /* POSIX says that iconv -l uses an unspecified format.  We
 769          parse the glibc and libiconv formats; feel free to add others
 770          as needed.  */
 771
 772       while (!feof (in))
 773         {
 774           /* The size of buf is chosen arbitrarily.  */
 775           char buf[1024];
 776           char *start, *r;
 777           int len, keep_going;
 778
 779           r = fgets (buf, sizeof (buf), in);
 780           if (!r)
 781             break;
 782           len = strlen (r);
 783           if (len <= 3)
 784             continue;
 785           if (ignore_line_p (r))
 786             continue;
 787
 788           /* Strip off the newline.  */
 789           --len;
 790           /* Strip off one or two '/'s.  glibc will print lines like
 791              "8859_7//", but also "10646-1:1993/UCS4/".  */
 792           if (buf[len - 1] == '/')
 793             --len;
 794           if (buf[len - 1] == '/')
 795             --len;
 796           buf[len] = '\0';
 797
 798           /* libiconv will print multiple entries per line, separated
 799              by spaces.  Older iconvs will print multiple entries per line,
 800              indented by two spaces, and separated by ", "
 801              (i.e. the human readable form).  */
 802           start = buf;
 803           while (1)
 804             {
 805               int keep_going;
 806               char *p;
 807
 808               /* Skip leading blanks.  */
 809               for (p = start; *p && *p == ' '; ++p)
 810                 ;
 811               start = p;
 812               /* Find the next space, comma, or end-of-line.  */
 813               for ( ; *p && *p != ' ' && *p != ','; ++p)
 814                 ;
 815               /* Ignore an empty result.  */
 816               if (p == start)
 817                 break;
 818               keep_going = *p;
 819               *p = '\0';
 820               VEC_safe_push (char_ptr, charsets, xstrdup (start));
 821               if (!keep_going)
 822                 break;
 823               /* Skip any extra spaces.  */
 824               for (start = p + 1; *start && *start == ' '; ++start)
 825                 ;
 826             }
 827         }
 828
 829       if (pex_get_status (child, 1, &status)
 830           && WIFEXITED (status) && !WEXITSTATUS (status))
 831         fail = 0;
 832
 833     }
 834
 835   pex_free (child);
 836   free_environ (iconv_env);
 837
 838   if (fail)
 839     {
 840       /* Some error occurred, so drop the vector.  */
 841       int ix;
 842       char *elt;
 843       for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
 844         xfree (elt);
 845       VEC_truncate (char_ptr, charsets, 0);
 846     }
 847   else
 848     VEC_safe_push (char_ptr, charsets, NULL);
 849 }
 850
 851 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
 852 #endif /* PHONY_ICONV */
 853
 854 void
 855 _initialize_charset (void)
 856 {
 857   struct cmd_list_element *new_cmd;
 858
 859   /* The first element is always "auto"; then we skip it for the
 860      commands where it is not allowed.  */
 861   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
 862   find_charset_names ();
 863
 864   if (VEC_length (char_ptr, charsets) > 1)
 865     charset_enum = (const char **) VEC_address (char_ptr, charsets);
 866   else
 867     charset_enum = default_charset_names;
 868
 869 #ifndef PHONY_ICONV
 870 #ifdef HAVE_LANGINFO_CODESET
 871   auto_host_charset_name = nl_langinfo (CODESET);
 872   /* Solaris will return `646' here -- but the Solaris iconv then
 873      does not accept this.  Darwin (and maybe FreeBSD) may return "" here,
 874      which GNU libiconv doesn't like (infinite loop).  */
 875   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
 876     auto_host_charset_name = "ASCII";
 877   target_charset_name = auto_host_charset_name;
 878
 879   set_be_le_names ();
 880 #endif
 881 #endif
 882
 883   add_setshow_enum_cmd ("charset", class_support,
 884                         &charset_enum[1], &host_charset_name, _("\
 885 Set the host and target character sets."), _("\
 886 Show the host and target character sets."), _("\
 887 The `host character set' is the one used by the system GDB is running on.\n\
 888 The `target character set' is the one used by the program being debugged.\n\
 889 You may only use supersets of ASCII for your host character set; GDB does\n\
 890 not support any others.\n\
 891 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
 892                         /* Note that the sfunc below needs to set
 893                            target_charset_name, because the 'set
 894                            charset' command sets two variables.  */
 895                         set_charset_sfunc,
 896                         show_charset,
 897                         &setlist, &showlist);
 898
 899   add_setshow_enum_cmd ("host-charset", class_support,
 900                         charset_enum, &host_charset_name, _("\
 901 Set the host character set."), _("\
 902 Show the host character set."), _("\
 903 The `host character set' is the one used by the system GDB is running on.\n\
 904 You may only use supersets of ASCII for your host character set; GDB does\n\
 905 not support any others.\n\
 906 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
 907                         set_host_charset_sfunc,
 908                         show_host_charset_name,
 909                         &setlist, &showlist);
 910
 911   add_setshow_enum_cmd ("target-charset", class_support,
 912                         &charset_enum[1], &target_charset_name, _("\
 913 Set the target character set."), _("\
 914 Show the target character set."), _("\
 915 The `target character set' is the one used by the program being debugged.\n\
 916 GDB translates characters and strings between the host and target\n\
 917 character sets as needed.\n\
 918 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
 919                         set_target_charset_sfunc,
 920                         show_target_charset_name,
 921                         &setlist, &showlist);
 922
 923   add_setshow_enum_cmd ("target-wide-charset", class_support,
 924                         &charset_enum[1], &target_wide_charset_name,
 925                         _("\
 926 Set the target wide character set."), _("\
 927 Show the target wide character set."), _("\
 928 The `target wide character set' is the one used by the program being debugged.\n\
 929 In particular it is the encoding used by `wchar_t'.\n\
 930 GDB translates characters and strings between the host and target\n\
 931 character sets as needed.\n\
 932 To see a list of the character sets GDB supports, type\n\
 933 `set target-wide-charset'<TAB>"),
 934                         set_target_wide_charset_sfunc,
 935                         show_target_wide_charset_name,
 936                         &setlist, &showlist);
 937 }