From: James Youngman Date: Sat, 26 May 2007 05:08:18 +0000 (+0200) Subject: wc: ignore multibyte-character decoding errors X-Git-Tag: v6.9.89~281 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7;p=platform%2Fupstream%2Fcoreutils.git wc: ignore multibyte-character decoding errors * src/wc.c (wc): Don't issue an error message when mbrtowc indicates that we have seen an invalid byte sequence. This makes "wc /bin/sh" bearable (though the word and line counts are likely not to be useful). * NEWS: Mention the change. --- diff --git a/ChangeLog b/ChangeLog index 34304f1..ab44638 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2007-05-25 James Youngman + + wc: ignore multibyte-character decoding errors + * src/wc.c (wc): Don't issue an error message when mbrtowc + indicates that we have seen an invalid byte sequence. This + makes "wc /bin/sh" bearable (though the word and line counts + are likely not to be useful). + * NEWS: Mention the change. + 2007-05-22 Jim Meyering Check for an up-to-date copyright year in coreutils.texi. diff --git a/NEWS b/NEWS index ea08e0a..715b0d6 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,11 @@ GNU coreutils NEWS -*- outline -*- option of the same name, this makes uniq consume and produce NUL-terminated lines rather than newline-terminated lines. + wc no longer warns about character decoding errors in multibyte locales. + This means for example that "wc /bin/sh" now produces normal output + (though the word count will have no real meaning) rather than many + error messages. + ** Bug fixes cut now diagnoses a range starting with zero (e.g., -f 0-2) as invalid; diff --git a/src/wc.c b/src/wc.c index 85f7d33..b4464d2 100644 --- a/src/wc.c +++ b/src/wc.c @@ -1,5 +1,5 @@ /* wc - print the number of lines, words, and bytes in files - Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc. + Copyright (C) 85, 91, 1995-2007 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -274,8 +274,6 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) bool in_word = false; uintmax_t linepos = 0; mbstate_t state = { 0, }; - uintmax_t last_error_line = 0; - int last_error_errno = 0; # if SUPPORT_OLD_MBRTOWC /* Back-up the state before each multibyte character conversion and move the last incomplete character of the buffer to the front @@ -323,17 +321,10 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) } if (n == (size_t) -1) { - /* Signal repeated errors only once per line. */ - if (!(lines + 1 == last_error_line - && errno == last_error_errno)) - { - char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)]; - last_error_line = lines + 1; - last_error_errno = errno; - error (0, errno, "%s:%s", file, - umaxtostr (last_error_line, line_number_buf)); - ok = false; - } + /* Remember that we read a byte, but don't complain + about the error. Because of the decoding error, + this is a considered to be byte but not a + character (that is, chars is not incremented). */ p++; bytes_read--; }