From 6efd10462d8103208f4575f0b5edddf841c7d87c Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 5 Jan 2008 23:55:01 +0100 Subject: [PATCH] Avoid tr case-conversion failure in some locales. * src/tr.c (skip_construct): New function. (main): When processing a pair of case-converting classes, don't iterate through the elements of each [:upper:] or [:lower:] class. Reported by Gerald Pfeifer in . * tests/tr/Test.pm [tolower-F]: New test for the above fix. [upcase-xtra, dncase-xtra]: New tests, for a related code path. * NEWS: Mention the tr bug fix. --- ChangeLog | 12 ++++++++++++ NEWS | 6 ++++++ THANKS | 1 + src/tr.c | 24 +++++++++++++++++++++++- tests/tr/Test.pm | 12 ++++++++++++ 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 942f97d..f076ad4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2008-01-05 Jim Meyering + + Avoid tr case-conversion failure in some locales. + * src/tr.c (skip_construct): New function. + (main): When processing a pair of case-converting classes, don't + iterate through the elements of each [:upper:] or [:lower:] class. + Reported by Gerald Pfeifer in + . + * tests/tr/Test.pm [tolower-F]: New test for the above fix. + [upcase-xtra, dncase-xtra]: New tests, for a related code path. + * NEWS: Mention the tr bug fix. + 2008-01-02 Jim Meyering * .gitignore: Ignore lzma-compressed files, too. diff --git a/NEWS b/NEWS index 5285d51..5b1b366 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,12 @@ GNU coreutils NEWS -*- outline -*- * Noteworthy changes in release 6.? (????-??-??) [?] +** Bug fixes + + tr's case conversion would fail in a locale with differing numbers + of lower case and upper case characters. E.g., this would fail: + env LC_CTYPE=en_US.iso88591 tr '[:upper:]' '[:lower:]' + [bug introduced in coreutils-6.9.90] * Noteworthy changes in release 6.9.91 (2007-12-15) [beta] diff --git a/THANKS b/THANKS index e4658a0..5121495 100644 --- a/THANKS +++ b/THANKS @@ -179,6 +179,7 @@ Geoff Collyer geoff at collyer.net Geoff Kuenning geoff@cs.hmc.edu Geoff Odhner geoff@franklin.com Geoff Whale geoffw@cse.unsw.EDU.AU +Gerald Pfeifer gerald@pfeifer.com Gerhard Poul gpoul@gnu.org Germano Leichsenring germano@jedi.cs.kobe-u.ac.jp Göran Uddeborg goeran@uddeborg.pp.se diff --git a/src/tr.c b/src/tr.c index dff602e..a7565f8 100644 --- a/src/tr.c +++ b/src/tr.c @@ -1,5 +1,5 @@ /* tr -- a filter to translate characters - Copyright (C) 91, 1995-2007 Free Software Foundation, Inc. + Copyright (C) 91, 1995-2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1019,6 +1019,15 @@ build_spec_list (const struct E_string *es, struct Spec_list *result) return true; } +/* Advance past the current construct. + S->tail must be non-NULL. */ +static void +skip_construct (struct Spec_list *s) +{ + s->tail = s->tail->next; + s->state = NEW_ELEMENT; +} + /* Given a Spec_list S (with its saved state implicit in the values of its members `tail' and `state'), return the next single character in the expansion of S's constructs. If the last character of S was @@ -1809,6 +1818,7 @@ main (int argc, char **argv) { int c1, c2; int i; + bool case_convert = false; enum Upper_Lower_class class_s1; enum Upper_Lower_class class_s2; @@ -1818,6 +1828,16 @@ main (int argc, char **argv) s2->state = BEGIN_STATE; for (;;) { + /* When the previous pair identified case-converting classes, + advance S1 and S2 so that each points to the following + construct. */ + if (case_convert) + { + skip_construct (s1); + skip_construct (s2); + case_convert = false; + } + c1 = get_next (s1, &class_s1); c2 = get_next (s2, &class_s2); @@ -1831,12 +1851,14 @@ main (int argc, char **argv) if (class_s1 == UL_LOWER && class_s2 == UL_UPPER) { + case_convert = true; for (i = 0; i < N_CHARS; i++) if (islower (i)) xlate[i] = toupper (i); } else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER) { + case_convert = true; for (i = 0; i < N_CHARS; i++) if (isupper (i)) xlate[i] = tolower (i); diff --git a/tests/tr/Test.pm b/tests/tr/Test.pm index 7b25a15..cf70213 100644 --- a/tests/tr/Test.pm +++ b/tests/tr/Test.pm @@ -139,8 +139,20 @@ my @tv = ( # Up to coreutils-6.9, tr rejected an unmatched [:lower:] or [:upper:] in SET1. ['s1-lower', q|'[:lower:]' '[.*]'|, '#$%123abcABC', '#$%123...ABC', 0], ['s1-upper', q|'[:upper:]' '[.*]'|, '#$%123abcABC', '#$%123abc...', 0], + +# Up to coreutils-6.9.91, this would fail with the diagnostic: +# tr: misaligned [:upper:] and/or [:lower:] construct +# with LC_CTYPE=en_US.iso88591. +['tolower-F',q|'[:upper:]' '[:lower:]'|, 'A', 'a', 0], + +# When doing a case-converting translation with something after the +# [:upper:] and [:lower:] elements, ensure that tr honors the following byte. +['upcase-xtra',q|'[:lower:].' '[:upper:]x'|, 'abc.', 'ABCx', 0], +['dncase-xtra',q|'[:upper:].' '[:lower:]x'|, 'ABC.', 'abcx', 0], ); +$Test::env{'tolower-F'} = ['LC_CTYPE=en_US.iso88591']; + sub test_vector { my $t; -- 2.7.4