sort: Fix two bugs with determining the end of field

author Pádraig Brady <P@draigBrady.com>

Tue, 24 Feb 2009 08:37:18 +0000 (08:37 +0000)

committer Pádraig Brady <P@draigBrady.com>

Thu, 26 Feb 2009 14:40:28 +0000 (14:40 +0000)
author Pádraig Brady <P@draigBrady.com>
Tue, 24 Feb 2009 08:37:18 +0000 (08:37 +0000)
committer Pádraig Brady <P@draigBrady.com>
Thu, 26 Feb 2009 14:40:28 +0000 (14:40 +0000)
diff --git a/NEWS b/NEWS

index 82ded9d..05d22cb 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,12 @@ GNU coreutils NEWS                                    -*- outline -*-
  
  * Noteworthy changes in release ?.? (????-??-??) [?]
  
+** Bug fixes
+
+  sort now handles specified key ends correctly.
+  Previously -k1,1b would have caused leading space from field 2 to be
+  included in the sort while -k2,3.0 would have not included field 3.
+
  
  * Noteworthy changes in release 7.1 (2009-02-21) [stable]
  
diff --git a/THANKS b/THANKS

index 81911da..724a9e3 100644 (file)
--- a/THANKS
+++ b/THANKS
@@ -137,6 +137,7 @@ David Godfrey                       dave@delta.demon.co.uk
  David Luyer                         david_luyer@pacific.net.au
  David Madore                        david.madore@ens.fr
  David Malone                        dwmalone@cnri.dit.ie
+Davide Canova                       kc.canova@gmail.com
  Dawson Engler                       engler@stanford.edu
  Dean Gaudet                         dean-savannah@arctic.org
  Deepak Goel                         deego@gnufans.org
diff --git a/src/sort.c b/src/sort.c

index f438563..7b0b064 100644 (file)
--- a/src/sort.c
+++ b/src/sort.c
@@ -1366,7 +1366,6 @@ begfield (const struct line *line, const struct keyfield *key)
    char *ptr = line->text, *lim = ptr + line->length - 1;
    size_t sword = key->sword;
    size_t schar = key->schar;
-  size_t remaining_bytes;
  
    /* The leading field separator itself is included in a field when -t
       is absent.  */
@@ -1388,16 +1387,14 @@ begfield (const struct line *line, const struct keyfield *key)
           ++ptr;
        }
  
+  /* If we're ignoring leading blanks when computing the Start
+     of the field, skip past them here.  */
    if (key->skipsblanks)
      while (ptr < lim && blanks[to_uchar (*ptr)])
        ++ptr;
  
    /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
-  remaining_bytes = lim - ptr;
-  if (schar < remaining_bytes)
-    ptr += schar;
-  else
-    ptr = lim;
+  ptr = MIN (lim, ptr + schar);
  
    return ptr;
  }
@@ -1410,7 +1407,9 @@ limfield (const struct line *line, const struct keyfield *key)
  {
    char *ptr = line->text, *lim = ptr + line->length - 1;
    size_t eword = key->eword, echar = key->echar;
-  size_t remaining_bytes;
+
+  if (echar == 0)
+    eword++; /* Skip all of end field.  */
  
    /* Move PTR past EWORD fields or to one past the last byte on LINE,
       whichever comes first.  If there are more than EWORD fields, leave
@@ -1487,19 +1486,17 @@ limfield (const struct line *line, const struct keyfield *key)
      }
  #endif
  
-  /* If we're ignoring leading blanks when computing the End
-     of the field, don't start counting bytes until after skipping
-     past any leading blanks. */
-  if (key->skipeblanks)
-    while (ptr < lim && blanks[to_uchar (*ptr)])
-      ++ptr;
+  if (echar != 0) /* We need to skip over a portion of the end field.  */
+    {
+      /* If we're ignoring leading blanks when computing the End
+        of the field, skip past them here.  */
+      if (key->skipeblanks)
+       while (ptr < lim && blanks[to_uchar (*ptr)])
+         ++ptr;
  
-  /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
-  remaining_bytes = lim - ptr;
-  if (echar < remaining_bytes)
-    ptr += echar;
-  else
-    ptr = lim;
+      /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
+      ptr = MIN (lim, ptr + echar);
+    }
  
    return ptr;
  }
@@ -3152,12 +3149,9 @@ main (int argc, char **argv)
                   badfieldspec (optarg, N_("field number is zero"));
                 }
               if (*s == '.')
-               s = parse_field_count (s + 1, &key->echar,
-                                      N_("invalid number after `.'"));
-             else
                 {
-                 /* `-k 2,3' is equivalent to `+1 -3'.  */
-                 key->eword++;
+                 s = parse_field_count (s + 1, &key->echar,
+                                        N_("invalid number after `.'"));
                 }
               s = set_ordering (s, key, bl_end);
             }
diff --git a/tests/misc/sort b/tests/misc/sort

index 3e8eda6..3af2388 100755 (executable)
--- a/tests/misc/sort
+++ b/tests/misc/sort
@@ -110,6 +110,8 @@ my @Tests =
  ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
  ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
  ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
+# ensure a character position of 0 includes whole field
+["07e", '-k 2,3.0', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
  #
  # report an error for `.' without following char spec
  ["08a", '-k 2.,3', {EXIT=>2},
@@ -210,6 +212,10 @@ my @Tests =
  # key start and key end.
  ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
  
+# When ignoring leading blanks for end position, ensure blanks from
+# next field are not included in the sort. I.E. order should not change here.
+["18f", '-k1,1b', {IN=>"a  y\na z\n"}, {OUT=>"a  y\na z\n"}],
+
  # This looks odd, but works properly -- 2nd keyspec is never
  # used because all lines are different.
  ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],
author	Pádraig Brady <P@draigBrady.com>
	Tue, 24 Feb 2009 08:37:18 +0000 (08:37 +0000)
committer	Pádraig Brady <P@draigBrady.com>
	Thu, 26 Feb 2009 14:40:28 +0000 (14:40 +0000)
NEWS		patch \| blob \| history
THANKS		patch \| blob \| history
src/sort.c		patch \| blob \| history
tests/misc/sort		patch \| blob \| history