join: add --header option to always output the first line

author Assaf Gordon <assafgordon@gmail.com>

Fri, 20 Nov 2009 15:24:07 +0000 (15:24 +0000)

committer Pádraig Brady <P@draigBrady.com>

Mon, 1 Feb 2010 13:57:42 +0000 (13:57 +0000)
author Assaf Gordon <assafgordon@gmail.com>
Fri, 20 Nov 2009 15:24:07 +0000 (15:24 +0000)
committer Pádraig Brady <P@draigBrady.com>
Mon, 1 Feb 2010 13:57:42 +0000 (13:57 +0000)
diff --git a/NEWS b/NEWS

index aa37d69..3fe52e7 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,9 @@ GNU coreutils NEWS                                    -*- outline -*-
  
  ** New features
  
+  join now accepts the --header option, to treat the first line of each
+  file as a header line to be joined and printed unconditionally.
+
    who: the "+/-" --mesg (-T) indicator of whether a user/tty is accepting
    messages could be incorrectly listed as "+", when in fact, the user was
    not accepting messages (mesg no).  Before, who would examine only the
diff --git a/doc/coreutils.texi b/doc/coreutils.texi

index 184b55a..2b3d32b 100644 (file)
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5515,6 +5515,15 @@ Do not check that both input files are in sorted order.  This is the default.
  Replace those output fields that are missing in the input with
  @var{string}.
  
+@item --header
+@opindex --header
+Treat the first line of each input file as a header line. The header lines will
+be joined and printed as the first output line.  If @option{-o} is used to
+specify output format, the header line will be printed according to the
+specified format.  The header lines will not be checked for ordering even if
+@option{--check-order} is specified.  Also if the header lines from each file
+do not match, the heading fields from the first file will be used.
+
  @item -i
  @itemx --ignore-case
  @opindex -i
diff --git a/src/join.c b/src/join.c

index d86e62c..6030a01 100644 (file)
--- a/src/join.c
+++ b/src/join.c
@@ -137,7 +137,8 @@ static enum
  enum
  {
    CHECK_ORDER_OPTION = CHAR_MAX + 1,
-  NOCHECK_ORDER_OPTION
+  NOCHECK_ORDER_OPTION,
+  HEADER_LINE_OPTION
  };
  
  
@@ -146,6 +147,7 @@ static struct option const longopts[] =
    {"ignore-case", no_argument, NULL, 'i'},
    {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
    {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
+  {"header", no_argument, NULL, HEADER_LINE_OPTION},
    {GETOPT_HELP_OPTION_DECL},
    {GETOPT_VERSION_OPTION_DECL},
    {NULL, 0, NULL, 0}
@@ -157,6 +159,10 @@ static struct line uni_blank;
  /* If nonzero, ignore case when comparing join fields.  */
  static bool ignore_case;
  
+/* If nonzero, treat the first line of each file as column headers -
+   join them without checking for ordering */
+static bool join_header_lines;
+
  void
  usage (int status)
  {
@@ -191,6 +197,8 @@ by whitespace.  When FILE1 or FILE2 (not both) is -, read standard input.\n\
    --check-order     check that the input is correctly sorted, even\n\
                        if all input lines are pairable\n\
    --nocheck-order   do not check that the input is correctly sorted\n\
+  --header          treat first line in each file as field header line,\n\
+                      print them without trying to pair them.\n\
  "), stdout);
        fputs (HELP_OPTION_DESCRIPTION, stdout);
        fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -614,6 +622,15 @@ join (FILE *fp1, FILE *fp2)
    initseq (&seq2);
    getseq (fp2, &seq2, 2);
  
+  if (join_header_lines && seq1.count && seq2.count)
+    {
+      prjoin(seq1.lines[0], seq2.lines[0]);
+      prevline[0] = NULL;
+      prevline[1] = NULL;
+      advance_seq (fp1, &seq1, true, 1);
+      advance_seq (fp2, &seq2, true, 2);
+    }
+
    while (seq1.count && seq2.count)
      {
        size_t i;
@@ -1051,6 +1068,10 @@ main (int argc, char **argv)
                           &nfiles, &prev_optc_status, &optc_status);
            break;
  
+        case HEADER_LINE_OPTION:
+          join_header_lines = true;
+          break;
+
          case_GETOPT_HELP_CHAR;
  
          case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
diff --git a/tests/misc/join b/tests/misc/join

index 49194e0..4e7798f 100755 (executable)
--- a/tests/misc/join
+++ b/tests/misc/join
@@ -185,6 +185,34 @@ my @tv = (
  # Before 6.10.143, this would mistakenly fail with the diagnostic:
  # join: File 1 is not in sorted order
  ['chkodr-7', '-12', ["2 a\n1 b\n", ""], "", 0],
+
+# Test '--header' feature
+['header-1', '--header',
+ [ "ID Name\n1 A\n2 B\n", "ID Color\n1 red\n"], "ID Name Color\n1 A red\n", 0],
+
+# '--header' with '--check-order' : The header line is out-of-order but the
+# actual data is in order. This join should succeed.
+['header-2', '--header --check-order',
+ ["ID Name\n1 A\n2 B\n", "ID Color\n2 green\n"],
+ "ID Name Color\n2 B green\n", 0],
+
+# '--header' with '--check-order' : The header line is out-of-order AND the
+# actual data out-of-order. This join should fail.
+['header-3', '--header --check-order',
+ ["ID Name\n2 B\n1 A\n", "ID Color\n2 blue\n"], "ID Name Color\n", 1,
+ "$prog: file 1 is not in sorted order\n"],
+
+# '--header' with specific output format '-o'.
+# output header line should respect the requested format
+['header-4', '--header -o "0,1.3,2.2"',
+ ["ID Group Name\n1 Foo A\n2 Bar B\n", "ID Color\n2 blue\n"],
+ "ID Name Color\n2 B blue\n", 0],
+
+# '--header' always outputs headers from the first file
+# even if the headers from the second file don't match
+['header-5', '--header',
+ [ "ID1 Name\n1 A\n2 B\n", "ID2 Color\n1 red\n"], "ID1 Name Color\n1 A red\n", 0],
+
  );
  
  # Convert the above old-style test vectors to the newer
author	Assaf Gordon <assafgordon@gmail.com>
	Fri, 20 Nov 2009 15:24:07 +0000 (15:24 +0000)
committer	Pádraig Brady <P@draigBrady.com>
	Mon, 1 Feb 2010 13:57:42 +0000 (13:57 +0000)
NEWS		patch \| blob \| history
doc/coreutils.texi		patch \| blob \| history
src/join.c		patch \| blob \| history
tests/misc/join		patch \| blob \| history