From: Pádraig Brady
Date: Sun, 1 Jul 2012 00:14:42 +0000 (+0100)
Subject: sort: avoid redundant processing with inaccessible inputs or output
X-Git-Tag: v8.18~39
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=44fbd3fd862e34d42006f8b74cb11c9c56346417;p=platform%2Fupstream%2Fcoreutils.git
sort: avoid redundant processing with inaccessible inputs or output
* src/sort.c (check_inputs): A new function to verify all inputs
are accessible before further processing.
(check_output): A new function to open or create a specified
output file, before futher processing.
(stream_open): Adjust to truncating the previously opened
output file rather than opening directly.
(avoid_trashing_input): Optimize to stat the output file
descriptor, rather than the file name.
(main): Call the new functions to check accessibility of
inputs and output, before processing starts.
* tests/misc/sort: Adjust to the changed error message.
* tests/misc/sort-merge-fdlimit: Account for the earlier opened
file descriptor of the specified output file.
* tests/misc/sort-exit-early: A new test to exercise the improvements.
* tests/Makefile.am: Reference the new test.
* NEWS: Mention the improvement.
Suggested-by: Bernhard Voelker
---
diff --git a/NEWS b/NEWS
index be98796..03c600a 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,12 @@ GNU coreutils NEWS -*- outline -*-
patches as well as enough support to build on the Hurd, we no longer
have any reason to include it here.
+** Improvements
+
+ sort avoids redundant processing in the presence of inaccessible inputs,
+ or unwritable output. Sort now diagnoses certain errors at start-up,
+ rather than after potentially expensive processing.
+
* Noteworthy changes in release 8.17 (2012-05-10) [stable]
diff --git a/src/sort.c b/src/sort.c
index 5a48ce6..e4067c9 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -357,6 +357,9 @@ static bool unique;
/* Nonzero if any of the input files are the standard input. */
static bool have_read_stdin;
+/* File descriptor associated with -o. */
+static int outfd = -1;
+
/* List of key field comparisons to be tried. */
static struct keyfield *keylist;
@@ -911,11 +914,12 @@ create_temp_file (int *pfd, bool survive_fd_exhaustion)
static FILE *
stream_open (char const *file, char const *how)
{
+ FILE *fp;
+
if (!file)
return stdout;
if (*how == 'r')
{
- FILE *fp;
if (STREQ (file, "-"))
{
have_read_stdin = true;
@@ -924,9 +928,18 @@ stream_open (char const *file, char const *how)
else
fp = fopen (file, how);
fadvise (fp, FADVISE_SEQUENTIAL);
- return fp;
}
- return fopen (file, how);
+ else if (*how == 'w')
+ {
+ assert (outfd != -1);
+ if (ftruncate (outfd, 0) != 0)
+ error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (file));
+ fp = fdopen (outfd, how);
+ }
+ else
+ assert (!"unexpected mode passed to stream_open");
+
+ return fp;
}
/* Same as stream_open, except always return a non-null value; die on
@@ -3637,7 +3650,7 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps,
if (! got_outstat)
{
if ((outfile
- ? stat (outfile, &outstat)
+ ? fstat (outfd, &outstat)
: fstat (STDOUT_FILENO, &outstat))
!= 0)
break;
@@ -3666,6 +3679,44 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps,
}
}
+/* Scan the input files to ensure all are accessible.
+ Otherwise exit with a diagnostic.
+
+ Note this will catch common issues with permissions etc.
+ but will fail to notice issues where you can open() but not read(),
+ like when a directory is specified on some systems.
+ Catching these obscure cases could slow down performance in
+ common cases. */
+
+static void
+check_inputs (char *const *files, size_t nfiles)
+{
+ size_t i;
+ for (i = 0; i < nfiles; i++)
+ {
+ if (STREQ (files[i], "-"))
+ continue;
+
+ if (euidaccess (files[i], R_OK) != 0)
+ die (_("cannot read"), files[i]);
+ }
+}
+
+/* Ensure a specified output file can be created or written to,
+ and cache the returned descriptor in the global OUTFD variable.
+ Otherwise exit with a diagnostic. */
+
+static void
+check_output (char const *outfile)
+{
+ if (outfile)
+ {
+ outfd = open (outfile, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO);
+ if (outfd < 0)
+ die (_("open failed"), outfile);
+ }
+}
+
/* Merge the input FILES. NTEMPS is the number of files at the
start of FILES that are temporary; it is zero at the top level.
NFILES is the total number of files. Put the output in
@@ -4620,6 +4671,12 @@ main (int argc, char **argv)
exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
}
+ /* Check all inputs are accessible, or exit immediately. */
+ check_inputs (files, nfiles);
+
+ /* Check output is writable, or exit immediately. */
+ check_output (outfile);
+
if (mergeonly)
{
struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2155cee..dfac9a0 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -251,6 +251,7 @@ TESTS = \
misc/sort-merge \
misc/sort-merge-fdlimit \
misc/sort-month \
+ misc/sort-exit-early \
misc/sort-rand \
misc/sort-spinlock-abuse \
misc/sort-stale-thread-mem \
diff --git a/tests/misc/sort b/tests/misc/sort
index 5be00a0..5d15d75 100755
--- a/tests/misc/sort
+++ b/tests/misc/sort
@@ -33,7 +33,7 @@ my $mb_locale = $ENV{LOCALE_FR_UTF8};
# Normalize each diagnostic to use '-'.
my $normalize_filename = {ERR_SUBST => 's/^$prog: .*?:/$prog: -:/'};
-my $no_file = "$prog: open failed: no-file: No such file or directory\n";
+my $no_file = "$prog: cannot read: no-file: No such file or directory\n";
my @Tests =
(
diff --git a/tests/misc/sort-exit-early b/tests/misc/sort-exit-early
new file mode 100755
index 0000000..7470aad
--- /dev/null
+++ b/tests/misc/sort-exit-early
@@ -0,0 +1,36 @@
+#!/bin/sh
+# Test 'sort' exits early on inaccessible inputs or output
+
+# Copyright (C) 2012 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see