#include "long-options.h"
#include "error.h"
#include "hard-locale.h"
+#include "human.h"
#include "memcoll.h"
#include "xalloc.h"
+#include "xstrtol.h"
/* The official name of this program (e.g., no `g' prefix). */
#define PROGRAM_NAME "sort"
struct line
{
char *text; /* Text of the line. */
- int length; /* Length including final newline. */
+ size_t length; /* Length including final newline. */
char *keybeg; /* Start of first key. */
char *keylim; /* Limit of first key. */
};
struct lines
{
struct line *lines; /* Dynamically allocated array of lines. */
- int used; /* Number of slots used. */
- int alloc; /* Number of slots allocated. */
- int limit; /* Max number of slots to allocate. */
+ size_t used; /* Number of slots used. */
+ size_t alloc; /* Number of slots allocated. */
+ size_t limit; /* Max number of slots to allocate. */
};
/* Input buffers. */
struct buffer
{
char *buf; /* Dynamically allocated buffer. */
- int used; /* Number of bytes used. */
- int alloc; /* Number of bytes allocated. */
- int left; /* Number of bytes left from previous reads. */
- int newline_free; /* Number of left bytes that are known
+ size_t used; /* Number of bytes used. */
+ size_t alloc; /* Number of bytes allocated. */
+ size_t left; /* Number of bytes left from previous reads. */
+ size_t newline_free; /* Number of left bytes that are known
to be newline-free. */
};
struct keyfield
{
- int sword; /* Zero-origin 'word' to start at. */
- int schar; /* Additional characters to skip. */
+ size_t sword; /* Zero-origin 'word' to start at. */
+ size_t schar; /* Additional characters to skip. */
int skipsblanks; /* Skip leading white space at start. */
- int eword; /* Zero-origin first word after field. */
- int echar; /* Additional characters in field. */
+ size_t eword; /* Zero-origin first word after field. */
+ size_t echar; /* Additional characters in field. */
int skipeblanks; /* Skip trailing white space at finish. */
int *ignore; /* Boolean array of characters to ignore. */
char *translate; /* Translation applied to characters. */
/* Initial buffer size for in-core sorting. The buffer will grow only
if a line longer than this is seen. */
#define SORTALLOC (8 * 1024 * 1024)
-static int const sortalloc = SORTALLOC;
+static size_t const sortalloc = SORTALLOC;
/* Initial buffer size for in core merge buffers. Bear in mind that
up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
-static int const mergealloc = SORTALLOC / NMERGE / 2;
+static size_t const mergealloc = SORTALLOC / NMERGE / 2;
/* Guess of average line length. */
-static int const linelength = 30;
+static size_t const linelength = 30;
/* Maximum number of elements for the array(s) of struct line's, in bytes. */
#define LINEALLOC (SORTALLOC / 2)
/* Initialize BUF, allocating ALLOC bytes initially. */
static void
-initbuf (struct buffer *buf, int alloc)
+initbuf (struct buffer *buf, size_t alloc)
{
buf->alloc = alloc;
buf->buf = xmalloc (buf->alloc);
file wasn't terminated by a newline, supply one. Return a count
of bytes buffered. */
-static int
+static size_t
fillbuf (struct buffer *buf, FILE *fp)
{
if (buf->used != buf->left)
while (!feof (fp))
{
- int cc;
+ size_t cc;
if (buf->used == buf->alloc)
{
buf->alloc *= 2;
+ if (buf->alloc < buf->used)
+ xalloc_die ();
buf->buf = xrealloc (buf->buf, buf->alloc);
}
cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
if (buf->newline_free != buf->used)
return buf->used;
buf->alloc *= 2;
+ if (buf->alloc < buf->used)
+ xalloc_die ();
buf->buf = xrealloc (buf->buf, buf->alloc);
}
buf->buf[buf->used++] = eolchar;
for, ever. */
static void
-initlines (struct lines *lines, int alloc, int limit)
+initlines (struct lines *lines, size_t alloc, size_t limit)
{
lines->alloc = alloc;
+ if (SIZE_T_MAX / sizeof (struct line) < alloc)
+ xalloc_die ();
lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
lines->used = 0;
lines->limit = limit;
begfield (const struct line *line, const struct keyfield *key)
{
register char *ptr = line->text, *lim = ptr + line->length - 1;
- register int sword = key->sword, schar = key->schar;
+ register size_t sword = key->sword;
+ register size_t schar = key->schar;
if (tab)
while (ptr < lim && sword--)
while (ptr < lim && blanks[UCHAR (*ptr)])
++ptr;
- if (ptr + schar <= lim)
+ if (schar < lim - ptr)
ptr += schar;
else
ptr = lim;
limfield (const struct line *line, const struct keyfield *key)
{
register char *ptr = line->text, *lim = ptr + line->length - 1;
- register int eword = key->eword, echar = key->echar;
+ register size_t eword = key->eword, echar = key->echar;
/* Note: from the POSIX spec:
The leading field separator itself is included in
{
while (ptr < lim && *ptr != tab)
++ptr;
- if (ptr < lim && (eword || echar > 0))
+ if (ptr < lim && (eword | echar))
++ptr;
}
else
++ptr;
/* Advance PTR by ECHAR (if possible), but no further than LIM. */
- if (ptr + echar <= lim)
+ if (echar < lim - ptr)
ptr += echar;
else
ptr = lim;
if (lines->used == lines->alloc)
{
lines->alloc *= 2;
+ if (SIZE_T_MAX / sizeof (struct line) < lines->alloc)
+ xalloc_die ();
lines->lines = (struct line *)
xrealloc ((char *) lines->lines,
lines->alloc * sizeof (struct line));
if (key)
{
/* Precompute the position of the first key for efficiency. */
- line->keylim = 0 <= key->eword ? limfield (line, key) : ptr;
+ line->keylim = (key->eword == -1 ? ptr : limfield (line, key));
- if (key->sword >= 0)
+ if (key->sword != -1)
line->keybeg = begfield (line, key);
else
{
static int
numcompare (register const char *a, register const char *b)
{
- register int tmpa, tmpb, loga, logb, tmp;
+ register int tmpa, tmpb, tmp;
+ register size_t loga, logb;
tmpa = *a;
tmpb = *b;
tmpb = *++b;
while (IS_THOUSANDS_SEP (tmpb));
- if (logb - loga != 0)
- return logb - loga;
+ if (loga != logb)
+ return loga < logb ? 1 : -1;
if (!loga)
return 0;
tmpb = *++b;
while (IS_THOUSANDS_SEP (tmpb));
- if (loga - logb != 0)
- return loga - logb;
+ if (loga != logb)
+ return loga < logb ? -1 : 1;
if (!loga)
return 0;
Return 0 if the name in S is not recognized. */
static int
-getmonth (const char *s, int len)
+getmonth (const char *s, size_t len)
{
char *month;
- register int i, lo = 0, hi = MONTHS_PER_YEAR, result;
+ register size_t i;
+ register int lo = 0, hi = MONTHS_PER_YEAR, result;
while (len > 0 && blanks[UCHAR (*s)])
{
register char *lima = a->keylim;
register char *limb = b->keylim;
- int diff, lena, lenb;
+ int diff;
+ size_t lena, lenb;
for (;;)
{
register int *ignore = key->ignore;
/* Find the lengths. */
- lena = lima - texta, lenb = limb - textb;
- if (lena < 0)
- lena = 0;
- if (lenb < 0)
- lenb = 0;
+ lena = lima <= texta ? 0 : lima - texta;
+ lenb = limb <= textb ? 0 : limb - textb;
if (key->skipeblanks)
{
{
char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
char *copy_b = copy_a + lena + 1;
- int new_len_a, new_len_b, i;
+ size_t new_len_a, new_len_b, i;
/* Ignore and/or translate chars before comparing. */
for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++)
diff = memcoll (copy_a, new_len_a, copy_b, new_len_b);
}
else if (lena == 0)
- diff = -lenb;
+ diff = - (lenb != 0);
else if (lenb == 0)
goto greater;
else
CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
}
else if (lena == 0)
- diff = -lenb;
+ diff = - (lenb != 0);
else if (lenb == 0)
goto greater;
else
if (diff)
goto not_equal;
}
- diff = lena - lenb;
+ diff = lena < lenb ? -1 : lena != lenb;
}
if (diff)
break;
/* Find the beginning and limit of the next field. */
- if (key->eword >= 0)
+ if (key->eword != -1)
lima = limfield (a, key), limb = limfield (b, key);
else
lima = a->text + a->length - 1, limb = b->text + b->length - 1;
- if (key->sword >= 0)
+ if (key->sword != -1)
texta = begfield (a, key), textb = begfield (b, key);
else
{
static int
compare (register const struct line *a, register const struct line *b)
{
- int diff, alen, blen;
+ int diff;
+ size_t alen, blen;
/* First try to compare on the specified keys (if any).
The only two cases with no key at all are unadorned sort,
alen = a->length - 1, blen = b->length - 1;
if (alen == 0)
- diff = - blen;
+ diff = - (blen != 0);
else if (blen == 0)
- diff = alen;
+ diff = alen != 0;
#ifdef ENABLE_NLS
else if (hard_LC_COLLATE)
diff = memcoll (a->text, alen, b->text, blen);
#endif
else if (! (diff = memcmp (a->text, b->text, min (alen, blen))))
- diff = alen - blen;
+ diff = alen < blen ? -1 : alen != blen;
return reverse ? -diff : diff;
}
/* Check that the lines read from the given FP come in order. Print a
diagnostic (FILE_NAME, line number, contents of line) to stderr and return
- the line number of the first out-of-order line (counting from 1) if they
- are not in order. Otherwise, print no diagnostic and return zero. */
+ one if they are not in order. Otherwise, print no diagnostic
+ and return zero. */
static int
checkfp (FILE *fp, const char *file_name)
struct buffer buf; /* Input buffer. */
struct lines lines; /* Lines scanned from the buffer. */
struct line temp; /* Copy of previous line. */
- int cc; /* Character count. */
- int alloc;
- int line_number = 1;
+ size_t cc; /* Character count. */
+ size_t alloc;
+ uintmax_t line_number = 1;
struct line *disorder_line IF_LINT (= NULL);
- int disorder_line_number = 0;
+ uintmax_t disorder_line_number = 0;
struct keyfield *key = keylist;
initbuf (&buf, mergealloc);
{
struct line *prev_line; /* Pointer to previous line. */
int cmp; /* Result of calling compare. */
- int i;
+ size_t i;
/* Compare each line in the buffer with its successor. */
- for (i = 0; i < lines.used - 1; ++i)
+ for (i = 1; i < lines.used; ++i)
{
- cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
+ cmp = compare (&lines.lines[i - 1], &lines.lines[i]);
if ((unique && cmp >= 0) || (cmp > 0))
{
- disorder_line = &lines.lines[i + 1];
- disorder_line_number = line_number + i + 1;
+ disorder_line = &lines.lines[i];
+ disorder_line_number = line_number + i;
goto finish;
}
}
prev_line = lines.lines + (lines.used - 1);
if (alloc < prev_line->length)
{
- while ((alloc *= 2) < prev_line->length)
- continue;
+ do
+ {
+ if (alloc * 2 < alloc)
+ xalloc_die ();
+ alloc *= 2;
+ }
+ while (alloc < prev_line->length);
+
temp.text = xrealloc (temp.text, alloc);
}
memcpy (temp.text, prev_line->text, prev_line->length);
if (disorder_line_number)
{
- fprintf (stderr, _("%s: %s:%d: disorder: "), program_name, file_name,
- disorder_line_number);
+ char buf[LONGEST_HUMAN_READABLE + 1];
+ fprintf (stderr, _("%s: %s:%s: disorder: "), program_name, file_name,
+ human_readable (disorder_line_number, buf, 1, 1));
write_bytes (disorder_line->text, disorder_line->length, stderr,
_("standard error"));
}
free (buf.buf);
free ((char *) lines.lines);
free (temp.text);
- return disorder_line_number;
+ return disorder_line_number != 0;
}
/* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
struct line saved; /* Saved line storage for unique check. */
struct line const *savedline IF_LINT (= NULL);
/* &saved if there is a saved line. */
- int savealloc IF_LINT (= 0); /* Size allocated for the saved line. */
- int cur[NMERGE]; /* Current line in each line table. */
+ size_t savealloc IF_LINT (= 0); /* Size allocated for the saved line. */
+ size_t cur[NMERGE]; /* Current line in each line table. */
int ord[NMERGE]; /* Table representing a permutation of fps,
such that lines[ord[0]].lines[cur[ord[0]]]
is the smallest line and will be next
/* Sort the array LINES with NLINES members, using TEMP for temporary space. */
static void
-sortlines (struct line *lines, int nlines, struct line *temp)
+sortlines (struct line *lines, size_t nlines, struct line *temp)
{
register struct line *lo, *hi, *t;
- register int nlo, nhi;
+ register size_t nlo, nhi;
if (nlines == 2)
{
for (i = 0; i < nfiles; ++i)
{
fp = xfopen (files[i], "r");
- if (checkfp (fp, files[i]))
- {
- ++disorders;
- }
+ disorders += checkfp (fp, files[i]);
}
return disorders;
}
{
struct buffer buf;
struct lines lines;
- struct line *tmp;
- int i, ntmp;
+ struct line *tmp = NULL;
+ size_t ntmp = 0;
FILE *fp, *tfp;
struct tempnode *node;
int n_temp_files = 0;
initbuf (&buf, sortalloc);
initlines (&lines, sortalloc / linelength + 1,
LINEALLOC / sizeof (struct line));
- ntmp = lines.alloc;
- tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
while (nfiles--)
{
fp = xfopen (*files++, "r");
while (fillbuf (&buf, fp))
{
+ size_t i;
+
if (nfiles && buf.used != buf.alloc && feof (fp))
{
/* End of file, but there is more input and buffer room.
findlines (&buf, &lines);
if (ntmp < lines.used)
{
- while ((ntmp *= 2) < lines.used)
- continue;
+ if (ntmp == 0)
+ ntmp = lines.used;
+ else
+ while ((ntmp *= 2) < lines.used)
+ continue;
+ if (SIZE_T_MAX / sizeof (struct line) < ntmp)
+ xalloc_die ();
tmp = (struct line *)
xrealloc ((char *) tmp, ntmp * sizeof (struct line));
}
if (n_temp_files)
{
+ int i = n_temp_files;
tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
- i = n_temp_files;
for (node = temphead.next; i > 0; node = node->next)
tempfiles[--i] = node->name;
merge (tempfiles, n_temp_files, ofp, output_file);
error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
}
+/* Parse the leading integer in STRING and store the resulting value
+ (which must fit into size_t) into *VAL. Return the address of the
+ suffix after the integer. */
+static char const *
+parse_field_count (char const *string, size_t *val)
+{
+ /* '@' can't possibly be a valid suffix; return &bad_suffix so that
+ the caller will eventually invoke badfieldspec. */
+ static char const invalid_suffix = '@';
+ char *suffix;
+ uintmax_t n;
+
+ switch (xstrtoumax (string, &suffix, 10, &n, ""))
+ {
+ case LONGINT_OK:
+ case LONGINT_INVALID_SUFFIX_CHAR:
+ *val = n;
+ if (*val == n)
+ break;
+ /* Fall through. */
+ case LONGINT_OVERFLOW:
+ error (0, 0, _("count `%.*s' too large"),
+ (int) (suffix - string), string);
+ return &invalid_suffix;
+
+ case LONGINT_INVALID:
+ error (0, 0, _("invalid count at start of `%s'"), string);
+ return &invalid_suffix;
+ }
+
+ return suffix;
+}
+
/* Handle interrupts and hangups. */
static void
main (int argc, char **argv)
{
struct keyfield *key = NULL, gkey;
- char *s;
- int i, t, t2;
+ char const *s;
+ int i;
int checkonly = 0, mergeonly = 0, nfiles = 0;
- char *minus = "-", *outfile = minus, **files, *tmp;
+ char *minus = "-", **files, *tmp;
+ char const *outfile = minus;
FILE *ofp;
#ifdef SA_NOCLDSTOP
struct sigaction oldact, newact;
s = argv[i] + 1;
if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
badfieldspec (argv[i]);
- for (t = 0; ISDIGIT (*s); ++s)
- t = 10 * t + *s - '0';
- t2 = 0;
+ s = parse_field_count (s, &key->sword);
if (*s == '.')
- for (++s; ISDIGIT (*s); ++s)
- t2 = 10 * t2 + *s - '0';
- if (t2 || t)
- {
- key->sword = t;
- key->schar = t2;
- }
- else
+ s = parse_field_count (s + 1, &key->schar);
+ if (! (key->sword | key->schar))
key->sword = -1;
s = set_ordering (s, key, bl_start);
if (*s)
key specifiers,\nthe +POS specifier must come first"));
usage (SORT_FAILURE);
}
- for (t = 0; ISDIGIT (*s); ++s)
- t = t * 10 + *s - '0';
- t2 = 0;
+ s = parse_field_count (s, &key->eword);
if (*s == '.')
- for (++s; ISDIGIT (*s); ++s)
- t2 = t2 * 10 + *s - '0';
- key->eword = t;
- key->echar = t2;
+ s = parse_field_count (s + 1, &key->echar);
s = set_ordering (s, key, bl_end);
if (*s)
badfieldspec (argv[i]);
/* Get POS1. */
if (!ISDIGIT (*s))
badfieldspec (argv[i]);
- for (t = 0; ISDIGIT (*s); ++s)
- t = 10 * t + *s - '0';
- if (t == 0)
+ s = parse_field_count (s, &key->sword);
+ if (! key->sword--)
{
/* Provoke with `sort -k0' */
error (0, 0, _("the starting field number argument \
to the `-k' option must be positive"));
badfieldspec (argv[i]);
}
- --t;
- t2 = 0;
if (*s == '.')
{
if (!ISDIGIT (s[1]))
lacks following character offset"));
badfieldspec (argv[i]);
}
- for (++s; ISDIGIT (*s); ++s)
- t2 = 10 * t2 + *s - '0';
- if (t2 == 0)
+ s = parse_field_count (s + 1, &key->schar);
+ if (! key->schar--)
{
/* Provoke with `sort -k1.0' */
error (0, 0, _("starting field character offset \
argument to the `-k' option\nmust be positive"));
badfieldspec (argv[i]);
}
- --t2;
}
- if (t2 || t)
- {
- key->sword = t;
- key->schar = t2;
- }
- else
+ if (! (key->sword | key->schar))
key->sword = -1;
s = set_ordering (s, key, bl_start);
if (*s == 0)
badfieldspec (argv[i]);
}
/* Get POS2. */
- for (t = 0; ISDIGIT (*s); ++s)
- t = t * 10 + *s - '0';
- if (t == 0)
+ s = parse_field_count (s, &key->eword);
+ if (! key->eword--)
{
/* Provoke with `sort -k1,0' */
error (0, 0, _("ending field number argument \
to the `-k' option must be positive"));
badfieldspec (argv[i]);
}
- --t;
- t2 = 0;
if (*s == '.')
{
if (!ISDIGIT (s[1]))
but lacks following character offset"));
badfieldspec (argv[i]);
}
- for (++s; ISDIGIT (*s); ++s)
- t2 = t2 * 10 + *s - '0';
+ s = parse_field_count (s + 1, &key->echar);
}
else
{
/* `-k 2,3' is equivalent to `+1 -3'. */
- ++t;
+ key->eword++;
}
- key->eword = t;
- key->echar = t2;
s = set_ordering (s, key, bl_end);
if (*s)
badfieldspec (argv[i]);