src/fmt.c

   1 /* GNU fmt -- simple text formatter.
   2    Copyright (C) 1994-2003 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
  19
  20 #include <config.h>
  21 #include <stdio.h>
  22 #include <sys/types.h>
  23 #include <getopt.h>
  24
  25 /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
  26    it to be a type get syntax errors for the variable declaration below.  */
  27 #define word unused_word_type
  28
  29 #include "system.h"
  30 #include "error.h"
  31 #include "quote.h"
  32 #include "xstrtol.h"
  33
  34 /* The official name of this program (e.g., no `g' prefix).  */
  35 #define PROGRAM_NAME "fmt"
  36
  37 #define AUTHORS "Ross Paterson"
  38
  39 /* The following parameters represent the program's idea of what is
  40    "best".  Adjust to taste, subject to the caveats given.  */
  41
  42 /* Default longest permitted line length (max_width).  */
  43 #define WIDTH   75
  44
  45 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
  46    room for optimization.  */
  47 #define LEEWAY  7
  48
  49 /* The default secondary indent of tagged paragraph used for unindented
  50    one-line paragraphs not preceded by any multi-line paragraphs.  */
  51 #define DEF_INDENT 3
  52
  53 /* Costs and bonuses are expressed as the equivalent departure from the
  54    optimal line length, multiplied by 10.  e.g. assigning something a
  55    cost of 50 means that it is as bad as a line 5 characters too short
  56    or too long.  The definition of SHORT_COST(n) should not be changed.
  57    However, EQUIV(n) may need tuning.  */
  58
  59 typedef long COST;
  60
  61 #define MAXCOST TYPE_MAXIMUM (COST)
  62
  63 #define SQR(n)          ((n) * (n))
  64 #define EQUIV(n)        SQR ((COST) (n))
  65
  66 /* Cost of a filled line n chars longer or shorter than best_width.  */
  67 #define SHORT_COST(n)   EQUIV ((n) * 10)
  68
  69 /* Cost of the difference between adjacent filled lines.  */
  70 #define RAGGED_COST(n)  (SHORT_COST (n) / 2)
  71
  72 /* Basic cost per line.  */
  73 #define LINE_COST       EQUIV (70)
  74
  75 /* Cost of breaking a line after the first word of a sentence, where
  76    the length of the word is N.  */
  77 #define WIDOW_COST(n)   (EQUIV (200) / ((n) + 2))
  78
  79 /* Cost of breaking a line before the last word of a sentence, where
  80    the length of the word is N.  */
  81 #define ORPHAN_COST(n)  (EQUIV (150) / ((n) + 2))
  82
  83 /* Bonus for breaking a line at the end of a sentence.  */
  84 #define SENTENCE_BONUS  EQUIV (50)
  85
  86 /* Cost of breaking a line after a period not marking end of a sentence.
  87    With the definition of sentence we are using (borrowed from emacs, see
  88    get_line()) such a break would then look like a sentence break.  Hence
  89    we assign a very high cost -- it should be avoided unless things are
  90    really bad.  */
  91 #define NOBREAK_COST    EQUIV (600)
  92
  93 /* Bonus for breaking a line before open parenthesis.  */
  94 #define PAREN_BONUS     EQUIV (40)
  95
  96 /* Bonus for breaking a line after other punctuation.  */
  97 #define PUNCT_BONUS     EQUIV(40)
  98
  99 /* Credit for breaking a long paragraph one line later.  */
 100 #define LINE_CREDIT     EQUIV(3)
 101
 102 /* Size of paragraph buffer, in words and characters.  Longer paragraphs
 103    are handled neatly (cf. flush_paragraph()), so there's little to gain
 104    by making these larger.  */
 105 #define MAXWORDS        1000
 106 #define MAXCHARS        5000
 107
 108 /* Extra ctype(3)-style macros.  */
 109
 110 #define isopen(c)       (strchr ("([`'\"", c) != NULL)
 111 #define isclose(c)      (strchr (")]'\"", c) != NULL)
 112 #define isperiod(c)     (strchr (".?!", c) != NULL)
 113
 114 /* Size of a tab stop, for expansion on input and re-introduction on
 115    output.  */
 116 #define TABWIDTH        8
 117
 118 /* Miscellaneous definitions.  */
 119
 120 #undef TRUE
 121 #define TRUE true
 122 #undef FALSE
 123 #define FALSE false
 124
 125 /* Word descriptor structure.  */
 126
 127 typedef struct Word WORD;
 128
 129 struct Word
 130   {
 131
 132     /* Static attributes determined during input.  */
 133
 134     const char *text;           /* the text of the word */
 135     int length;                 /* length of this word */
 136     int space;                  /* the size of the following space */
 137     unsigned int paren:1;       /* starts with open paren */
 138     unsigned int period:1;      /* ends in [.?!])* */
 139     unsigned int punct:1;       /* ends in punctuation */
 140     unsigned int final:1;       /* end of sentence */
 141
 142     /* The remaining fields are computed during the optimization.  */
 143
 144     int line_length;            /* length of the best line starting here */
 145     COST best_cost;             /* cost of best paragraph starting here */
 146     WORD *next_break;           /* break which achieves best_cost */
 147   };
 148
 149 /* Forward declarations.  */
 150
 151 static void set_prefix (char *p);
 152 static void fmt (FILE *f);
 153 static bool get_paragraph (FILE *f);
 154 static int get_line (FILE *f, int c);
 155 static int get_prefix (FILE *f);
 156 static int get_space (FILE *f, int c);
 157 static int copy_rest (FILE *f, int c);
 158 static bool same_para (int c);
 159 static void flush_paragraph (void);
 160 static void fmt_paragraph (void);
 161 static void check_punctuation (WORD *w);
 162 static COST base_cost (WORD *this);
 163 static COST line_cost (WORD *next, int len);
 164 static void put_paragraph (WORD *finish);
 165 static void put_line (WORD *w, int indent);
 166 static void put_word (WORD *w);
 167 static void put_space (int space);
 168
 169 /* The name this program was run with.  */
 170 const char *program_name;
 171
 172 /* Option values.  */
 173
 174 /* If TRUE, first 2 lines may have different indent (default FALSE).  */
 175 static bool crown;
 176
 177 /* If TRUE, first 2 lines _must_ have different indent (default FALSE).  */
 178 static bool tagged;
 179
 180 /* If TRUE, each line is a paragraph on its own (default FALSE).  */
 181 static bool split;
 182
 183 /* If TRUE, don't preserve inter-word spacing (default FALSE).  */
 184 static bool uniform;
 185
 186 /* Prefix minus leading and trailing spaces (default "").  */
 187 static const char *prefix;
 188
 189 /* User-supplied maximum line width (default WIDTH).  The only output
 190    lines longer than this will each comprise a single word.  */
 191 static int max_width;
 192
 193 /* Values derived from the option values.  */
 194
 195 /* The length of prefix minus leading space.  */
 196 static int prefix_full_length;
 197
 198 /* The length of the leading space trimmed from the prefix.  */
 199 static int prefix_lead_space;
 200
 201 /* The length of prefix minus leading and trailing space.  */
 202 static int prefix_length;
 203
 204 /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
 205 static int best_width;
 206
 207 /* Dynamic variables.  */
 208
 209 /* Start column of the character most recently read from the input file.  */
 210 static int in_column;
 211
 212 /* Start column of the next character to be written to stdout.  */
 213 static int out_column;
 214
 215 /* Space for the paragraph text -- longer paragraphs are handled neatly
 216    (cf. flush_paragraph()).  */
 217 static char parabuf[MAXCHARS];
 218
 219 /* A pointer into parabuf, indicating the first unused character position.  */
 220 static char *wptr;
 221
 222 /* The words of a paragraph -- longer paragraphs are handled neatly
 223    (cf. flush_paragraph()).  */
 224 static WORD word[MAXWORDS];
 225
 226 /* A pointer into the above word array, indicating the first position
 227    after the last complete word.  Sometimes it will point at an incomplete
 228    word.  */
 229 static WORD *word_limit;
 230
 231 /* If TRUE, current input file contains tab characters, and so tabs can be
 232    used for white space on output.  */
 233 static bool tabs;
 234
 235 /* Space before trimmed prefix on each line of the current paragraph.  */
 236 static int prefix_indent;
 237
 238 /* Indentation of the first line of the current paragraph.  */
 239 static int first_indent;
 240
 241 /* Indentation of other lines of the current paragraph */
 242 static int other_indent;
 243
 244 /* To detect the end of a paragraph, we need to look ahead to the first
 245    non-blank character after the prefix on the next line, or the first
 246    character on the following line that failed to match the prefix.
 247    We can reconstruct the lookahead from that character (next_char), its
 248    position on the line (in_column) and the amount of space before the
 249    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
 250
 251 /* The last character read from the input file.  */
 252 static int next_char;
 253
 254 /* The space before the trimmed prefix (or part of it) on the next line
 255    after the current paragraph.  */
 256 static int next_prefix_indent;
 257
 258 /* If nonzero, the length of the last line output in the current
 259    paragraph, used to charge for raggedness at the split point for long
 260    paragraphs chosen by fmt_paragraph().  */
 261 static int last_line_length;
 262
 263 void
 264 usage (int status)
 265 {
 266   if (status != 0)
 267     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 268              program_name);
 269   else
 270     {
 271       printf (_("Usage: %s [-DIGITS] [OPTION]... [FILE]...\n"), program_name);
 272       fputs (_("\
 273 Reformat each paragraph in the FILE(s), writing to standard output.\n\
 274 If no FILE or if FILE is `-', read standard input.\n\
 275 \n\
 276 "), stdout);
 277       fputs (_("\
 278 Mandatory arguments to long options are mandatory for short options too.\n\
 279 "), stdout);
 280       fputs (_("\
 281   -c, --crown-margin        preserve indentation of first two lines\n\
 282   -p, --prefix=STRING       combine only lines having STRING as prefix\n\
 283   -s, --split-only          split long lines, but do not refill\n\
 284 "),
 285              stdout);
 286       fputs (_("\
 287   -t, --tagged-paragraph    indentation of first line different from second\n\
 288   -u, --uniform-spacing     one space between words, two after sentences\n\
 289   -w, --width=WIDTH         maximum line width (default of 75 columns)\n\
 290 "), stdout);
 291       fputs (HELP_OPTION_DESCRIPTION, stdout);
 292       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 293       fputs (_("\
 294 \n\
 295 With no FILE, or when FILE is -, read standard input.\n"),
 296              stdout);
 297       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 298     }
 299   exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
 300 }
 301
 302 /* Decode options and launch execution.  */
 303
 304 static const struct option long_options[] =
 305 {
 306   {"crown-margin", no_argument, NULL, 'c'},
 307   {"prefix", required_argument, NULL, 'p'},
 308   {"split-only", no_argument, NULL, 's'},
 309   {"tagged-paragraph", no_argument, NULL, 't'},
 310   {"uniform-spacing", no_argument, NULL, 'u'},
 311   {"width", required_argument, NULL, 'w'},
 312   {GETOPT_HELP_OPTION_DECL},
 313   {GETOPT_VERSION_OPTION_DECL},
 314   {0, 0, 0, 0},
 315 };
 316
 317 int
 318 main (register int argc, register char **argv)
 319 {
 320   int optchar;
 321   int fail;
 322
 323   initialize_main (&argc, &argv);
 324   program_name = argv[0];
 325   setlocale (LC_ALL, "");
 326   bindtextdomain (PACKAGE, LOCALEDIR);
 327   textdomain (PACKAGE);
 328
 329   atexit (close_stdout);
 330
 331   crown = tagged = split = uniform = FALSE;
 332   max_width = WIDTH;
 333   prefix = "";
 334   prefix_length = prefix_lead_space = prefix_full_length = 0;
 335
 336   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
 337     {
 338       const char *s = argv[1] + 1;
 339       max_width = 0;
 340       /* Old option syntax; a dash followed by one or more digits.
 341          Move past the number. */
 342       for (; ISDIGIT (*s); ++s)
 343         {
 344           int old_max = max_width;
 345           max_width = max_width * 10 + *s - '0';
 346           if (INT_MAX / 10 < old_max || max_width < old_max)
 347             break;
 348         }
 349
 350       if (*s)
 351         error (EXIT_FAILURE, 0, _("invalid width option: %s"),
 352                quote (argv[1]));
 353
 354       /* Make the options we just parsed invisible to getopt. */
 355       argv[1] = argv[0];
 356       argv++;
 357       argc--;
 358     }
 359
 360   while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
 361                                  long_options, NULL))
 362          != -1)
 363     switch (optchar)
 364       {
 365       default:
 366         if (ISDIGIT (optchar))
 367           error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
 368  only when it is the first\noption; use -w N instead"),
 369                  optchar);
 370         usage (EXIT_FAILURE);
 371
 372       case 'c':
 373         crown = TRUE;
 374         break;
 375
 376       case 's':
 377         split = TRUE;
 378         break;
 379
 380       case 't':
 381         tagged = TRUE;
 382         break;
 383
 384       case 'u':
 385         uniform = TRUE;
 386         break;
 387
 388       case 'w':
 389         {
 390           long int tmp_long;
 391           if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
 392               || tmp_long <= 0 || tmp_long > INT_MAX)
 393             error (EXIT_FAILURE, 0, _("invalid width: %s"), quote (optarg));
 394           max_width = (int) tmp_long;
 395         }
 396         break;
 397
 398       case 'p':
 399         set_prefix (optarg);
 400         break;
 401
 402       case_GETOPT_HELP_CHAR;
 403
 404       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 405
 406       }
 407
 408   best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
 409
 410   fail = 0;
 411   if (optind == argc)
 412     fmt (stdin);
 413   else
 414     {
 415       for (; optind < argc; optind++)
 416         {
 417           char *file = argv[optind];
 418           if (STREQ (file, "-"))
 419             fmt (stdin);
 420           else
 421             {
 422               FILE *in_stream;
 423               in_stream = fopen (file, "r");
 424               if (in_stream != NULL)
 425                 {
 426                   fmt (in_stream);
 427                   if (fclose (in_stream) == EOF)
 428                     {
 429                       error (0, errno, "%s", file);
 430                       fail = 1;
 431                     }
 432                 }
 433               else
 434                 {
 435                   error (0, errno, _("cannot open %s for reading"),
 436                          quote (file));
 437                   fail = 1;
 438                 }
 439             }
 440         }
 441     }
 442
 443   exit (fail ? EXIT_FAILURE : EXIT_SUCCESS);
 444 }
 445
 446 /* Trim space from the front and back of the string P, yielding the prefix,
 447    and record the lengths of the prefix and the space trimmed.  */
 448
 449 static void
 450 set_prefix (register char *p)
 451 {
 452   register char *s;
 453
 454   prefix_lead_space = 0;
 455   while (*p == ' ')
 456     {
 457       prefix_lead_space++;
 458       p++;
 459     }
 460   prefix = p;
 461   prefix_full_length = strlen (p);
 462   s = p + prefix_full_length;
 463   while (s > p && s[-1] == ' ')
 464     s--;
 465   *s = '\0';
 466   prefix_length = s - p;
 467 }
 468
 469 /* read file F and send formatted output to stdout.  */
 470
 471 static void
 472 fmt (FILE *f)
 473 {
 474   tabs = FALSE;
 475   other_indent = 0;
 476   next_char = get_prefix (f);
 477   while (get_paragraph (f))
 478     {
 479       fmt_paragraph ();
 480       put_paragraph (word_limit);
 481     }
 482 }
 483
 484 /* Set the global variable `other_indent' according to SAME_PARAGRAPH
 485    and other global variables.  */
 486
 487 static void
 488 set_other_indent (bool same_paragraph)
 489 {
 490   if (split)
 491     other_indent = first_indent;
 492   else if (crown)
 493     {
 494       other_indent = (same_paragraph ? in_column : first_indent);
 495     }
 496   else if (tagged)
 497     {
 498       if (same_paragraph && in_column != first_indent)
 499         {
 500           other_indent = in_column;
 501         }
 502
 503       /* Only one line: use the secondary indent from last time if it
 504          splits, or 0 if there have been no multi-line paragraphs in the
 505          input so far.  But if these rules make the two indents the same,
 506          pick a new secondary indent.  */
 507
 508       else if (other_indent == first_indent)
 509         other_indent = first_indent == 0 ? DEF_INDENT : 0;
 510     }
 511   else
 512     {
 513       other_indent = first_indent;
 514     }
 515 }
 516
 517 /* Read a paragraph from input file F.  A paragraph consists of a
 518    maximal number of non-blank (excluding any prefix) lines subject to:
 519    * In split mode, a paragraph is a single non-blank line.
 520    * In crown mode, the second and subsequent lines must have the
 521    same indentation, but possibly different from the indent of the
 522    first line.
 523    * Tagged mode is similar, but the first and second lines must have
 524    different indentations.
 525    * Otherwise, all lines of a paragraph must have the same indent.
 526    If a prefix is in effect, it must be present at the same indent for
 527    each line in the paragraph.
 528
 529    Return FALSE if end-of-file was encountered before the start of a
 530    paragraph, else TRUE.  */
 531
 532 static bool
 533 get_paragraph (FILE *f)
 534 {
 535   register int c;
 536
 537   last_line_length = 0;
 538   c = next_char;
 539
 540   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
 541
 542   while (c == '\n' || c == EOF
 543          || next_prefix_indent < prefix_lead_space
 544          || in_column < next_prefix_indent + prefix_full_length)
 545     {
 546       c = copy_rest (f, c);
 547       if (c == EOF)
 548         {
 549           next_char = EOF;
 550           return FALSE;
 551         }
 552       putchar ('\n');
 553       c = get_prefix (f);
 554     }
 555
 556   /* Got a suitable first line for a paragraph.  */
 557
 558   prefix_indent = next_prefix_indent;
 559   first_indent = in_column;
 560   wptr = parabuf;
 561   word_limit = word;
 562   c = get_line (f, c);
 563   set_other_indent (same_para (c));
 564
 565   /* Read rest of paragraph (unless split is specified).  */
 566
 567   if (split)
 568     {
 569       /* empty */
 570     }
 571   else if (crown)
 572     {
 573       if (same_para (c))
 574         {
 575           do
 576             {                   /* for each line till the end of the para */
 577               c = get_line (f, c);
 578             }
 579           while (same_para (c) && in_column == other_indent);
 580         }
 581     }
 582   else if (tagged)
 583     {
 584       if (same_para (c) && in_column != first_indent)
 585         {
 586           do
 587             {                   /* for each line till the end of the para */
 588               c = get_line (f, c);
 589             }
 590           while (same_para (c) && in_column == other_indent);
 591         }
 592     }
 593   else
 594     {
 595       while (same_para (c) && in_column == other_indent)
 596         c = get_line (f, c);
 597     }
 598   (word_limit - 1)->period = (word_limit - 1)->final = TRUE;
 599   next_char = c;
 600   return TRUE;
 601 }
 602
 603 /* Copy to the output a line that failed to match the prefix, or that
 604    was blank after the prefix.  In the former case, C is the character
 605    that failed to match the prefix.  In the latter, C is \n or EOF.
 606    Return the character (\n or EOF) ending the line.  */
 607
 608 static int
 609 copy_rest (FILE *f, register int c)
 610 {
 611   register const char *s;
 612
 613   out_column = 0;
 614   if (in_column > next_prefix_indent && c != '\n' && c != EOF)
 615     {
 616       put_space (next_prefix_indent);
 617       for (s = prefix; out_column != in_column && *s; out_column++)
 618         putchar (*(unsigned char *)s++);
 619       put_space (in_column - out_column);
 620     }
 621   while (c != '\n' && c != EOF)
 622     {
 623       putchar (c);
 624       c = getc (f);
 625     }
 626   return c;
 627 }
 628
 629 /* Return TRUE if a line whose first non-blank character after the
 630    prefix (if any) is C could belong to the current paragraph,
 631    otherwise FALSE.  */
 632
 633 static bool
 634 same_para (register int c)
 635 {
 636   return (next_prefix_indent == prefix_indent
 637           && in_column >= next_prefix_indent + prefix_full_length
 638           && c != '\n' && c != EOF);
 639 }
 640
 641 /* Read a line from input file F, given first non-blank character C
 642    after the prefix, and the following indent, and break it into words.
 643    A word is a maximal non-empty string of non-white characters.  A word
 644    ending in [.?!]["')\]]* and followed by end-of-line or at least two
 645    spaces ends a sentence, as in emacs.
 646
 647    Return the first non-blank character of the next line.  */
 648
 649 static int
 650 get_line (FILE *f, register int c)
 651 {
 652   int start;
 653   register char *end_of_parabuf;
 654   register WORD *end_of_word;
 655
 656   end_of_parabuf = &parabuf[MAXCHARS];
 657   end_of_word = &word[MAXWORDS - 2];
 658
 659   do
 660     {                           /* for each word in a line */
 661
 662       /* Scan word.  */
 663
 664       word_limit->text = wptr;
 665       do
 666         {
 667           if (wptr == end_of_parabuf)
 668             {
 669               set_other_indent (true);
 670               flush_paragraph ();
 671             }
 672           *wptr++ = c;
 673           c = getc (f);
 674         }
 675       while (c != EOF && !ISSPACE (c));
 676       in_column += word_limit->length = wptr - word_limit->text;
 677       check_punctuation (word_limit);
 678
 679       /* Scan inter-word space.  */
 680
 681       start = in_column;
 682       c = get_space (f, c);
 683       word_limit->space = in_column - start;
 684       word_limit->final = (c == EOF
 685                            || (word_limit->period
 686                                && (c == '\n' || word_limit->space > 1)));
 687       if (c == '\n' || c == EOF || uniform)
 688         word_limit->space = word_limit->final ? 2 : 1;
 689       if (word_limit == end_of_word)
 690         {
 691           set_other_indent (true);
 692           flush_paragraph ();
 693         }
 694       word_limit++;
 695       if (c == EOF)
 696         return EOF;
 697     }
 698   while (c != '\n');
 699   return get_prefix (f);
 700 }
 701
 702 /* Read a prefix from input file F.  Return either first non-matching
 703    character, or first non-blank character after the prefix.  */
 704
 705 static int
 706 get_prefix (FILE *f)
 707 {
 708   register int c;
 709
 710   in_column = 0;
 711   c = get_space (f, getc (f));
 712   if (prefix_length == 0)
 713     next_prefix_indent = prefix_lead_space < in_column ?
 714       prefix_lead_space : in_column;
 715   else
 716     {
 717       const char *p;
 718       next_prefix_indent = in_column;
 719       for (p = prefix; *p != '\0'; p++)
 720         {
 721           if (c != *(unsigned char *)p)
 722             return c;
 723           in_column++;
 724           c = getc (f);
 725         }
 726       c = get_space (f, c);
 727     }
 728   return c;
 729 }
 730
 731 /* Read blank characters from input file F, starting with C, and keeping
 732    in_column up-to-date.  Return first non-blank character.  */
 733
 734 static int
 735 get_space (FILE *f, register int c)
 736 {
 737   for (;;)
 738     {
 739       if (c == ' ')
 740         in_column++;
 741       else if (c == '\t')
 742         {
 743           tabs = TRUE;
 744           in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
 745         }
 746       else
 747         return c;
 748       c = getc (f);
 749     }
 750 }
 751
 752 /* Set extra fields in word W describing any attached punctuation.  */
 753
 754 static void
 755 check_punctuation (register WORD *w)
 756 {
 757   const unsigned char *start, *finish;
 758
 759   start = (unsigned char *) w->text;
 760   finish = start + (w->length - 1);
 761   w->paren = isopen (*start);
 762   w->punct = ISPUNCT (*finish);
 763   while (isclose (*finish) && finish > start)
 764     finish--;
 765   w->period = isperiod (*finish);
 766 }
 767
 768 /* Flush part of the paragraph to make room.  This function is called on
 769    hitting the limit on the number of words or characters.  */
 770
 771 static void
 772 flush_paragraph (void)
 773 {
 774   WORD *split_point;
 775   register WORD *w;
 776   int shift;
 777   COST best_break;
 778
 779   /* In the special case where it's all one word, just flush it.  */
 780
 781   if (word_limit == word)
 782     {
 783       printf ("%*s", (int) (wptr - parabuf), parabuf);
 784       wptr = parabuf;
 785       return;
 786     }
 787
 788   /* Otherwise:
 789      - format what you have so far as a paragraph,
 790      - find a low-cost line break near the end,
 791      - output to there,
 792      - make that the start of the paragraph.  */
 793
 794   fmt_paragraph ();
 795
 796   /* Choose a good split point.  */
 797
 798   split_point = word_limit;
 799   best_break = MAXCOST;
 800   for (w = word->next_break; w != word_limit; w = w->next_break)
 801     {
 802       if (w->best_cost - w->next_break->best_cost < best_break)
 803         {
 804           split_point = w;
 805           best_break = w->best_cost - w->next_break->best_cost;
 806         }
 807       if (best_break <= MAXCOST - LINE_CREDIT)
 808         best_break += LINE_CREDIT;
 809     }
 810   put_paragraph (split_point);
 811
 812   /* Copy text of words down to start of parabuf -- we use memmove because
 813      the source and target may overlap.  */
 814
 815   memmove (parabuf, split_point->text, (size_t) (wptr - split_point->text));
 816   shift = split_point->text - parabuf;
 817   wptr -= shift;
 818
 819   /* Adjust text pointers.  */
 820
 821   for (w = split_point; w <= word_limit; w++)
 822     w->text -= shift;
 823
 824   /* Copy words from split_point down to word -- we use memmove because
 825      the source and target may overlap.  */
 826
 827   memmove ((char *) word, (char *) split_point,
 828          (word_limit - split_point + 1) * sizeof (WORD));
 829   word_limit -= split_point - word;
 830 }
 831
 832 /* Compute the optimal formatting for the whole paragraph by computing
 833    and remembering the optimal formatting for each suffix from the empty
 834    one to the whole paragraph.  */
 835
 836 static void
 837 fmt_paragraph (void)
 838 {
 839   register WORD *start, *w;
 840   register int len;
 841   register COST wcost, best;
 842   int saved_length;
 843
 844   word_limit->best_cost = 0;
 845   saved_length = word_limit->length;
 846   word_limit->length = max_width;       /* sentinel */
 847
 848   for (start = word_limit - 1; start >= word; start--)
 849     {
 850       best = MAXCOST;
 851       len = start == word ? first_indent : other_indent;
 852
 853       /* At least one word, however long, in the line.  */
 854
 855       w = start;
 856       len += w->length;
 857       do
 858         {
 859           w++;
 860
 861           /* Consider breaking before w.  */
 862
 863           wcost = line_cost (w, len) + w->best_cost;
 864           if (start == word && last_line_length > 0)
 865             wcost += RAGGED_COST (len - last_line_length);
 866           if (wcost < best)
 867             {
 868               best = wcost;
 869               start->next_break = w;
 870               start->line_length = len;
 871             }
 872
 873           /* This is a kludge to keep us from computing `len' as the
 874              sum of the sentinel length and some non-zero number.
 875              Since the sentinel w->length may be INT_MAX, adding
 876              to that would give a negative result.  */
 877           if (w == word_limit)
 878             break;
 879
 880           len += (w - 1)->space + w->length;    /* w > start >= word */
 881         }
 882       while (len < max_width);
 883       start->best_cost = best + base_cost (start);
 884     }
 885
 886   word_limit->length = saved_length;
 887 }
 888
 889 /* Return the constant component of the cost of breaking before the
 890    word THIS.  */
 891
 892 static COST
 893 base_cost (register WORD *this)
 894 {
 895   register COST cost;
 896
 897   cost = LINE_COST;
 898
 899   if (this > word)
 900     {
 901       if ((this - 1)->period)
 902         {
 903           if ((this - 1)->final)
 904             cost -= SENTENCE_BONUS;
 905           else
 906             cost += NOBREAK_COST;
 907         }
 908       else if ((this - 1)->punct)
 909         cost -= PUNCT_BONUS;
 910       else if (this > word + 1 && (this - 2)->final)
 911         cost += WIDOW_COST ((this - 1)->length);
 912     }
 913
 914   if (this->paren)
 915     cost -= PAREN_BONUS;
 916   else if (this->final)
 917     cost += ORPHAN_COST (this->length);
 918
 919   return cost;
 920 }
 921
 922 /* Return the component of the cost of breaking before word NEXT that
 923    depends on LEN, the length of the line beginning there.  */
 924
 925 static COST
 926 line_cost (register WORD *next, register int len)
 927 {
 928   register int n;
 929   register COST cost;
 930
 931   if (next == word_limit)
 932     return 0;
 933   n = best_width - len;
 934   cost = SHORT_COST (n);
 935   if (next->next_break != word_limit)
 936     {
 937       n = len - next->line_length;
 938       cost += RAGGED_COST (n);
 939     }
 940   return cost;
 941 }
 942
 943 /* Output to stdout a paragraph from word up to (but not including)
 944    FINISH, which must be in the next_break chain from word.  */
 945
 946 static void
 947 put_paragraph (register WORD *finish)
 948 {
 949   register WORD *w;
 950
 951   put_line (word, first_indent);
 952   for (w = word->next_break; w != finish; w = w->next_break)
 953     put_line (w, other_indent);
 954 }
 955
 956 /* Output to stdout the line beginning with word W, beginning in column
 957    INDENT, including the prefix (if any).  */
 958
 959 static void
 960 put_line (register WORD *w, int indent)
 961 {
 962   register WORD *endline;
 963
 964   out_column = 0;
 965   put_space (prefix_indent);
 966   fputs (prefix, stdout);
 967   out_column += prefix_length;
 968   put_space (indent - out_column);
 969
 970   endline = w->next_break - 1;
 971   for (; w != endline; w++)
 972     {
 973       put_word (w);
 974       put_space (w->space);
 975     }
 976   put_word (w);
 977   last_line_length = out_column;
 978   putchar ('\n');
 979 }
 980
 981 /* Output to stdout the word W.  */
 982
 983 static void
 984 put_word (register WORD *w)
 985 {
 986   register const char *s;
 987   register int n;
 988
 989   s = w->text;
 990   for (n = w->length; n != 0; n--)
 991     putchar (*s++);
 992   out_column += w->length;
 993 }
 994
 995 /* Output to stdout SPACE spaces, or equivalent tabs.  */
 996
 997 static void
 998 put_space (int space)
 999 {
1000   register int space_target, tab_target;
1001
1002   space_target = out_column + space;
1003   if (tabs)
1004     {
1005       tab_target = space_target / TABWIDTH * TABWIDTH;
1006       if (out_column + 1 < tab_target)
1007         while (out_column < tab_target)
1008           {
1009             putchar ('\t');
1010             out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
1011           }
1012     }
1013   while (out_column < space_target)
1014     {
1015       putchar (' ');
1016       out_column++;
1017     }
1018 }