src/fmt.c

   1 /* GNU fmt -- simple text formatter.
   2    Copyright (C) 1994, 1995 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17  */
  18
  19 /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
  20
  21 #include <config.h>
  22 #include <stdio.h>
  23 #include <sys/types.h>
  24 #include <getopt.h>
  25
  26 #include "system.h"
  27 #include "version.h"
  28 #include "error.h"
  29
  30 /* The following parameters represent the program's idea of what is
  31    "best".  Adjust to taste, subject to the caveats given.  */
  32
  33 /* Default longest permitted line length (max_width).  */
  34 #define WIDTH   75
  35
  36 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
  37    room for optimization.  */
  38 #define LEEWAY  7
  39
  40 /* The default secondary indent of tagged paragraph used for unindented
  41    one-line paragraphs not preceded by any multi-line paragraphs.  */
  42 #define DEF_INDENT 3
  43
  44 /* Costs and bonuses are expressed as the equivalent departure from the
  45    optimal line length, multiplied by 10.  e.g. assigning something a
  46    cost of 50 means that it is as bad as a line 5 characters too short
  47    or too long.  The definition of SHORT_COST(n) should not be changed.
  48    However, EQUIV(n) may need tuning.  */
  49
  50 typedef long COST;
  51
  52 #define MAXCOST (~(((COST) 1) << (8 * sizeof (COST) -1)))
  53
  54 #define SQR(n)          ((n) * (n))
  55 #define EQUIV(n)        SQR ((COST) (n))
  56
  57 /* Cost of a filled line n chars longer or shorter than best_width.  */
  58 #define SHORT_COST(n)   EQUIV ((n) * 10)
  59
  60 /* Cost of the difference between adjacent filled lines.  */
  61 #define RAGGED_COST(n)  (SHORT_COST (n) / 2)
  62
  63 /* Basic cost per line.  */
  64 #define LINE_COST       EQUIV (70)
  65
  66 /* Cost of breaking a line after the first word of a sentence, where
  67    the length of the word is N.  */
  68 #define WIDOW_COST(n)   (EQUIV (200) / ((n) + 2))
  69
  70 /* Cost of breaking a line before the last word of a sentence, where
  71    the length of the word is N.  */
  72 #define ORPHAN_COST(n)  (EQUIV (150) / ((n) + 2))
  73
  74 /* Bonus for breaking a line at the end of a sentence.  */
  75 #define SENTENCE_BONUS  EQUIV (50)
  76
  77 /* Cost of breaking a line after a period not marking end of a sentence.
  78    With the definition of sentence we are using (borrowed from emacs, see
  79    get_line()) such a break would then look like a sentence break.  Hence
  80    we assign a very high cost -- it should be avoided unless things are
  81    really bad.  */
  82 #define NOBREAK_COST    EQUIV (600)
  83
  84 /* Bonus for breaking a line before open parenthesis.  */
  85 #define PAREN_BONUS     EQUIV (40)
  86
  87 /* Bonus for breaking a line after other punctuation.  */
  88 #define PUNCT_BONUS     EQUIV(40)
  89
  90 /* Credit for breaking a long paragraph one line later.  */
  91 #define LINE_CREDIT     EQUIV(3)
  92
  93 /* Size of paragraph buffer, in words and characters.  Longer paragraphs
  94    are handled neatly (cf. flush_paragraph()), so there's little to gain
  95    by making these larger.  */
  96 #define MAXWORDS        1000
  97 #define MAXCHARS        5000
  98
  99 /* Extra ctype(3)-style macros.  */
 100
 101 #define isopen(c)       (strchr ("([`'\"", c) != NULL)
 102 #define isclose(c)      (strchr (")]'\"", c) != NULL)
 103 #define isperiod(c)     (strchr (".?!", c) != NULL)
 104
 105 /* Size of a tab stop, for expansion on input and re-introduction on
 106    output.  */
 107 #define TABWIDTH        8
 108
 109 /* Miscellaneous definitions.  */
 110
 111 typedef unsigned int bool;
 112 #define TRUE    1
 113 #define FALSE   0
 114
 115 /* Word descriptor structure.  */
 116
 117 typedef struct Word WORD;
 118
 119 struct Word
 120   {
 121
 122     /* Static attributes determined during input.  */
 123
 124     const char *text;           /* the text of the word */
 125     short length;               /* length of this word */
 126     short space;                /* the size of the following space */
 127     bool paren:1;               /* starts with open paren */
 128     bool period:1;              /* ends in [.?!])* */
 129     bool punct:1;               /* ends in punctuation */
 130     bool final:1;               /* end of sentence */
 131
 132     /* The remaining fields are computed during the optimization.  */
 133
 134     short line_length;          /* length of the best line starting here */
 135     COST best_cost;             /* cost of best paragraph starting here */
 136     WORD *next_break;           /* break which achieves best_cost */
 137   };
 138
 139 /* Forward declarations.  */
 140
 141 /* My AC_PROTOTYPES would be better than __STDC__.  FIXME :-).  */
 142 #if __STDC__
 143 #define _(x) x
 144 #else
 145 #define _(x) ()
 146 #endif
 147
 148 static void set_prefix _ ((char *p));
 149 static void fmt _ ((FILE *f));
 150 static bool get_paragraph _ ((FILE *f));
 151 static int get_line _ ((FILE *f, int c));
 152 static int get_prefix _ ((FILE *f));
 153 static int get_space _ ((FILE *f, int c));
 154 static int copy_rest _ ((FILE *f, int c));
 155 static bool same_para _ ((int c));
 156 static void flush_paragraph _ ((void));
 157 static void fmt_paragraph _ ((void));
 158 static void check_punctuation _ ((WORD *w));
 159 static COST base_cost _ ((WORD *this));
 160 static COST line_cost _ ((WORD *next, int len));
 161 static void put_paragraph _ ((WORD *finish));
 162 static void put_line _ ((WORD *w, int indent));
 163 static void put_word _ ((WORD *w));
 164 static void put_space _ ((int space));
 165
 166 /* The name this program was run with.  */
 167 const char *program_name;
 168
 169 /* If non-zero, display usage information and exit.  */
 170 static int show_help = 0;
 171
 172 /* If non-zero, print the version on standard output and exit.  */
 173 static int show_version = 0;
 174
 175 /* Option values.  */
 176
 177 /* If TRUE, first 2 lines may have different indent (default FALSE).  */
 178 static bool crown;
 179
 180 /* If TRUE, first 2 lines _must_ have different indent (default FALSE).  */
 181 static bool tagged;
 182
 183 /* If TRUE, each line is a paragraph on its own (default FALSE).  */
 184 static bool split;
 185
 186 /* If TRUE, don't preserve inter-word spacing (default FALSE).  */
 187 static bool uniform;
 188
 189 /* Prefix minus leading and trailing spaces (default "").  */
 190 static const char *prefix;
 191
 192 /* User-supplied maximum line width (default WIDTH).  The only output
 193    lines
 194    longer than this will each comprise a single word.  */
 195 static int max_width;
 196
 197 /* Values derived from the option values.  */
 198
 199 /* The length of prefix minus leading space.  */
 200 static int prefix_full_length;
 201
 202 /* The length of the leading space trimmed from the prefix.  */
 203 static int prefix_lead_space;
 204
 205 /* The length of prefix minus leading and trailing space.  */
 206 static int prefix_length;
 207
 208 /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
 209 static int best_width;
 210
 211 /* Dynamic variables.  */
 212
 213 /* Start column of the character most recently read from the input file.  */
 214 static int in_column;
 215
 216 /* Start column of the next character to be written to stdout.  */
 217 static int out_column;
 218
 219 /* Space for the paragraph text -- longer paragraphs are handled neatly
 220    (cf. flush_paragraph()).  */
 221 static char parabuf[MAXCHARS];
 222
 223 /* A pointer into parabuf, indicating the first unused character position.  */
 224 static char *wptr;
 225
 226 /* The words of a paragraph -- longer paragraphs are handled neatly
 227    (cf. flush_paragraph()).  */
 228 static WORD word[MAXWORDS];
 229
 230 /* A pointer into the above word array, indicating the first position
 231    after the last complete word.  Sometimes it will point at an incomplete
 232    word.  */
 233 static WORD *word_limit;
 234
 235 /* If TRUE, current input file contains tab characters, and so tabs can be
 236    used for white space on output.  */
 237 static bool tabs;
 238
 239 /* Space before trimmed prefix on each line of the current paragraph.  */
 240 static int prefix_indent;
 241
 242 /* Indentation of the first line of the current paragraph.  */
 243 static int first_indent;
 244
 245 /* Indentation of other lines of the current paragraph */
 246 static int other_indent;
 247
 248 /* To detect the end of a paragraph, we need to look ahead to the first
 249    non-blank character after the prefix on the next line, or the first
 250    character on the following line that failed to match the prefix.
 251    We can reconstruct the lookahead from that character (next_char), its
 252    position on the line (in_column) and the amount of space before the
 253    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
 254
 255 /* The last character read from the input file.  */
 256 static int next_char;
 257
 258 /* The space before the trimmed prefix (or part of it) on the next line
 259    after the current paragraph.  */
 260 static int next_prefix_indent;
 261
 262 /* If non-zero, the length of the last line output in the current
 263    paragraph, used to charge for raggedness at the split point for long
 264    paragraphs chosen by fmt_paragraph().  */
 265 static int last_line_length;
 266
 267 static void
 268 usage (status)
 269      int status;
 270 {
 271   if (status != 0)
 272     fprintf (stderr, "Try `%s --help' for more information.\n", program_name);
 273   else
 274     {
 275       printf ("Usage: %s [-DIGITS] [OPTION]... [FILE]...\n", program_name);
 276       fputs ("\
 277 Mandatory arguments to long options are mandatory for short options too.\n\
 278 \n\
 279   -c, --crown-margin        preserve indentation of first two lines\n\
 280   -s, --split-only          split long lines, but do not refill\n\
 281   -t, --tagged-paragraph    indentation of first line different from second\n\
 282   -u, --uniform-spacing     one space between words, two after sentences\n\
 283   -w, --width=NUMBER        maximum line width (default of 75 columns)\n\
 284   -p, --prefix=STRING       combine only lines having STRING as prefix\n\
 285       --help                display this help and exit\n\
 286       --version             output version information and exit\n\
 287 \n\
 288 In -wNUMBER, the letter `w' may be omitted.  Input FILEs are concatenated.\n\
 289 If no FILE or if FILE is `-', standard input is read.\n",
 290              stdout);
 291     }
 292   exit (status);
 293 }
 294
 295 /* Decode options and launch execution.  */
 296
 297 static const struct option long_options[] =
 298 {
 299   {"crown-margin", no_argument, NULL, 'c'},
 300   {"help", no_argument, &show_help, 1},
 301   {"prefix", required_argument, NULL, 'p'},
 302   {"split-only", no_argument, NULL, 's'},
 303   {"tagged-paragraph", no_argument, NULL, 't'},
 304   {"uniform-spacing", no_argument, NULL, 'u'},
 305   {"version", no_argument, &show_version, 1},
 306   {"width", required_argument, NULL, 'w'},
 307   {0, 0, 0, 0},
 308 };
 309
 310 int
 311 main (argc, argv)
 312      register int argc;
 313      register char *argv[];
 314 {
 315   int optchar;
 316   FILE *infile;
 317
 318   program_name = argv[0];
 319
 320   crown = tagged = split = uniform = FALSE;
 321   max_width = WIDTH;
 322   prefix = "";
 323   prefix_length = prefix_lead_space = prefix_full_length = 0;
 324
 325   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
 326     {
 327       max_width = 0;
 328       /* Old option syntax; a dash followed by one or more digits.
 329          Move past the number. */
 330       for (++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
 331         {
 332           /* FIXME: use strtol to detect overflow.  */
 333           max_width = max_width * 10 + *argv[1] - '0';
 334         }
 335       /* Make the options we just parsed invisible to getopt. */
 336       argv[1] = argv[0];
 337       argv++;
 338       argc--;
 339     }
 340
 341   while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
 342                                  long_options, NULL))
 343          != EOF)
 344     switch (optchar)
 345       {
 346       default:
 347         usage (1);
 348
 349       case 0:
 350         break;
 351
 352       case 'c':
 353         crown = TRUE;
 354         break;
 355
 356       case 's':
 357         split = TRUE;
 358         break;
 359
 360       case 't':
 361         tagged = TRUE;
 362         break;
 363
 364       case 'u':
 365         uniform = TRUE;
 366         break;
 367
 368       case 'w':
 369         /* FIXME: use strtol.  */
 370         max_width = atoi (optarg);
 371         break;
 372
 373       case 'p':
 374         set_prefix (optarg);
 375         break;
 376
 377       }
 378
 379   if (show_version)
 380     {
 381       printf ("fmt - %s\n", version_string);
 382       exit (0);
 383     }
 384
 385   if (show_help)
 386     usage (0);
 387
 388   best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
 389
 390   if (optind == argc)
 391     fmt (stdin);
 392   else
 393     for (; optind < argc; optind++)
 394       if (strcmp (argv[optind], "-") == 0)
 395         fmt (stdin);
 396       else
 397         {
 398           infile = fopen (argv[optind], "r");
 399           if (infile != NULL)
 400             {
 401               fmt (infile);
 402               fclose (infile);
 403             }
 404           else
 405             error (0, errno, argv[optind]);
 406         }
 407
 408   exit (0);
 409 }
 410
 411 /* Trim space from the front and back of the string P, yielding the prefix,
 412    and record the lengths of the prefix and the space trimmed.  */
 413
 414 static void
 415 set_prefix (p)
 416      register char *p;
 417 {
 418   register char *s;
 419
 420   prefix_lead_space = 0;
 421   while (*p == ' ')
 422     {
 423       prefix_lead_space++;
 424       p++;
 425     }
 426   prefix = p;
 427   prefix_full_length = strlen (p);
 428   s = p + prefix_full_length;
 429   while (s > p && s[-1] == ' ')
 430     s--;
 431   *s = '\0';
 432   prefix_length = s - p;
 433 }
 434
 435 /* read file F and send formatted output to stdout.  */
 436
 437 static void
 438 fmt (f)
 439      FILE *f;
 440 {
 441   tabs = FALSE;
 442   other_indent = 0;
 443   next_char = get_prefix (f);
 444   while (get_paragraph (f))
 445     {
 446       fmt_paragraph ();
 447       put_paragraph (word_limit);
 448     }
 449 }
 450
 451 /* Read a paragraph from input file F.  A paragraph consists of a
 452    maximal number of non-blank (excluding any prefix) lines subject to:
 453    * In split mode, a paragraph is a single non-blank line.
 454    * In crown mode, the second and subsequent lines must have the
 455    same indentation, but possibly different from the indent of the
 456    first line.
 457    * Tagged mode is similar, but the first and second lines must have
 458    different indentations.
 459    * Otherwise, all lines of a paragraph must have the same indent.
 460    If a prefix is in effect, it must be present at the same indent for
 461    each line in the paragraph.
 462
 463    Return FALSE if end-of-file was encountered before the start of a
 464    paragraph, else TRUE.  */
 465
 466 static bool
 467 get_paragraph (f)
 468      FILE *f;
 469 {
 470   register int c;
 471
 472   last_line_length = 0;
 473   c = next_char;
 474
 475   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
 476
 477   while (c == '\n' || c == EOF
 478          || next_prefix_indent < prefix_lead_space
 479          || in_column < next_prefix_indent + prefix_full_length)
 480     {
 481       c = copy_rest (f, c);
 482       if (c == EOF)
 483         {
 484           next_char = EOF;
 485           return FALSE;
 486         }
 487       putchar ('\n');
 488       c = get_prefix (f);
 489     }
 490
 491   /* Got a suitable first line for a paragraph.  */
 492
 493   prefix_indent = next_prefix_indent;
 494   first_indent = in_column;
 495   wptr = parabuf;
 496   word_limit = word;
 497   c = get_line (f, c);
 498
 499   /* Read rest of paragraph (unless split is specified).  */
 500
 501   if (split)
 502     other_indent = first_indent;
 503   else if (crown)
 504     {
 505       if (same_para (c))
 506         {
 507           other_indent = in_column;
 508           do
 509             {                   /* for each line till the end of the para */
 510               c = get_line (f, c);
 511             }
 512           while (same_para (c) && in_column == other_indent);
 513         }
 514       else
 515         other_indent = first_indent;
 516     }
 517   else if (tagged)
 518     {
 519       if (same_para (c) && in_column != first_indent)
 520         {
 521           other_indent = in_column;
 522           do
 523             {                   /* for each line till the end of the para */
 524               c = get_line (f, c);
 525             }
 526           while (same_para (c) && in_column == other_indent);
 527         }
 528
 529       /* Only one line: use the secondary indent from last time if it
 530          splits, or 0 if there have been no multi-line paragraphs in the
 531          input so far.  But if these rules make the two indents the same,
 532          pick a new secondary indent.  */
 533
 534       else if (other_indent == first_indent)
 535         other_indent = first_indent == 0 ? DEF_INDENT : 0;
 536     }
 537   else
 538     {
 539       other_indent = first_indent;
 540       while (same_para (c) && in_column == other_indent)
 541         c = get_line (f, c);
 542     }
 543   (word_limit - 1)->period = (word_limit - 1)->final = TRUE;
 544   next_char = c;
 545   return TRUE;
 546 }
 547
 548 /* Copy to the output a line that failed to match the prefix, or that
 549    was blank after the prefix.  In the former case, C is the character
 550    that failed to match the prefix.  In the latter, C is \n or EOF.
 551    Return the character (\n or EOF) ending the line.  */
 552
 553 static int
 554 copy_rest (f, c)
 555      FILE *f;
 556      register int c;
 557 {
 558   register const char *s;
 559
 560   out_column = 0;
 561   if (in_column > next_prefix_indent && c != '\n' && c != EOF)
 562     {
 563       put_space (next_prefix_indent);
 564       for (s = prefix; out_column != in_column; out_column++)
 565         putchar (*s++);
 566     }
 567   while (c != '\n' && c != EOF)
 568     {
 569       putchar (c);
 570       c = getc (f);
 571     }
 572   return c;
 573 }
 574
 575 /* Return TRUE if a line whose first non-blank character after the
 576    prefix (if any) is C could belong to the current paragraph,
 577    otherwise FALSE.  */
 578
 579 static bool
 580 same_para (c)
 581      register int c;
 582 {
 583   return (next_prefix_indent == prefix_indent
 584           && in_column >= next_prefix_indent + prefix_full_length
 585           && c != '\n' && c != EOF);
 586 }
 587
 588 /* Read a line from input file F, given first non-blank character C
 589    after the prefix, and the following indent, and break it into words.
 590    A word is a maximal non-empty string of non-white characters.  A word
 591    ending in [.?!]["')\]]* and followed by end-of-line or at least two
 592    spaces ends a sentence, as in emacs.
 593
 594    Return the first non-blank character of the next line.  */
 595
 596 static int
 597 get_line (f, c)
 598      FILE *f;
 599      register int c;
 600 {
 601   int start;
 602   register char *end_of_parabuf;
 603   register WORD *end_of_word;
 604
 605   end_of_parabuf = &parabuf[MAXCHARS];
 606   end_of_word = &word[MAXWORDS - 2];
 607
 608   do
 609     {                           /* for each word in a line */
 610
 611       /* Scan word.  */
 612
 613       word_limit->text = wptr;
 614       do
 615         {
 616           if (wptr == end_of_parabuf)
 617             flush_paragraph ();
 618           *wptr++ = c;
 619           c = getc (f);
 620         }
 621       while (c != EOF && !isspace (c));
 622       in_column += word_limit->length = wptr - word_limit->text;
 623       check_punctuation (word_limit);
 624
 625       /* Scan inter-word space.  */
 626
 627       start = in_column;
 628       c = get_space (f, c);
 629       word_limit->space = in_column - start;
 630       word_limit->final = (c == EOF
 631                            || (word_limit->period
 632                                && (c == '\n' || word_limit->space > 1)));
 633       if (c == '\n' || c == EOF || uniform)
 634         word_limit->space = word_limit->final ? 2 : 1;
 635       if (word_limit == end_of_word)
 636         flush_paragraph ();
 637       word_limit++;
 638       if (c == EOF)
 639         return EOF;
 640     }
 641   while (c != '\n');
 642   return get_prefix (f);
 643 }
 644
 645 /* Read a prefix from input file F.  Return either first non-matching
 646    character, or first non-blank character after the prefix.  */
 647
 648 static int
 649 get_prefix (f)
 650      FILE *f;
 651 {
 652   register int c;
 653   register const char *p;
 654
 655   in_column = 0;
 656   c = get_space (f, getc (f));
 657   if (prefix_length == 0)
 658     next_prefix_indent = prefix_lead_space < in_column ?
 659       prefix_lead_space : in_column;
 660   else
 661     {
 662       next_prefix_indent = in_column;
 663       for (p = prefix; *p != '\0'; p++)
 664         {
 665           if (c != *p)
 666             return c;
 667           in_column++;
 668           c = getc (f);
 669         }
 670       c = get_space (f, c);
 671     }
 672   return c;
 673 }
 674
 675 /* Read blank characters from input file F, starting with C, and keeping
 676    in_column up-to-date.  Return first non-blank character.  */
 677
 678 static int
 679 get_space (f, c)
 680      FILE *f;
 681      register int c;
 682 {
 683   for (;;)
 684     {
 685       if (c == ' ')
 686         in_column++;
 687       else if (c == '\t')
 688         {
 689           tabs = TRUE;
 690           in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
 691         }
 692       else
 693         return c;
 694       c = getc (f);
 695     }
 696 }
 697
 698 /* Set extra fields in word W describing any attached punctuation.  */
 699
 700 static void
 701 check_punctuation (w)
 702      register WORD *w;
 703 {
 704   register const char *start, *finish;
 705
 706   start = w->text;
 707   finish = start + (w->length - 1);
 708   w->paren = isopen (*start);
 709   w->punct = ispunct (*finish);
 710   while (isclose (*finish) && finish > start)
 711     finish--;
 712   w->period = isperiod (*finish);
 713 }
 714
 715 /* Flush part of the paragraph to make room.  This function is called on
 716    hitting the limit on the number of words or characters.  */
 717
 718 static void
 719 flush_paragraph ()
 720 {
 721   WORD *split_point;
 722   register WORD *w;
 723   int shift;
 724   COST best_break;
 725
 726   /* In the special case where it's all one word, just flush it.  */
 727
 728   if (word_limit == word)
 729     {
 730       printf ("%*s", wptr - parabuf, parabuf);
 731       wptr = parabuf;
 732       return;
 733     }
 734
 735   /* Otherwise:
 736      - format what you have so far as a paragraph,
 737      - find a low-cost line break near the end,
 738      - output to there,
 739      - make that the start of the paragraph.  */
 740
 741   fmt_paragraph ();
 742
 743   /* Choose a good split point.  */
 744
 745   split_point = word_limit;
 746   best_break = MAXCOST;
 747   for (w = word->next_break; w != word_limit; w = w->next_break)
 748     {
 749       if (w->best_cost - w->next_break->best_cost < best_break)
 750         {
 751           split_point = w;
 752           best_break = w->best_cost - w->next_break->best_cost;
 753         }
 754       if (best_break <= MAXCOST - LINE_CREDIT)
 755         best_break += LINE_CREDIT;
 756     }
 757   put_paragraph (split_point);
 758
 759   /* Copy text of words down to start of parabuf -- we use memmove because
 760      the source and target may overlap.  */
 761
 762   memmove (parabuf, split_point->text, (size_t) (wptr - split_point->text));
 763   shift = split_point->text - parabuf;
 764   wptr -= shift;
 765
 766   /* Adjust text pointers.  */
 767
 768   for (w = split_point; w <= word_limit; w++)
 769     w->text -= shift;
 770
 771   /* Copy words from split_point down to word -- we use memmove because
 772      the source and target may overlap.  */
 773
 774   memmove ((char *) word, (char *) split_point,
 775          (word_limit - split_point + 1) * sizeof (WORD));
 776   word_limit -= split_point - word;
 777 }
 778
 779 /* Compute the optimal formatting for the whole paragraph by computing
 780    and remembering the optimal formatting for each suffix from the empty
 781    one to the whole paragraph.  */
 782
 783 static void
 784 fmt_paragraph ()
 785 {
 786   register WORD *start, *w;
 787   register int len;
 788   register COST wcost, best;
 789   int saved_length;
 790
 791   word_limit->best_cost = 0;
 792   saved_length = word_limit->length;
 793   word_limit->length = max_width;       /* sentinel */
 794
 795   for (start = word_limit - 1; start >= word; start--)
 796     {
 797       best = MAXCOST;
 798       len = start == word ? first_indent : other_indent;
 799
 800       /* At least one word, however long, in the line.  */
 801
 802       w = start;
 803       len += w->length;
 804       do
 805         {
 806           w++;
 807
 808           /* Consider breaking before w.  */
 809
 810           wcost = line_cost (w, len) + w->best_cost;
 811           if (start == word && last_line_length > 0)
 812             wcost += RAGGED_COST (len - last_line_length);
 813           if (wcost < best)
 814             {
 815               best = wcost;
 816               start->next_break = w;
 817               start->line_length = len;
 818             }
 819           len += (w - 1)->space + w->length;    /* w > start >= word */
 820         }
 821       while (len < max_width);
 822       start->best_cost = best + base_cost (start);
 823     }
 824
 825   word_limit->length = saved_length;
 826 }
 827
 828 /* Return the constant component of the cost of breaking before the
 829    word THIS.  */
 830
 831 static COST
 832 base_cost (this)
 833      register WORD *this;
 834 {
 835   register COST cost;
 836
 837   cost = LINE_COST;
 838
 839   if (this > word)
 840     {
 841       if ((this - 1)->period)
 842         {
 843           if ((this - 1)->final)
 844             cost -= SENTENCE_BONUS;
 845           else
 846             cost += NOBREAK_COST;
 847         }
 848       else if ((this - 1)->punct)
 849         cost -= PUNCT_BONUS;
 850       else if (this > word + 1 && (this - 2)->final)
 851         cost += WIDOW_COST ((this - 1)->length);
 852     }
 853
 854   if (this->paren)
 855     cost -= PAREN_BONUS;
 856   else if (this->final)
 857     cost += ORPHAN_COST (this->length);
 858
 859   return cost;
 860 }
 861
 862 /* Return the component of the cost of breaking before word NEXT that
 863    depends on LEN, the length of the line beginning there.  */
 864
 865 static COST
 866 line_cost (next, len)
 867      register WORD *next;
 868      register int len;
 869 {
 870   register int n;
 871   register COST cost;
 872
 873   if (next == word_limit)
 874     return 0;
 875   n = best_width - len;
 876   cost = SHORT_COST (n);
 877   if (next->next_break != word_limit)
 878     {
 879       n = len - next->line_length;
 880       cost += RAGGED_COST (n);
 881     }
 882   return cost;
 883 }
 884
 885 /* Output to stdout a paragraph from word up to (but not including)
 886    FINISH, which must be in the next_break chain from word.  */
 887
 888 static void
 889 put_paragraph (finish)
 890      register WORD *finish;
 891 {
 892   register WORD *w;
 893
 894   put_line (word, first_indent);
 895   for (w = word->next_break; w != finish; w = w->next_break)
 896     put_line (w, other_indent);
 897 }
 898
 899 /* Output to stdout the line beginning with word W, beginning in column
 900    INDENT, including the prefix (if any).  */
 901
 902 static void
 903 put_line (w, indent)
 904      register WORD *w;
 905      int indent;
 906 {
 907   register WORD *endline;
 908
 909   out_column = 0;
 910   put_space (prefix_indent);
 911   fputs (prefix, stdout);
 912   out_column += prefix_length;
 913   put_space (indent - out_column);
 914
 915   endline = w->next_break - 1;
 916   for (; w != endline; w++)
 917     {
 918       put_word (w);
 919       put_space (w->space);
 920     }
 921   put_word (w);
 922   last_line_length = out_column;
 923   putchar ('\n');
 924 }
 925
 926 /* Output to stdout the word W.  */
 927
 928 static void
 929 put_word (w)
 930      register WORD *w;
 931 {
 932   register const char *s;
 933   register int n;
 934
 935   s = w->text;
 936   for (n = w->length; n != 0; n--)
 937     putchar (*s++);
 938   out_column += w->length;
 939 }
 940
 941 /* Output to stdout SPACE spaces, or equivalent tabs.  */
 942
 943 static void
 944 put_space (space)
 945      int space;
 946 {
 947   register int space_target, tab_target;
 948
 949   space_target = out_column + space;
 950   if (tabs)
 951     {
 952       tab_target = space_target / TABWIDTH * TABWIDTH;
 953       if (out_column + 1 < tab_target)
 954         while (out_column < tab_target)
 955           {
 956             putchar ('\t');
 957             out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
 958           }
 959     }
 960   while (out_column < space_target)
 961     {
 962       putchar (' ');
 963       out_column++;
 964     }
 965 }