src/fmt.c

   1 /* GNU fmt -- simple text formatter.
   2    Copyright (C) 1994, 1995 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software
  16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17  */
  18
  19 /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
  20
  21 #include <config.h>
  22 #include <stdio.h>
  23 #include <sys/types.h>
  24 #include <getopt.h>
  25
  26 #include "system.h"
  27 #include "version.h"
  28 #include "error.h"
  29
  30 /* The following parameters represent the program's idea of what is
  31    "best".  Adjust to taste, subject to the caveats given.  */
  32
  33 /* Default longest permitted line length (max_width).  */
  34 #define WIDTH   75
  35
  36 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
  37    room for optimization.  */
  38 #define LEEWAY  7
  39
  40 /* The default secondary indent of tagged paragraph used for unindented
  41    one-line paragraphs not preceded by any multi-line paragraphs.  */
  42 #define DEF_INDENT 3
  43
  44 /* Costs and bonuses are expressed as the equivalent departure from the
  45    optimal line length, multiplied by 10.  e.g. assigning something a
  46    cost of 50 means that it is as bad as a line 5 characters too short
  47    or too long.  The definition of SHORT_COST(n) should not be changed.
  48    However, EQUIV(n) may need tuning.  */
  49
  50 typedef long COST;
  51
  52 #define MAXCOST (~(((COST) 1) << (8 * sizeof (COST) -1)))
  53
  54 #define SQR(n)          ((n) * (n))
  55 #define EQUIV(n)        SQR ((COST) (n))
  56
  57 /* Cost of a filled line n chars longer or shorter than best_width.  */
  58 #define SHORT_COST(n)   EQUIV ((n) * 10)
  59
  60 /* Cost of the difference between adjacent filled lines.  */
  61 #define RAGGED_COST(n)  (SHORT_COST (n) / 2)
  62
  63 /* Basic cost per line.  */
  64 #define LINE_COST       EQUIV (70)
  65
  66 /* Cost of breaking a line after the first word of a sentence, where
  67    the length of the word is N.  */
  68 #define WIDOW_COST(n)   (EQUIV (200) / ((n) + 2))
  69
  70 /* Cost of breaking a line before the last word of a sentence, where
  71    the length of the word is N.  */
  72 #define ORPHAN_COST(n)  (EQUIV (150) / ((n) + 2))
  73
  74 /* Bonus for breaking a line at the end of a sentence.  */
  75 #define SENTENCE_BONUS  EQUIV (50)
  76
  77 /* Cost of breaking a line after a period not marking end of a sentence.
  78    With the definition of sentence we are using (borrowed from emacs, see
  79    get_line()) such a break would then look like a sentence break.  Hence
  80    we assign a very high cost -- it should be avoided unless things are
  81    really bad.  */
  82 #define NOBREAK_COST    EQUIV (600)
  83
  84 /* Bonus for breaking a line before open parenthesis.  */
  85 #define PAREN_BONUS     EQUIV (40)
  86
  87 /* Bonus for breaking a line after other punctuation.  */
  88 #define PUNCT_BONUS     EQUIV(40)
  89
  90 /* Credit for breaking a long paragraph one line later.  */
  91 #define LINE_CREDIT     EQUIV(3)
  92
  93 /* Size of paragraph buffer, in words and characters.  Longer paragraphs
  94    are handled neatly (cf. flush_paragraph()), so there's little to gain
  95    by making these larger.  */
  96 #define MAXWORDS        1000
  97 #define MAXCHARS        5000
  98
  99 /* Extra ctype(3)-style macros.  */
 100
 101 #define isopen(c)       (strchr ("([`'\"", c) != NULL)
 102 #define isclose(c)      (strchr (")]'\"", c) != NULL)
 103 #define isperiod(c)     (strchr (".?!", c) != NULL)
 104
 105 /* Size of a tab stop, for expansion on input and re-introduction on
 106    output.  */
 107 #define TABWIDTH        8
 108
 109 /* Miscellaneous definitions.  */
 110
 111 typedef unsigned int bool;
 112 #define TRUE    1
 113 #define FALSE   0
 114
 115 /* Word descriptor structure.  */
 116
 117 typedef struct Word WORD;
 118
 119 struct Word
 120   {
 121
 122     /* Static attributes determined during input.  */
 123
 124     const char *text;           /* the text of the word */
 125     short length;               /* length of this word */
 126     short space;                /* the size of the following space */
 127     bool paren:1;               /* starts with open paren */
 128     bool period:1;              /* ends in [.?!])* */
 129     bool punct:1;               /* ends in punctuation */
 130     bool final:1;               /* end of sentence */
 131
 132     /* The remaining fields are computed during the optimization.  */
 133
 134     short line_length;          /* length of the best line starting here */
 135     COST best_cost;             /* cost of best paragraph starting here */
 136     WORD *next_break;           /* break which achieves best_cost */
 137   };
 138
 139 /* Forward declarations.  */
 140
 141 /* My AC_PROTOTYPES would be better than __STDC__.  FIXME :-).  */
 142 #if __STDC__
 143 #define _(x) x
 144 #else
 145 #define _(x) ()
 146 #endif
 147
 148 static void set_prefix _ ((char *p));
 149 static void fmt _ ((FILE *f));
 150 static bool get_paragraph _ ((FILE *f));
 151 static int get_line _ ((FILE *f, int c));
 152 static int get_prefix _ ((FILE *f));
 153 static int get_space _ ((FILE *f, int c));
 154 static int copy_rest _ ((FILE *f, int c));
 155 static bool same_para _ ((int c));
 156 static void flush_paragraph _ ((void));
 157 static void fmt_paragraph _ ((void));
 158 static void check_punctuation _ ((WORD *w));
 159 static COST base_cost _ ((WORD *this));
 160 static COST line_cost _ ((WORD *next, int len));
 161 static void put_paragraph _ ((WORD *finish));
 162 static void put_line _ ((WORD *w, int indent));
 163 static void put_word _ ((WORD *w));
 164 static void put_space _ ((int space));
 165
 166 /* The name this program was run with.  */
 167 const char *program_name;
 168
 169 /* If non-zero, display usage information and exit.  */
 170 static int show_help = 0;
 171
 172 /* If non-zero, print the version on standard output and exit.  */
 173 static int show_version = 0;
 174
 175 /* Option values.  */
 176
 177 /* If TRUE, first 2 lines may have different indent (default FALSE).  */
 178 static bool crown;
 179
 180 /* If TRUE, first 2 lines _must_ have different indent (default FALSE).  */
 181 static bool tagged;
 182
 183 /* If TRUE, each line is a paragraph on its own (default FALSE).  */
 184 static bool split;
 185
 186 /* If TRUE, don't preserve inter-word spacing (default FALSE).  */
 187 static bool uniform;
 188
 189 /* Prefix minus leading and trailing spaces (default "").  */
 190 static const char *prefix;
 191
 192 /* User-supplied maximum line width (default WIDTH).  The only output
 193    lines
 194    longer than this will each comprise a single word.  */
 195 static int max_width;
 196
 197 /* Values derived from the option values.  */
 198
 199 /* The length of prefix minus leading space.  */
 200 static int prefix_full_length;
 201
 202 /* The length of the leading space trimmed from the prefix.  */
 203 static int prefix_lead_space;
 204
 205 /* The length of prefix minus leading and trailing space.  */
 206 static int prefix_length;
 207
 208 /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
 209 static int best_width;
 210
 211 /* Dynamic variables.  */
 212
 213 /* Start column of the character most recently read from the input file.  */
 214 static int in_column;
 215
 216 /* Start column of the next character to be written to stdout.  */
 217 static int out_column;
 218
 219 /* Space for the paragraph text -- longer paragraphs are handled neatly
 220    (cf. flush_paragraph()).  */
 221 static char parabuf[MAXCHARS];
 222
 223 /* A pointer into parabuf, indicating the first unused character position.  */
 224 static char *wptr;
 225
 226 /* The words of a paragraph -- longer paragraphs are handled neatly
 227    (cf. flush_paragraph()).  */
 228 static WORD word[MAXWORDS];
 229
 230 /* A pointer into the above word array, indicating the first position
 231    after the last complete word.  Sometimes it will point at an incomplete
 232    word.  */
 233 static WORD *word_limit;
 234
 235 /* If TRUE, current input file contains tab characters, and so tabs can be
 236    used for white space on output.  */
 237 static bool tabs;
 238
 239 /* Space before trimmed prefix on each line of the current paragraph.  */
 240 static int prefix_indent;
 241
 242 /* Indentation of the first line of the current paragraph.  */
 243 static int first_indent;
 244
 245 /* Indentation of other lines of the current paragraph */
 246 static int other_indent;
 247
 248 /* To detect the end of a paragraph, we need to look ahead to the first
 249    non-blank character after the prefix on the next line, or the first
 250    character on the following line that failed to match the prefix.
 251    We can reconstruct the lookahead from that character (next_char), its
 252    position on the line (in_column) and the amount of space before the
 253    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
 254
 255 /* The last character read from the input file.  */
 256 static int next_char;
 257
 258 /* The space before the trimmed prefix (or part of it) on the next line
 259    after the current paragraph.  */
 260 static int next_prefix_indent;
 261
 262 /* If non-zero, the length of the last line output in the current
 263    paragraph, used to charge for raggedness at the split point for long
 264    paragraphs chosen by fmt_paragraph().  */
 265 static int last_line_length;
 266
 267 static void
 268 usage (status)
 269      int status;
 270 {
 271   if (status != 0)
 272     fprintf (stderr, "Try `%s --help' for more information.\n", program_name);
 273   else
 274     {
 275       printf ("Usage: %s [-DIGITS] [OPTION]... [FILE]...\n", program_name);
 276       fputs ("\
 277 Reformat each paragraph in the FILE(s), writing to standard output.\n\
 278 If no FILE or if FILE is `-', standard input is read.\n\
 279 \n\
 280 Mandatory arguments to long options are mandatory for short options too.\n\
 281   -c, --crown-margin        preserve indentation of first two lines\n\
 282   -s, --split-only          split long lines, but do not refill\n\
 283   -t, --tagged-paragraph    indentation of first line different from second\n\
 284   -u, --uniform-spacing     one space between words, two after sentences\n\
 285   -w, --width=NUMBER        maximum line width (default of 75 columns)\n\
 286   -p, --prefix=STRING       combine only lines having STRING as prefix\n\
 287       --help                display this help and exit\n\
 288       --version             output version information and exit\n\
 289 \n\
 290 In -wNUMBER, the letter `w' may be omitted.\n",
 291              stdout);
 292     }
 293   exit (status);
 294 }
 295
 296 /* Decode options and launch execution.  */
 297
 298 static const struct option long_options[] =
 299 {
 300   {"crown-margin", no_argument, NULL, 'c'},
 301   {"help", no_argument, &show_help, 1},
 302   {"prefix", required_argument, NULL, 'p'},
 303   {"split-only", no_argument, NULL, 's'},
 304   {"tagged-paragraph", no_argument, NULL, 't'},
 305   {"uniform-spacing", no_argument, NULL, 'u'},
 306   {"version", no_argument, &show_version, 1},
 307   {"width", required_argument, NULL, 'w'},
 308   {0, 0, 0, 0},
 309 };
 310
 311 int
 312 main (argc, argv)
 313      register int argc;
 314      register char *argv[];
 315 {
 316   int optchar;
 317   FILE *infile;
 318
 319   program_name = argv[0];
 320
 321   crown = tagged = split = uniform = FALSE;
 322   max_width = WIDTH;
 323   prefix = "";
 324   prefix_length = prefix_lead_space = prefix_full_length = 0;
 325
 326   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
 327     {
 328       max_width = 0;
 329       /* Old option syntax; a dash followed by one or more digits.
 330          Move past the number. */
 331       for (++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
 332         {
 333           /* FIXME: use strtol to detect overflow.  */
 334           max_width = max_width * 10 + *argv[1] - '0';
 335         }
 336       /* Make the options we just parsed invisible to getopt. */
 337       argv[1] = argv[0];
 338       argv++;
 339       argc--;
 340     }
 341
 342   while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
 343                                  long_options, NULL))
 344          != EOF)
 345     switch (optchar)
 346       {
 347       default:
 348         usage (1);
 349
 350       case 0:
 351         break;
 352
 353       case 'c':
 354         crown = TRUE;
 355         break;
 356
 357       case 's':
 358         split = TRUE;
 359         break;
 360
 361       case 't':
 362         tagged = TRUE;
 363         break;
 364
 365       case 'u':
 366         uniform = TRUE;
 367         break;
 368
 369       case 'w':
 370         /* FIXME: use strtol.  */
 371         max_width = atoi (optarg);
 372         break;
 373
 374       case 'p':
 375         set_prefix (optarg);
 376         break;
 377
 378       }
 379
 380   if (show_version)
 381     {
 382       printf ("fmt - %s\n", version_string);
 383       exit (0);
 384     }
 385
 386   if (show_help)
 387     usage (0);
 388
 389   best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
 390
 391   if (optind == argc)
 392     fmt (stdin);
 393   else
 394     for (; optind < argc; optind++)
 395       if (strcmp (argv[optind], "-") == 0)
 396         fmt (stdin);
 397       else
 398         {
 399           infile = fopen (argv[optind], "r");
 400           if (infile != NULL)
 401             {
 402               fmt (infile);
 403               fclose (infile);
 404             }
 405           else
 406             error (0, errno, argv[optind]);
 407         }
 408
 409   exit (0);
 410 }
 411
 412 /* Trim space from the front and back of the string P, yielding the prefix,
 413    and record the lengths of the prefix and the space trimmed.  */
 414
 415 static void
 416 set_prefix (p)
 417      register char *p;
 418 {
 419   register char *s;
 420
 421   prefix_lead_space = 0;
 422   while (*p == ' ')
 423     {
 424       prefix_lead_space++;
 425       p++;
 426     }
 427   prefix = p;
 428   prefix_full_length = strlen (p);
 429   s = p + prefix_full_length;
 430   while (s > p && s[-1] == ' ')
 431     s--;
 432   *s = '\0';
 433   prefix_length = s - p;
 434 }
 435
 436 /* read file F and send formatted output to stdout.  */
 437
 438 static void
 439 fmt (f)
 440      FILE *f;
 441 {
 442   tabs = FALSE;
 443   other_indent = 0;
 444   next_char = get_prefix (f);
 445   while (get_paragraph (f))
 446     {
 447       fmt_paragraph ();
 448       put_paragraph (word_limit);
 449     }
 450 }
 451
 452 /* Read a paragraph from input file F.  A paragraph consists of a
 453    maximal number of non-blank (excluding any prefix) lines subject to:
 454    * In split mode, a paragraph is a single non-blank line.
 455    * In crown mode, the second and subsequent lines must have the
 456    same indentation, but possibly different from the indent of the
 457    first line.
 458    * Tagged mode is similar, but the first and second lines must have
 459    different indentations.
 460    * Otherwise, all lines of a paragraph must have the same indent.
 461    If a prefix is in effect, it must be present at the same indent for
 462    each line in the paragraph.
 463
 464    Return FALSE if end-of-file was encountered before the start of a
 465    paragraph, else TRUE.  */
 466
 467 static bool
 468 get_paragraph (f)
 469      FILE *f;
 470 {
 471   register int c;
 472
 473   last_line_length = 0;
 474   c = next_char;
 475
 476   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
 477
 478   while (c == '\n' || c == EOF
 479          || next_prefix_indent < prefix_lead_space
 480          || in_column < next_prefix_indent + prefix_full_length)
 481     {
 482       c = copy_rest (f, c);
 483       if (c == EOF)
 484         {
 485           next_char = EOF;
 486           return FALSE;
 487         }
 488       putchar ('\n');
 489       c = get_prefix (f);
 490     }
 491
 492   /* Got a suitable first line for a paragraph.  */
 493
 494   prefix_indent = next_prefix_indent;
 495   first_indent = in_column;
 496   wptr = parabuf;
 497   word_limit = word;
 498   c = get_line (f, c);
 499
 500   /* Read rest of paragraph (unless split is specified).  */
 501
 502   if (split)
 503     other_indent = first_indent;
 504   else if (crown)
 505     {
 506       if (same_para (c))
 507         {
 508           other_indent = in_column;
 509           do
 510             {                   /* for each line till the end of the para */
 511               c = get_line (f, c);
 512             }
 513           while (same_para (c) && in_column == other_indent);
 514         }
 515       else
 516         other_indent = first_indent;
 517     }
 518   else if (tagged)
 519     {
 520       if (same_para (c) && in_column != first_indent)
 521         {
 522           other_indent = in_column;
 523           do
 524             {                   /* for each line till the end of the para */
 525               c = get_line (f, c);
 526             }
 527           while (same_para (c) && in_column == other_indent);
 528         }
 529
 530       /* Only one line: use the secondary indent from last time if it
 531          splits, or 0 if there have been no multi-line paragraphs in the
 532          input so far.  But if these rules make the two indents the same,
 533          pick a new secondary indent.  */
 534
 535       else if (other_indent == first_indent)
 536         other_indent = first_indent == 0 ? DEF_INDENT : 0;
 537     }
 538   else
 539     {
 540       other_indent = first_indent;
 541       while (same_para (c) && in_column == other_indent)
 542         c = get_line (f, c);
 543     }
 544   (word_limit - 1)->period = (word_limit - 1)->final = TRUE;
 545   next_char = c;
 546   return TRUE;
 547 }
 548
 549 /* Copy to the output a line that failed to match the prefix, or that
 550    was blank after the prefix.  In the former case, C is the character
 551    that failed to match the prefix.  In the latter, C is \n or EOF.
 552    Return the character (\n or EOF) ending the line.  */
 553
 554 static int
 555 copy_rest (f, c)
 556      FILE *f;
 557      register int c;
 558 {
 559   register const char *s;
 560
 561   out_column = 0;
 562   if (in_column > next_prefix_indent && c != '\n' && c != EOF)
 563     {
 564       put_space (next_prefix_indent);
 565       for (s = prefix; out_column != in_column; out_column++)
 566         putchar (*s++);
 567     }
 568   while (c != '\n' && c != EOF)
 569     {
 570       putchar (c);
 571       c = getc (f);
 572     }
 573   return c;
 574 }
 575
 576 /* Return TRUE if a line whose first non-blank character after the
 577    prefix (if any) is C could belong to the current paragraph,
 578    otherwise FALSE.  */
 579
 580 static bool
 581 same_para (c)
 582      register int c;
 583 {
 584   return (next_prefix_indent == prefix_indent
 585           && in_column >= next_prefix_indent + prefix_full_length
 586           && c != '\n' && c != EOF);
 587 }
 588
 589 /* Read a line from input file F, given first non-blank character C
 590    after the prefix, and the following indent, and break it into words.
 591    A word is a maximal non-empty string of non-white characters.  A word
 592    ending in [.?!]["')\]]* and followed by end-of-line or at least two
 593    spaces ends a sentence, as in emacs.
 594
 595    Return the first non-blank character of the next line.  */
 596
 597 static int
 598 get_line (f, c)
 599      FILE *f;
 600      register int c;
 601 {
 602   int start;
 603   register char *end_of_parabuf;
 604   register WORD *end_of_word;
 605
 606   end_of_parabuf = &parabuf[MAXCHARS];
 607   end_of_word = &word[MAXWORDS - 2];
 608
 609   do
 610     {                           /* for each word in a line */
 611
 612       /* Scan word.  */
 613
 614       word_limit->text = wptr;
 615       do
 616         {
 617           if (wptr == end_of_parabuf)
 618             flush_paragraph ();
 619           *wptr++ = c;
 620           c = getc (f);
 621         }
 622       while (c != EOF && !isspace (c));
 623       in_column += word_limit->length = wptr - word_limit->text;
 624       check_punctuation (word_limit);
 625
 626       /* Scan inter-word space.  */
 627
 628       start = in_column;
 629       c = get_space (f, c);
 630       word_limit->space = in_column - start;
 631       word_limit->final = (c == EOF
 632                            || (word_limit->period
 633                                && (c == '\n' || word_limit->space > 1)));
 634       if (c == '\n' || c == EOF || uniform)
 635         word_limit->space = word_limit->final ? 2 : 1;
 636       if (word_limit == end_of_word)
 637         flush_paragraph ();
 638       word_limit++;
 639       if (c == EOF)
 640         return EOF;
 641     }
 642   while (c != '\n');
 643   return get_prefix (f);
 644 }
 645
 646 /* Read a prefix from input file F.  Return either first non-matching
 647    character, or first non-blank character after the prefix.  */
 648
 649 static int
 650 get_prefix (f)
 651      FILE *f;
 652 {
 653   register int c;
 654   register const char *p;
 655
 656   in_column = 0;
 657   c = get_space (f, getc (f));
 658   if (prefix_length == 0)
 659     next_prefix_indent = prefix_lead_space < in_column ?
 660       prefix_lead_space : in_column;
 661   else
 662     {
 663       next_prefix_indent = in_column;
 664       for (p = prefix; *p != '\0'; p++)
 665         {
 666           if (c != *p)
 667             return c;
 668           in_column++;
 669           c = getc (f);
 670         }
 671       c = get_space (f, c);
 672     }
 673   return c;
 674 }
 675
 676 /* Read blank characters from input file F, starting with C, and keeping
 677    in_column up-to-date.  Return first non-blank character.  */
 678
 679 static int
 680 get_space (f, c)
 681      FILE *f;
 682      register int c;
 683 {
 684   for (;;)
 685     {
 686       if (c == ' ')
 687         in_column++;
 688       else if (c == '\t')
 689         {
 690           tabs = TRUE;
 691           in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
 692         }
 693       else
 694         return c;
 695       c = getc (f);
 696     }
 697 }
 698
 699 /* Set extra fields in word W describing any attached punctuation.  */
 700
 701 static void
 702 check_punctuation (w)
 703      register WORD *w;
 704 {
 705   register const char *start, *finish;
 706
 707   start = w->text;
 708   finish = start + (w->length - 1);
 709   w->paren = isopen (*start);
 710   w->punct = ispunct (*finish);
 711   while (isclose (*finish) && finish > start)
 712     finish--;
 713   w->period = isperiod (*finish);
 714 }
 715
 716 /* Flush part of the paragraph to make room.  This function is called on
 717    hitting the limit on the number of words or characters.  */
 718
 719 static void
 720 flush_paragraph ()
 721 {
 722   WORD *split_point;
 723   register WORD *w;
 724   int shift;
 725   COST best_break;
 726
 727   /* In the special case where it's all one word, just flush it.  */
 728
 729   if (word_limit == word)
 730     {
 731       printf ("%*s", wptr - parabuf, parabuf);
 732       wptr = parabuf;
 733       return;
 734     }
 735
 736   /* Otherwise:
 737      - format what you have so far as a paragraph,
 738      - find a low-cost line break near the end,
 739      - output to there,
 740      - make that the start of the paragraph.  */
 741
 742   fmt_paragraph ();
 743
 744   /* Choose a good split point.  */
 745
 746   split_point = word_limit;
 747   best_break = MAXCOST;
 748   for (w = word->next_break; w != word_limit; w = w->next_break)
 749     {
 750       if (w->best_cost - w->next_break->best_cost < best_break)
 751         {
 752           split_point = w;
 753           best_break = w->best_cost - w->next_break->best_cost;
 754         }
 755       if (best_break <= MAXCOST - LINE_CREDIT)
 756         best_break += LINE_CREDIT;
 757     }
 758   put_paragraph (split_point);
 759
 760   /* Copy text of words down to start of parabuf -- we use memmove because
 761      the source and target may overlap.  */
 762
 763   memmove (parabuf, split_point->text, (size_t) (wptr - split_point->text));
 764   shift = split_point->text - parabuf;
 765   wptr -= shift;
 766
 767   /* Adjust text pointers.  */
 768
 769   for (w = split_point; w <= word_limit; w++)
 770     w->text -= shift;
 771
 772   /* Copy words from split_point down to word -- we use memmove because
 773      the source and target may overlap.  */
 774
 775   memmove ((char *) word, (char *) split_point,
 776          (word_limit - split_point + 1) * sizeof (WORD));
 777   word_limit -= split_point - word;
 778 }
 779
 780 /* Compute the optimal formatting for the whole paragraph by computing
 781    and remembering the optimal formatting for each suffix from the empty
 782    one to the whole paragraph.  */
 783
 784 static void
 785 fmt_paragraph ()
 786 {
 787   register WORD *start, *w;
 788   register int len;
 789   register COST wcost, best;
 790   int saved_length;
 791
 792   word_limit->best_cost = 0;
 793   saved_length = word_limit->length;
 794   word_limit->length = max_width;       /* sentinel */
 795
 796   for (start = word_limit - 1; start >= word; start--)
 797     {
 798       best = MAXCOST;
 799       len = start == word ? first_indent : other_indent;
 800
 801       /* At least one word, however long, in the line.  */
 802
 803       w = start;
 804       len += w->length;
 805       do
 806         {
 807           w++;
 808
 809           /* Consider breaking before w.  */
 810
 811           wcost = line_cost (w, len) + w->best_cost;
 812           if (start == word && last_line_length > 0)
 813             wcost += RAGGED_COST (len - last_line_length);
 814           if (wcost < best)
 815             {
 816               best = wcost;
 817               start->next_break = w;
 818               start->line_length = len;
 819             }
 820           len += (w - 1)->space + w->length;    /* w > start >= word */
 821         }
 822       while (len < max_width);
 823       start->best_cost = best + base_cost (start);
 824     }
 825
 826   word_limit->length = saved_length;
 827 }
 828
 829 /* Return the constant component of the cost of breaking before the
 830    word THIS.  */
 831
 832 static COST
 833 base_cost (this)
 834      register WORD *this;
 835 {
 836   register COST cost;
 837
 838   cost = LINE_COST;
 839
 840   if (this > word)
 841     {
 842       if ((this - 1)->period)
 843         {
 844           if ((this - 1)->final)
 845             cost -= SENTENCE_BONUS;
 846           else
 847             cost += NOBREAK_COST;
 848         }
 849       else if ((this - 1)->punct)
 850         cost -= PUNCT_BONUS;
 851       else if (this > word + 1 && (this - 2)->final)
 852         cost += WIDOW_COST ((this - 1)->length);
 853     }
 854
 855   if (this->paren)
 856     cost -= PAREN_BONUS;
 857   else if (this->final)
 858     cost += ORPHAN_COST (this->length);
 859
 860   return cost;
 861 }
 862
 863 /* Return the component of the cost of breaking before word NEXT that
 864    depends on LEN, the length of the line beginning there.  */
 865
 866 static COST
 867 line_cost (next, len)
 868      register WORD *next;
 869      register int len;
 870 {
 871   register int n;
 872   register COST cost;
 873
 874   if (next == word_limit)
 875     return 0;
 876   n = best_width - len;
 877   cost = SHORT_COST (n);
 878   if (next->next_break != word_limit)
 879     {
 880       n = len - next->line_length;
 881       cost += RAGGED_COST (n);
 882     }
 883   return cost;
 884 }
 885
 886 /* Output to stdout a paragraph from word up to (but not including)
 887    FINISH, which must be in the next_break chain from word.  */
 888
 889 static void
 890 put_paragraph (finish)
 891      register WORD *finish;
 892 {
 893   register WORD *w;
 894
 895   put_line (word, first_indent);
 896   for (w = word->next_break; w != finish; w = w->next_break)
 897     put_line (w, other_indent);
 898 }
 899
 900 /* Output to stdout the line beginning with word W, beginning in column
 901    INDENT, including the prefix (if any).  */
 902
 903 static void
 904 put_line (w, indent)
 905      register WORD *w;
 906      int indent;
 907 {
 908   register WORD *endline;
 909
 910   out_column = 0;
 911   put_space (prefix_indent);
 912   fputs (prefix, stdout);
 913   out_column += prefix_length;
 914   put_space (indent - out_column);
 915
 916   endline = w->next_break - 1;
 917   for (; w != endline; w++)
 918     {
 919       put_word (w);
 920       put_space (w->space);
 921     }
 922   put_word (w);
 923   last_line_length = out_column;
 924   putchar ('\n');
 925 }
 926
 927 /* Output to stdout the word W.  */
 928
 929 static void
 930 put_word (w)
 931      register WORD *w;
 932 {
 933   register const char *s;
 934   register int n;
 935
 936   s = w->text;
 937   for (n = w->length; n != 0; n--)
 938     putchar (*s++);
 939   out_column += w->length;
 940 }
 941
 942 /* Output to stdout SPACE spaces, or equivalent tabs.  */
 943
 944 static void
 945 put_space (space)
 946      int space;
 947 {
 948   register int space_target, tab_target;
 949
 950   space_target = out_column + space;
 951   if (tabs)
 952     {
 953       tab_target = space_target / TABWIDTH * TABWIDTH;
 954       if (out_column + 1 < tab_target)
 955         while (out_column < tab_target)
 956           {
 957             putchar ('\t');
 958             out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
 959           }
 960     }
 961   while (out_column < space_target)
 962     {
 963       putchar (' ');
 964       out_column++;
 965     }
 966 }