lib/git-merge-changelog.c

   1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
   2    Copyright (C) 2008-2010 Bruno Haible <bruno@clisp.org>
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* README:
  18    The default merge driver of 'git' *always* produces conflicts when
  19    pulling public modifications into a privately modified ChangeLog file.
  20    This is because ChangeLog files are always modified at the top; the
  21    default merge driver has no clue how to deal with this. Furthermore
  22    the conflicts are presented with more <<<< ==== >>>> markers than
  23    necessary; this is because the default merge driver makes pointless
  24    efforts to look at the individual line changes inside a ChangeLog entry.
  25
  26    This program serves as a 'git' merge driver that avoids these problems.
  27    1. It produces no conflict when ChangeLog entries have been inserted
  28       at the top both in the public and in the private modification. It
  29       puts the privately added entries above the publicly added entries.
  30    2. It respects the structure of ChangeLog files: entries are not split
  31       into lines but kept together.
  32    3. It also handles the case of small modifications of past ChangeLog
  33       entries, or of removed ChangeLog entries: they are merged as one
  34       would expect it.
  35    4. Conflicts are presented at the top of the file, rather than where
  36       they occurred, so that the user will see them immediately. (Unlike
  37       for source code written in some programming language, conflict markers
  38       that are located several hundreds lines from the top will not cause
  39       any syntax error and therefore would be likely to remain unnoticed.)
  40  */
  41
  42 /* Installation:
  43
  44    $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
  45    $ cd /tmp/testdir123
  46    $ ./configure
  47    $ make
  48    $ make install
  49
  50    Additionally, for git users:
  51      - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
  52        lines
  53
  54           [merge "merge-changelog"]
  55                   name = GNU-style ChangeLog merge driver
  56                   driver = /usr/local/bin/git-merge-changelog %O %A %B
  57
  58      - In every directory that contains a ChangeLog file, add a file
  59        '.gitattributes' with this line:
  60
  61           ChangeLog    merge=merge-changelog
  62
  63        (See "man 5 gitattributes" for more info.)
  64
  65    Additionally, for bzr users:
  66      - Install the 'extmerge' bzr plug-in listed at
  67          <http://doc.bazaar.canonical.com/plugins/en/index.html>
  68          <http://wiki.bazaar.canonical.com/BzrPlugins>
  69      - Add to your $HOME/.bazaar/bazaar.conf the line
  70
  71           external_merge = git-merge-changelog %b %T %o
  72
  73      - Then, to merge a conflict in a ChangeLog file, use
  74
  75           $ bzr extmerge ChangeLog
  76
  77    Additionally, for hg users:
  78      - Add to your $HOME/.hgrc the lines
  79
  80         [merge-patterns]
  81         ChangeLog = git-merge-changelog
  82
  83         [merge-tools]
  84         git-merge-changelog.executable = /usr/local/bin/git-merge-changelog
  85         git-merge-changelog.args = $base $local $other
  86
  87        See <http://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
  88        for reference.
  89  */
  90
  91 /* Use as an alternative to 'diff3':
  92    git-merge-changelog performs the same role as "diff3 -m", just with
  93    reordered arguments:
  94      $ git-merge-changelog %O %A %B
  95    is comparable to
  96      $ diff3 -m %A %O %B
  97  */
  98
  99 /* Calling convention:
 100    A merge driver is called with three filename arguments:
 101      1. %O = The common ancestor of %A and %B.
 102      2. %A = The file's contents from the "current branch".
 103      3. %B = The file's contents from the "other branch"; this is the contents
 104         being merged in.
 105
 106    In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
 107    maintainer to a central maintainer) or of a downstream pull with --rebase:
 108      2. %A = The file's newest pulled contents; modified by other committers.
 109      3. %B = The user's newest copy of the file; modified by the user.
 110    In case of a downstream pull (e.g. from a central repository to the user)
 111    or of an upstream pull with --rebase:
 112      2. %A = The user's newest copy of the file; modified by the user.
 113      3. %B = The file's newest pulled contents; modified by other committers.
 114
 115    It should write its merged output into file %A. It can also echo some
 116    remarks to stdout.  It should exit with return code 0 if the merge could
 117    be resolved cleanly, or with non-zero return code if there were conflicts.
 118  */
 119
 120 /* How it works:
 121    The structure of a ChangeLog file: It consists of ChangeLog entries. A
 122    ChangeLog entry starts at a line following a blank line and that starts with
 123    a non-whitespace character, or at the beginning of a file.
 124    The merge driver works as follows: It reads the three files into memory and
 125    dissects them into ChangeLog entries. It then finds the differences between
 126    %O and %B. They are classified as:
 127      - removals (some consecutive entries removed),
 128      - changes (some consecutive entries removed, some consecutive entries
 129        added),
 130      - additions (some consecutive entries added).
 131    The driver then attempts to apply the changes to %A.
 132    To this effect, it first computes a correspondence between the entries in %O
 133    and the entries in %A, using fuzzy string matching to still identify changed
 134    entries.
 135      - Removals are applied one by one. If the entry is present in %A, at any
 136        position, it is removed. If not, the removal is marked as a conflict.
 137      - Additions at the top of %B are applied at the top of %A.
 138      - Additions between entry x and entry y (y may be the file end) in %B are
 139        applied between entry x and entry y in %A (if they still exist and are
 140        still consecutive in %A), otherwise the additions are marked as a
 141        conflict.
 142      - Changes are categorized into "simple changes":
 143          entry1 ... entryn
 144        are mapped to
 145          added_entry ... added_entry modified_entry1 ... modified_entryn,
 146        where the correspondence between entry_i and modified_entry_i is still
 147        clear; and "big changes": these are all the rest. Simple changes at the
 148        top of %B are applied by putting the added entries at the top of %A. The
 149        changes in simple changes are applied one by one; possibly leading to
 150        single-entry conflicts. Big changes are applied en bloc, possibly
 151        leading to conflicts spanning multiple entries.
 152      - Conflicts are output at the top of the file and cause an exit status of
 153        1.
 154  */
 155
 156 #include <config.h>
 157
 158 #include <getopt.h>
 159 #include <limits.h>
 160 #include <stdbool.h>
 161 #include <stdio.h>
 162 #include <stdlib.h>
 163 #include <string.h>
 164 #include <sys/types.h>
 165 #include <unistd.h>
 166
 167 #include "error.h"
 168 #include "read-file.h"
 169 #include "gl_xlist.h"
 170 #include "gl_array_list.h"
 171 #include "gl_linkedhash_list.h"
 172 #include "gl_rbtreehash_list.h"
 173 #include "gl_linked_list.h"
 174 #include "xalloc.h"
 175 #include "xmalloca.h"
 176 #include "fstrcmp.h"
 177 #include "minmax.h"
 178 #include "c-strstr.h"
 179 #include "fwriteerror.h"
 180 #include "getprogname.h"
 181
 182 #define ASSERT(expr) \
 183   do                                                                         \
 184     {                                                                        \
 185       if (!(expr))                                                           \
 186         abort ();                                                            \
 187     }                                                                        \
 188   while (0)
 189
 190 #define FSTRCMP_THRESHOLD 0.6
 191 #define FSTRCMP_STRICTER_THRESHOLD 0.8
 192
 193 /* Representation of a ChangeLog entry.
 194    The string may contain NUL bytes; therefore it is represented as a plain
 195    opaque memory region.  */
 196 struct entry
 197 {
 198   char *string;
 199   size_t length;
 200   /* Cache for the hash code.  */
 201   bool hashcode_cached;
 202   size_t hashcode;
 203 };
 204
 205 /* Create an entry.
 206    The memory region passed by the caller must of indefinite extent.  It is
 207    *not* copied here.  */
 208 static struct entry *
 209 entry_create (char *string, size_t length)
 210 {
 211   struct entry *result = XMALLOC (struct entry);
 212   result->string = string;
 213   result->length = length;
 214   result->hashcode_cached = false;
 215   return result;
 216 }
 217
 218 /* Compare two entries for equality.  */
 219 static bool
 220 entry_equals (const void *elt1, const void *elt2)
 221 {
 222   const struct entry *entry1 = (const struct entry *) elt1;
 223   const struct entry *entry2 = (const struct entry *) elt2;
 224   return entry1->length == entry2->length
 225          && memcmp (entry1->string, entry2->string, entry1->length) == 0;
 226 }
 227
 228 /* Return a hash code of the contents of a ChangeLog entry.  */
 229 static size_t
 230 entry_hashcode (const void *elt)
 231 {
 232   struct entry *entry = (struct entry *) elt;
 233   if (!entry->hashcode_cached)
 234     {
 235       /* See http://www.haible.de/bruno/hashfunc.html.  */
 236       const char *s;
 237       size_t n;
 238       size_t h = 0;
 239
 240       for (s = entry->string, n = entry->length; n > 0; s++, n--)
 241         h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
 242
 243       entry->hashcode = h;
 244       entry->hashcode_cached = true;
 245     }
 246   return entry->hashcode;
 247 }
 248
 249 /* Perform a fuzzy comparison of two ChangeLog entries.
 250    Return a similarity measure of the two entries, a value between 0 and 1.
 251    0 stands for very distinct, 1 for identical.
 252    If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
 253    be returned.  */
 254 static double
 255 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
 256                double lower_bound)
 257 {
 258   /* fstrcmp works only on NUL terminated strings.  */
 259   char *memory;
 260   double similarity;
 261
 262   if (memchr (entry1->string, '\0', entry1->length) != NULL)
 263     return 0.0;
 264   if (memchr (entry2->string, '\0', entry2->length) != NULL)
 265     return 0.0;
 266   memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
 267   {
 268     char *p = memory;
 269     memcpy (p, entry1->string, entry1->length);
 270     p += entry1->length;
 271     *p++ = '\0';
 272     memcpy (p, entry2->string, entry2->length);
 273     p += entry2->length;
 274     *p++ = '\0';
 275   }
 276   similarity =
 277     fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
 278   freea (memory);
 279   return similarity;
 280 }
 281
 282 /* This structure represents an entire ChangeLog file, after it was read
 283    into memory.  */
 284 struct changelog_file
 285 {
 286   /* The entries, as a list.  */
 287   gl_list_t /* <struct entry *> */ entries_list;
 288   /* The entries, as a list in opposite direction.  */
 289   gl_list_t /* <struct entry *> */ entries_reversed;
 290   /* The entries, as an array.  */
 291   size_t num_entries;
 292   struct entry **entries;
 293 };
 294
 295 /* Read a ChangeLog file into memory.
 296    Return the contents in *RESULT.  */
 297 static void
 298 read_changelog_file (const char *filename, struct changelog_file *result)
 299 {
 300   /* Read the file in text mode, otherwise it's hard to recognize empty
 301      lines.  */
 302   size_t length;
 303   char *contents = read_file (filename, &length);
 304   if (contents == NULL)
 305     {
 306       fprintf (stderr, "could not read file '%s'\n", filename);
 307       exit (EXIT_FAILURE);
 308     }
 309
 310   result->entries_list =
 311     gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
 312                           NULL, true);
 313   result->entries_reversed =
 314     gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
 315                           NULL, true);
 316   /* A ChangeLog file consists of ChangeLog entries.  A ChangeLog entry starts
 317      at a line following a blank line and that starts with a non-whitespace
 318      character, or at the beginning of a file.
 319      Split the file contents into entries.  */
 320   {
 321     char *contents_end = contents + length;
 322     char *start = contents;
 323     while (start < contents_end)
 324       {
 325         /* Search the end of the current entry.  */
 326         char *ptr = start;
 327         struct entry *curr;
 328
 329         while (ptr < contents_end)
 330           {
 331             ptr = memchr (ptr, '\n', contents_end - ptr);
 332             if (ptr == NULL)
 333               {
 334                 ptr = contents_end;
 335                 break;
 336               }
 337             ptr++;
 338             if (contents_end - ptr >= 2
 339                 && ptr[0] == '\n'
 340                 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
 341               {
 342                 ptr++;
 343                 break;
 344               }
 345           }
 346
 347         curr = entry_create (start, ptr - start);
 348         gl_list_add_last (result->entries_list, curr);
 349         gl_list_add_first (result->entries_reversed, curr);
 350
 351         start = ptr;
 352       }
 353   }
 354
 355   result->num_entries = gl_list_size (result->entries_list);
 356   result->entries = XNMALLOC (result->num_entries, struct entry *);
 357   {
 358     size_t index = 0;
 359     gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
 360     const void *elt;
 361     gl_list_node_t node;
 362     while (gl_list_iterator_next (&iter, &elt, &node))
 363       result->entries[index++] = (struct entry *) elt;
 364     gl_list_iterator_free (&iter);
 365     ASSERT (index == result->num_entries);
 366   }
 367 }
 368
 369 /* A mapping (correspondence) between entries of FILE1 and of FILE2.  */
 370 struct entries_mapping
 371 {
 372   struct changelog_file *file1;
 373   struct changelog_file *file2;
 374   /* Mapping from indices in FILE1 to indices in FILE2.
 375      A value -1 means that the entry from FILE1 is not found in FILE2.
 376      A value -2 means that it has not yet been computed.  */
 377   ssize_t *index_mapping;
 378   /* Mapping from indices in FILE2 to indices in FILE1.
 379      A value -1 means that the entry from FILE2 is not found in FILE1.
 380      A value -2 means that it has not yet been computed.  */
 381   ssize_t *index_mapping_reverse;
 382 };
 383
 384 /* Look up (or lazily compute) the mapping of an entry in FILE1.
 385    i is the index in FILE1.
 386    Return the index in FILE2, or -1 when the entry is not found in FILE2.  */
 387 static ssize_t
 388 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
 389 {
 390   if (mapping->index_mapping[i] < -1)
 391     {
 392       struct changelog_file *file1 = mapping->file1;
 393       struct changelog_file *file2 = mapping->file2;
 394       size_t n1 = file1->num_entries;
 395       size_t n2 = file2->num_entries;
 396       struct entry *entry_i = file1->entries[i];
 397       ssize_t j;
 398
 399       /* Search whether it approximately occurs in file2.  */
 400       ssize_t best_j = -1;
 401       double best_j_similarity = 0.0;
 402       for (j = n2 - 1; j >= 0; j--)
 403         if (mapping->index_mapping_reverse[j] < 0)
 404           {
 405             double similarity =
 406               entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
 407             if (similarity > best_j_similarity)
 408               {
 409                 best_j = j;
 410                 best_j_similarity = similarity;
 411               }
 412           }
 413       if (best_j_similarity >= FSTRCMP_THRESHOLD)
 414         {
 415           /* Found a similar entry in file2.  */
 416           struct entry *entry_j = file2->entries[best_j];
 417           /* Search whether it approximately occurs in file1 at index i.  */
 418           ssize_t best_i = -1;
 419           double best_i_similarity = 0.0;
 420           ssize_t ii;
 421           for (ii = n1 - 1; ii >= 0; ii--)
 422             if (mapping->index_mapping[ii] < 0)
 423               {
 424                 double similarity =
 425                   entry_fstrcmp (file1->entries[ii], entry_j,
 426                                  best_i_similarity);
 427                 if (similarity > best_i_similarity)
 428                   {
 429                     best_i = ii;
 430                     best_i_similarity = similarity;
 431                   }
 432               }
 433           if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
 434             {
 435               mapping->index_mapping[i] = best_j;
 436               mapping->index_mapping_reverse[best_j] = i;
 437             }
 438         }
 439       if (mapping->index_mapping[i] < -1)
 440         /* It does not approximately occur in FILE2.
 441            Remember it, for next time.  */
 442         mapping->index_mapping[i] = -1;
 443     }
 444   return mapping->index_mapping[i];
 445 }
 446
 447 /* Look up (or lazily compute) the mapping of an entry in FILE2.
 448    j is the index in FILE2.
 449    Return the index in FILE1, or -1 when the entry is not found in FILE1.  */
 450 static ssize_t
 451 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
 452 {
 453   if (mapping->index_mapping_reverse[j] < -1)
 454     {
 455       struct changelog_file *file1 = mapping->file1;
 456       struct changelog_file *file2 = mapping->file2;
 457       size_t n1 = file1->num_entries;
 458       size_t n2 = file2->num_entries;
 459       struct entry *entry_j = file2->entries[j];
 460       ssize_t i;
 461
 462       /* Search whether it approximately occurs in file1.  */
 463       ssize_t best_i = -1;
 464       double best_i_similarity = 0.0;
 465       for (i = n1 - 1; i >= 0; i--)
 466         if (mapping->index_mapping[i] < 0)
 467           {
 468             double similarity =
 469               entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
 470             if (similarity > best_i_similarity)
 471               {
 472                 best_i = i;
 473                 best_i_similarity = similarity;
 474               }
 475           }
 476       if (best_i_similarity >= FSTRCMP_THRESHOLD)
 477         {
 478           /* Found a similar entry in file1.  */
 479           struct entry *entry_i = file1->entries[best_i];
 480           /* Search whether it approximately occurs in file2 at index j.  */
 481           ssize_t best_j = -1;
 482           double best_j_similarity = 0.0;
 483           ssize_t jj;
 484           for (jj = n2 - 1; jj >= 0; jj--)
 485             if (mapping->index_mapping_reverse[jj] < 0)
 486               {
 487                 double similarity =
 488                   entry_fstrcmp (entry_i, file2->entries[jj],
 489                                  best_j_similarity);
 490                 if (similarity > best_j_similarity)
 491                   {
 492                     best_j = jj;
 493                     best_j_similarity = similarity;
 494                   }
 495               }
 496           if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
 497             {
 498               mapping->index_mapping_reverse[j] = best_i;
 499               mapping->index_mapping[best_i] = j;
 500             }
 501         }
 502       if (mapping->index_mapping_reverse[j] < -1)
 503         /* It does not approximately occur in FILE1.
 504            Remember it, for next time.  */
 505         mapping->index_mapping_reverse[j] = -1;
 506     }
 507   return mapping->index_mapping_reverse[j];
 508 }
 509
 510 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
 511    The correspondence also takes into account small modifications; i.e. the
 512    indicated relation is not equality of entries but best-match similarity
 513    of entries.
 514    If FULL is true, the maximum of matching is done up-front.  If it is false,
 515    it is done in a lazy way through the functions entries_mapping_get and
 516    entries_mapping_reverse_get.
 517    Return the result in *RESULT.  */
 518 static void
 519 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
 520                  bool full,
 521                  struct entries_mapping *result)
 522 {
 523   /* Mapping from indices in file1 to indices in file2.  */
 524   ssize_t *index_mapping;
 525   /* Mapping from indices in file2 to indices in file1.  */
 526   ssize_t *index_mapping_reverse;
 527   size_t n1 = file1->num_entries;
 528   size_t n2 = file2->num_entries;
 529   ssize_t i, j;
 530
 531   index_mapping = XNMALLOC (n1, ssize_t);
 532   for (i = 0; i < n1; i++)
 533     index_mapping[i] = -2;
 534
 535   index_mapping_reverse = XNMALLOC (n2, ssize_t);
 536   for (j = 0; j < n2; j++)
 537     index_mapping_reverse[j] = -2;
 538
 539   for (i = n1 - 1; i >= 0; i--)
 540     /* Take an entry from file1.  */
 541     if (index_mapping[i] < -1)
 542       {
 543         struct entry *entry = file1->entries[i];
 544         /* Search whether it occurs in file2.  */
 545         j = gl_list_indexof (file2->entries_reversed, entry);
 546         if (j >= 0)
 547           {
 548             j = n2 - 1 - j;
 549             /* Found an exact correspondence.  */
 550             /* If index_mapping_reverse[j] >= 0, we have already seen other
 551                copies of this entry, and there were more occurrences of it in
 552                file1 than in file2.  In this case, do nothing.  */
 553             if (index_mapping_reverse[j] < 0)
 554               {
 555                 index_mapping[i] = j;
 556                 index_mapping_reverse[j] = i;
 557                 /* Look for more occurrences of the same entry.  Match them
 558                    as long as they pair up.  Unpaired occurrences of the same
 559                    entry are left without mapping.  */
 560                 {
 561                   ssize_t curr_i = i;
 562                   ssize_t curr_j = j;
 563
 564                   for (;;)
 565                     {
 566                       ssize_t next_i;
 567                       ssize_t next_j;
 568
 569                       next_i =
 570                         gl_list_indexof_from (file1->entries_reversed,
 571                                               n1 - curr_i, entry);
 572                       if (next_i < 0)
 573                         break;
 574                       next_j =
 575                         gl_list_indexof_from (file2->entries_reversed,
 576                                               n2 - curr_j, entry);
 577                       if (next_j < 0)
 578                         break;
 579                       curr_i = n1 - 1 - next_i;
 580                       curr_j = n2 - 1 - next_j;
 581                       ASSERT (index_mapping[curr_i] < 0);
 582                       ASSERT (index_mapping_reverse[curr_j] < 0);
 583                       index_mapping[curr_i] = curr_j;
 584                       index_mapping_reverse[curr_j] = curr_i;
 585                     }
 586                 }
 587               }
 588           }
 589       }
 590
 591   result->file1 = file1;
 592   result->file2 = file2;
 593   result->index_mapping = index_mapping;
 594   result->index_mapping_reverse = index_mapping_reverse;
 595
 596   if (full)
 597     for (i = n1 - 1; i >= 0; i--)
 598       entries_mapping_get (result, i);
 599 }
 600
 601 /* An "edit" is a textual modification performed by the user, that needs to
 602    be applied to the other file.  */
 603 enum edit_type
 604 {
 605   /* Some consecutive entries were added.  */
 606   ADDITION,
 607   /* Some consecutive entries were removed; some other consecutive entries
 608      were added at the same position.  (Not necessarily the same number of
 609      entries.)  */
 610   CHANGE,
 611   /* Some consecutive entries were removed.  */
 612   REMOVAL
 613 };
 614
 615 /* This structure represents an edit.  */
 616 struct edit
 617 {
 618   enum edit_type type;
 619   /* Range of indices into the entries of FILE1.  */
 620   ssize_t i1, i2;       /* first, last index; only used for CHANGE, REMOVAL */
 621   /* Range of indices into the entries of FILE2.  */
 622   ssize_t j1, j2;       /* first, last index; only used for ADDITION, CHANGE */
 623 };
 624
 625 /* This structure represents the differences from one file, FILE1, to another
 626    file, FILE2.  */
 627 struct differences
 628 {
 629   /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
 630      from FILE1 is not found in FILE2).  */
 631   ssize_t *index_mapping;
 632   /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
 633      from FILE2 is not found in FILE1).  */
 634   ssize_t *index_mapping_reverse;
 635   /* The edits that transform FILE1 into FILE2.  */
 636   size_t num_edits;
 637   struct edit **edits;
 638 };
 639
 640 /* Import the difference detection algorithm from GNU diff.  */
 641 #define ELEMENT struct entry *
 642 #define EQUAL entry_equals
 643 #define OFFSET ssize_t
 644 #define EXTRA_CONTEXT_FIELDS \
 645   ssize_t *index_mapping; \
 646   ssize_t *index_mapping_reverse;
 647 #define NOTE_DELETE(ctxt, xoff) \
 648   ctxt->index_mapping[xoff] = -1
 649 #define NOTE_INSERT(ctxt, yoff) \
 650   ctxt->index_mapping_reverse[yoff] = -1
 651 #include "diffseq.h"
 652
 653 /* Compute the differences between the entries of FILE1 and the entries of
 654    FILE2.  */
 655 static void
 656 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
 657                      struct differences *result)
 658 {
 659   /* Unlike compute_mapping, which mostly ignores the order of the entries and
 660      therefore works well when some entries are permuted, here we use the order.
 661      I think this is needed in order to distinguish changes from
 662      additions+removals; I don't know how to say what is a "change" if the
 663      files are considered as unordered sets of entries.  */
 664   struct context ctxt;
 665   size_t n1 = file1->num_entries;
 666   size_t n2 = file2->num_entries;
 667   ssize_t i;
 668   ssize_t j;
 669   gl_list_t /* <struct edit *> */ edits;
 670
 671   ctxt.xvec = file1->entries;
 672   ctxt.yvec = file2->entries;
 673   ctxt.index_mapping = XNMALLOC (n1, ssize_t);
 674   for (i = 0; i < n1; i++)
 675     ctxt.index_mapping[i] = 0;
 676   ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
 677   for (j = 0; j < n2; j++)
 678     ctxt.index_mapping_reverse[j] = 0;
 679   ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
 680   ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
 681
 682   /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
 683      each removed or added entry.  */
 684   compareseq (0, n1, 0, n2, &ctxt);
 685
 686   /* Complete the index_mapping and index_mapping_reverse arrays.  */
 687   i = 0;
 688   j = 0;
 689   while (i < n1 || j < n2)
 690     {
 691       while (i < n1 && ctxt.index_mapping[i] < 0)
 692         i++;
 693       while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
 694         j++;
 695       ASSERT ((i < n1) == (j < n2));
 696       if (i == n1 && j == n2)
 697         break;
 698       ctxt.index_mapping[i] = j;
 699       ctxt.index_mapping_reverse[j] = i;
 700       i++;
 701       j++;
 702     }
 703
 704   /* Create the edits.  */
 705   edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
 706   i = 0;
 707   j = 0;
 708   while (i < n1 || j < n2)
 709     {
 710       if (i == n1)
 711         {
 712           struct edit *e;
 713           ASSERT (j < n2);
 714           e = XMALLOC (struct edit);
 715           e->type = ADDITION;
 716           e->j1 = j;
 717           e->j2 = n2 - 1;
 718           gl_list_add_last (edits, e);
 719           break;
 720         }
 721       if (j == n2)
 722         {
 723           struct edit *e;
 724           ASSERT (i < n1);
 725           e = XMALLOC (struct edit);
 726           e->type = REMOVAL;
 727           e->i1 = i;
 728           e->i2 = n1 - 1;
 729           gl_list_add_last (edits, e);
 730           break;
 731         }
 732       if (ctxt.index_mapping[i] >= 0)
 733         {
 734           if (ctxt.index_mapping_reverse[j] >= 0)
 735             {
 736               ASSERT (ctxt.index_mapping[i] == j);
 737               ASSERT (ctxt.index_mapping_reverse[j] == i);
 738               i++;
 739               j++;
 740             }
 741           else
 742             {
 743               struct edit *e;
 744               ASSERT (ctxt.index_mapping_reverse[j] < 0);
 745               e = XMALLOC (struct edit);
 746               e->type = ADDITION;
 747               e->j1 = j;
 748               do
 749                 j++;
 750               while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
 751               e->j2 = j - 1;
 752               gl_list_add_last (edits, e);
 753             }
 754         }
 755       else
 756         {
 757           if (ctxt.index_mapping_reverse[j] >= 0)
 758             {
 759               struct edit *e;
 760               ASSERT (ctxt.index_mapping[i] < 0);
 761               e = XMALLOC (struct edit);
 762               e->type = REMOVAL;
 763               e->i1 = i;
 764               do
 765                 i++;
 766               while (i < n1 && ctxt.index_mapping[i] < 0);
 767               e->i2 = i - 1;
 768               gl_list_add_last (edits, e);
 769             }
 770           else
 771             {
 772               struct edit *e;
 773               ASSERT (ctxt.index_mapping[i] < 0);
 774               ASSERT (ctxt.index_mapping_reverse[j] < 0);
 775               e = XMALLOC (struct edit);
 776               e->type = CHANGE;
 777               e->i1 = i;
 778               do
 779                 i++;
 780               while (i < n1 && ctxt.index_mapping[i] < 0);
 781               e->i2 = i - 1;
 782               e->j1 = j;
 783               do
 784                 j++;
 785               while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
 786               e->j2 = j - 1;
 787               gl_list_add_last (edits, e);
 788             }
 789         }
 790     }
 791
 792   result->index_mapping = ctxt.index_mapping;
 793   result->index_mapping_reverse = ctxt.index_mapping_reverse;
 794   result->num_edits = gl_list_size (edits);
 795   result->edits = XNMALLOC (result->num_edits, struct edit *);
 796   {
 797     size_t index = 0;
 798     gl_list_iterator_t iter = gl_list_iterator (edits);
 799     const void *elt;
 800     gl_list_node_t node;
 801     while (gl_list_iterator_next (&iter, &elt, &node))
 802       result->edits[index++] = (struct edit *) elt;
 803     gl_list_iterator_free (&iter);
 804     ASSERT (index == result->num_edits);
 805   }
 806 }
 807
 808 /* An empty entry.  */
 809 static struct entry empty_entry = { NULL, 0 };
 810
 811 /* Return the end a paragraph.
 812    ENTRY is an entry.
 813    OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
 814    Return the offset of the end of paragraph, as an offset <= ENTRY->length;
 815    it is the start of a blank line or the end of the entry.  */
 816 static size_t
 817 find_paragraph_end (const struct entry *entry, size_t offset)
 818 {
 819   const char *string = entry->string;
 820   size_t length = entry->length;
 821
 822   for (;;)
 823     {
 824       const char *nl = memchr (string + offset, '\n', length - offset);
 825       if (nl == NULL)
 826         return length;
 827       offset = (nl - string) + 1;
 828       if (offset < length && string[offset] == '\n')
 829         return offset;
 830     }
 831 }
 832
 833 /* Split a merged entry.
 834    Given an old entry of the form
 835        TITLE
 836        BODY
 837    and a new entry of the form
 838        TITLE
 839        BODY1
 840        BODY'
 841    where the two titles are the same and BODY and BODY' are very similar,
 842    this computes two new entries
 843        TITLE
 844        BODY1
 845    and
 846        TITLE
 847        BODY'
 848    and returns true.
 849    If the entries don't have this form, it returns false.  */
 850 static bool
 851 try_split_merged_entry (const struct entry *old_entry,
 852                         const struct entry *new_entry,
 853                         struct entry *new_split[2])
 854 {
 855   size_t old_title_len = find_paragraph_end (old_entry, 0);
 856   size_t new_title_len = find_paragraph_end (new_entry, 0);
 857   struct entry old_body;
 858   struct entry new_body;
 859   size_t best_split_offset;
 860   double best_similarity;
 861   size_t split_offset;
 862
 863   /* Same title? */
 864   if (!(old_title_len == new_title_len
 865         && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
 866     return false;
 867
 868   old_body.string = old_entry->string + old_title_len;
 869   old_body.length = old_entry->length - old_title_len;
 870
 871   /* Determine where to split the new entry.
 872      This is done by maximizing the similarity between BODY and BODY'.  */
 873   best_split_offset = split_offset = new_title_len;
 874   best_similarity = 0.0;
 875   for (;;)
 876     {
 877       double similarity;
 878
 879       new_body.string = new_entry->string + split_offset;
 880       new_body.length = new_entry->length - split_offset;
 881       similarity =
 882         entry_fstrcmp (&old_body, &new_body, best_similarity);
 883       if (similarity > best_similarity)
 884         {
 885           best_split_offset = split_offset;
 886           best_similarity = similarity;
 887         }
 888       if (best_similarity == 1.0)
 889         /* It cannot get better.  */
 890         break;
 891
 892       if (split_offset < new_entry->length)
 893         split_offset = find_paragraph_end (new_entry, split_offset + 1);
 894       else
 895         break;
 896     }
 897
 898   /* BODY' should not be empty.  */
 899   if (best_split_offset == new_entry->length)
 900     return false;
 901   ASSERT (new_entry->string[best_split_offset] == '\n');
 902
 903   /* A certain similarity between BODY and BODY' is required.  */
 904   if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
 905     return false;
 906
 907   new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
 908
 909   {
 910     size_t len1 = new_title_len;
 911     size_t len2 = new_entry->length - best_split_offset;
 912     char *combined = XNMALLOC (len1 + len2, char);
 913     memcpy (combined, new_entry->string, len1);
 914     memcpy (combined + len1, new_entry->string + best_split_offset, len2);
 915     new_split[1] = entry_create (combined, len1 + len2);
 916   }
 917
 918   return true;
 919 }
 920
 921 /* Write the contents of an entry to the output stream FP.  */
 922 static void
 923 entry_write (FILE *fp, struct entry *entry)
 924 {
 925   if (entry->length > 0)
 926     fwrite (entry->string, 1, entry->length, fp);
 927 }
 928
 929 /* This structure represents a conflict.
 930    A conflict can occur for various reasons.  */
 931 struct conflict
 932 {
 933   /* Parts from the ancestor file.  */
 934   size_t num_old_entries;
 935   struct entry **old_entries;
 936   /* Parts of the modified file.  */
 937   size_t num_modified_entries;
 938   struct entry **modified_entries;
 939 };
 940
 941 /* Write a conflict to the output stream FP, including markers.  */
 942 static void
 943 conflict_write (FILE *fp, struct conflict *c)
 944 {
 945   size_t i;
 946
 947   /* Use the same syntax as git's default merge driver.
 948      Don't indent the contents of the entries (with things like ">" or "-"),
 949      otherwise the user needs more textual editing to resolve the conflict.  */
 950   fputs ("<<<<<<<\n", fp);
 951   for (i = 0; i < c->num_old_entries; i++)
 952     entry_write (fp, c->old_entries[i]);
 953   fputs ("=======\n", fp);
 954   for (i = 0; i < c->num_modified_entries; i++)
 955     entry_write (fp, c->modified_entries[i]);
 956   fputs (">>>>>>>\n", fp);
 957 }
 958
 959 /* Long options.  */
 960 static const struct option long_options[] =
 961 {
 962   { "help", no_argument, NULL, 'h' },
 963   { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
 964   { "version", no_argument, NULL, 'V' },
 965   { NULL, 0, NULL, 0 }
 966 };
 967
 968 /* Print a usage message and exit.  */
 969 static void
 970 usage (int status)
 971 {
 972   if (status != EXIT_SUCCESS)
 973     fprintf (stderr, "Try '%s --help' for more information.\n",
 974              getprogname ());
 975   else
 976     {
 977       printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
 978               getprogname ());
 979       printf ("\n");
 980       printf ("Merges independent modifications of a ChangeLog style file.\n");
 981       printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
 982       printf ("A-FILE-NAME names the publicly modified file.\n");
 983       printf ("B-FILE-NAME names the user-modified file.\n");
 984       printf ("Writes the merged file into A-FILE-NAME.\n");
 985       printf ("\n");
 986       #if 0 /* --split-merged-entry is now on by default.  */
 987       printf ("Operation modifiers:\n");
 988       printf ("\
 989       --split-merged-entry    Possibly split a merged entry between paragraphs.\n\
 990                               Use this if you have the habit to merge unrelated\n\
 991                               entries into a single one, separated only by a\n\
 992                               newline, just because they happened on the same\n\
 993                               date.\n");
 994       printf ("\n");
 995       #endif
 996       printf ("Informative output:\n");
 997       printf ("  -h, --help                  display this help and exit\n");
 998       printf ("  -V, --version               output version information and exit\n");
 999       printf ("\n");
1000       fputs ("Report bugs to <bug-gnulib@gnu.org>.\n",
1001              stdout);
1002     }
1003
1004   exit (status);
1005 }
1006
1007 int
1008 main (int argc, char *argv[])
1009 {
1010   int optchar;
1011   bool do_help;
1012   bool do_version;
1013   bool split_merged_entry;
1014
1015   /* Set default values for variables.  */
1016   do_help = false;
1017   do_version = false;
1018   split_merged_entry = true;
1019
1020   /* Parse command line options.  */
1021   while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1022     switch (optchar)
1023     {
1024     case '\0':          /* Long option.  */
1025       break;
1026     case 'h':
1027       do_help = true;
1028       break;
1029     case 'V':
1030       do_version = true;
1031       break;
1032     case CHAR_MAX + 1:  /* --split-merged-entry */
1033       break;
1034     default:
1035       usage (EXIT_FAILURE);
1036     }
1037
1038   if (do_version)
1039     {
1040       /* Version information is requested.  */
1041       printf ("%s\n", getprogname ());
1042       printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1043 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1044 This is free software: you are free to change and redistribute it.\n\
1045 There is NO WARRANTY, to the extent permitted by law.\n\
1046 ",
1047               "2008");
1048       printf ("Written by %s.\n", "Bruno Haible");
1049       exit (EXIT_SUCCESS);
1050     }
1051
1052   if (do_help)
1053     {
1054       /* Help is requested.  */
1055       usage (EXIT_SUCCESS);
1056     }
1057
1058   /* Test argument count.  */
1059   if (optind + 3 != argc)
1060     error (EXIT_FAILURE, 0, "expected three arguments");
1061
1062   {
1063     const char *ancestor_file_name; /* O-FILE-NAME */
1064     const char *destination_file_name; /* A-FILE-NAME */
1065     bool downstream;
1066     const char *other_file_name; /* B-FILE-NAME */
1067     const char *mainstream_file_name;
1068     const char *modified_file_name;
1069     struct changelog_file ancestor_file;
1070     struct changelog_file mainstream_file;
1071     struct changelog_file modified_file;
1072     /* Mapping from indices in ancestor_file to indices in mainstream_file.  */
1073     struct entries_mapping mapping;
1074     struct differences diffs;
1075     gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1076     gl_list_t /* <struct entry *> */ result_entries;
1077     gl_list_t /* <struct conflict *> */ result_conflicts;
1078
1079     ancestor_file_name = argv[optind];
1080     destination_file_name = argv[optind + 1];
1081     other_file_name = argv[optind + 2];
1082
1083     /* Heuristic to determine whether it's a pull in downstream direction
1084        (e.g. pull from a centralized server) or a pull in upstream direction
1085        (e.g. "git stash apply").
1086
1087        For ChangeLog this distinction is important. The difference between
1088        an "upstream" and a "downstream" repository is that more people are
1089        looking at the "upstream" repository.  They want to be informed about
1090        changes and expect them to be shown at the top of the ChangeLog.
1091        When a user pulls downstream, on the other hand, he has two options:
1092          a) He gets the change entries from the central repository also at the
1093             top of his ChangeLog, and his own changes come after them.
1094          b) He gets the change entries from the central repository after those
1095             he has collected for his branch.  His own change entries stay at
1096             the top of the ChangeLog file.
1097        In the case a) he has to reorder the ChangeLog before he can commit.
1098        No one does that.  So most people want b).
1099        In other words, the order of entries in a ChangeLog should represent
1100        the order in which they have flown (or will flow) into the *central*
1101        repository.
1102
1103        But in git this is fundamentally indistinguishable, because when Linus
1104        pulls patches from akpm and akpm pulls patches from Linus, it's not
1105        clear which of the two is more "upstream".  Also, when you have many
1106        branches in a repository and pull from one to another, "git" has no way
1107        to know which branch is more "upstream" than the other.  The git-tag(1)
1108        manual page also says:
1109          "One important aspect of git is it is distributed, and being
1110           distributed largely means there is no inherent "upstream" or
1111           "downstream" in the system."
1112        Therefore anyone who attempts to produce a ChangeLog from the merge
1113        history will fail.
1114
1115        Here we allow the user to specify the pull direction through an
1116        environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM).  If these two
1117        environment variables are not set, we assume a "simple single user"
1118        usage pattern: He manages local changes through stashes and uses
1119        "git pull" only to pull downstream.
1120
1121        How to distinguish these situation? There are several hints:
1122          - During a "git stash apply", GIT_REFLOG_ACTION is not set.  During
1123            a "git pull", it is set to 'pull '. During a "git pull --rebase",
1124            it is set to 'pull --rebase'.  During a "git cherry-pick", it is
1125            set to 'cherry-pick'.
1126          - During a "git stash apply", there is an environment variable of
1127            the form GITHEAD_<40_hex_digits>='Stashed changes'.  */
1128     {
1129       const char *var;
1130
1131       var = getenv ("GIT_DOWNSTREAM");
1132       if (var != NULL && var[0] != '\0')
1133         downstream = true;
1134       else
1135         {
1136           var = getenv ("GIT_UPSTREAM");
1137           if (var != NULL && var[0] != '\0')
1138             downstream = false;
1139           else
1140             {
1141               var = getenv ("GIT_REFLOG_ACTION");
1142               #if 0 /* Debugging code */
1143               printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1144               #endif
1145               if (var != NULL
1146                   && ((strncmp (var, "pull", 4) == 0
1147                        && c_strstr (var, " --rebase") == NULL)
1148                       || strncmp (var, "merge origin", 12) == 0))
1149                 downstream = true;
1150               else
1151                 {
1152                   /* "git stash apply", "git rebase", "git cherry-pick" and
1153                      similar.  */
1154                   downstream = false;
1155                 }
1156             }
1157         }
1158     }
1159
1160     #if 0 /* Debugging code */
1161     {
1162       char buf[1000];
1163       printf ("First line of %%A:\n");
1164       sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1165       printf ("First line of %%B:\n");
1166       sprintf (buf, "head -1 %s", other_file_name); system (buf);
1167       printf ("Guessing calling convention: %s\n",
1168               downstream
1169               ? "%A = modified by user, %B = upstream"
1170               : "%A = upstream, %B = modified by user");
1171     }
1172     #endif
1173
1174     if (downstream)
1175       {
1176         mainstream_file_name = other_file_name;
1177         modified_file_name = destination_file_name;
1178       }
1179     else
1180       {
1181         mainstream_file_name = destination_file_name;
1182         modified_file_name = other_file_name;
1183       }
1184
1185     /* Read the three files into memory.  */
1186     read_changelog_file (ancestor_file_name, &ancestor_file);
1187     read_changelog_file (mainstream_file_name, &mainstream_file);
1188     read_changelog_file (modified_file_name, &modified_file);
1189
1190     /* Compute correspondence between the entries of ancestor_file and of
1191        mainstream_file.  */
1192     compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1193     (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1194
1195     /* Compute differences between the entries of ancestor_file and of
1196        modified_file.  */
1197     compute_differences (&ancestor_file, &modified_file, &diffs);
1198
1199     /* Compute the result.  */
1200     result_entries_pointers =
1201       XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1202     result_entries =
1203       gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1204                             NULL, true);
1205     {
1206       size_t k;
1207       for (k = 0; k < mainstream_file.num_entries; k++)
1208         result_entries_pointers[k] =
1209           gl_list_add_last (result_entries, mainstream_file.entries[k]);
1210     }
1211     result_conflicts =
1212       gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1213     {
1214       size_t e;
1215       for (e = 0; e < diffs.num_edits; e++)
1216         {
1217           struct edit *edit = diffs.edits[e];
1218           switch (edit->type)
1219             {
1220             case ADDITION:
1221               if (edit->j1 == 0)
1222                 {
1223                   /* An addition to the top of modified_file.
1224                      Apply it to the top of mainstream_file.  */
1225                   ssize_t j;
1226                   for (j = edit->j2; j >= edit->j1; j--)
1227                     {
1228                       struct entry *added_entry = modified_file.entries[j];
1229                       gl_list_add_first (result_entries, added_entry);
1230                     }
1231                 }
1232               else
1233                 {
1234                   ssize_t i_before;
1235                   ssize_t i_after;
1236                   ssize_t k_before;
1237                   ssize_t k_after;
1238                   i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1239                   ASSERT (i_before >= 0);
1240                   i_after = (edit->j2 + 1 == modified_file.num_entries
1241                              ? ancestor_file.num_entries
1242                              : diffs.index_mapping_reverse[edit->j2 + 1]);
1243                   ASSERT (i_after >= 0);
1244                   ASSERT (i_after == i_before + 1);
1245                   /* An addition between ancestor_file.entries[i_before] and
1246                      ancestor_file.entries[i_after].  See whether these two
1247                      entries still exist in mainstream_file and are still
1248                      consecutive.  */
1249                   k_before = entries_mapping_get (&mapping, i_before);
1250                   k_after = (i_after == ancestor_file.num_entries
1251                              ? mainstream_file.num_entries
1252                              : entries_mapping_get (&mapping, i_after));
1253                   if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1254                     {
1255                       /* Yes, the entry before and after are still neighbours
1256                          in mainstream_file.  Apply the addition between
1257                          them.  */
1258                       if (k_after == mainstream_file.num_entries)
1259                         {
1260                           size_t j;
1261                           for (j = edit->j1; j <= edit->j2; j++)
1262                             {
1263                               struct entry *added_entry = modified_file.entries[j];
1264                               gl_list_add_last (result_entries, added_entry);
1265                             }
1266                         }
1267                       else
1268                         {
1269                           gl_list_node_t node_k_after = result_entries_pointers[k_after];
1270                           size_t j;
1271                           for (j = edit->j1; j <= edit->j2; j++)
1272                             {
1273                               struct entry *added_entry = modified_file.entries[j];
1274                               gl_list_add_before (result_entries, node_k_after, added_entry);
1275                             }
1276                         }
1277                     }
1278                   else
1279                     {
1280                       /* It's not clear where the additions should be applied.
1281                          Let the user decide.  */
1282                       struct conflict *c = XMALLOC (struct conflict);
1283                       size_t j;
1284                       c->num_old_entries = 0;
1285                       c->old_entries = NULL;
1286                       c->num_modified_entries = edit->j2 - edit->j1 + 1;
1287                       c->modified_entries =
1288                         XNMALLOC (c->num_modified_entries, struct entry *);
1289                       for (j = edit->j1; j <= edit->j2; j++)
1290                         c->modified_entries[j - edit->j1] = modified_file.entries[j];
1291                       gl_list_add_last (result_conflicts, c);
1292                     }
1293                 }
1294               break;
1295             case REMOVAL:
1296               {
1297                 /* Apply the removals one by one.  */
1298                 size_t i;
1299                 for (i = edit->i1; i <= edit->i2; i++)
1300                   {
1301                     struct entry *removed_entry = ancestor_file.entries[i];
1302                     ssize_t k = entries_mapping_get (&mapping, i);
1303                     if (k >= 0
1304                         && entry_equals (removed_entry,
1305                                          mainstream_file.entries[k]))
1306                       {
1307                         /* The entry to be removed still exists in
1308                            mainstream_file.  Remove it.  */
1309                         gl_list_node_set_value (result_entries,
1310                                                 result_entries_pointers[k],
1311                                                 &empty_entry);
1312                       }
1313                     else
1314                       {
1315                         /* The entry to be removed was already removed or was
1316                            modified.  This is a conflict.  */
1317                         struct conflict *c = XMALLOC (struct conflict);
1318                         c->num_old_entries = 1;
1319                         c->old_entries =
1320                           XNMALLOC (c->num_old_entries, struct entry *);
1321                         c->old_entries[0] = removed_entry;
1322                         c->num_modified_entries = 0;
1323                         c->modified_entries = NULL;
1324                         gl_list_add_last (result_conflicts, c);
1325                       }
1326                   }
1327               }
1328               break;
1329             case CHANGE:
1330               {
1331                 bool done = false;
1332                 /* When the user usually merges entries from the same day,
1333                    and this edit is at the top of the file:  */
1334                 if (split_merged_entry && edit->j1 == 0)
1335                   {
1336                     /* Test whether the change is "simple merged", i.e. whether
1337                        it consists of additions, followed by an augmentation of
1338                        the first changed entry, followed by small changes of the
1339                        remaining entries:
1340                          entry_1
1341                          entry_2
1342                          ...
1343                          entry_n
1344                        are mapped to
1345                          added_entry
1346                          ...
1347                          added_entry
1348                          augmented_entry_1
1349                          modified_entry_2
1350                          ...
1351                          modified_entry_n.  */
1352                     if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1353                       {
1354                         struct entry *split[2];
1355                         bool simple_merged =
1356                           try_split_merged_entry (ancestor_file.entries[edit->i1],
1357                                                   modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1358                                                   split);
1359                         if (simple_merged)
1360                           {
1361                             size_t i;
1362                             for (i = edit->i1 + 1; i <= edit->i2; i++)
1363                               if (entry_fstrcmp (ancestor_file.entries[i],
1364                                                  modified_file.entries[i + edit->j2 - edit->i2],
1365                                                  FSTRCMP_THRESHOLD)
1366                                   < FSTRCMP_THRESHOLD)
1367                                 {
1368                                   simple_merged = false;
1369                                   break;
1370                                 }
1371                           }
1372                         if (simple_merged)
1373                           {
1374                             /* Apply the additions at the top of modified_file.
1375                                Apply each of the single-entry changes
1376                                separately.  */
1377                             size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1378                             size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1379                             ssize_t j;
1380                             /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]:  */
1381                             gl_list_add_first (result_entries, split[0]);
1382                             /* The additions.  */
1383                             for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1384                               {
1385                                 struct entry *added_entry = modified_file.entries[j];
1386                                 gl_list_add_first (result_entries, added_entry);
1387                               }
1388                             /* Now the single-entry changes.  */
1389                             for (j = edit->j1 + num_added; j <= edit->j2; j++)
1390                               {
1391                                 struct entry *changed_entry =
1392                                   (j == edit->j1 + num_added
1393                                    ? split[1]
1394                                    : modified_file.entries[j]);
1395                                 size_t i = j + edit->i2 - edit->j2;
1396                                 ssize_t k = entries_mapping_get (&mapping, i);
1397                                 if (k >= 0
1398                                     && entry_equals (ancestor_file.entries[i],
1399                                                      mainstream_file.entries[k]))
1400                                   {
1401                                     gl_list_node_set_value (result_entries,
1402                                                             result_entries_pointers[k],
1403                                                             changed_entry);
1404                                   }
1405                                 else if (!entry_equals (ancestor_file.entries[i],
1406                                                         changed_entry))
1407                                   {
1408                                     struct conflict *c = XMALLOC (struct conflict);
1409                                     c->num_old_entries = 1;
1410                                     c->old_entries =
1411                                       XNMALLOC (c->num_old_entries, struct entry *);
1412                                     c->old_entries[0] = ancestor_file.entries[i];
1413                                     c->num_modified_entries = 1;
1414                                     c->modified_entries =
1415                                       XNMALLOC (c->num_modified_entries, struct entry *);
1416                                     c->modified_entries[0] = changed_entry;
1417                                     gl_list_add_last (result_conflicts, c);
1418                                   }
1419                               }
1420                             done = true;
1421                           }
1422                       }
1423                   }
1424                 if (!done)
1425                   {
1426                     bool simple;
1427                     /* Test whether the change is "simple", i.e. whether it
1428                        consists of small changes to the old ChangeLog entries
1429                        and additions before them:
1430                          entry_1
1431                          ...
1432                          entry_n
1433                        are mapped to
1434                          added_entry
1435                          ...
1436                          added_entry
1437                          modified_entry_1
1438                          ...
1439                          modified_entry_n.  */
1440                     if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1441                       {
1442                         size_t i;
1443                         simple = true;
1444                         for (i = edit->i1; i <= edit->i2; i++)
1445                           if (entry_fstrcmp (ancestor_file.entries[i],
1446                                              modified_file.entries[i + edit->j2 - edit->i2],
1447                                              FSTRCMP_THRESHOLD)
1448                               < FSTRCMP_THRESHOLD)
1449                             {
1450                               simple = false;
1451                               break;
1452                             }
1453                       }
1454                     else
1455                       simple = false;
1456                     if (simple)
1457                       {
1458                         /* Apply the additions and each of the single-entry
1459                            changes separately.  */
1460                         size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1461                         size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1462                         if (edit->j1 == 0)
1463                           {
1464                             /* A simple change at the top of modified_file.
1465                                Apply it to the top of mainstream_file.  */
1466                             ssize_t j;
1467                             for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1468                               {
1469                                 struct entry *added_entry = modified_file.entries[j];
1470                                 gl_list_add_first (result_entries, added_entry);
1471                               }
1472                             for (j = edit->j1 + num_added; j <= edit->j2; j++)
1473                               {
1474                                 struct entry *changed_entry = modified_file.entries[j];
1475                                 size_t i = j + edit->i2 - edit->j2;
1476                                 ssize_t k = entries_mapping_get (&mapping, i);
1477                                 if (k >= 0
1478                                     && entry_equals (ancestor_file.entries[i],
1479                                                      mainstream_file.entries[k]))
1480                                   {
1481                                     gl_list_node_set_value (result_entries,
1482                                                             result_entries_pointers[k],
1483                                                             changed_entry);
1484                                   }
1485                                 else
1486                                   {
1487                                     struct conflict *c;
1488                                     ASSERT (!entry_equals (ancestor_file.entries[i],
1489                                                            changed_entry));
1490                                     c = XMALLOC (struct conflict);
1491                                     c->num_old_entries = 1;
1492                                     c->old_entries =
1493                                       XNMALLOC (c->num_old_entries, struct entry *);
1494                                     c->old_entries[0] = ancestor_file.entries[i];
1495                                     c->num_modified_entries = 1;
1496                                     c->modified_entries =
1497                                       XNMALLOC (c->num_modified_entries, struct entry *);
1498                                     c->modified_entries[0] = changed_entry;
1499                                     gl_list_add_last (result_conflicts, c);
1500                                   }
1501                               }
1502                             done = true;
1503                           }
1504                         else
1505                           {
1506                             ssize_t i_before;
1507                             ssize_t k_before;
1508                             bool linear;
1509                             i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1510                             ASSERT (i_before >= 0);
1511                             /* A simple change after ancestor_file.entries[i_before].
1512                                See whether this entry and the following num_changed
1513                                entries still exist in mainstream_file and are still
1514                                consecutive.  */
1515                             k_before = entries_mapping_get (&mapping, i_before);
1516                             linear = (k_before >= 0);
1517                             if (linear)
1518                               {
1519                                 size_t i;
1520                                 for (i = i_before + 1; i <= i_before + num_changed; i++)
1521                                   if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1522                                     {
1523                                       linear = false;
1524                                       break;
1525                                     }
1526                               }
1527                             if (linear)
1528                               {
1529                                 gl_list_node_t node_for_insert =
1530                                   result_entries_pointers[k_before + 1];
1531                                 ssize_t j;
1532                                 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1533                                   {
1534                                     struct entry *added_entry = modified_file.entries[j];
1535                                     gl_list_add_before (result_entries, node_for_insert, added_entry);
1536                                   }
1537                                 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1538                                   {
1539                                     struct entry *changed_entry = modified_file.entries[j];
1540                                     size_t i = j + edit->i2 - edit->j2;
1541                                     ssize_t k = entries_mapping_get (&mapping, i);
1542                                     ASSERT (k >= 0);
1543                                     if (entry_equals (ancestor_file.entries[i],
1544                                                       mainstream_file.entries[k]))
1545                                       {
1546                                         gl_list_node_set_value (result_entries,
1547                                                                 result_entries_pointers[k],
1548                                                                 changed_entry);
1549                                       }
1550                                     else
1551                                       {
1552                                         struct conflict *c;
1553                                         ASSERT (!entry_equals (ancestor_file.entries[i],
1554                                                                changed_entry));
1555                                         c = XMALLOC (struct conflict);
1556                                         c->num_old_entries = 1;
1557                                         c->old_entries =
1558                                           XNMALLOC (c->num_old_entries, struct entry *);
1559                                         c->old_entries[0] = ancestor_file.entries[i];
1560                                         c->num_modified_entries = 1;
1561                                         c->modified_entries =
1562                                           XNMALLOC (c->num_modified_entries, struct entry *);
1563                                         c->modified_entries[0] = changed_entry;
1564                                         gl_list_add_last (result_conflicts, c);
1565                                       }
1566                                   }
1567                                 done = true;
1568                               }
1569                           }
1570                       }
1571                     else
1572                       {
1573                         /* A big change.
1574                            See whether the num_changed entries still exist
1575                            unchanged in mainstream_file and are still
1576                            consecutive.  */
1577                         ssize_t i_first;
1578                         ssize_t k_first;
1579                         bool linear_unchanged;
1580                         i_first = edit->i1;
1581                         k_first = entries_mapping_get (&mapping, i_first);
1582                         linear_unchanged =
1583                           (k_first >= 0
1584                            && entry_equals (ancestor_file.entries[i_first],
1585                                             mainstream_file.entries[k_first]));
1586                         if (linear_unchanged)
1587                           {
1588                             size_t i;
1589                             for (i = i_first + 1; i <= edit->i2; i++)
1590                               if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1591                                     && entry_equals (ancestor_file.entries[i],
1592                                                      mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1593                                 {
1594                                   linear_unchanged = false;
1595                                   break;
1596                                 }
1597                           }
1598                         if (linear_unchanged)
1599                           {
1600                             gl_list_node_t node_for_insert =
1601                               result_entries_pointers[k_first];
1602                             ssize_t j;
1603                             size_t i;
1604                             for (j = edit->j2; j >= edit->j1; j--)
1605                               {
1606                                 struct entry *new_entry = modified_file.entries[j];
1607                                 gl_list_add_before (result_entries, node_for_insert, new_entry);
1608                               }
1609                             for (i = edit->i1; i <= edit->i2; i++)
1610                               {
1611                                 ssize_t k = entries_mapping_get (&mapping, i);
1612                                 ASSERT (k >= 0);
1613                                 ASSERT (entry_equals (ancestor_file.entries[i],
1614                                                       mainstream_file.entries[k]));
1615                                 gl_list_node_set_value (result_entries,
1616                                                         result_entries_pointers[k],
1617                                                         &empty_entry);
1618                               }
1619                             done = true;
1620                           }
1621                       }
1622                   }
1623                 if (!done)
1624                   {
1625                     struct conflict *c = XMALLOC (struct conflict);
1626                     size_t i, j;
1627                     c->num_old_entries = edit->i2 - edit->i1 + 1;
1628                     c->old_entries =
1629                       XNMALLOC (c->num_old_entries, struct entry *);
1630                     for (i = edit->i1; i <= edit->i2; i++)
1631                       c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1632                     c->num_modified_entries = edit->j2 - edit->j1 + 1;
1633                     c->modified_entries =
1634                       XNMALLOC (c->num_modified_entries, struct entry *);
1635                     for (j = edit->j1; j <= edit->j2; j++)
1636                       c->modified_entries[j - edit->j1] = modified_file.entries[j];
1637                     gl_list_add_last (result_conflicts, c);
1638                   }
1639               }
1640               break;
1641             }
1642         }
1643     }
1644
1645     /* Output the result.  */
1646     {
1647       FILE *fp = fopen (destination_file_name, "w");
1648       if (fp == NULL)
1649         {
1650           fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1651           exit (EXIT_FAILURE);
1652         }
1653
1654       /* Output the conflicts at the top.  */
1655       {
1656         size_t n = gl_list_size (result_conflicts);
1657         size_t i;
1658         for (i = 0; i < n; i++)
1659           conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1660       }
1661       /* Output the modified and unmodified entries, in order.  */
1662       {
1663         gl_list_iterator_t iter = gl_list_iterator (result_entries);
1664         const void *elt;
1665         gl_list_node_t node;
1666         while (gl_list_iterator_next (&iter, &elt, &node))
1667           entry_write (fp, (struct entry *) elt);
1668         gl_list_iterator_free (&iter);
1669       }
1670
1671       if (fwriteerror (fp))
1672         {
1673           fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1674           exit (EXIT_FAILURE);
1675         }
1676     }
1677
1678     exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
1679   }
1680 }