1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2010 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
44 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
50 Additionally, for git users:
51 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
54 [merge "merge-changelog"]
55 name = GNU-style ChangeLog merge driver
56 driver = /usr/local/bin/git-merge-changelog %O %A %B
58 - In every directory that contains a ChangeLog file, add a file
59 '.gitattributes' with this line:
61 ChangeLog merge=merge-changelog
63 (See "man 5 gitattributes" for more info.)
65 Additionally, for bzr users:
66 - Install the 'extmerge' bzr plug-in listed at
67 <http://doc.bazaar.canonical.com/plugins/en/index.html>
68 <http://wiki.bazaar.canonical.com/BzrPlugins>
69 - Add to your $HOME/.bazaar/bazaar.conf the line
71 external_merge = git-merge-changelog %b %T %o
73 - Then, to merge a conflict in a ChangeLog file, use
75 $ bzr extmerge ChangeLog
77 Additionally, for hg users:
78 - Add to your $HOME/.hgrc the lines
81 ChangeLog = git-merge-changelog
84 git-merge-changelog.executable = /usr/local/bin/git-merge-changelog
85 git-merge-changelog.args = $base $local $other
87 See <http://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
91 /* Use as an alternative to 'diff3':
92 git-merge-changelog performs the same role as "diff3 -m", just with
94 $ git-merge-changelog %O %A %B
99 /* Calling convention:
100 A merge driver is called with three filename arguments:
101 1. %O = The common ancestor of %A and %B.
102 2. %A = The file's contents from the "current branch".
103 3. %B = The file's contents from the "other branch"; this is the contents
106 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
107 maintainer to a central maintainer) or of a downstream pull with --rebase:
108 2. %A = The file's newest pulled contents; modified by other committers.
109 3. %B = The user's newest copy of the file; modified by the user.
110 In case of a downstream pull (e.g. from a central repository to the user)
111 or of an upstream pull with --rebase:
112 2. %A = The user's newest copy of the file; modified by the user.
113 3. %B = The file's newest pulled contents; modified by other committers.
115 It should write its merged output into file %A. It can also echo some
116 remarks to stdout. It should exit with return code 0 if the merge could
117 be resolved cleanly, or with non-zero return code if there were conflicts.
121 The structure of a ChangeLog file: It consists of ChangeLog entries. A
122 ChangeLog entry starts at a line following a blank line and that starts with
123 a non-whitespace character, or at the beginning of a file.
124 The merge driver works as follows: It reads the three files into memory and
125 dissects them into ChangeLog entries. It then finds the differences between
126 %O and %B. They are classified as:
127 - removals (some consecutive entries removed),
128 - changes (some consecutive entries removed, some consecutive entries
130 - additions (some consecutive entries added).
131 The driver then attempts to apply the changes to %A.
132 To this effect, it first computes a correspondence between the entries in %O
133 and the entries in %A, using fuzzy string matching to still identify changed
135 - Removals are applied one by one. If the entry is present in %A, at any
136 position, it is removed. If not, the removal is marked as a conflict.
137 - Additions at the top of %B are applied at the top of %A.
138 - Additions between entry x and entry y (y may be the file end) in %B are
139 applied between entry x and entry y in %A (if they still exist and are
140 still consecutive in %A), otherwise the additions are marked as a
142 - Changes are categorized into "simple changes":
145 added_entry ... added_entry modified_entry1 ... modified_entryn,
146 where the correspondence between entry_i and modified_entry_i is still
147 clear; and "big changes": these are all the rest. Simple changes at the
148 top of %B are applied by putting the added entries at the top of %A. The
149 changes in simple changes are applied one by one; possibly leading to
150 single-entry conflicts. Big changes are applied en bloc, possibly
151 leading to conflicts spanning multiple entries.
152 - Conflicts are output at the top of the file and cause an exit status of
164 #include <sys/types.h>
168 #include "read-file.h"
169 #include "gl_xlist.h"
170 #include "gl_array_list.h"
171 #include "gl_linkedhash_list.h"
172 #include "gl_rbtreehash_list.h"
173 #include "gl_linked_list.h"
175 #include "xmalloca.h"
178 #include "c-strstr.h"
179 #include "fwriteerror.h"
180 #include "getprogname.h"
182 #define ASSERT(expr) \
190 #define FSTRCMP_THRESHOLD 0.6
191 #define FSTRCMP_STRICTER_THRESHOLD 0.8
193 /* Representation of a ChangeLog entry.
194 The string may contain NUL bytes; therefore it is represented as a plain
195 opaque memory region. */
200 /* Cache for the hash code. */
201 bool hashcode_cached;
206 The memory region passed by the caller must of indefinite extent. It is
207 *not* copied here. */
208 static struct entry *
209 entry_create (char *string, size_t length)
211 struct entry *result = XMALLOC (struct entry);
212 result->string = string;
213 result->length = length;
214 result->hashcode_cached = false;
218 /* Compare two entries for equality. */
220 entry_equals (const void *elt1, const void *elt2)
222 const struct entry *entry1 = (const struct entry *) elt1;
223 const struct entry *entry2 = (const struct entry *) elt2;
224 return entry1->length == entry2->length
225 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
228 /* Return a hash code of the contents of a ChangeLog entry. */
230 entry_hashcode (const void *elt)
232 struct entry *entry = (struct entry *) elt;
233 if (!entry->hashcode_cached)
235 /* See http://www.haible.de/bruno/hashfunc.html. */
240 for (s = entry->string, n = entry->length; n > 0; s++, n--)
241 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
244 entry->hashcode_cached = true;
246 return entry->hashcode;
249 /* Perform a fuzzy comparison of two ChangeLog entries.
250 Return a similarity measure of the two entries, a value between 0 and 1.
251 0 stands for very distinct, 1 for identical.
252 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
255 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
258 /* fstrcmp works only on NUL terminated strings. */
262 if (memchr (entry1->string, '\0', entry1->length) != NULL)
264 if (memchr (entry2->string, '\0', entry2->length) != NULL)
266 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
269 memcpy (p, entry1->string, entry1->length);
272 memcpy (p, entry2->string, entry2->length);
277 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
282 /* This structure represents an entire ChangeLog file, after it was read
284 struct changelog_file
286 /* The entries, as a list. */
287 gl_list_t /* <struct entry *> */ entries_list;
288 /* The entries, as a list in opposite direction. */
289 gl_list_t /* <struct entry *> */ entries_reversed;
290 /* The entries, as an array. */
292 struct entry **entries;
295 /* Read a ChangeLog file into memory.
296 Return the contents in *RESULT. */
298 read_changelog_file (const char *filename, struct changelog_file *result)
300 /* Read the file in text mode, otherwise it's hard to recognize empty
303 char *contents = read_file (filename, &length);
304 if (contents == NULL)
306 fprintf (stderr, "could not read file '%s'\n", filename);
310 result->entries_list =
311 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
313 result->entries_reversed =
314 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
316 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
317 at a line following a blank line and that starts with a non-whitespace
318 character, or at the beginning of a file.
319 Split the file contents into entries. */
321 char *contents_end = contents + length;
322 char *start = contents;
323 while (start < contents_end)
325 /* Search the end of the current entry. */
329 while (ptr < contents_end)
331 ptr = memchr (ptr, '\n', contents_end - ptr);
338 if (contents_end - ptr >= 2
340 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
347 curr = entry_create (start, ptr - start);
348 gl_list_add_last (result->entries_list, curr);
349 gl_list_add_first (result->entries_reversed, curr);
355 result->num_entries = gl_list_size (result->entries_list);
356 result->entries = XNMALLOC (result->num_entries, struct entry *);
359 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
362 while (gl_list_iterator_next (&iter, &elt, &node))
363 result->entries[index++] = (struct entry *) elt;
364 gl_list_iterator_free (&iter);
365 ASSERT (index == result->num_entries);
369 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
370 struct entries_mapping
372 struct changelog_file *file1;
373 struct changelog_file *file2;
374 /* Mapping from indices in FILE1 to indices in FILE2.
375 A value -1 means that the entry from FILE1 is not found in FILE2.
376 A value -2 means that it has not yet been computed. */
377 ssize_t *index_mapping;
378 /* Mapping from indices in FILE2 to indices in FILE1.
379 A value -1 means that the entry from FILE2 is not found in FILE1.
380 A value -2 means that it has not yet been computed. */
381 ssize_t *index_mapping_reverse;
384 /* Look up (or lazily compute) the mapping of an entry in FILE1.
385 i is the index in FILE1.
386 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
388 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
390 if (mapping->index_mapping[i] < -1)
392 struct changelog_file *file1 = mapping->file1;
393 struct changelog_file *file2 = mapping->file2;
394 size_t n1 = file1->num_entries;
395 size_t n2 = file2->num_entries;
396 struct entry *entry_i = file1->entries[i];
399 /* Search whether it approximately occurs in file2. */
401 double best_j_similarity = 0.0;
402 for (j = n2 - 1; j >= 0; j--)
403 if (mapping->index_mapping_reverse[j] < 0)
406 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
407 if (similarity > best_j_similarity)
410 best_j_similarity = similarity;
413 if (best_j_similarity >= FSTRCMP_THRESHOLD)
415 /* Found a similar entry in file2. */
416 struct entry *entry_j = file2->entries[best_j];
417 /* Search whether it approximately occurs in file1 at index i. */
419 double best_i_similarity = 0.0;
421 for (ii = n1 - 1; ii >= 0; ii--)
422 if (mapping->index_mapping[ii] < 0)
425 entry_fstrcmp (file1->entries[ii], entry_j,
427 if (similarity > best_i_similarity)
430 best_i_similarity = similarity;
433 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
435 mapping->index_mapping[i] = best_j;
436 mapping->index_mapping_reverse[best_j] = i;
439 if (mapping->index_mapping[i] < -1)
440 /* It does not approximately occur in FILE2.
441 Remember it, for next time. */
442 mapping->index_mapping[i] = -1;
444 return mapping->index_mapping[i];
447 /* Look up (or lazily compute) the mapping of an entry in FILE2.
448 j is the index in FILE2.
449 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
451 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
453 if (mapping->index_mapping_reverse[j] < -1)
455 struct changelog_file *file1 = mapping->file1;
456 struct changelog_file *file2 = mapping->file2;
457 size_t n1 = file1->num_entries;
458 size_t n2 = file2->num_entries;
459 struct entry *entry_j = file2->entries[j];
462 /* Search whether it approximately occurs in file1. */
464 double best_i_similarity = 0.0;
465 for (i = n1 - 1; i >= 0; i--)
466 if (mapping->index_mapping[i] < 0)
469 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
470 if (similarity > best_i_similarity)
473 best_i_similarity = similarity;
476 if (best_i_similarity >= FSTRCMP_THRESHOLD)
478 /* Found a similar entry in file1. */
479 struct entry *entry_i = file1->entries[best_i];
480 /* Search whether it approximately occurs in file2 at index j. */
482 double best_j_similarity = 0.0;
484 for (jj = n2 - 1; jj >= 0; jj--)
485 if (mapping->index_mapping_reverse[jj] < 0)
488 entry_fstrcmp (entry_i, file2->entries[jj],
490 if (similarity > best_j_similarity)
493 best_j_similarity = similarity;
496 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
498 mapping->index_mapping_reverse[j] = best_i;
499 mapping->index_mapping[best_i] = j;
502 if (mapping->index_mapping_reverse[j] < -1)
503 /* It does not approximately occur in FILE1.
504 Remember it, for next time. */
505 mapping->index_mapping_reverse[j] = -1;
507 return mapping->index_mapping_reverse[j];
510 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
511 The correspondence also takes into account small modifications; i.e. the
512 indicated relation is not equality of entries but best-match similarity
514 If FULL is true, the maximum of matching is done up-front. If it is false,
515 it is done in a lazy way through the functions entries_mapping_get and
516 entries_mapping_reverse_get.
517 Return the result in *RESULT. */
519 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
521 struct entries_mapping *result)
523 /* Mapping from indices in file1 to indices in file2. */
524 ssize_t *index_mapping;
525 /* Mapping from indices in file2 to indices in file1. */
526 ssize_t *index_mapping_reverse;
527 size_t n1 = file1->num_entries;
528 size_t n2 = file2->num_entries;
531 index_mapping = XNMALLOC (n1, ssize_t);
532 for (i = 0; i < n1; i++)
533 index_mapping[i] = -2;
535 index_mapping_reverse = XNMALLOC (n2, ssize_t);
536 for (j = 0; j < n2; j++)
537 index_mapping_reverse[j] = -2;
539 for (i = n1 - 1; i >= 0; i--)
540 /* Take an entry from file1. */
541 if (index_mapping[i] < -1)
543 struct entry *entry = file1->entries[i];
544 /* Search whether it occurs in file2. */
545 j = gl_list_indexof (file2->entries_reversed, entry);
549 /* Found an exact correspondence. */
550 /* If index_mapping_reverse[j] >= 0, we have already seen other
551 copies of this entry, and there were more occurrences of it in
552 file1 than in file2. In this case, do nothing. */
553 if (index_mapping_reverse[j] < 0)
555 index_mapping[i] = j;
556 index_mapping_reverse[j] = i;
557 /* Look for more occurrences of the same entry. Match them
558 as long as they pair up. Unpaired occurrences of the same
559 entry are left without mapping. */
570 gl_list_indexof_from (file1->entries_reversed,
575 gl_list_indexof_from (file2->entries_reversed,
579 curr_i = n1 - 1 - next_i;
580 curr_j = n2 - 1 - next_j;
581 ASSERT (index_mapping[curr_i] < 0);
582 ASSERT (index_mapping_reverse[curr_j] < 0);
583 index_mapping[curr_i] = curr_j;
584 index_mapping_reverse[curr_j] = curr_i;
591 result->file1 = file1;
592 result->file2 = file2;
593 result->index_mapping = index_mapping;
594 result->index_mapping_reverse = index_mapping_reverse;
597 for (i = n1 - 1; i >= 0; i--)
598 entries_mapping_get (result, i);
601 /* An "edit" is a textual modification performed by the user, that needs to
602 be applied to the other file. */
605 /* Some consecutive entries were added. */
607 /* Some consecutive entries were removed; some other consecutive entries
608 were added at the same position. (Not necessarily the same number of
611 /* Some consecutive entries were removed. */
615 /* This structure represents an edit. */
619 /* Range of indices into the entries of FILE1. */
620 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
621 /* Range of indices into the entries of FILE2. */
622 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
625 /* This structure represents the differences from one file, FILE1, to another
629 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
630 from FILE1 is not found in FILE2). */
631 ssize_t *index_mapping;
632 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
633 from FILE2 is not found in FILE1). */
634 ssize_t *index_mapping_reverse;
635 /* The edits that transform FILE1 into FILE2. */
640 /* Import the difference detection algorithm from GNU diff. */
641 #define ELEMENT struct entry *
642 #define EQUAL entry_equals
643 #define OFFSET ssize_t
644 #define EXTRA_CONTEXT_FIELDS \
645 ssize_t *index_mapping; \
646 ssize_t *index_mapping_reverse;
647 #define NOTE_DELETE(ctxt, xoff) \
648 ctxt->index_mapping[xoff] = -1
649 #define NOTE_INSERT(ctxt, yoff) \
650 ctxt->index_mapping_reverse[yoff] = -1
653 /* Compute the differences between the entries of FILE1 and the entries of
656 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
657 struct differences *result)
659 /* Unlike compute_mapping, which mostly ignores the order of the entries and
660 therefore works well when some entries are permuted, here we use the order.
661 I think this is needed in order to distinguish changes from
662 additions+removals; I don't know how to say what is a "change" if the
663 files are considered as unordered sets of entries. */
665 size_t n1 = file1->num_entries;
666 size_t n2 = file2->num_entries;
669 gl_list_t /* <struct edit *> */ edits;
671 ctxt.xvec = file1->entries;
672 ctxt.yvec = file2->entries;
673 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
674 for (i = 0; i < n1; i++)
675 ctxt.index_mapping[i] = 0;
676 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
677 for (j = 0; j < n2; j++)
678 ctxt.index_mapping_reverse[j] = 0;
679 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
680 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
682 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
683 each removed or added entry. */
684 compareseq (0, n1, 0, n2, &ctxt);
686 /* Complete the index_mapping and index_mapping_reverse arrays. */
689 while (i < n1 || j < n2)
691 while (i < n1 && ctxt.index_mapping[i] < 0)
693 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
695 ASSERT ((i < n1) == (j < n2));
696 if (i == n1 && j == n2)
698 ctxt.index_mapping[i] = j;
699 ctxt.index_mapping_reverse[j] = i;
704 /* Create the edits. */
705 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
708 while (i < n1 || j < n2)
714 e = XMALLOC (struct edit);
718 gl_list_add_last (edits, e);
725 e = XMALLOC (struct edit);
729 gl_list_add_last (edits, e);
732 if (ctxt.index_mapping[i] >= 0)
734 if (ctxt.index_mapping_reverse[j] >= 0)
736 ASSERT (ctxt.index_mapping[i] == j);
737 ASSERT (ctxt.index_mapping_reverse[j] == i);
744 ASSERT (ctxt.index_mapping_reverse[j] < 0);
745 e = XMALLOC (struct edit);
750 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
752 gl_list_add_last (edits, e);
757 if (ctxt.index_mapping_reverse[j] >= 0)
760 ASSERT (ctxt.index_mapping[i] < 0);
761 e = XMALLOC (struct edit);
766 while (i < n1 && ctxt.index_mapping[i] < 0);
768 gl_list_add_last (edits, e);
773 ASSERT (ctxt.index_mapping[i] < 0);
774 ASSERT (ctxt.index_mapping_reverse[j] < 0);
775 e = XMALLOC (struct edit);
780 while (i < n1 && ctxt.index_mapping[i] < 0);
785 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
787 gl_list_add_last (edits, e);
792 result->index_mapping = ctxt.index_mapping;
793 result->index_mapping_reverse = ctxt.index_mapping_reverse;
794 result->num_edits = gl_list_size (edits);
795 result->edits = XNMALLOC (result->num_edits, struct edit *);
798 gl_list_iterator_t iter = gl_list_iterator (edits);
801 while (gl_list_iterator_next (&iter, &elt, &node))
802 result->edits[index++] = (struct edit *) elt;
803 gl_list_iterator_free (&iter);
804 ASSERT (index == result->num_edits);
808 /* An empty entry. */
809 static struct entry empty_entry = { NULL, 0 };
811 /* Return the end a paragraph.
813 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
814 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
815 it is the start of a blank line or the end of the entry. */
817 find_paragraph_end (const struct entry *entry, size_t offset)
819 const char *string = entry->string;
820 size_t length = entry->length;
824 const char *nl = memchr (string + offset, '\n', length - offset);
827 offset = (nl - string) + 1;
828 if (offset < length && string[offset] == '\n')
833 /* Split a merged entry.
834 Given an old entry of the form
837 and a new entry of the form
841 where the two titles are the same and BODY and BODY' are very similar,
842 this computes two new entries
849 If the entries don't have this form, it returns false. */
851 try_split_merged_entry (const struct entry *old_entry,
852 const struct entry *new_entry,
853 struct entry *new_split[2])
855 size_t old_title_len = find_paragraph_end (old_entry, 0);
856 size_t new_title_len = find_paragraph_end (new_entry, 0);
857 struct entry old_body;
858 struct entry new_body;
859 size_t best_split_offset;
860 double best_similarity;
864 if (!(old_title_len == new_title_len
865 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
868 old_body.string = old_entry->string + old_title_len;
869 old_body.length = old_entry->length - old_title_len;
871 /* Determine where to split the new entry.
872 This is done by maximizing the similarity between BODY and BODY'. */
873 best_split_offset = split_offset = new_title_len;
874 best_similarity = 0.0;
879 new_body.string = new_entry->string + split_offset;
880 new_body.length = new_entry->length - split_offset;
882 entry_fstrcmp (&old_body, &new_body, best_similarity);
883 if (similarity > best_similarity)
885 best_split_offset = split_offset;
886 best_similarity = similarity;
888 if (best_similarity == 1.0)
889 /* It cannot get better. */
892 if (split_offset < new_entry->length)
893 split_offset = find_paragraph_end (new_entry, split_offset + 1);
898 /* BODY' should not be empty. */
899 if (best_split_offset == new_entry->length)
901 ASSERT (new_entry->string[best_split_offset] == '\n');
903 /* A certain similarity between BODY and BODY' is required. */
904 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
907 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
910 size_t len1 = new_title_len;
911 size_t len2 = new_entry->length - best_split_offset;
912 char *combined = XNMALLOC (len1 + len2, char);
913 memcpy (combined, new_entry->string, len1);
914 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
915 new_split[1] = entry_create (combined, len1 + len2);
921 /* Write the contents of an entry to the output stream FP. */
923 entry_write (FILE *fp, struct entry *entry)
925 if (entry->length > 0)
926 fwrite (entry->string, 1, entry->length, fp);
929 /* This structure represents a conflict.
930 A conflict can occur for various reasons. */
933 /* Parts from the ancestor file. */
934 size_t num_old_entries;
935 struct entry **old_entries;
936 /* Parts of the modified file. */
937 size_t num_modified_entries;
938 struct entry **modified_entries;
941 /* Write a conflict to the output stream FP, including markers. */
943 conflict_write (FILE *fp, struct conflict *c)
947 /* Use the same syntax as git's default merge driver.
948 Don't indent the contents of the entries (with things like ">" or "-"),
949 otherwise the user needs more textual editing to resolve the conflict. */
950 fputs ("<<<<<<<\n", fp);
951 for (i = 0; i < c->num_old_entries; i++)
952 entry_write (fp, c->old_entries[i]);
953 fputs ("=======\n", fp);
954 for (i = 0; i < c->num_modified_entries; i++)
955 entry_write (fp, c->modified_entries[i]);
956 fputs (">>>>>>>\n", fp);
960 static const struct option long_options[] =
962 { "help", no_argument, NULL, 'h' },
963 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
964 { "version", no_argument, NULL, 'V' },
968 /* Print a usage message and exit. */
972 if (status != EXIT_SUCCESS)
973 fprintf (stderr, "Try '%s --help' for more information.\n",
977 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
980 printf ("Merges independent modifications of a ChangeLog style file.\n");
981 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
982 printf ("A-FILE-NAME names the publicly modified file.\n");
983 printf ("B-FILE-NAME names the user-modified file.\n");
984 printf ("Writes the merged file into A-FILE-NAME.\n");
986 #if 0 /* --split-merged-entry is now on by default. */
987 printf ("Operation modifiers:\n");
989 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
990 Use this if you have the habit to merge unrelated\n\
991 entries into a single one, separated only by a\n\
992 newline, just because they happened on the same\n\
996 printf ("Informative output:\n");
997 printf (" -h, --help display this help and exit\n");
998 printf (" -V, --version output version information and exit\n");
1000 fputs ("Report bugs to <bug-gnulib@gnu.org>.\n",
1008 main (int argc, char *argv[])
1013 bool split_merged_entry;
1015 /* Set default values for variables. */
1018 split_merged_entry = true;
1020 /* Parse command line options. */
1021 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1024 case '\0': /* Long option. */
1032 case CHAR_MAX + 1: /* --split-merged-entry */
1035 usage (EXIT_FAILURE);
1040 /* Version information is requested. */
1041 printf ("%s\n", getprogname ());
1042 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1043 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1044 This is free software: you are free to change and redistribute it.\n\
1045 There is NO WARRANTY, to the extent permitted by law.\n\
1048 printf ("Written by %s.\n", "Bruno Haible");
1049 exit (EXIT_SUCCESS);
1054 /* Help is requested. */
1055 usage (EXIT_SUCCESS);
1058 /* Test argument count. */
1059 if (optind + 3 != argc)
1060 error (EXIT_FAILURE, 0, "expected three arguments");
1063 const char *ancestor_file_name; /* O-FILE-NAME */
1064 const char *destination_file_name; /* A-FILE-NAME */
1066 const char *other_file_name; /* B-FILE-NAME */
1067 const char *mainstream_file_name;
1068 const char *modified_file_name;
1069 struct changelog_file ancestor_file;
1070 struct changelog_file mainstream_file;
1071 struct changelog_file modified_file;
1072 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1073 struct entries_mapping mapping;
1074 struct differences diffs;
1075 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1076 gl_list_t /* <struct entry *> */ result_entries;
1077 gl_list_t /* <struct conflict *> */ result_conflicts;
1079 ancestor_file_name = argv[optind];
1080 destination_file_name = argv[optind + 1];
1081 other_file_name = argv[optind + 2];
1083 /* Heuristic to determine whether it's a pull in downstream direction
1084 (e.g. pull from a centralized server) or a pull in upstream direction
1085 (e.g. "git stash apply").
1087 For ChangeLog this distinction is important. The difference between
1088 an "upstream" and a "downstream" repository is that more people are
1089 looking at the "upstream" repository. They want to be informed about
1090 changes and expect them to be shown at the top of the ChangeLog.
1091 When a user pulls downstream, on the other hand, he has two options:
1092 a) He gets the change entries from the central repository also at the
1093 top of his ChangeLog, and his own changes come after them.
1094 b) He gets the change entries from the central repository after those
1095 he has collected for his branch. His own change entries stay at
1096 the top of the ChangeLog file.
1097 In the case a) he has to reorder the ChangeLog before he can commit.
1098 No one does that. So most people want b).
1099 In other words, the order of entries in a ChangeLog should represent
1100 the order in which they have flown (or will flow) into the *central*
1103 But in git this is fundamentally indistinguishable, because when Linus
1104 pulls patches from akpm and akpm pulls patches from Linus, it's not
1105 clear which of the two is more "upstream". Also, when you have many
1106 branches in a repository and pull from one to another, "git" has no way
1107 to know which branch is more "upstream" than the other. The git-tag(1)
1108 manual page also says:
1109 "One important aspect of git is it is distributed, and being
1110 distributed largely means there is no inherent "upstream" or
1111 "downstream" in the system."
1112 Therefore anyone who attempts to produce a ChangeLog from the merge
1115 Here we allow the user to specify the pull direction through an
1116 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1117 environment variables are not set, we assume a "simple single user"
1118 usage pattern: He manages local changes through stashes and uses
1119 "git pull" only to pull downstream.
1121 How to distinguish these situation? There are several hints:
1122 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1123 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1124 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1125 set to 'cherry-pick'.
1126 - During a "git stash apply", there is an environment variable of
1127 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1131 var = getenv ("GIT_DOWNSTREAM");
1132 if (var != NULL && var[0] != '\0')
1136 var = getenv ("GIT_UPSTREAM");
1137 if (var != NULL && var[0] != '\0')
1141 var = getenv ("GIT_REFLOG_ACTION");
1142 #if 0 /* Debugging code */
1143 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1146 && ((strncmp (var, "pull", 4) == 0
1147 && c_strstr (var, " --rebase") == NULL)
1148 || strncmp (var, "merge origin", 12) == 0))
1152 /* "git stash apply", "git rebase", "git cherry-pick" and
1160 #if 0 /* Debugging code */
1163 printf ("First line of %%A:\n");
1164 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1165 printf ("First line of %%B:\n");
1166 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1167 printf ("Guessing calling convention: %s\n",
1169 ? "%A = modified by user, %B = upstream"
1170 : "%A = upstream, %B = modified by user");
1176 mainstream_file_name = other_file_name;
1177 modified_file_name = destination_file_name;
1181 mainstream_file_name = destination_file_name;
1182 modified_file_name = other_file_name;
1185 /* Read the three files into memory. */
1186 read_changelog_file (ancestor_file_name, &ancestor_file);
1187 read_changelog_file (mainstream_file_name, &mainstream_file);
1188 read_changelog_file (modified_file_name, &modified_file);
1190 /* Compute correspondence between the entries of ancestor_file and of
1192 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1193 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1195 /* Compute differences between the entries of ancestor_file and of
1197 compute_differences (&ancestor_file, &modified_file, &diffs);
1199 /* Compute the result. */
1200 result_entries_pointers =
1201 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1203 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1207 for (k = 0; k < mainstream_file.num_entries; k++)
1208 result_entries_pointers[k] =
1209 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1212 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1215 for (e = 0; e < diffs.num_edits; e++)
1217 struct edit *edit = diffs.edits[e];
1223 /* An addition to the top of modified_file.
1224 Apply it to the top of mainstream_file. */
1226 for (j = edit->j2; j >= edit->j1; j--)
1228 struct entry *added_entry = modified_file.entries[j];
1229 gl_list_add_first (result_entries, added_entry);
1238 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1239 ASSERT (i_before >= 0);
1240 i_after = (edit->j2 + 1 == modified_file.num_entries
1241 ? ancestor_file.num_entries
1242 : diffs.index_mapping_reverse[edit->j2 + 1]);
1243 ASSERT (i_after >= 0);
1244 ASSERT (i_after == i_before + 1);
1245 /* An addition between ancestor_file.entries[i_before] and
1246 ancestor_file.entries[i_after]. See whether these two
1247 entries still exist in mainstream_file and are still
1249 k_before = entries_mapping_get (&mapping, i_before);
1250 k_after = (i_after == ancestor_file.num_entries
1251 ? mainstream_file.num_entries
1252 : entries_mapping_get (&mapping, i_after));
1253 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1255 /* Yes, the entry before and after are still neighbours
1256 in mainstream_file. Apply the addition between
1258 if (k_after == mainstream_file.num_entries)
1261 for (j = edit->j1; j <= edit->j2; j++)
1263 struct entry *added_entry = modified_file.entries[j];
1264 gl_list_add_last (result_entries, added_entry);
1269 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1271 for (j = edit->j1; j <= edit->j2; j++)
1273 struct entry *added_entry = modified_file.entries[j];
1274 gl_list_add_before (result_entries, node_k_after, added_entry);
1280 /* It's not clear where the additions should be applied.
1281 Let the user decide. */
1282 struct conflict *c = XMALLOC (struct conflict);
1284 c->num_old_entries = 0;
1285 c->old_entries = NULL;
1286 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1287 c->modified_entries =
1288 XNMALLOC (c->num_modified_entries, struct entry *);
1289 for (j = edit->j1; j <= edit->j2; j++)
1290 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1291 gl_list_add_last (result_conflicts, c);
1297 /* Apply the removals one by one. */
1299 for (i = edit->i1; i <= edit->i2; i++)
1301 struct entry *removed_entry = ancestor_file.entries[i];
1302 ssize_t k = entries_mapping_get (&mapping, i);
1304 && entry_equals (removed_entry,
1305 mainstream_file.entries[k]))
1307 /* The entry to be removed still exists in
1308 mainstream_file. Remove it. */
1309 gl_list_node_set_value (result_entries,
1310 result_entries_pointers[k],
1315 /* The entry to be removed was already removed or was
1316 modified. This is a conflict. */
1317 struct conflict *c = XMALLOC (struct conflict);
1318 c->num_old_entries = 1;
1320 XNMALLOC (c->num_old_entries, struct entry *);
1321 c->old_entries[0] = removed_entry;
1322 c->num_modified_entries = 0;
1323 c->modified_entries = NULL;
1324 gl_list_add_last (result_conflicts, c);
1332 /* When the user usually merges entries from the same day,
1333 and this edit is at the top of the file: */
1334 if (split_merged_entry && edit->j1 == 0)
1336 /* Test whether the change is "simple merged", i.e. whether
1337 it consists of additions, followed by an augmentation of
1338 the first changed entry, followed by small changes of the
1351 modified_entry_n. */
1352 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1354 struct entry *split[2];
1355 bool simple_merged =
1356 try_split_merged_entry (ancestor_file.entries[edit->i1],
1357 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1362 for (i = edit->i1 + 1; i <= edit->i2; i++)
1363 if (entry_fstrcmp (ancestor_file.entries[i],
1364 modified_file.entries[i + edit->j2 - edit->i2],
1366 < FSTRCMP_THRESHOLD)
1368 simple_merged = false;
1374 /* Apply the additions at the top of modified_file.
1375 Apply each of the single-entry changes
1377 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1378 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1380 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1381 gl_list_add_first (result_entries, split[0]);
1382 /* The additions. */
1383 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1385 struct entry *added_entry = modified_file.entries[j];
1386 gl_list_add_first (result_entries, added_entry);
1388 /* Now the single-entry changes. */
1389 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1391 struct entry *changed_entry =
1392 (j == edit->j1 + num_added
1394 : modified_file.entries[j]);
1395 size_t i = j + edit->i2 - edit->j2;
1396 ssize_t k = entries_mapping_get (&mapping, i);
1398 && entry_equals (ancestor_file.entries[i],
1399 mainstream_file.entries[k]))
1401 gl_list_node_set_value (result_entries,
1402 result_entries_pointers[k],
1405 else if (!entry_equals (ancestor_file.entries[i],
1408 struct conflict *c = XMALLOC (struct conflict);
1409 c->num_old_entries = 1;
1411 XNMALLOC (c->num_old_entries, struct entry *);
1412 c->old_entries[0] = ancestor_file.entries[i];
1413 c->num_modified_entries = 1;
1414 c->modified_entries =
1415 XNMALLOC (c->num_modified_entries, struct entry *);
1416 c->modified_entries[0] = changed_entry;
1417 gl_list_add_last (result_conflicts, c);
1427 /* Test whether the change is "simple", i.e. whether it
1428 consists of small changes to the old ChangeLog entries
1429 and additions before them:
1439 modified_entry_n. */
1440 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1444 for (i = edit->i1; i <= edit->i2; i++)
1445 if (entry_fstrcmp (ancestor_file.entries[i],
1446 modified_file.entries[i + edit->j2 - edit->i2],
1448 < FSTRCMP_THRESHOLD)
1458 /* Apply the additions and each of the single-entry
1459 changes separately. */
1460 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1461 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1464 /* A simple change at the top of modified_file.
1465 Apply it to the top of mainstream_file. */
1467 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1469 struct entry *added_entry = modified_file.entries[j];
1470 gl_list_add_first (result_entries, added_entry);
1472 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1474 struct entry *changed_entry = modified_file.entries[j];
1475 size_t i = j + edit->i2 - edit->j2;
1476 ssize_t k = entries_mapping_get (&mapping, i);
1478 && entry_equals (ancestor_file.entries[i],
1479 mainstream_file.entries[k]))
1481 gl_list_node_set_value (result_entries,
1482 result_entries_pointers[k],
1488 ASSERT (!entry_equals (ancestor_file.entries[i],
1490 c = XMALLOC (struct conflict);
1491 c->num_old_entries = 1;
1493 XNMALLOC (c->num_old_entries, struct entry *);
1494 c->old_entries[0] = ancestor_file.entries[i];
1495 c->num_modified_entries = 1;
1496 c->modified_entries =
1497 XNMALLOC (c->num_modified_entries, struct entry *);
1498 c->modified_entries[0] = changed_entry;
1499 gl_list_add_last (result_conflicts, c);
1509 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1510 ASSERT (i_before >= 0);
1511 /* A simple change after ancestor_file.entries[i_before].
1512 See whether this entry and the following num_changed
1513 entries still exist in mainstream_file and are still
1515 k_before = entries_mapping_get (&mapping, i_before);
1516 linear = (k_before >= 0);
1520 for (i = i_before + 1; i <= i_before + num_changed; i++)
1521 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1529 gl_list_node_t node_for_insert =
1530 result_entries_pointers[k_before + 1];
1532 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1534 struct entry *added_entry = modified_file.entries[j];
1535 gl_list_add_before (result_entries, node_for_insert, added_entry);
1537 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1539 struct entry *changed_entry = modified_file.entries[j];
1540 size_t i = j + edit->i2 - edit->j2;
1541 ssize_t k = entries_mapping_get (&mapping, i);
1543 if (entry_equals (ancestor_file.entries[i],
1544 mainstream_file.entries[k]))
1546 gl_list_node_set_value (result_entries,
1547 result_entries_pointers[k],
1553 ASSERT (!entry_equals (ancestor_file.entries[i],
1555 c = XMALLOC (struct conflict);
1556 c->num_old_entries = 1;
1558 XNMALLOC (c->num_old_entries, struct entry *);
1559 c->old_entries[0] = ancestor_file.entries[i];
1560 c->num_modified_entries = 1;
1561 c->modified_entries =
1562 XNMALLOC (c->num_modified_entries, struct entry *);
1563 c->modified_entries[0] = changed_entry;
1564 gl_list_add_last (result_conflicts, c);
1574 See whether the num_changed entries still exist
1575 unchanged in mainstream_file and are still
1579 bool linear_unchanged;
1581 k_first = entries_mapping_get (&mapping, i_first);
1584 && entry_equals (ancestor_file.entries[i_first],
1585 mainstream_file.entries[k_first]));
1586 if (linear_unchanged)
1589 for (i = i_first + 1; i <= edit->i2; i++)
1590 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1591 && entry_equals (ancestor_file.entries[i],
1592 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1594 linear_unchanged = false;
1598 if (linear_unchanged)
1600 gl_list_node_t node_for_insert =
1601 result_entries_pointers[k_first];
1604 for (j = edit->j2; j >= edit->j1; j--)
1606 struct entry *new_entry = modified_file.entries[j];
1607 gl_list_add_before (result_entries, node_for_insert, new_entry);
1609 for (i = edit->i1; i <= edit->i2; i++)
1611 ssize_t k = entries_mapping_get (&mapping, i);
1613 ASSERT (entry_equals (ancestor_file.entries[i],
1614 mainstream_file.entries[k]));
1615 gl_list_node_set_value (result_entries,
1616 result_entries_pointers[k],
1625 struct conflict *c = XMALLOC (struct conflict);
1627 c->num_old_entries = edit->i2 - edit->i1 + 1;
1629 XNMALLOC (c->num_old_entries, struct entry *);
1630 for (i = edit->i1; i <= edit->i2; i++)
1631 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1632 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1633 c->modified_entries =
1634 XNMALLOC (c->num_modified_entries, struct entry *);
1635 for (j = edit->j1; j <= edit->j2; j++)
1636 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1637 gl_list_add_last (result_conflicts, c);
1645 /* Output the result. */
1647 FILE *fp = fopen (destination_file_name, "w");
1650 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1651 exit (EXIT_FAILURE);
1654 /* Output the conflicts at the top. */
1656 size_t n = gl_list_size (result_conflicts);
1658 for (i = 0; i < n; i++)
1659 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1661 /* Output the modified and unmodified entries, in order. */
1663 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1665 gl_list_node_t node;
1666 while (gl_list_iterator_next (&iter, &elt, &node))
1667 entry_write (fp, (struct entry *) elt);
1668 gl_list_iterator_free (&iter);
1671 if (fwriteerror (fp))
1673 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1674 exit (EXIT_FAILURE);
1678 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);