root/maint/gnulib/lib/git-merge-changelog.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. entry_create
  2. entry_equals
  3. entry_hashcode
  4. entry_fstrcmp
  5. read_changelog_file
  6. entries_mapping_get
  7. entries_mapping_reverse_get
  8. compute_mapping
  9. compute_differences
  10. find_paragraph_end
  11. try_split_merged_entry
  12. entry_write
  13. conflict_write
  14. usage
  15. main

   1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
   2    Copyright (C) 2008-2021 Bruno Haible <bruno@clisp.org>
   3 
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published
   6    by the Free Software Foundation; either version 3 of the License,
   7    or (at your option) any later version.
   8 
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* README:
  18    The default merge driver of 'git' *always* produces conflicts when
  19    pulling public modifications into a privately modified ChangeLog file.
  20    This is because ChangeLog files are always modified at the top; the
  21    default merge driver has no clue how to deal with this. Furthermore
  22    the conflicts are presented with more <<<< ==== >>>> markers than
  23    necessary; this is because the default merge driver makes pointless
  24    efforts to look at the individual line changes inside a ChangeLog entry.
  25 
  26    This program serves as a 'git' merge driver that avoids these problems.
  27    1. It produces no conflict when ChangeLog entries have been inserted
  28       at the top both in the public and in the private modification. It
  29       puts the privately added entries above the publicly added entries.
  30    2. It respects the structure of ChangeLog files: entries are not split
  31       into lines but kept together.
  32    3. It also handles the case of small modifications of past ChangeLog
  33       entries, or of removed ChangeLog entries: they are merged as one
  34       would expect it.
  35    4. Conflicts are presented at the top of the file, rather than where
  36       they occurred, so that the user will see them immediately. (Unlike
  37       for source code written in some programming language, conflict markers
  38       that are located several hundreds lines from the top will not cause
  39       any syntax error and therefore would be likely to remain unnoticed.)
  40  */
  41 
  42 /* Installation:
  43 
  44    $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
  45    $ cd /tmp/testdir123
  46    $ ./configure
  47    $ make
  48    $ make install
  49 
  50    Additionally, for git users:
  51      - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
  52        lines
  53 
  54           [merge "merge-changelog"]
  55                   name = GNU-style ChangeLog merge driver
  56                   driver = /usr/local/bin/git-merge-changelog %O %A %B
  57 
  58      - In every directory that contains a ChangeLog file, add a file
  59        '.gitattributes' with this line:
  60 
  61           ChangeLog    merge=merge-changelog
  62 
  63        (See "man 5 gitattributes" for more info.)
  64 
  65    Additionally, for bzr users:
  66      - Install the 'extmerge' bzr plug-in listed at
  67          <http://doc.bazaar.canonical.com/plugins/en/index.html>
  68          <http://wiki.bazaar.canonical.com/BzrPlugins>
  69      - Add to your $HOME/.bazaar/bazaar.conf the line
  70 
  71           external_merge = git-merge-changelog %b %T %o
  72 
  73      - Then, to merge a conflict in a ChangeLog file, use
  74 
  75           $ bzr extmerge ChangeLog
  76 
  77    Additionally, for hg users:
  78      - Add to your $HOME/.hgrc the lines
  79 
  80         [merge-patterns]
  81         ChangeLog = git-merge-changelog
  82 
  83         [merge-tools]
  84         git-merge-changelog.executable = /usr/local/bin/git-merge-changelog
  85         git-merge-changelog.args = $base $local $other
  86 
  87        See <https://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
  88        for reference.
  89  */
  90 
  91 /* Use as an alternative to 'diff3':
  92    git-merge-changelog performs the same role as "diff3 -m", just with
  93    reordered arguments:
  94      $ git-merge-changelog %O %A %B
  95    is comparable to
  96      $ diff3 -m %A %O %B
  97  */
  98 
  99 /* Calling convention:
 100    A merge driver is called with three filename arguments:
 101      1. %O = The common ancestor of %A and %B.
 102      2. %A = The file's contents from the "current branch".
 103      3. %B = The file's contents from the "other branch"; this is the contents
 104         being merged in.
 105 
 106    In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
 107    maintainer to a central maintainer) or of a downstream pull with --rebase:
 108      2. %A = The file's newest pulled contents; modified by other committers.
 109      3. %B = The user's newest copy of the file; modified by the user.
 110    In case of a downstream pull (e.g. from a central repository to the user)
 111    or of an upstream pull with --rebase:
 112      2. %A = The user's newest copy of the file; modified by the user.
 113      3. %B = The file's newest pulled contents; modified by other committers.
 114 
 115    It should write its merged output into file %A. It can also echo some
 116    remarks to stdout.  It should exit with return code 0 if the merge could
 117    be resolved cleanly, or with non-zero return code if there were conflicts.
 118  */
 119 
 120 /* How it works:
 121    The structure of a ChangeLog file: It consists of ChangeLog entries. A
 122    ChangeLog entry starts at a line following a blank line and that starts with
 123    a non-whitespace character, or at the beginning of a file.
 124    The merge driver works as follows: It reads the three files into memory and
 125    dissects them into ChangeLog entries. It then finds the differences between
 126    %O and %B. They are classified as:
 127      - removals (some consecutive entries removed),
 128      - changes (some consecutive entries removed, some consecutive entries
 129        added),
 130      - additions (some consecutive entries added).
 131    The driver then attempts to apply the changes to %A.
 132    To this effect, it first computes a correspondence between the entries in %O
 133    and the entries in %A, using fuzzy string matching to still identify changed
 134    entries.
 135      - Removals are applied one by one. If the entry is present in %A, at any
 136        position, it is removed. If not, the removal is marked as a conflict.
 137      - Additions at the top of %B are applied at the top of %A.
 138      - Additions between entry x and entry y (y may be the file end) in %B are
 139        applied between entry x and entry y in %A (if they still exist and are
 140        still consecutive in %A), otherwise the additions are marked as a
 141        conflict.
 142      - Changes are categorized into "simple changes":
 143          entry1 ... entryn
 144        are mapped to
 145          added_entry ... added_entry modified_entry1 ... modified_entryn,
 146        where the correspondence between entry_i and modified_entry_i is still
 147        clear; and "big changes": these are all the rest. Simple changes at the
 148        top of %B are applied by putting the added entries at the top of %A. The
 149        changes in simple changes are applied one by one; possibly leading to
 150        single-entry conflicts. Big changes are applied en bloc, possibly
 151        leading to conflicts spanning multiple entries.
 152      - Conflicts are output at the top of the file and cause an exit status of
 153        1.
 154  */
 155 
 156 #include <config.h>
 157 
 158 #include <getopt.h>
 159 #include <limits.h>
 160 #include <stdbool.h>
 161 #include <stdio.h>
 162 #include <stdlib.h>
 163 #include <string.h>
 164 #include <sys/types.h>
 165 #include <unistd.h>
 166 
 167 #include "error.h"
 168 #include "read-file.h"
 169 #include "gl_xlist.h"
 170 #include "gl_array_list.h"
 171 #include "gl_linkedhash_list.h"
 172 #include "gl_rbtreehash_list.h"
 173 #include "gl_linked_list.h"
 174 #include "xalloc.h"
 175 #include "xmalloca.h"
 176 #include "fstrcmp.h"
 177 #include "minmax.h"
 178 #include "c-strstr.h"
 179 #include "fwriteerror.h"
 180 #include "getprogname.h"
 181 
 182 #define ASSERT(expr) \
 183   do                                                                         \
 184     {                                                                        \
 185       if (!(expr))                                                           \
 186         abort ();                                                            \
 187     }                                                                        \
 188   while (0)
 189 
 190 #define FSTRCMP_THRESHOLD 0.6
 191 #define FSTRCMP_STRICTER_THRESHOLD 0.8
 192 
 193 /* Representation of a ChangeLog entry.
 194    The string may contain NUL bytes; therefore it is represented as a plain
 195    opaque memory region.  */
 196 struct entry
 197 {
 198   char *string;
 199   size_t length;
 200   /* Cache for the hash code.  */
 201   bool hashcode_cached;
 202   size_t hashcode;
 203 };
 204 
 205 /* Create an entry.
 206    The memory region passed by the caller must of indefinite extent.  It is
 207    *not* copied here.  */
 208 static struct entry *
 209 entry_create (char *string, size_t length)
     /* [previous][next][first][last][top][bottom][index][help] */
 210 {
 211   struct entry *result = XMALLOC (struct entry);
 212   result->string = string;
 213   result->length = length;
 214   result->hashcode_cached = false;
 215   return result;
 216 }
 217 
 218 /* Compare two entries for equality.  */
 219 static bool
 220 entry_equals (const void *elt1, const void *elt2)
     /* [previous][next][first][last][top][bottom][index][help] */
 221 {
 222   const struct entry *entry1 = (const struct entry *) elt1;
 223   const struct entry *entry2 = (const struct entry *) elt2;
 224   return entry1->length == entry2->length
 225          && memcmp (entry1->string, entry2->string, entry1->length) == 0;
 226 }
 227 
 228 /* Return a hash code of the contents of a ChangeLog entry.  */
 229 static size_t
 230 entry_hashcode (const void *elt)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232   struct entry *entry = (struct entry *) elt;
 233   if (!entry->hashcode_cached)
 234     {
 235       /* See https://www.haible.de/bruno/hashfunc.html.  */
 236       const char *s;
 237       size_t n;
 238       size_t h = 0;
 239 
 240       for (s = entry->string, n = entry->length; n > 0; s++, n--)
 241         h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
 242 
 243       entry->hashcode = h;
 244       entry->hashcode_cached = true;
 245     }
 246   return entry->hashcode;
 247 }
 248 
 249 /* Perform a fuzzy comparison of two ChangeLog entries.
 250    Return a similarity measure of the two entries, a value between 0 and 1.
 251    0 stands for very distinct, 1 for identical.
 252    If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
 253    be returned.  */
 254 static double
 255 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
     /* [previous][next][first][last][top][bottom][index][help] */
 256                double lower_bound)
 257 {
 258   /* fstrcmp works only on NUL terminated strings.  */
 259   char *memory;
 260   double similarity;
 261 
 262   if (memchr (entry1->string, '\0', entry1->length) != NULL)
 263     return 0.0;
 264   if (memchr (entry2->string, '\0', entry2->length) != NULL)
 265     return 0.0;
 266   memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
 267   {
 268     char *p = memory;
 269     memcpy (p, entry1->string, entry1->length);
 270     p += entry1->length;
 271     *p++ = '\0';
 272     memcpy (p, entry2->string, entry2->length);
 273     p += entry2->length;
 274     *p++ = '\0';
 275   }
 276   similarity =
 277     fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
 278   freea (memory);
 279   return similarity;
 280 }
 281 
 282 /* This structure represents an entire ChangeLog file, after it was read
 283    into memory.  */
 284 struct changelog_file
 285 {
 286   /* The entries, as a list.  */
 287   gl_list_t /* <struct entry *> */ entries_list;
 288   /* The entries, as a list in opposite direction.  */
 289   gl_list_t /* <struct entry *> */ entries_reversed;
 290   /* The entries, as an array.  */
 291   size_t num_entries;
 292   struct entry **entries;
 293 };
 294 
 295 /* Read a ChangeLog file into memory.
 296    Return the contents in *RESULT.  */
 297 static void
 298 read_changelog_file (const char *filename, struct changelog_file *result)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300   /* Read the file in text mode, otherwise it's hard to recognize empty
 301      lines.  */
 302   size_t length;
 303   char *contents = read_file (filename, 0, &length);
 304   if (contents == NULL)
 305     {
 306       fprintf (stderr, "could not read file '%s'\n", filename);
 307       exit (EXIT_FAILURE);
 308     }
 309 
 310   result->entries_list =
 311     gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
 312                           NULL, true);
 313   result->entries_reversed =
 314     gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
 315                           NULL, true);
 316   /* A ChangeLog file consists of ChangeLog entries.  A ChangeLog entry starts
 317      at a line following a blank line and that starts with a non-whitespace
 318      character, or at the beginning of a file.
 319      Split the file contents into entries.  */
 320   {
 321     char *contents_end = contents + length;
 322     char *start = contents;
 323     while (start < contents_end)
 324       {
 325         /* Search the end of the current entry.  */
 326         char *ptr = start;
 327         struct entry *curr;
 328 
 329         while (ptr < contents_end)
 330           {
 331             ptr = memchr (ptr, '\n', contents_end - ptr);
 332             if (ptr == NULL)
 333               {
 334                 ptr = contents_end;
 335                 break;
 336               }
 337             ptr++;
 338             if (contents_end - ptr >= 2
 339                 && ptr[0] == '\n'
 340                 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
 341               {
 342                 ptr++;
 343                 break;
 344               }
 345           }
 346 
 347         curr = entry_create (start, ptr - start);
 348         gl_list_add_last (result->entries_list, curr);
 349         gl_list_add_first (result->entries_reversed, curr);
 350 
 351         start = ptr;
 352       }
 353   }
 354 
 355   result->num_entries = gl_list_size (result->entries_list);
 356   result->entries = XNMALLOC (result->num_entries, struct entry *);
 357   {
 358     size_t index = 0;
 359     gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
 360     const void *elt;
 361     gl_list_node_t node;
 362     while (gl_list_iterator_next (&iter, &elt, &node))
 363       result->entries[index++] = (struct entry *) elt;
 364     gl_list_iterator_free (&iter);
 365     ASSERT (index == result->num_entries);
 366   }
 367 }
 368 
 369 /* A mapping (correspondence) between entries of FILE1 and of FILE2.  */
 370 struct entries_mapping
 371 {
 372   struct changelog_file *file1;
 373   struct changelog_file *file2;
 374   /* Mapping from indices in FILE1 to indices in FILE2.
 375      A value -1 means that the entry from FILE1 is not found in FILE2.
 376      A value -2 means that it has not yet been computed.  */
 377   ssize_t *index_mapping;
 378   /* Mapping from indices in FILE2 to indices in FILE1.
 379      A value -1 means that the entry from FILE2 is not found in FILE1.
 380      A value -2 means that it has not yet been computed.  */
 381   ssize_t *index_mapping_reverse;
 382 };
 383 
 384 /* Look up (or lazily compute) the mapping of an entry in FILE1.
 385    i is the index in FILE1.
 386    Return the index in FILE2, or -1 when the entry is not found in FILE2.  */
 387 static ssize_t
 388 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
     /* [previous][next][first][last][top][bottom][index][help] */
 389 {
 390   if (mapping->index_mapping[i] < -1)
 391     {
 392       struct changelog_file *file1 = mapping->file1;
 393       struct changelog_file *file2 = mapping->file2;
 394       size_t n1 = file1->num_entries;
 395       size_t n2 = file2->num_entries;
 396       struct entry *entry_i = file1->entries[i];
 397       ssize_t j;
 398 
 399       /* Search whether it approximately occurs in file2.  */
 400       ssize_t best_j = -1;
 401       double best_j_similarity = 0.0;
 402       for (j = n2 - 1; j >= 0; j--)
 403         if (mapping->index_mapping_reverse[j] < 0)
 404           {
 405             double similarity =
 406               entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
 407             if (similarity > best_j_similarity)
 408               {
 409                 best_j = j;
 410                 best_j_similarity = similarity;
 411               }
 412           }
 413       if (best_j_similarity >= FSTRCMP_THRESHOLD)
 414         {
 415           /* Found a similar entry in file2.  */
 416           struct entry *entry_j = file2->entries[best_j];
 417           /* Search whether it approximately occurs in file1 at index i.  */
 418           ssize_t best_i = -1;
 419           double best_i_similarity = 0.0;
 420           ssize_t ii;
 421           for (ii = n1 - 1; ii >= 0; ii--)
 422             if (mapping->index_mapping[ii] < 0)
 423               {
 424                 double similarity =
 425                   entry_fstrcmp (file1->entries[ii], entry_j,
 426                                  best_i_similarity);
 427                 if (similarity > best_i_similarity)
 428                   {
 429                     best_i = ii;
 430                     best_i_similarity = similarity;
 431                   }
 432               }
 433           if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
 434             {
 435               mapping->index_mapping[i] = best_j;
 436               mapping->index_mapping_reverse[best_j] = i;
 437             }
 438         }
 439       if (mapping->index_mapping[i] < -1)
 440         /* It does not approximately occur in FILE2.
 441            Remember it, for next time.  */
 442         mapping->index_mapping[i] = -1;
 443     }
 444   return mapping->index_mapping[i];
 445 }
 446 
 447 /* Look up (or lazily compute) the mapping of an entry in FILE2.
 448    j is the index in FILE2.
 449    Return the index in FILE1, or -1 when the entry is not found in FILE1.  */
 450 static ssize_t
 451 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
     /* [previous][next][first][last][top][bottom][index][help] */
 452 {
 453   if (mapping->index_mapping_reverse[j] < -1)
 454     {
 455       struct changelog_file *file1 = mapping->file1;
 456       struct changelog_file *file2 = mapping->file2;
 457       size_t n1 = file1->num_entries;
 458       size_t n2 = file2->num_entries;
 459       struct entry *entry_j = file2->entries[j];
 460       ssize_t i;
 461 
 462       /* Search whether it approximately occurs in file1.  */
 463       ssize_t best_i = -1;
 464       double best_i_similarity = 0.0;
 465       for (i = n1 - 1; i >= 0; i--)
 466         if (mapping->index_mapping[i] < 0)
 467           {
 468             double similarity =
 469               entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
 470             if (similarity > best_i_similarity)
 471               {
 472                 best_i = i;
 473                 best_i_similarity = similarity;
 474               }
 475           }
 476       if (best_i_similarity >= FSTRCMP_THRESHOLD)
 477         {
 478           /* Found a similar entry in file1.  */
 479           struct entry *entry_i = file1->entries[best_i];
 480           /* Search whether it approximately occurs in file2 at index j.  */
 481           ssize_t best_j = -1;
 482           double best_j_similarity = 0.0;
 483           ssize_t jj;
 484           for (jj = n2 - 1; jj >= 0; jj--)
 485             if (mapping->index_mapping_reverse[jj] < 0)
 486               {
 487                 double similarity =
 488                   entry_fstrcmp (entry_i, file2->entries[jj],
 489                                  best_j_similarity);
 490                 if (similarity > best_j_similarity)
 491                   {
 492                     best_j = jj;
 493                     best_j_similarity = similarity;
 494                   }
 495               }
 496           if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
 497             {
 498               mapping->index_mapping_reverse[j] = best_i;
 499               mapping->index_mapping[best_i] = j;
 500             }
 501         }
 502       if (mapping->index_mapping_reverse[j] < -1)
 503         /* It does not approximately occur in FILE1.
 504            Remember it, for next time.  */
 505         mapping->index_mapping_reverse[j] = -1;
 506     }
 507   return mapping->index_mapping_reverse[j];
 508 }
 509 
 510 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
 511    The correspondence also takes into account small modifications; i.e. the
 512    indicated relation is not equality of entries but best-match similarity
 513    of entries.
 514    If FULL is true, the maximum of matching is done up-front.  If it is false,
 515    it is done in a lazy way through the functions entries_mapping_get and
 516    entries_mapping_reverse_get.
 517    Return the result in *RESULT.  */
 518 static void
 519 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
     /* [previous][next][first][last][top][bottom][index][help] */
 520                  bool full,
 521                  struct entries_mapping *result)
 522 {
 523   /* Mapping from indices in file1 to indices in file2.  */
 524   ssize_t *index_mapping;
 525   /* Mapping from indices in file2 to indices in file1.  */
 526   ssize_t *index_mapping_reverse;
 527   size_t n1 = file1->num_entries;
 528   size_t n2 = file2->num_entries;
 529   ssize_t i, j;
 530 
 531   index_mapping = XNMALLOC (n1, ssize_t);
 532   for (i = 0; i < n1; i++)
 533     index_mapping[i] = -2;
 534 
 535   index_mapping_reverse = XNMALLOC (n2, ssize_t);
 536   for (j = 0; j < n2; j++)
 537     index_mapping_reverse[j] = -2;
 538 
 539   for (i = n1 - 1; i >= 0; i--)
 540     /* Take an entry from file1.  */
 541     if (index_mapping[i] < -1)
 542       {
 543         struct entry *entry = file1->entries[i];
 544         /* Search whether it occurs in file2.  */
 545         j = gl_list_indexof (file2->entries_reversed, entry);
 546         if (j >= 0)
 547           {
 548             j = n2 - 1 - j;
 549             /* Found an exact correspondence.  */
 550             /* If index_mapping_reverse[j] >= 0, we have already seen other
 551                copies of this entry, and there were more occurrences of it in
 552                file1 than in file2.  In this case, do nothing.  */
 553             if (index_mapping_reverse[j] < 0)
 554               {
 555                 index_mapping[i] = j;
 556                 index_mapping_reverse[j] = i;
 557                 /* Look for more occurrences of the same entry.  Match them
 558                    as long as they pair up.  Unpaired occurrences of the same
 559                    entry are left without mapping.  */
 560                 {
 561                   ssize_t curr_i = i;
 562                   ssize_t curr_j = j;
 563 
 564                   for (;;)
 565                     {
 566                       ssize_t next_i;
 567                       ssize_t next_j;
 568 
 569                       next_i =
 570                         gl_list_indexof_from (file1->entries_reversed,
 571                                               n1 - curr_i, entry);
 572                       if (next_i < 0)
 573                         break;
 574                       next_j =
 575                         gl_list_indexof_from (file2->entries_reversed,
 576                                               n2 - curr_j, entry);
 577                       if (next_j < 0)
 578                         break;
 579                       curr_i = n1 - 1 - next_i;
 580                       curr_j = n2 - 1 - next_j;
 581                       ASSERT (index_mapping[curr_i] < 0);
 582                       ASSERT (index_mapping_reverse[curr_j] < 0);
 583                       index_mapping[curr_i] = curr_j;
 584                       index_mapping_reverse[curr_j] = curr_i;
 585                     }
 586                 }
 587               }
 588           }
 589       }
 590 
 591   result->file1 = file1;
 592   result->file2 = file2;
 593   result->index_mapping = index_mapping;
 594   result->index_mapping_reverse = index_mapping_reverse;
 595 
 596   if (full)
 597     for (i = n1 - 1; i >= 0; i--)
 598       entries_mapping_get (result, i);
 599 }
 600 
 601 /* An "edit" is a textual modification performed by the user, that needs to
 602    be applied to the other file.  */
 603 enum edit_type
 604 {
 605   /* Some consecutive entries were added.  */
 606   ADDITION,
 607   /* Some consecutive entries were removed; some other consecutive entries
 608      were added at the same position.  (Not necessarily the same number of
 609      entries.)  */
 610   CHANGE,
 611   /* Some consecutive entries were removed.  */
 612   REMOVAL
 613 };
 614 
 615 /* This structure represents an edit.  */
 616 struct edit
 617 {
 618   enum edit_type type;
 619   /* Range of indices into the entries of FILE1.  */
 620   ssize_t i1, i2;       /* first, last index; only used for CHANGE, REMOVAL */
 621   /* Range of indices into the entries of FILE2.  */
 622   ssize_t j1, j2;       /* first, last index; only used for ADDITION, CHANGE */
 623 };
 624 
 625 /* This structure represents the differences from one file, FILE1, to another
 626    file, FILE2.  */
 627 struct differences
 628 {
 629   /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
 630      from FILE1 is not found in FILE2).  */
 631   ssize_t *index_mapping;
 632   /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
 633      from FILE2 is not found in FILE1).  */
 634   ssize_t *index_mapping_reverse;
 635   /* The edits that transform FILE1 into FILE2.  */
 636   size_t num_edits;
 637   struct edit **edits;
 638 };
 639 
 640 /* Import the difference detection algorithm from GNU diff.  */
 641 #define ELEMENT struct entry *
 642 #define EQUAL entry_equals
 643 #define OFFSET ssize_t
 644 #define EXTRA_CONTEXT_FIELDS \
 645   ssize_t *index_mapping; \
 646   ssize_t *index_mapping_reverse;
 647 #define NOTE_DELETE(ctxt, xoff) \
 648   ctxt->index_mapping[xoff] = -1
 649 #define NOTE_INSERT(ctxt, yoff) \
 650   ctxt->index_mapping_reverse[yoff] = -1
 651 #include "diffseq.h"
 652 
 653 /* Compute the differences between the entries of FILE1 and the entries of
 654    FILE2.  */
 655 static void
 656 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
     /* [previous][next][first][last][top][bottom][index][help] */
 657                      struct differences *result)
 658 {
 659   /* Unlike compute_mapping, which mostly ignores the order of the entries and
 660      therefore works well when some entries are permuted, here we use the order.
 661      I think this is needed in order to distinguish changes from
 662      additions+removals; I don't know how to say what is a "change" if the
 663      files are considered as unordered sets of entries.  */
 664   struct context ctxt;
 665   size_t n1 = file1->num_entries;
 666   size_t n2 = file2->num_entries;
 667   ssize_t i;
 668   ssize_t j;
 669   gl_list_t /* <struct edit *> */ edits;
 670 
 671   ctxt.xvec = file1->entries;
 672   ctxt.yvec = file2->entries;
 673   ctxt.index_mapping = XNMALLOC (n1, ssize_t);
 674   for (i = 0; i < n1; i++)
 675     ctxt.index_mapping[i] = 0;
 676   ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
 677   for (j = 0; j < n2; j++)
 678     ctxt.index_mapping_reverse[j] = 0;
 679   ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
 680   ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
 681   ctxt.too_expensive = n1 + n2;
 682 
 683   /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
 684      each removed or added entry.  */
 685   compareseq (0, n1, 0, n2, 0, &ctxt);
 686 
 687   /* Complete the index_mapping and index_mapping_reverse arrays.  */
 688   i = 0;
 689   j = 0;
 690   while (i < n1 || j < n2)
 691     {
 692       while (i < n1 && ctxt.index_mapping[i] < 0)
 693         i++;
 694       while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
 695         j++;
 696       ASSERT ((i < n1) == (j < n2));
 697       if (i == n1 && j == n2)
 698         break;
 699       ctxt.index_mapping[i] = j;
 700       ctxt.index_mapping_reverse[j] = i;
 701       i++;
 702       j++;
 703     }
 704 
 705   /* Create the edits.  */
 706   edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
 707   i = 0;
 708   j = 0;
 709   while (i < n1 || j < n2)
 710     {
 711       if (i == n1)
 712         {
 713           struct edit *e;
 714           ASSERT (j < n2);
 715           e = XMALLOC (struct edit);
 716           e->type = ADDITION;
 717           e->j1 = j;
 718           e->j2 = n2 - 1;
 719           gl_list_add_last (edits, e);
 720           break;
 721         }
 722       if (j == n2)
 723         {
 724           struct edit *e;
 725           ASSERT (i < n1);
 726           e = XMALLOC (struct edit);
 727           e->type = REMOVAL;
 728           e->i1 = i;
 729           e->i2 = n1 - 1;
 730           gl_list_add_last (edits, e);
 731           break;
 732         }
 733       if (ctxt.index_mapping[i] >= 0)
 734         {
 735           if (ctxt.index_mapping_reverse[j] >= 0)
 736             {
 737               ASSERT (ctxt.index_mapping[i] == j);
 738               ASSERT (ctxt.index_mapping_reverse[j] == i);
 739               i++;
 740               j++;
 741             }
 742           else
 743             {
 744               struct edit *e;
 745               ASSERT (ctxt.index_mapping_reverse[j] < 0);
 746               e = XMALLOC (struct edit);
 747               e->type = ADDITION;
 748               e->j1 = j;
 749               do
 750                 j++;
 751               while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
 752               e->j2 = j - 1;
 753               gl_list_add_last (edits, e);
 754             }
 755         }
 756       else
 757         {
 758           if (ctxt.index_mapping_reverse[j] >= 0)
 759             {
 760               struct edit *e;
 761               ASSERT (ctxt.index_mapping[i] < 0);
 762               e = XMALLOC (struct edit);
 763               e->type = REMOVAL;
 764               e->i1 = i;
 765               do
 766                 i++;
 767               while (i < n1 && ctxt.index_mapping[i] < 0);
 768               e->i2 = i - 1;
 769               gl_list_add_last (edits, e);
 770             }
 771           else
 772             {
 773               struct edit *e;
 774               ASSERT (ctxt.index_mapping[i] < 0);
 775               ASSERT (ctxt.index_mapping_reverse[j] < 0);
 776               e = XMALLOC (struct edit);
 777               e->type = CHANGE;
 778               e->i1 = i;
 779               do
 780                 i++;
 781               while (i < n1 && ctxt.index_mapping[i] < 0);
 782               e->i2 = i - 1;
 783               e->j1 = j;
 784               do
 785                 j++;
 786               while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
 787               e->j2 = j - 1;
 788               gl_list_add_last (edits, e);
 789             }
 790         }
 791     }
 792 
 793   result->index_mapping = ctxt.index_mapping;
 794   result->index_mapping_reverse = ctxt.index_mapping_reverse;
 795   result->num_edits = gl_list_size (edits);
 796   result->edits = XNMALLOC (result->num_edits, struct edit *);
 797   {
 798     size_t index = 0;
 799     gl_list_iterator_t iter = gl_list_iterator (edits);
 800     const void *elt;
 801     gl_list_node_t node;
 802     while (gl_list_iterator_next (&iter, &elt, &node))
 803       result->edits[index++] = (struct edit *) elt;
 804     gl_list_iterator_free (&iter);
 805     ASSERT (index == result->num_edits);
 806   }
 807 }
 808 
 809 /* An empty entry.  */
 810 static struct entry empty_entry = { NULL, 0 };
 811 
 812 /* Return the end a paragraph.
 813    ENTRY is an entry.
 814    OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
 815    Return the offset of the end of paragraph, as an offset <= ENTRY->length;
 816    it is the start of a blank line or the end of the entry.  */
 817 static size_t
 818 find_paragraph_end (const struct entry *entry, size_t offset)
     /* [previous][next][first][last][top][bottom][index][help] */
 819 {
 820   const char *string = entry->string;
 821   size_t length = entry->length;
 822 
 823   for (;;)
 824     {
 825       const char *nl = memchr (string + offset, '\n', length - offset);
 826       if (nl == NULL)
 827         return length;
 828       offset = (nl - string) + 1;
 829       if (offset < length && string[offset] == '\n')
 830         return offset;
 831     }
 832 }
 833 
 834 /* Split a merged entry.
 835    Given an old entry of the form
 836        TITLE
 837        BODY
 838    and a new entry of the form
 839        TITLE
 840        BODY1
 841        BODY'
 842    where the two titles are the same and BODY and BODY' are very similar,
 843    this computes two new entries
 844        TITLE
 845        BODY1
 846    and
 847        TITLE
 848        BODY'
 849    and returns true.
 850    If the entries don't have this form, it returns false.  */
 851 static bool
 852 try_split_merged_entry (const struct entry *old_entry,
     /* [previous][next][first][last][top][bottom][index][help] */
 853                         const struct entry *new_entry,
 854                         struct entry *new_split[2])
 855 {
 856   size_t old_title_len = find_paragraph_end (old_entry, 0);
 857   size_t new_title_len = find_paragraph_end (new_entry, 0);
 858   struct entry old_body;
 859   struct entry new_body;
 860   size_t best_split_offset;
 861   double best_similarity;
 862   size_t split_offset;
 863 
 864   /* Same title? */
 865   if (!(old_title_len == new_title_len
 866         && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
 867     return false;
 868 
 869   old_body.string = old_entry->string + old_title_len;
 870   old_body.length = old_entry->length - old_title_len;
 871 
 872   /* Determine where to split the new entry.
 873      This is done by maximizing the similarity between BODY and BODY'.  */
 874   best_split_offset = split_offset = new_title_len;
 875   best_similarity = 0.0;
 876   for (;;)
 877     {
 878       double similarity;
 879 
 880       new_body.string = new_entry->string + split_offset;
 881       new_body.length = new_entry->length - split_offset;
 882       similarity =
 883         entry_fstrcmp (&old_body, &new_body, best_similarity);
 884       if (similarity > best_similarity)
 885         {
 886           best_split_offset = split_offset;
 887           best_similarity = similarity;
 888         }
 889       if (best_similarity == 1.0)
 890         /* It cannot get better.  */
 891         break;
 892 
 893       if (split_offset < new_entry->length)
 894         split_offset = find_paragraph_end (new_entry, split_offset + 1);
 895       else
 896         break;
 897     }
 898 
 899   /* BODY' should not be empty.  */
 900   if (best_split_offset == new_entry->length)
 901     return false;
 902   ASSERT (new_entry->string[best_split_offset] == '\n');
 903 
 904   /* A certain similarity between BODY and BODY' is required.  */
 905   if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
 906     return false;
 907 
 908   new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
 909 
 910   {
 911     size_t len1 = new_title_len;
 912     size_t len2 = new_entry->length - best_split_offset;
 913     char *combined = XNMALLOC (len1 + len2, char);
 914     memcpy (combined, new_entry->string, len1);
 915     memcpy (combined + len1, new_entry->string + best_split_offset, len2);
 916     new_split[1] = entry_create (combined, len1 + len2);
 917   }
 918 
 919   return true;
 920 }
 921 
 922 /* Write the contents of an entry to the output stream FP.  */
 923 static void
 924 entry_write (FILE *fp, struct entry *entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 925 {
 926   if (entry->length > 0)
 927     fwrite (entry->string, 1, entry->length, fp);
 928 }
 929 
 930 /* This structure represents a conflict.
 931    A conflict can occur for various reasons.  */
 932 struct conflict
 933 {
 934   /* Parts from the ancestor file.  */
 935   size_t num_old_entries;
 936   struct entry **old_entries;
 937   /* Parts of the modified file.  */
 938   size_t num_modified_entries;
 939   struct entry **modified_entries;
 940 };
 941 
 942 /* Write a conflict to the output stream FP, including markers.  */
 943 static void
 944 conflict_write (FILE *fp, struct conflict *c)
     /* [previous][next][first][last][top][bottom][index][help] */
 945 {
 946   size_t i;
 947 
 948   /* Use the same syntax as git's default merge driver.
 949      Don't indent the contents of the entries (with things like ">" or "-"),
 950      otherwise the user needs more textual editing to resolve the conflict.  */
 951   fputs ("<<<<<<<\n", fp);
 952   for (i = 0; i < c->num_old_entries; i++)
 953     entry_write (fp, c->old_entries[i]);
 954   fputs ("=======\n", fp);
 955   for (i = 0; i < c->num_modified_entries; i++)
 956     entry_write (fp, c->modified_entries[i]);
 957   fputs (">>>>>>>\n", fp);
 958 }
 959 
 960 /* Long options.  */
 961 static const struct option long_options[] =
 962 {
 963   { "help", no_argument, NULL, 'h' },
 964   { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
 965   { "version", no_argument, NULL, 'V' },
 966   { NULL, 0, NULL, 0 }
 967 };
 968 
 969 /* Print a usage message and exit.  */
 970 static void
 971 usage (int status)
     /* [previous][next][first][last][top][bottom][index][help] */
 972 {
 973   if (status != EXIT_SUCCESS)
 974     fprintf (stderr, "Try '%s --help' for more information.\n",
 975              getprogname ());
 976   else
 977     {
 978       printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
 979               getprogname ());
 980       printf ("\n");
 981       printf ("Merges independent modifications of a ChangeLog style file.\n");
 982       printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
 983       printf ("A-FILE-NAME names the publicly modified file.\n");
 984       printf ("B-FILE-NAME names the user-modified file.\n");
 985       printf ("Writes the merged file into A-FILE-NAME.\n");
 986       printf ("\n");
 987       #if 0 /* --split-merged-entry is now on by default.  */
 988       printf ("Operation modifiers:\n");
 989       printf ("\
 990       --split-merged-entry    Possibly split a merged entry between paragraphs.\n\
 991                               Use this if you have the habit to merge unrelated\n\
 992                               entries into a single one, separated only by a\n\
 993                               newline, just because they happened on the same\n\
 994                               date.\n");
 995       printf ("\n");
 996       #endif
 997       printf ("Informative output:\n");
 998       printf ("  -h, --help                  display this help and exit\n");
 999       printf ("  -V, --version               output version information and exit\n");
1000       printf ("\n");
1001       fputs ("Report bugs to <bug-gnulib@gnu.org>.\n",
1002              stdout);
1003     }
1004 
1005   exit (status);
1006 }
1007 
1008 int
1009 main (int argc, char *argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
1010 {
1011   int optchar;
1012   bool do_help;
1013   bool do_version;
1014   bool split_merged_entry;
1015 
1016   /* Set default values for variables.  */
1017   do_help = false;
1018   do_version = false;
1019   split_merged_entry = true;
1020 
1021   /* Parse command line options.  */
1022   while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1023     switch (optchar)
1024     {
1025     case '\0':          /* Long option.  */
1026       break;
1027     case 'h':
1028       do_help = true;
1029       break;
1030     case 'V':
1031       do_version = true;
1032       break;
1033     case CHAR_MAX + 1:  /* --split-merged-entry */
1034       break;
1035     default:
1036       usage (EXIT_FAILURE);
1037     }
1038 
1039   if (do_version)
1040     {
1041       /* Version information is requested.  */
1042       printf ("%s\n", getprogname ());
1043       printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1044 License GPLv2+: GNU GPL version 2 or later <https://gnu.org/licenses/gpl.html>\n\
1045 This is free software: you are free to change and redistribute it.\n\
1046 There is NO WARRANTY, to the extent permitted by law.\n\
1047 ",
1048               "2020");
1049       printf ("Written by %s.\n", "Bruno Haible");
1050       exit (EXIT_SUCCESS);
1051     }
1052 
1053   if (do_help)
1054     {
1055       /* Help is requested.  */
1056       usage (EXIT_SUCCESS);
1057     }
1058 
1059   /* Test argument count.  */
1060   if (optind + 3 != argc)
1061     error (EXIT_FAILURE, 0, "expected three arguments");
1062 
1063   {
1064     const char *ancestor_file_name; /* O-FILE-NAME */
1065     const char *destination_file_name; /* A-FILE-NAME */
1066     bool downstream;
1067     const char *other_file_name; /* B-FILE-NAME */
1068     const char *mainstream_file_name;
1069     const char *modified_file_name;
1070     struct changelog_file ancestor_file;
1071     struct changelog_file mainstream_file;
1072     struct changelog_file modified_file;
1073     /* Mapping from indices in ancestor_file to indices in mainstream_file.  */
1074     struct entries_mapping mapping;
1075     struct differences diffs;
1076     gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1077     gl_list_t /* <struct entry *> */ result_entries;
1078     gl_list_t /* <struct conflict *> */ result_conflicts;
1079 
1080     ancestor_file_name = argv[optind];
1081     destination_file_name = argv[optind + 1];
1082     other_file_name = argv[optind + 2];
1083 
1084     /* Heuristic to determine whether it's a pull in downstream direction
1085        (e.g. pull from a centralized server) or a pull in upstream direction
1086        (e.g. "git stash apply").
1087 
1088        For ChangeLog this distinction is important. The difference between
1089        an "upstream" and a "downstream" repository is that more people are
1090        looking at the "upstream" repository.  They want to be informed about
1091        changes and expect them to be shown at the top of the ChangeLog.
1092        When a user pulls downstream, on the other hand, he has two options:
1093          a) He gets the change entries from the central repository also at the
1094             top of his ChangeLog, and his own changes come after them.
1095          b) He gets the change entries from the central repository after those
1096             he has collected for his branch.  His own change entries stay at
1097             the top of the ChangeLog file.
1098        In the case a) he has to reorder the ChangeLog before he can commit.
1099        No one does that.  So most people want b).
1100        In other words, the order of entries in a ChangeLog should represent
1101        the order in which they have flown (or will flow) into the *central*
1102        repository.
1103 
1104        But in git this is fundamentally indistinguishable, because when Linus
1105        pulls patches from akpm and akpm pulls patches from Linus, it's not
1106        clear which of the two is more "upstream".  Also, when you have many
1107        branches in a repository and pull from one to another, "git" has no way
1108        to know which branch is more "upstream" than the other.  The git-tag(1)
1109        manual page also says:
1110          "One important aspect of git is it is distributed, and being
1111           distributed largely means there is no inherent "upstream" or
1112           "downstream" in the system."
1113        Therefore anyone who attempts to produce a ChangeLog from the merge
1114        history will fail.
1115 
1116        Here we allow the user to specify the pull direction through an
1117        environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM).  If these two
1118        environment variables are not set, we assume a "simple single user"
1119        usage pattern: He manages local changes through stashes and uses
1120        "git pull" only to pull downstream.
1121 
1122        How to distinguish these situation? There are several hints:
1123          - During a "git stash apply", GIT_REFLOG_ACTION is not set.  During
1124            a "git pull", it is set to 'pull '. During a "git pull --rebase",
1125            it is set to 'pull --rebase'.  During a "git cherry-pick", it is
1126            set to 'cherry-pick'.
1127          - During a "git stash apply", there is an environment variable of
1128            the form GITHEAD_<40_hex_digits>='Stashed changes'.  */
1129     {
1130       const char *var;
1131 
1132       var = getenv ("GIT_DOWNSTREAM");
1133       if (var != NULL && var[0] != '\0')
1134         downstream = true;
1135       else
1136         {
1137           var = getenv ("GIT_UPSTREAM");
1138           if (var != NULL && var[0] != '\0')
1139             downstream = false;
1140           else
1141             {
1142               var = getenv ("GIT_REFLOG_ACTION");
1143               #if 0 /* Debugging code */
1144               printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1145               #endif
1146               if (var != NULL
1147                   && ((strncmp (var, "pull", 4) == 0
1148                        && c_strstr (var, " --rebase") == NULL)
1149                       || strncmp (var, "merge origin", 12) == 0))
1150                 downstream = true;
1151               else
1152                 {
1153                   /* "git stash apply", "git rebase", "git cherry-pick" and
1154                      similar.  */
1155                   downstream = false;
1156                 }
1157             }
1158         }
1159     }
1160 
1161     #if 0 /* Debugging code */
1162     {
1163       char buf[1000];
1164       printf ("First line of %%A:\n");
1165       sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1166       printf ("First line of %%B:\n");
1167       sprintf (buf, "head -1 %s", other_file_name); system (buf);
1168       printf ("Guessing calling convention: %s\n",
1169               downstream
1170               ? "%A = modified by user, %B = upstream"
1171               : "%A = upstream, %B = modified by user");
1172     }
1173     #endif
1174 
1175     if (downstream)
1176       {
1177         mainstream_file_name = other_file_name;
1178         modified_file_name = destination_file_name;
1179       }
1180     else
1181       {
1182         mainstream_file_name = destination_file_name;
1183         modified_file_name = other_file_name;
1184       }
1185 
1186     /* Read the three files into memory.  */
1187     read_changelog_file (ancestor_file_name, &ancestor_file);
1188     read_changelog_file (mainstream_file_name, &mainstream_file);
1189     read_changelog_file (modified_file_name, &modified_file);
1190 
1191     /* Compute correspondence between the entries of ancestor_file and of
1192        mainstream_file.  */
1193     compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1194     (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1195 
1196     /* Compute differences between the entries of ancestor_file and of
1197        modified_file.  */
1198     compute_differences (&ancestor_file, &modified_file, &diffs);
1199 
1200     /* Compute the result.  */
1201     result_entries_pointers =
1202       XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1203     result_entries =
1204       gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1205                             NULL, true);
1206     {
1207       size_t k;
1208       for (k = 0; k < mainstream_file.num_entries; k++)
1209         result_entries_pointers[k] =
1210           gl_list_add_last (result_entries, mainstream_file.entries[k]);
1211     }
1212     result_conflicts =
1213       gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1214     {
1215       size_t e;
1216       for (e = 0; e < diffs.num_edits; e++)
1217         {
1218           struct edit *edit = diffs.edits[e];
1219           switch (edit->type)
1220             {
1221             case ADDITION:
1222               if (edit->j1 == 0)
1223                 {
1224                   /* An addition to the top of modified_file.
1225                      Apply it to the top of mainstream_file.  */
1226                   ssize_t j;
1227                   for (j = edit->j2; j >= edit->j1; j--)
1228                     {
1229                       struct entry *added_entry = modified_file.entries[j];
1230                       gl_list_add_first (result_entries, added_entry);
1231                     }
1232                 }
1233               else
1234                 {
1235                   ssize_t i_before;
1236                   ssize_t i_after;
1237                   ssize_t k_before;
1238                   ssize_t k_after;
1239                   i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1240                   ASSERT (i_before >= 0);
1241                   i_after = (edit->j2 + 1 == modified_file.num_entries
1242                              ? ancestor_file.num_entries
1243                              : diffs.index_mapping_reverse[edit->j2 + 1]);
1244                   ASSERT (i_after >= 0);
1245                   ASSERT (i_after == i_before + 1);
1246                   /* An addition between ancestor_file.entries[i_before] and
1247                      ancestor_file.entries[i_after].  See whether these two
1248                      entries still exist in mainstream_file and are still
1249                      consecutive.  */
1250                   k_before = entries_mapping_get (&mapping, i_before);
1251                   k_after = (i_after == ancestor_file.num_entries
1252                              ? mainstream_file.num_entries
1253                              : entries_mapping_get (&mapping, i_after));
1254                   if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1255                     {
1256                       /* Yes, the entry before and after are still neighbours
1257                          in mainstream_file.  Apply the addition between
1258                          them.  */
1259                       if (k_after == mainstream_file.num_entries)
1260                         {
1261                           size_t j;
1262                           for (j = edit->j1; j <= edit->j2; j++)
1263                             {
1264                               struct entry *added_entry = modified_file.entries[j];
1265                               gl_list_add_last (result_entries, added_entry);
1266                             }
1267                         }
1268                       else
1269                         {
1270                           gl_list_node_t node_k_after = result_entries_pointers[k_after];
1271                           size_t j;
1272                           for (j = edit->j1; j <= edit->j2; j++)
1273                             {
1274                               struct entry *added_entry = modified_file.entries[j];
1275                               gl_list_add_before (result_entries, node_k_after, added_entry);
1276                             }
1277                         }
1278                     }
1279                   else
1280                     {
1281                       /* It's not clear where the additions should be applied.
1282                          Let the user decide.  */
1283                       struct conflict *c = XMALLOC (struct conflict);
1284                       size_t j;
1285                       c->num_old_entries = 0;
1286                       c->old_entries = NULL;
1287                       c->num_modified_entries = edit->j2 - edit->j1 + 1;
1288                       c->modified_entries =
1289                         XNMALLOC (c->num_modified_entries, struct entry *);
1290                       for (j = edit->j1; j <= edit->j2; j++)
1291                         c->modified_entries[j - edit->j1] = modified_file.entries[j];
1292                       gl_list_add_last (result_conflicts, c);
1293                     }
1294                 }
1295               break;
1296             case REMOVAL:
1297               {
1298                 /* Apply the removals one by one.  */
1299                 size_t i;
1300                 for (i = edit->i1; i <= edit->i2; i++)
1301                   {
1302                     struct entry *removed_entry = ancestor_file.entries[i];
1303                     ssize_t k = entries_mapping_get (&mapping, i);
1304                     if (k >= 0
1305                         && entry_equals (removed_entry,
1306                                          mainstream_file.entries[k]))
1307                       {
1308                         /* The entry to be removed still exists in
1309                            mainstream_file.  Remove it.  */
1310                         gl_list_node_set_value (result_entries,
1311                                                 result_entries_pointers[k],
1312                                                 &empty_entry);
1313                       }
1314                     else
1315                       {
1316                         /* The entry to be removed was already removed or was
1317                            modified.  This is a conflict.  */
1318                         struct conflict *c = XMALLOC (struct conflict);
1319                         c->num_old_entries = 1;
1320                         c->old_entries =
1321                           XNMALLOC (c->num_old_entries, struct entry *);
1322                         c->old_entries[0] = removed_entry;
1323                         c->num_modified_entries = 0;
1324                         c->modified_entries = NULL;
1325                         gl_list_add_last (result_conflicts, c);
1326                       }
1327                   }
1328               }
1329               break;
1330             case CHANGE:
1331               {
1332                 bool done = false;
1333                 /* When the user usually merges entries from the same day,
1334                    and this edit is at the top of the file:  */
1335                 if (split_merged_entry && edit->j1 == 0)
1336                   {
1337                     /* Test whether the change is "simple merged", i.e. whether
1338                        it consists of additions, followed by an augmentation of
1339                        the first changed entry, followed by small changes of the
1340                        remaining entries:
1341                          entry_1
1342                          entry_2
1343                          ...
1344                          entry_n
1345                        are mapped to
1346                          added_entry
1347                          ...
1348                          added_entry
1349                          augmented_entry_1
1350                          modified_entry_2
1351                          ...
1352                          modified_entry_n.  */
1353                     if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1354                       {
1355                         struct entry *split[2];
1356                         bool simple_merged =
1357                           try_split_merged_entry (ancestor_file.entries[edit->i1],
1358                                                   modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1359                                                   split);
1360                         if (simple_merged)
1361                           {
1362                             size_t i;
1363                             for (i = edit->i1 + 1; i <= edit->i2; i++)
1364                               if (entry_fstrcmp (ancestor_file.entries[i],
1365                                                  modified_file.entries[i + edit->j2 - edit->i2],
1366                                                  FSTRCMP_THRESHOLD)
1367                                   < FSTRCMP_THRESHOLD)
1368                                 {
1369                                   simple_merged = false;
1370                                   break;
1371                                 }
1372                           }
1373                         if (simple_merged)
1374                           {
1375                             /* Apply the additions at the top of modified_file.
1376                                Apply each of the single-entry changes
1377                                separately.  */
1378                             size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1379                             size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1380                             ssize_t j;
1381                             /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]:  */
1382                             gl_list_add_first (result_entries, split[0]);
1383                             /* The additions.  */
1384                             for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1385                               {
1386                                 struct entry *added_entry = modified_file.entries[j];
1387                                 gl_list_add_first (result_entries, added_entry);
1388                               }
1389                             /* Now the single-entry changes.  */
1390                             for (j = edit->j1 + num_added; j <= edit->j2; j++)
1391                               {
1392                                 struct entry *changed_entry =
1393                                   (j == edit->j1 + num_added
1394                                    ? split[1]
1395                                    : modified_file.entries[j]);
1396                                 size_t i = j + edit->i2 - edit->j2;
1397                                 ssize_t k = entries_mapping_get (&mapping, i);
1398                                 if (k >= 0
1399                                     && entry_equals (ancestor_file.entries[i],
1400                                                      mainstream_file.entries[k]))
1401                                   {
1402                                     gl_list_node_set_value (result_entries,
1403                                                             result_entries_pointers[k],
1404                                                             changed_entry);
1405                                   }
1406                                 else if (!entry_equals (ancestor_file.entries[i],
1407                                                         changed_entry))
1408                                   {
1409                                     struct conflict *c = XMALLOC (struct conflict);
1410                                     c->num_old_entries = 1;
1411                                     c->old_entries =
1412                                       XNMALLOC (c->num_old_entries, struct entry *);
1413                                     c->old_entries[0] = ancestor_file.entries[i];
1414                                     c->num_modified_entries = 1;
1415                                     c->modified_entries =
1416                                       XNMALLOC (c->num_modified_entries, struct entry *);
1417                                     c->modified_entries[0] = changed_entry;
1418                                     gl_list_add_last (result_conflicts, c);
1419                                   }
1420                               }
1421                             done = true;
1422                           }
1423                       }
1424                   }
1425                 if (!done)
1426                   {
1427                     bool simple;
1428                     /* Test whether the change is "simple", i.e. whether it
1429                        consists of small changes to the old ChangeLog entries
1430                        and additions before them:
1431                          entry_1
1432                          ...
1433                          entry_n
1434                        are mapped to
1435                          added_entry
1436                          ...
1437                          added_entry
1438                          modified_entry_1
1439                          ...
1440                          modified_entry_n.  */
1441                     if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1442                       {
1443                         size_t i;
1444                         simple = true;
1445                         for (i = edit->i1; i <= edit->i2; i++)
1446                           if (entry_fstrcmp (ancestor_file.entries[i],
1447                                              modified_file.entries[i + edit->j2 - edit->i2],
1448                                              FSTRCMP_THRESHOLD)
1449                               < FSTRCMP_THRESHOLD)
1450                             {
1451                               simple = false;
1452                               break;
1453                             }
1454                       }
1455                     else
1456                       simple = false;
1457                     if (simple)
1458                       {
1459                         /* Apply the additions and each of the single-entry
1460                            changes separately.  */
1461                         size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1462                         size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1463                         if (edit->j1 == 0)
1464                           {
1465                             /* A simple change at the top of modified_file.
1466                                Apply it to the top of mainstream_file.  */
1467                             ssize_t j;
1468                             for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1469                               {
1470                                 struct entry *added_entry = modified_file.entries[j];
1471                                 gl_list_add_first (result_entries, added_entry);
1472                               }
1473                             for (j = edit->j1 + num_added; j <= edit->j2; j++)
1474                               {
1475                                 struct entry *changed_entry = modified_file.entries[j];
1476                                 size_t i = j + edit->i2 - edit->j2;
1477                                 ssize_t k = entries_mapping_get (&mapping, i);
1478                                 if (k >= 0
1479                                     && entry_equals (ancestor_file.entries[i],
1480                                                      mainstream_file.entries[k]))
1481                                   {
1482                                     gl_list_node_set_value (result_entries,
1483                                                             result_entries_pointers[k],
1484                                                             changed_entry);
1485                                   }
1486                                 else
1487                                   {
1488                                     struct conflict *c;
1489                                     ASSERT (!entry_equals (ancestor_file.entries[i],
1490                                                            changed_entry));
1491                                     c = XMALLOC (struct conflict);
1492                                     c->num_old_entries = 1;
1493                                     c->old_entries =
1494                                       XNMALLOC (c->num_old_entries, struct entry *);
1495                                     c->old_entries[0] = ancestor_file.entries[i];
1496                                     c->num_modified_entries = 1;
1497                                     c->modified_entries =
1498                                       XNMALLOC (c->num_modified_entries, struct entry *);
1499                                     c->modified_entries[0] = changed_entry;
1500                                     gl_list_add_last (result_conflicts, c);
1501                                   }
1502                               }
1503                             done = true;
1504                           }
1505                         else
1506                           {
1507                             ssize_t i_before;
1508                             ssize_t k_before;
1509                             bool linear;
1510                             i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1511                             ASSERT (i_before >= 0);
1512                             /* A simple change after ancestor_file.entries[i_before].
1513                                See whether this entry and the following num_changed
1514                                entries still exist in mainstream_file and are still
1515                                consecutive.  */
1516                             k_before = entries_mapping_get (&mapping, i_before);
1517                             linear = (k_before >= 0);
1518                             if (linear)
1519                               {
1520                                 size_t i;
1521                                 for (i = i_before + 1; i <= i_before + num_changed; i++)
1522                                   if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1523                                     {
1524                                       linear = false;
1525                                       break;
1526                                     }
1527                               }
1528                             if (linear)
1529                               {
1530                                 gl_list_node_t node_for_insert =
1531                                   result_entries_pointers[k_before + 1];
1532                                 ssize_t j;
1533                                 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1534                                   {
1535                                     struct entry *added_entry = modified_file.entries[j];
1536                                     gl_list_add_before (result_entries, node_for_insert, added_entry);
1537                                   }
1538                                 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1539                                   {
1540                                     struct entry *changed_entry = modified_file.entries[j];
1541                                     size_t i = j + edit->i2 - edit->j2;
1542                                     ssize_t k = entries_mapping_get (&mapping, i);
1543                                     ASSERT (k >= 0);
1544                                     if (entry_equals (ancestor_file.entries[i],
1545                                                       mainstream_file.entries[k]))
1546                                       {
1547                                         gl_list_node_set_value (result_entries,
1548                                                                 result_entries_pointers[k],
1549                                                                 changed_entry);
1550                                       }
1551                                     else
1552                                       {
1553                                         struct conflict *c;
1554                                         ASSERT (!entry_equals (ancestor_file.entries[i],
1555                                                                changed_entry));
1556                                         c = XMALLOC (struct conflict);
1557                                         c->num_old_entries = 1;
1558                                         c->old_entries =
1559                                           XNMALLOC (c->num_old_entries, struct entry *);
1560                                         c->old_entries[0] = ancestor_file.entries[i];
1561                                         c->num_modified_entries = 1;
1562                                         c->modified_entries =
1563                                           XNMALLOC (c->num_modified_entries, struct entry *);
1564                                         c->modified_entries[0] = changed_entry;
1565                                         gl_list_add_last (result_conflicts, c);
1566                                       }
1567                                   }
1568                                 done = true;
1569                               }
1570                           }
1571                       }
1572                     else
1573                       {
1574                         /* A big change.
1575                            See whether the num_changed entries still exist
1576                            unchanged in mainstream_file and are still
1577                            consecutive.  */
1578                         ssize_t i_first;
1579                         ssize_t k_first;
1580                         bool linear_unchanged;
1581                         i_first = edit->i1;
1582                         k_first = entries_mapping_get (&mapping, i_first);
1583                         linear_unchanged =
1584                           (k_first >= 0
1585                            && entry_equals (ancestor_file.entries[i_first],
1586                                             mainstream_file.entries[k_first]));
1587                         if (linear_unchanged)
1588                           {
1589                             size_t i;
1590                             for (i = i_first + 1; i <= edit->i2; i++)
1591                               if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1592                                     && entry_equals (ancestor_file.entries[i],
1593                                                      mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1594                                 {
1595                                   linear_unchanged = false;
1596                                   break;
1597                                 }
1598                           }
1599                         if (linear_unchanged)
1600                           {
1601                             gl_list_node_t node_for_insert =
1602                               result_entries_pointers[k_first];
1603                             ssize_t j;
1604                             size_t i;
1605                             for (j = edit->j2; j >= edit->j1; j--)
1606                               {
1607                                 struct entry *new_entry = modified_file.entries[j];
1608                                 gl_list_add_before (result_entries, node_for_insert, new_entry);
1609                               }
1610                             for (i = edit->i1; i <= edit->i2; i++)
1611                               {
1612                                 ssize_t k = entries_mapping_get (&mapping, i);
1613                                 ASSERT (k >= 0);
1614                                 ASSERT (entry_equals (ancestor_file.entries[i],
1615                                                       mainstream_file.entries[k]));
1616                                 gl_list_node_set_value (result_entries,
1617                                                         result_entries_pointers[k],
1618                                                         &empty_entry);
1619                               }
1620                             done = true;
1621                           }
1622                       }
1623                   }
1624                 if (!done)
1625                   {
1626                     struct conflict *c = XMALLOC (struct conflict);
1627                     size_t i, j;
1628                     c->num_old_entries = edit->i2 - edit->i1 + 1;
1629                     c->old_entries =
1630                       XNMALLOC (c->num_old_entries, struct entry *);
1631                     for (i = edit->i1; i <= edit->i2; i++)
1632                       c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1633                     c->num_modified_entries = edit->j2 - edit->j1 + 1;
1634                     c->modified_entries =
1635                       XNMALLOC (c->num_modified_entries, struct entry *);
1636                     for (j = edit->j1; j <= edit->j2; j++)
1637                       c->modified_entries[j - edit->j1] = modified_file.entries[j];
1638                     gl_list_add_last (result_conflicts, c);
1639                   }
1640               }
1641               break;
1642             }
1643         }
1644     }
1645 
1646     /* Output the result.  */
1647     {
1648       FILE *fp = fopen (destination_file_name, "w");
1649       if (fp == NULL)
1650         {
1651           fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1652           exit (EXIT_FAILURE);
1653         }
1654 
1655       /* Output the conflicts at the top.  */
1656       {
1657         size_t n = gl_list_size (result_conflicts);
1658         size_t i;
1659         for (i = 0; i < n; i++)
1660           conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1661       }
1662       /* Output the modified and unmodified entries, in order.  */
1663       {
1664         gl_list_iterator_t iter = gl_list_iterator (result_entries);
1665         const void *elt;
1666         gl_list_node_t node;
1667         while (gl_list_iterator_next (&iter, &elt, &node))
1668           entry_write (fp, (struct entry *) elt);
1669         gl_list_iterator_free (&iter);
1670       }
1671 
1672       if (fwriteerror (fp))
1673         {
1674           fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1675           exit (EXIT_FAILURE);
1676         }
1677     }
1678 
1679     exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
1680   }
1681 }

/* [previous][next][first][last][top][bottom][index][help] */