Imported Upstream version 2.14.6
[platform/upstream/git.git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "unpack-trees.h"
4 #include "dir.h"
5 #include "tree.h"
6 #include "pathspec.h"
7
8 static const char *get_mode(const char *str, unsigned int *modep)
9 {
10         unsigned char c;
11         unsigned int mode = 0;
12
13         if (*str == ' ')
14                 return NULL;
15
16         while ((c = *str++) != ' ') {
17                 if (c < '0' || c > '7')
18                         return NULL;
19                 mode = (mode << 3) + (c - '0');
20         }
21         *modep = mode;
22         return str;
23 }
24
25 static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
26 {
27         const char *path;
28         unsigned int mode, len;
29
30         if (size < 23 || buf[size - 21]) {
31                 strbuf_addstr(err, _("too-short tree object"));
32                 return -1;
33         }
34
35         path = get_mode(buf, &mode);
36         if (!path) {
37                 strbuf_addstr(err, _("malformed mode in tree entry"));
38                 return -1;
39         }
40         if (!*path) {
41                 strbuf_addstr(err, _("empty filename in tree entry"));
42                 return -1;
43         }
44 #ifdef GIT_WINDOWS_NATIVE
45         if (protect_ntfs && strchr(path, '\\')) {
46                 strbuf_addf(err, _("filename in tree entry contains backslash: '%s'"), path);
47                 return -1;
48         }
49 #endif
50         len = strlen(path) + 1;
51
52         /* Initialize the descriptor entry */
53         desc->entry.path = path;
54         desc->entry.mode = canon_mode(mode);
55         desc->entry.oid  = (const struct object_id *)(path + len);
56
57         return 0;
58 }
59
60 static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
61 {
62         desc->buffer = buffer;
63         desc->size = size;
64         if (size)
65                 return decode_tree_entry(desc, buffer, size, err);
66         return 0;
67 }
68
69 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
70 {
71         struct strbuf err = STRBUF_INIT;
72         if (init_tree_desc_internal(desc, buffer, size, &err))
73                 die("%s", err.buf);
74         strbuf_release(&err);
75 }
76
77 int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
78 {
79         struct strbuf err = STRBUF_INIT;
80         int result = init_tree_desc_internal(desc, buffer, size, &err);
81         if (result)
82                 error("%s", err.buf);
83         strbuf_release(&err);
84         return result;
85 }
86
87 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
88 {
89         unsigned long size = 0;
90         void *buf = NULL;
91
92         if (sha1) {
93                 buf = read_object_with_reference(sha1, tree_type, &size, NULL);
94                 if (!buf)
95                         die("unable to read tree %s", sha1_to_hex(sha1));
96         }
97         init_tree_desc(desc, buf, size);
98         return buf;
99 }
100
101 static void entry_clear(struct name_entry *a)
102 {
103         memset(a, 0, sizeof(*a));
104 }
105
106 static void entry_extract(struct tree_desc *t, struct name_entry *a)
107 {
108         *a = t->entry;
109 }
110
111 static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
112 {
113         const void *buf = desc->buffer;
114         const unsigned char *end = desc->entry.oid->hash + 20;
115         unsigned long size = desc->size;
116         unsigned long len = end - (const unsigned char *)buf;
117
118         if (size < len)
119                 die(_("too-short tree file"));
120         buf = end;
121         size -= len;
122         desc->buffer = buf;
123         desc->size = size;
124         if (size)
125                 return decode_tree_entry(desc, buf, size, err);
126         return 0;
127 }
128
129 void update_tree_entry(struct tree_desc *desc)
130 {
131         struct strbuf err = STRBUF_INIT;
132         if (update_tree_entry_internal(desc, &err))
133                 die("%s", err.buf);
134         strbuf_release(&err);
135 }
136
137 int update_tree_entry_gently(struct tree_desc *desc)
138 {
139         struct strbuf err = STRBUF_INIT;
140         if (update_tree_entry_internal(desc, &err)) {
141                 error("%s", err.buf);
142                 strbuf_release(&err);
143                 /* Stop processing this tree after error */
144                 desc->size = 0;
145                 return -1;
146         }
147         strbuf_release(&err);
148         return 0;
149 }
150
151 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
152 {
153         if (!desc->size)
154                 return 0;
155
156         *entry = desc->entry;
157         update_tree_entry(desc);
158         return 1;
159 }
160
161 int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
162 {
163         if (!desc->size)
164                 return 0;
165
166         *entry = desc->entry;
167         if (update_tree_entry_gently(desc))
168                 return 0;
169         return 1;
170 }
171
172 void setup_traverse_info(struct traverse_info *info, const char *base)
173 {
174         int pathlen = strlen(base);
175         static struct traverse_info dummy;
176
177         memset(info, 0, sizeof(*info));
178         if (pathlen && base[pathlen-1] == '/')
179                 pathlen--;
180         info->pathlen = pathlen ? pathlen + 1 : 0;
181         info->name.path = base;
182         info->name.oid = (void *)(base + pathlen + 1);
183         if (pathlen)
184                 info->prev = &dummy;
185 }
186
187 char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
188 {
189         int len = tree_entry_len(n);
190         int pathlen = info->pathlen;
191
192         path[pathlen + len] = 0;
193         for (;;) {
194                 memcpy(path + pathlen, n->path, len);
195                 if (!pathlen)
196                         break;
197                 path[--pathlen] = '/';
198                 n = &info->name;
199                 len = tree_entry_len(n);
200                 info = info->prev;
201                 pathlen -= len;
202         }
203         return path;
204 }
205
206 struct tree_desc_skip {
207         struct tree_desc_skip *prev;
208         const void *ptr;
209 };
210
211 struct tree_desc_x {
212         struct tree_desc d;
213         struct tree_desc_skip *skip;
214 };
215
216 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
217 {
218         /*
219          * The caller wants to pick *a* from a tree or nothing.
220          * We are looking at *b* in a tree.
221          *
222          * (0) If a and b are the same name, we are trivially happy.
223          *
224          * There are three possibilities where *a* could be hiding
225          * behind *b*.
226          *
227          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
228          *                                matter what.
229          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
230          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
231          *
232          * Otherwise we know *a* won't appear in the tree without
233          * scanning further.
234          */
235
236         int cmp = name_compare(a, a_len, b, b_len);
237
238         /* Most common case first -- reading sync'd trees */
239         if (!cmp)
240                 return cmp;
241
242         if (0 < cmp) {
243                 /* a comes after b; it does not matter if it is case (3)
244                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
245                         return 1;
246                 */
247                 return 1; /* keep looking */
248         }
249
250         /* b comes after a; are we looking at case (2)? */
251         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
252                 return 1; /* keep looking */
253
254         return -1; /* a cannot appear in the tree */
255 }
256
257 /*
258  * From the extended tree_desc, extract the first name entry, while
259  * paying attention to the candidate "first" name.  Most importantly,
260  * when looking for an entry, if there are entries that sorts earlier
261  * in the tree object representation than that name, skip them and
262  * process the named entry first.  We will remember that we haven't
263  * processed the first entry yet, and in the later call skip the
264  * entry we processed early when update_extended_entry() is called.
265  *
266  * E.g. if the underlying tree object has these entries:
267  *
268  *    blob    "t-1"
269  *    blob    "t-2"
270  *    tree    "t"
271  *    blob    "t=1"
272  *
273  * and the "first" asks for "t", remember that we still need to
274  * process "t-1" and "t-2" but extract "t".  After processing the
275  * entry "t" from this call, the caller will let us know by calling
276  * update_extended_entry() that we can remember "t" has been processed
277  * already.
278  */
279
280 static void extended_entry_extract(struct tree_desc_x *t,
281                                    struct name_entry *a,
282                                    const char *first,
283                                    int first_len)
284 {
285         const char *path;
286         int len;
287         struct tree_desc probe;
288         struct tree_desc_skip *skip;
289
290         /*
291          * Extract the first entry from the tree_desc, but skip the
292          * ones that we already returned in earlier rounds.
293          */
294         while (1) {
295                 if (!t->d.size) {
296                         entry_clear(a);
297                         break; /* not found */
298                 }
299                 entry_extract(&t->d, a);
300                 for (skip = t->skip; skip; skip = skip->prev)
301                         if (a->path == skip->ptr)
302                                 break; /* found */
303                 if (!skip)
304                         break;
305                 /* We have processed this entry already. */
306                 update_tree_entry(&t->d);
307         }
308
309         if (!first || !a->path)
310                 return;
311
312         /*
313          * The caller wants "first" from this tree, or nothing.
314          */
315         path = a->path;
316         len = tree_entry_len(a);
317         switch (check_entry_match(first, first_len, path, len)) {
318         case -1:
319                 entry_clear(a);
320         case 0:
321                 return;
322         default:
323                 break;
324         }
325
326         /*
327          * We need to look-ahead -- we suspect that a subtree whose
328          * name is "first" may be hiding behind the current entry "path".
329          */
330         probe = t->d;
331         while (probe.size) {
332                 entry_extract(&probe, a);
333                 path = a->path;
334                 len = tree_entry_len(a);
335                 switch (check_entry_match(first, first_len, path, len)) {
336                 case -1:
337                         entry_clear(a);
338                 case 0:
339                         return;
340                 default:
341                         update_tree_entry(&probe);
342                         break;
343                 }
344                 /* keep looking */
345         }
346         entry_clear(a);
347 }
348
349 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
350 {
351         if (t->d.entry.path == a->path) {
352                 update_tree_entry(&t->d);
353         } else {
354                 /* we have returned this entry early */
355                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
356                 skip->ptr = a->path;
357                 skip->prev = t->skip;
358                 t->skip = skip;
359         }
360 }
361
362 static void free_extended_entry(struct tree_desc_x *t)
363 {
364         struct tree_desc_skip *p, *s;
365
366         for (s = t->skip; s; s = p) {
367                 p = s->prev;
368                 free(s);
369         }
370 }
371
372 static inline int prune_traversal(struct name_entry *e,
373                                   struct traverse_info *info,
374                                   struct strbuf *base,
375                                   int still_interesting)
376 {
377         if (!info->pathspec || still_interesting == 2)
378                 return 2;
379         if (still_interesting < 0)
380                 return still_interesting;
381         return tree_entry_interesting(e, base, 0, info->pathspec);
382 }
383
384 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
385 {
386         int error = 0;
387         struct name_entry *entry = xmalloc(n*sizeof(*entry));
388         int i;
389         struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
390         struct strbuf base = STRBUF_INIT;
391         int interesting = 1;
392         char *traverse_path;
393
394         for (i = 0; i < n; i++)
395                 tx[i].d = t[i];
396
397         if (info->prev) {
398                 strbuf_grow(&base, info->pathlen);
399                 make_traverse_path(base.buf, info->prev, &info->name);
400                 base.buf[info->pathlen-1] = '/';
401                 strbuf_setlen(&base, info->pathlen);
402                 traverse_path = xstrndup(base.buf, info->pathlen);
403         } else {
404                 traverse_path = xstrndup(info->name.path, info->pathlen);
405         }
406         info->traverse_path = traverse_path;
407         for (;;) {
408                 int trees_used;
409                 unsigned long mask, dirmask;
410                 const char *first = NULL;
411                 int first_len = 0;
412                 struct name_entry *e = NULL;
413                 int len;
414
415                 for (i = 0; i < n; i++) {
416                         e = entry + i;
417                         extended_entry_extract(tx + i, e, NULL, 0);
418                 }
419
420                 /*
421                  * A tree may have "t-2" at the current location even
422                  * though it may have "t" that is a subtree behind it,
423                  * and another tree may return "t".  We want to grab
424                  * all "t" from all trees to match in such a case.
425                  */
426                 for (i = 0; i < n; i++) {
427                         e = entry + i;
428                         if (!e->path)
429                                 continue;
430                         len = tree_entry_len(e);
431                         if (!first) {
432                                 first = e->path;
433                                 first_len = len;
434                                 continue;
435                         }
436                         if (name_compare(e->path, len, first, first_len) < 0) {
437                                 first = e->path;
438                                 first_len = len;
439                         }
440                 }
441
442                 if (first) {
443                         for (i = 0; i < n; i++) {
444                                 e = entry + i;
445                                 extended_entry_extract(tx + i, e, first, first_len);
446                                 /* Cull the ones that are not the earliest */
447                                 if (!e->path)
448                                         continue;
449                                 len = tree_entry_len(e);
450                                 if (name_compare(e->path, len, first, first_len))
451                                         entry_clear(e);
452                         }
453                 }
454
455                 /* Now we have in entry[i] the earliest name from the trees */
456                 mask = 0;
457                 dirmask = 0;
458                 for (i = 0; i < n; i++) {
459                         if (!entry[i].path)
460                                 continue;
461                         mask |= 1ul << i;
462                         if (S_ISDIR(entry[i].mode))
463                                 dirmask |= 1ul << i;
464                         e = &entry[i];
465                 }
466                 if (!mask)
467                         break;
468                 interesting = prune_traversal(e, info, &base, interesting);
469                 if (interesting < 0)
470                         break;
471                 if (interesting) {
472                         trees_used = info->fn(n, mask, dirmask, entry, info);
473                         if (trees_used < 0) {
474                                 error = trees_used;
475                                 if (!info->show_all_errors)
476                                         break;
477                         }
478                         mask &= trees_used;
479                 }
480                 for (i = 0; i < n; i++)
481                         if (mask & (1ul << i))
482                                 update_extended_entry(tx + i, entry + i);
483         }
484         free(entry);
485         for (i = 0; i < n; i++)
486                 free_extended_entry(tx + i);
487         free(tx);
488         free(traverse_path);
489         info->traverse_path = NULL;
490         strbuf_release(&base);
491         return error;
492 }
493
494 struct dir_state {
495         void *tree;
496         unsigned long size;
497         unsigned char sha1[20];
498 };
499
500 static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
501 {
502         int namelen = strlen(name);
503         while (t->size) {
504                 const char *entry;
505                 const struct object_id *oid;
506                 int entrylen, cmp;
507
508                 oid = tree_entry_extract(t, &entry, mode);
509                 entrylen = tree_entry_len(&t->entry);
510                 update_tree_entry(t);
511                 if (entrylen > namelen)
512                         continue;
513                 cmp = memcmp(name, entry, entrylen);
514                 if (cmp > 0)
515                         continue;
516                 if (cmp < 0)
517                         break;
518                 if (entrylen == namelen) {
519                         hashcpy(result, oid->hash);
520                         return 0;
521                 }
522                 if (name[entrylen] != '/')
523                         continue;
524                 if (!S_ISDIR(*mode))
525                         break;
526                 if (++entrylen == namelen) {
527                         hashcpy(result, oid->hash);
528                         return 0;
529                 }
530                 return get_tree_entry(oid->hash, name + entrylen, result, mode);
531         }
532         return -1;
533 }
534
535 int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
536 {
537         int retval;
538         void *tree;
539         unsigned long size;
540         unsigned char root[20];
541
542         tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
543         if (!tree)
544                 return -1;
545
546         if (name[0] == '\0') {
547                 hashcpy(sha1, root);
548                 free(tree);
549                 return 0;
550         }
551
552         if (!size) {
553                 retval = -1;
554         } else {
555                 struct tree_desc t;
556                 init_tree_desc(&t, tree, size);
557                 retval = find_tree_entry(&t, name, sha1, mode);
558         }
559         free(tree);
560         return retval;
561 }
562
563 /*
564  * This is Linux's built-in max for the number of symlinks to follow.
565  * That limit, of course, does not affect git, but it's a reasonable
566  * choice.
567  */
568 #define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
569
570 /**
571  * Find a tree entry by following symlinks in tree_sha (which is
572  * assumed to be the root of the repository).  In the event that a
573  * symlink points outside the repository (e.g. a link to /foo or a
574  * root-level link to ../foo), the portion of the link which is
575  * outside the repository will be returned in result_path, and *mode
576  * will be set to 0.  It is assumed that result_path is uninitialized.
577  * If there are no symlinks, or the end result of the symlink chain
578  * points to an object inside the repository, result will be filled in
579  * with the sha1 of the found object, and *mode will hold the mode of
580  * the object.
581  *
582  * See the code for enum follow_symlink_result for a description of
583  * the return values.
584  */
585 enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode)
586 {
587         int retval = MISSING_OBJECT;
588         struct dir_state *parents = NULL;
589         size_t parents_alloc = 0;
590         size_t i, parents_nr = 0;
591         unsigned char current_tree_sha1[20];
592         struct strbuf namebuf = STRBUF_INIT;
593         struct tree_desc t;
594         int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
595
596         init_tree_desc(&t, NULL, 0UL);
597         strbuf_addstr(&namebuf, name);
598         hashcpy(current_tree_sha1, tree_sha1);
599
600         while (1) {
601                 int find_result;
602                 char *first_slash;
603                 char *remainder = NULL;
604
605                 if (!t.buffer) {
606                         void *tree;
607                         unsigned char root[20];
608                         unsigned long size;
609                         tree = read_object_with_reference(current_tree_sha1,
610                                                           tree_type, &size,
611                                                           root);
612                         if (!tree)
613                                 goto done;
614
615                         ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
616                         parents[parents_nr].tree = tree;
617                         parents[parents_nr].size = size;
618                         hashcpy(parents[parents_nr].sha1, root);
619                         parents_nr++;
620
621                         if (namebuf.buf[0] == '\0') {
622                                 hashcpy(result, root);
623                                 retval = FOUND;
624                                 goto done;
625                         }
626
627                         if (!size)
628                                 goto done;
629
630                         /* descend */
631                         init_tree_desc(&t, tree, size);
632                 }
633
634                 /* Handle symlinks to e.g. a//b by removing leading slashes */
635                 while (namebuf.buf[0] == '/') {
636                         strbuf_remove(&namebuf, 0, 1);
637                 }
638
639                 /* Split namebuf into a first component and a remainder */
640                 if ((first_slash = strchr(namebuf.buf, '/'))) {
641                         *first_slash = 0;
642                         remainder = first_slash + 1;
643                 }
644
645                 if (!strcmp(namebuf.buf, "..")) {
646                         struct dir_state *parent;
647                         /*
648                          * We could end up with .. in the namebuf if it
649                          * appears in a symlink.
650                          */
651
652                         if (parents_nr == 1) {
653                                 if (remainder)
654                                         *first_slash = '/';
655                                 strbuf_add(result_path, namebuf.buf,
656                                            namebuf.len);
657                                 *mode = 0;
658                                 retval = FOUND;
659                                 goto done;
660                         }
661                         parent = &parents[parents_nr - 1];
662                         free(parent->tree);
663                         parents_nr--;
664                         parent = &parents[parents_nr - 1];
665                         init_tree_desc(&t, parent->tree, parent->size);
666                         strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
667                         continue;
668                 }
669
670                 /* We could end up here via a symlink to dir/.. */
671                 if (namebuf.buf[0] == '\0') {
672                         hashcpy(result, parents[parents_nr - 1].sha1);
673                         retval = FOUND;
674                         goto done;
675                 }
676
677                 /* Look up the first (or only) path component in the tree. */
678                 find_result = find_tree_entry(&t, namebuf.buf,
679                                               current_tree_sha1, mode);
680                 if (find_result) {
681                         goto done;
682                 }
683
684                 if (S_ISDIR(*mode)) {
685                         if (!remainder) {
686                                 hashcpy(result, current_tree_sha1);
687                                 retval = FOUND;
688                                 goto done;
689                         }
690                         /* Descend the tree */
691                         t.buffer = NULL;
692                         strbuf_remove(&namebuf, 0,
693                                       1 + first_slash - namebuf.buf);
694                 } else if (S_ISREG(*mode)) {
695                         if (!remainder) {
696                                 hashcpy(result, current_tree_sha1);
697                                 retval = FOUND;
698                         } else {
699                                 retval = NOT_DIR;
700                         }
701                         goto done;
702                 } else if (S_ISLNK(*mode)) {
703                         /* Follow a symlink */
704                         unsigned long link_len;
705                         size_t len;
706                         char *contents, *contents_start;
707                         struct dir_state *parent;
708                         enum object_type type;
709
710                         if (follows_remaining-- == 0) {
711                                 /* Too many symlinks followed */
712                                 retval = SYMLINK_LOOP;
713                                 goto done;
714                         }
715
716                         /*
717                          * At this point, we have followed at a least
718                          * one symlink, so on error we need to report this.
719                          */
720                         retval = DANGLING_SYMLINK;
721
722                         contents = read_sha1_file(current_tree_sha1, &type,
723                                                   &link_len);
724
725                         if (!contents)
726                                 goto done;
727
728                         if (contents[0] == '/') {
729                                 strbuf_addstr(result_path, contents);
730                                 free(contents);
731                                 *mode = 0;
732                                 retval = FOUND;
733                                 goto done;
734                         }
735
736                         if (remainder)
737                                 len = first_slash - namebuf.buf;
738                         else
739                                 len = namebuf.len;
740
741                         contents_start = contents;
742
743                         parent = &parents[parents_nr - 1];
744                         init_tree_desc(&t, parent->tree, parent->size);
745                         strbuf_splice(&namebuf, 0, len,
746                                       contents_start, link_len);
747                         if (remainder)
748                                 namebuf.buf[link_len] = '/';
749                         free(contents);
750                 }
751         }
752 done:
753         for (i = 0; i < parents_nr; i++)
754                 free(parents[i].tree);
755         free(parents);
756
757         strbuf_release(&namebuf);
758         return retval;
759 }
760
761 static int match_entry(const struct pathspec_item *item,
762                        const struct name_entry *entry, int pathlen,
763                        const char *match, int matchlen,
764                        enum interesting *never_interesting)
765 {
766         int m = -1; /* signals that we haven't called strncmp() */
767
768         if (item->magic & PATHSPEC_ICASE)
769                 /*
770                  * "Never interesting" trick requires exact
771                  * matching. We could do something clever with inexact
772                  * matching, but it's trickier (and not to forget that
773                  * strcasecmp is locale-dependent, at least in
774                  * glibc). Just disable it for now. It can't be worse
775                  * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
776                  * pattern.
777                  */
778                 *never_interesting = entry_not_interesting;
779         else if (*never_interesting != entry_not_interesting) {
780                 /*
781                  * We have not seen any match that sorts later
782                  * than the current path.
783                  */
784
785                 /*
786                  * Does match sort strictly earlier than path
787                  * with their common parts?
788                  */
789                 m = strncmp(match, entry->path,
790                             (matchlen < pathlen) ? matchlen : pathlen);
791                 if (m < 0)
792                         return 0;
793
794                 /*
795                  * If we come here even once, that means there is at
796                  * least one pathspec that would sort equal to or
797                  * later than the path we are currently looking at.
798                  * In other words, if we have never reached this point
799                  * after iterating all pathspecs, it means all
800                  * pathspecs are either outside of base, or inside the
801                  * base but sorts strictly earlier than the current
802                  * one.  In either case, they will never match the
803                  * subsequent entries.  In such a case, we initialized
804                  * the variable to -1 and that is what will be
805                  * returned, allowing the caller to terminate early.
806                  */
807                 *never_interesting = entry_not_interesting;
808         }
809
810         if (pathlen > matchlen)
811                 return 0;
812
813         if (matchlen > pathlen) {
814                 if (match[pathlen] != '/')
815                         return 0;
816                 if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
817                         return 0;
818         }
819
820         if (m == -1)
821                 /*
822                  * we cheated and did not do strncmp(), so we do
823                  * that here.
824                  */
825                 m = ps_strncmp(item, match, entry->path, pathlen);
826
827         /*
828          * If common part matched earlier then it is a hit,
829          * because we rejected the case where path is not a
830          * leading directory and is shorter than match.
831          */
832         if (!m)
833                 /*
834                  * match_entry does not check if the prefix part is
835                  * matched case-sensitively. If the entry is a
836                  * directory and part of prefix, it'll be rematched
837                  * eventually by basecmp with special treatment for
838                  * the prefix.
839                  */
840                 return 1;
841
842         return 0;
843 }
844
845 /* :(icase)-aware string compare */
846 static int basecmp(const struct pathspec_item *item,
847                    const char *base, const char *match, int len)
848 {
849         if (item->magic & PATHSPEC_ICASE) {
850                 int ret, n = len > item->prefix ? item->prefix : len;
851                 ret = strncmp(base, match, n);
852                 if (ret)
853                         return ret;
854                 base += n;
855                 match += n;
856                 len -= n;
857         }
858         return ps_strncmp(item, base, match, len);
859 }
860
861 static int match_dir_prefix(const struct pathspec_item *item,
862                             const char *base,
863                             const char *match, int matchlen)
864 {
865         if (basecmp(item, base, match, matchlen))
866                 return 0;
867
868         /*
869          * If the base is a subdirectory of a path which
870          * was specified, all of them are interesting.
871          */
872         if (!matchlen ||
873             base[matchlen] == '/' ||
874             match[matchlen - 1] == '/')
875                 return 1;
876
877         /* Just a random prefix match */
878         return 0;
879 }
880
881 /*
882  * Perform matching on the leading non-wildcard part of
883  * pathspec. item->nowildcard_len must be greater than zero. Return
884  * non-zero if base is matched.
885  */
886 static int match_wildcard_base(const struct pathspec_item *item,
887                                const char *base, int baselen,
888                                int *matched)
889 {
890         const char *match = item->match;
891         /* the wildcard part is not considered in this function */
892         int matchlen = item->nowildcard_len;
893
894         if (baselen) {
895                 int dirlen;
896                 /*
897                  * Return early if base is longer than the
898                  * non-wildcard part but it does not match.
899                  */
900                 if (baselen >= matchlen) {
901                         *matched = matchlen;
902                         return !basecmp(item, base, match, matchlen);
903                 }
904
905                 dirlen = matchlen;
906                 while (dirlen && match[dirlen - 1] != '/')
907                         dirlen--;
908
909                 /*
910                  * Return early if base is shorter than the
911                  * non-wildcard part but it does not match. Note that
912                  * base ends with '/' so we are sure it really matches
913                  * directory
914                  */
915                 if (basecmp(item, base, match, baselen))
916                         return 0;
917                 *matched = baselen;
918         } else
919                 *matched = 0;
920         /*
921          * we could have checked entry against the non-wildcard part
922          * that is not in base and does similar never_interesting
923          * optimization as in match_entry. For now just be happy with
924          * base comparison.
925          */
926         return entry_interesting;
927 }
928
929 /*
930  * Is a tree entry interesting given the pathspec we have?
931  *
932  * Pre-condition: either baselen == base_offset (i.e. empty path)
933  * or base[baselen-1] == '/' (i.e. with trailing slash).
934  */
935 static enum interesting do_match(const struct name_entry *entry,
936                                  struct strbuf *base, int base_offset,
937                                  const struct pathspec *ps,
938                                  int exclude)
939 {
940         int i;
941         int pathlen, baselen = base->len - base_offset;
942         enum interesting never_interesting = ps->has_wildcard ?
943                 entry_not_interesting : all_entries_not_interesting;
944
945         GUARD_PATHSPEC(ps,
946                        PATHSPEC_FROMTOP |
947                        PATHSPEC_MAXDEPTH |
948                        PATHSPEC_LITERAL |
949                        PATHSPEC_GLOB |
950                        PATHSPEC_ICASE |
951                        PATHSPEC_EXCLUDE);
952
953         if (!ps->nr) {
954                 if (!ps->recursive ||
955                     !(ps->magic & PATHSPEC_MAXDEPTH) ||
956                     ps->max_depth == -1)
957                         return all_entries_interesting;
958                 return within_depth(base->buf + base_offset, baselen,
959                                     !!S_ISDIR(entry->mode),
960                                     ps->max_depth) ?
961                         entry_interesting : entry_not_interesting;
962         }
963
964         pathlen = tree_entry_len(entry);
965
966         for (i = ps->nr - 1; i >= 0; i--) {
967                 const struct pathspec_item *item = ps->items+i;
968                 const char *match = item->match;
969                 const char *base_str = base->buf + base_offset;
970                 int matchlen = item->len, matched = 0;
971
972                 if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
973                     ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
974                         continue;
975
976                 if (baselen >= matchlen) {
977                         /* If it doesn't match, move along... */
978                         if (!match_dir_prefix(item, base_str, match, matchlen))
979                                 goto match_wildcards;
980
981                         if (!ps->recursive ||
982                             !(ps->magic & PATHSPEC_MAXDEPTH) ||
983                             ps->max_depth == -1)
984                                 return all_entries_interesting;
985
986                         return within_depth(base_str + matchlen + 1,
987                                             baselen - matchlen - 1,
988                                             !!S_ISDIR(entry->mode),
989                                             ps->max_depth) ?
990                                 entry_interesting : entry_not_interesting;
991                 }
992
993                 /* Either there must be no base, or the base must match. */
994                 if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
995                         if (match_entry(item, entry, pathlen,
996                                         match + baselen, matchlen - baselen,
997                                         &never_interesting))
998                                 return entry_interesting;
999
1000                         if (item->nowildcard_len < item->len) {
1001                                 if (!git_fnmatch(item, match + baselen, entry->path,
1002                                                  item->nowildcard_len - baselen))
1003                                         return entry_interesting;
1004
1005                                 /*
1006                                  * Match all directories. We'll try to
1007                                  * match files later on.
1008                                  */
1009                                 if (ps->recursive && S_ISDIR(entry->mode))
1010                                         return entry_interesting;
1011
1012                                 /*
1013                                  * When matching against submodules with
1014                                  * wildcard characters, ensure that the entry
1015                                  * at least matches up to the first wild
1016                                  * character.  More accurate matching can then
1017                                  * be performed in the submodule itself.
1018                                  */
1019                                 if (ps->recursive && S_ISGITLINK(entry->mode) &&
1020                                     !ps_strncmp(item, match + baselen,
1021                                                 entry->path,
1022                                                 item->nowildcard_len - baselen))
1023                                         return entry_interesting;
1024                         }
1025
1026                         continue;
1027                 }
1028
1029 match_wildcards:
1030                 if (item->nowildcard_len == item->len)
1031                         continue;
1032
1033                 if (item->nowildcard_len &&
1034                     !match_wildcard_base(item, base_str, baselen, &matched))
1035                         continue;
1036
1037                 /*
1038                  * Concatenate base and entry->path into one and do
1039                  * fnmatch() on it.
1040                  *
1041                  * While we could avoid concatenation in certain cases
1042                  * [1], which saves a memcpy and potentially a
1043                  * realloc, it turns out not worth it. Measurement on
1044                  * linux-2.6 does not show any clear improvements,
1045                  * partly because of the nowildcard_len optimization
1046                  * in git_fnmatch(). Avoid micro-optimizations here.
1047                  *
1048                  * [1] if match_wildcard_base() says the base
1049                  * directory is already matched, we only need to match
1050                  * the rest, which is shorter so _in theory_ faster.
1051                  */
1052
1053                 strbuf_add(base, entry->path, pathlen);
1054
1055                 if (!git_fnmatch(item, match, base->buf + base_offset,
1056                                  item->nowildcard_len)) {
1057                         strbuf_setlen(base, base_offset + baselen);
1058                         return entry_interesting;
1059                 }
1060
1061                 /*
1062                  * When matching against submodules with
1063                  * wildcard characters, ensure that the entry
1064                  * at least matches up to the first wild
1065                  * character.  More accurate matching can then
1066                  * be performed in the submodule itself.
1067                  */
1068                 if (ps->recursive && S_ISGITLINK(entry->mode) &&
1069                     !ps_strncmp(item, match, base->buf + base_offset,
1070                                 item->nowildcard_len)) {
1071                         strbuf_setlen(base, base_offset + baselen);
1072                         return entry_interesting;
1073                 }
1074
1075                 strbuf_setlen(base, base_offset + baselen);
1076
1077                 /*
1078                  * Match all directories. We'll try to match files
1079                  * later on.
1080                  * max_depth is ignored but we may consider support it
1081                  * in future, see
1082                  * https://public-inbox.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
1083                  */
1084                 if (ps->recursive && S_ISDIR(entry->mode))
1085                         return entry_interesting;
1086         }
1087         return never_interesting; /* No matches */
1088 }
1089
1090 /*
1091  * Is a tree entry interesting given the pathspec we have?
1092  *
1093  * Pre-condition: either baselen == base_offset (i.e. empty path)
1094  * or base[baselen-1] == '/' (i.e. with trailing slash).
1095  */
1096 enum interesting tree_entry_interesting(const struct name_entry *entry,
1097                                         struct strbuf *base, int base_offset,
1098                                         const struct pathspec *ps)
1099 {
1100         enum interesting positive, negative;
1101         positive = do_match(entry, base, base_offset, ps, 0);
1102
1103         /*
1104          * case | entry | positive | negative | result
1105          * -----+-------+----------+----------+-------
1106          *   1  |  file |   -1     |  -1..2   |  -1
1107          *   2  |  file |    0     |  -1..2   |   0
1108          *   3  |  file |    1     |   -1     |   1
1109          *   4  |  file |    1     |    0     |   1
1110          *   5  |  file |    1     |    1     |   0
1111          *   6  |  file |    1     |    2     |   0
1112          *   7  |  file |    2     |   -1     |   2
1113          *   8  |  file |    2     |    0     |   2
1114          *   9  |  file |    2     |    1     |   0
1115          *  10  |  file |    2     |    2     |  -1
1116          * -----+-------+----------+----------+-------
1117          *  11  |  dir  |   -1     |  -1..2   |  -1
1118          *  12  |  dir  |    0     |  -1..2   |   0
1119          *  13  |  dir  |    1     |   -1     |   1
1120          *  14  |  dir  |    1     |    0     |   1
1121          *  15  |  dir  |    1     |    1     |   1 (*)
1122          *  16  |  dir  |    1     |    2     |   0
1123          *  17  |  dir  |    2     |   -1     |   2
1124          *  18  |  dir  |    2     |    0     |   2
1125          *  19  |  dir  |    2     |    1     |   1 (*)
1126          *  20  |  dir  |    2     |    2     |  -1
1127          *
1128          * (*) An exclude pattern interested in a directory does not
1129          * necessarily mean it will exclude all of the directory. In
1130          * wildcard case, it can't decide until looking at individual
1131          * files inside. So don't write such directories off yet.
1132          */
1133
1134         if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1135             positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1136                 return positive;
1137
1138         negative = do_match(entry, base, base_offset, ps, 1);
1139
1140         /* #3, #4, #7, #8, #13, #14, #17, #18 */
1141         if (negative <= entry_not_interesting)
1142                 return positive;
1143
1144         /* #15, #19 */
1145         if (S_ISDIR(entry->mode) &&
1146             positive >= entry_interesting &&
1147             negative == entry_interesting)
1148                 return entry_interesting;
1149
1150         if ((positive == entry_interesting &&
1151              negative >= entry_interesting) || /* #5, #6, #16 */
1152             (positive == all_entries_interesting &&
1153              negative == entry_interesting)) /* #9 */
1154                 return entry_not_interesting;
1155
1156         return all_entries_not_interesting; /* #10, #20 */
1157 }