+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-//
-// Traverse the source tree, parsing all .gitignore files, and print file paths
-// that are ignored by git.
-// The output is suitable to the --exclude-from option of tar.
-// This is useful until the --exclude-vcs-ignores option gets working correctly.
-//
-// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
-// (a lot of code imported from GIT)
-
-#include <assert.h>
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
-//
-//---------------------------(IMPORT FROM GIT BEGIN)---------------------------
-
-// Copied from environment.c
-
-static bool ignore_case;
-
-// Copied from git-compat-util.h
-
-/* Sane ctype - no locale, and works with signed chars */
-#undef isascii
-#undef isspace
-#undef isdigit
-#undef isalpha
-#undef isalnum
-#undef isprint
-#undef islower
-#undef isupper
-#undef tolower
-#undef toupper
-#undef iscntrl
-#undef ispunct
-#undef isxdigit
-
-static const unsigned char sane_ctype[256];
-#define GIT_SPACE 0x01
-#define GIT_DIGIT 0x02
-#define GIT_ALPHA 0x04
-#define GIT_GLOB_SPECIAL 0x08
-#define GIT_REGEX_SPECIAL 0x10
-#define GIT_PATHSPEC_MAGIC 0x20
-#define GIT_CNTRL 0x40
-#define GIT_PUNCT 0x80
-#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
-#define isascii(x) (((x) & ~0x7f) == 0)
-#define isspace(x) sane_istest(x,GIT_SPACE)
-#define isdigit(x) sane_istest(x,GIT_DIGIT)
-#define isalpha(x) sane_istest(x,GIT_ALPHA)
-#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
-#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
-#define islower(x) sane_iscase(x, 1)
-#define isupper(x) sane_iscase(x, 0)
-#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
-#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
-#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
- GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
-#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
-#define tolower(x) sane_case((unsigned char)(x), 0x20)
-#define toupper(x) sane_case((unsigned char)(x), 0)
-
-static inline int sane_case(int x, int high)
-{
- if (sane_istest(x, GIT_ALPHA))
- x = (x & ~0x20) | high;
- return x;
-}
-
-static inline int sane_iscase(int x, int is_lower)
-{
- if (!sane_istest(x, GIT_ALPHA))
- return 0;
-
- if (is_lower)
- return (x & 0x20) != 0;
- else
- return (x & 0x20) == 0;
-}
-
-// Copied from ctype.c
-
-enum {
- S = GIT_SPACE,
- A = GIT_ALPHA,
- D = GIT_DIGIT,
- G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
- R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
- P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
- X = GIT_CNTRL,
- U = GIT_PUNCT,
- Z = GIT_CNTRL | GIT_SPACE
-};
-
-static const unsigned char sane_ctype[256] = {
- X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
- X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
- S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
- D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
- P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
- A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
- P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
- A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
- /* Nothing in the 128.. range */
-};
-
-// Copied from hex.c
-
-static const signed char hexval_table[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
- 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */
- 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */
- -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
- -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
- -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
- -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
- -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
- -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
- -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
- -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
- -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
-};
-
-// Copied from wildmatch.h
-
-#define WM_CASEFOLD 1
-#define WM_PATHNAME 2
-
-#define WM_NOMATCH 1
-#define WM_MATCH 0
-#define WM_ABORT_ALL -1
-#define WM_ABORT_TO_STARSTAR -2
-
-// Copied from wildmatch.c
-
-typedef unsigned char uchar;
-
-// local modification: remove NEGATE_CLASS(2)
-
-#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
- && *(class) == *(litmatch) \
- && strncmp((char*)class, litmatch, len) == 0)
-
-// local modification: simpilify macros
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-#define ISGRAPH(c) (isprint(c) && !isspace(c))
-#define ISPRINT(c) isprint(c)
-#define ISDIGIT(c) isdigit(c)
-#define ISALNUM(c) isalnum(c)
-#define ISALPHA(c) isalpha(c)
-#define ISCNTRL(c) iscntrl(c)
-#define ISLOWER(c) islower(c)
-#define ISPUNCT(c) ispunct(c)
-#define ISSPACE(c) isspace(c)
-#define ISUPPER(c) isupper(c)
-#define ISXDIGIT(c) isxdigit(c)
-
-/* Match pattern "p" against "text" */
-static int dowild(const uchar *p, const uchar *text, unsigned int flags)
-{
- uchar p_ch;
- const uchar *pattern = p;
-
- for ( ; (p_ch = *p) != '\0'; text++, p++) {
- int matched, match_slash, negated;
- uchar t_ch, prev_ch;
- if ((t_ch = *text) == '\0' && p_ch != '*')
- return WM_ABORT_ALL;
- if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
- t_ch = tolower(t_ch);
- if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
- p_ch = tolower(p_ch);
- switch (p_ch) {
- case '\\':
- /* Literal match with following character. Note that the test
- * in "default" handles the p[1] == '\0' failure case. */
- p_ch = *++p;
- /* FALLTHROUGH */
- default:
- if (t_ch != p_ch)
- return WM_NOMATCH;
- continue;
- case '?':
- /* Match anything but '/'. */
- if ((flags & WM_PATHNAME) && t_ch == '/')
- return WM_NOMATCH;
- continue;
- case '*':
- if (*++p == '*') {
- const uchar *prev_p = p - 2;
- while (*++p == '*') {}
- if (!(flags & WM_PATHNAME))
- /* without WM_PATHNAME, '*' == '**' */
- match_slash = 1;
- else if ((prev_p < pattern || *prev_p == '/') &&
- (*p == '\0' || *p == '/' ||
- (p[0] == '\\' && p[1] == '/'))) {
- /*
- * Assuming we already match 'foo/' and are at
- * <star star slash>, just assume it matches
- * nothing and go ahead match the rest of the
- * pattern with the remaining string. This
- * helps make foo/<*><*>/bar (<> because
- * otherwise it breaks C comment syntax) match
- * both foo/bar and foo/a/bar.
- */
- if (p[0] == '/' &&
- dowild(p + 1, text, flags) == WM_MATCH)
- return WM_MATCH;
- match_slash = 1;
- } else /* WM_PATHNAME is set */
- match_slash = 0;
- } else
- /* without WM_PATHNAME, '*' == '**' */
- match_slash = flags & WM_PATHNAME ? 0 : 1;
- if (*p == '\0') {
- /* Trailing "**" matches everything. Trailing "*" matches
- * only if there are no more slash characters. */
- if (!match_slash) {
- if (strchr((char *)text, '/'))
- return WM_NOMATCH;
- }
- return WM_MATCH;
- } else if (!match_slash && *p == '/') {
- /*
- * _one_ asterisk followed by a slash
- * with WM_PATHNAME matches the next
- * directory
- */
- const char *slash = strchr((char*)text, '/');
- if (!slash)
- return WM_NOMATCH;
- text = (const uchar*)slash;
- /* the slash is consumed by the top-level for loop */
- break;
- }
- while (1) {
- if (t_ch == '\0')
- break;
- /*
- * Try to advance faster when an asterisk is
- * followed by a literal. We know in this case
- * that the string before the literal
- * must belong to "*".
- * If match_slash is false, do not look past
- * the first slash as it cannot belong to '*'.
- */
- if (!is_glob_special(*p)) {
- p_ch = *p;
- if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
- p_ch = tolower(p_ch);
- while ((t_ch = *text) != '\0' &&
- (match_slash || t_ch != '/')) {
- if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
- t_ch = tolower(t_ch);
- if (t_ch == p_ch)
- break;
- text++;
- }
- if (t_ch != p_ch)
- return WM_NOMATCH;
- }
- if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
- if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
- return matched;
- } else if (!match_slash && t_ch == '/')
- return WM_ABORT_TO_STARSTAR;
- t_ch = *++text;
- }
- return WM_ABORT_ALL;
- case '[':
- p_ch = *++p;
- if (p_ch == '^')
- p_ch = '!';
- /* Assign literal 1/0 because of "matched" comparison. */
- negated = p_ch == '!' ? 1 : 0;
- if (negated) {
- /* Inverted character class. */
- p_ch = *++p;
- }
- prev_ch = 0;
- matched = 0;
- do {
- if (!p_ch)
- return WM_ABORT_ALL;
- if (p_ch == '\\') {
- p_ch = *++p;
- if (!p_ch)
- return WM_ABORT_ALL;
- if (t_ch == p_ch)
- matched = 1;
- } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
- p_ch = *++p;
- if (p_ch == '\\') {
- p_ch = *++p;
- if (!p_ch)
- return WM_ABORT_ALL;
- }
- if (t_ch <= p_ch && t_ch >= prev_ch)
- matched = 1;
- else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
- uchar t_ch_upper = toupper(t_ch);
- if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
- matched = 1;
- }
- p_ch = 0; /* This makes "prev_ch" get set to 0. */
- } else if (p_ch == '[' && p[1] == ':') {
- const uchar *s;
- int i;
- for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
- if (!p_ch)
- return WM_ABORT_ALL;
- i = p - s - 1;
- if (i < 0 || p[-1] != ':') {
- /* Didn't find ":]", so treat like a normal set. */
- p = s - 2;
- p_ch = '[';
- if (t_ch == p_ch)
- matched = 1;
- continue;
- }
- if (CC_EQ(s,i, "alnum")) {
- if (ISALNUM(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "alpha")) {
- if (ISALPHA(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "blank")) {
- if (ISBLANK(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "cntrl")) {
- if (ISCNTRL(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "digit")) {
- if (ISDIGIT(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "graph")) {
- if (ISGRAPH(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "lower")) {
- if (ISLOWER(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "print")) {
- if (ISPRINT(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "punct")) {
- if (ISPUNCT(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "space")) {
- if (ISSPACE(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "upper")) {
- if (ISUPPER(t_ch))
- matched = 1;
- else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
- matched = 1;
- } else if (CC_EQ(s,i, "xdigit")) {
- if (ISXDIGIT(t_ch))
- matched = 1;
- } else /* malformed [:class:] string */
- return WM_ABORT_ALL;
- p_ch = 0; /* This makes "prev_ch" get set to 0. */
- } else if (t_ch == p_ch)
- matched = 1;
- } while (prev_ch = p_ch, (p_ch = *++p) != ']');
- if (matched == negated ||
- ((flags & WM_PATHNAME) && t_ch == '/'))
- return WM_NOMATCH;
- continue;
- }
- }
-
- return *text ? WM_NOMATCH : WM_MATCH;
-}
-
-/* Match the "pattern" against the "text" string. */
-static int wildmatch(const char *pattern, const char *text, unsigned int flags)
-{
- // local modification: move WM_CASEFOLD here
- if (ignore_case)
- flags |= WM_CASEFOLD;
-
- return dowild((const uchar*)pattern, (const uchar*)text, flags);
-}
-
-// Copied from dir.h
-
-#define PATTERN_FLAG_NODIR 1
-#define PATTERN_FLAG_ENDSWITH 4
-#define PATTERN_FLAG_MUSTBEDIR 8
-#define PATTERN_FLAG_NEGATIVE 16
-
-// Copied from dir.c
-
-static int fspathncmp(const char *a, const char *b, size_t count)
-{
- return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
-}
-
-static int simple_length(const char *match)
-{
- int len = -1;
-
- for (;;) {
- unsigned char c = *match++;
- len++;
- if (c == '\0' || is_glob_special(c))
- return len;
- }
-}
-
-static int no_wildcard(const char *string)
-{
- return string[simple_length(string)] == '\0';
-}
-
-static void parse_path_pattern(const char **pattern,
- int *patternlen,
- unsigned *flags,
- int *nowildcardlen)
-{
- const char *p = *pattern;
- size_t i, len;
-
- *flags = 0;
- if (*p == '!') {
- *flags |= PATTERN_FLAG_NEGATIVE;
- p++;
- }
- len = strlen(p);
- if (len && p[len - 1] == '/') {
- len--;
- *flags |= PATTERN_FLAG_MUSTBEDIR;
- }
- for (i = 0; i < len; i++) {
- if (p[i] == '/')
- break;
- }
- if (i == len)
- *flags |= PATTERN_FLAG_NODIR;
- *nowildcardlen = simple_length(p);
- /*
- * we should have excluded the trailing slash from 'p' too,
- * but that's one more allocation. Instead just make sure
- * nowildcardlen does not exceed real patternlen
- */
- if (*nowildcardlen > len)
- *nowildcardlen = len;
- if (*p == '*' && no_wildcard(p + 1))
- *flags |= PATTERN_FLAG_ENDSWITH;
- *pattern = p;
- *patternlen = len;
-}
-
-static void trim_trailing_spaces(char *buf)
-{
- char *p, *last_space = NULL;
-
- for (p = buf; *p; p++)
- switch (*p) {
- case ' ':
- if (!last_space)
- last_space = p;
- break;
- case '\\':
- p++;
- if (!*p)
- return;
- /* fallthrough */
- default:
- last_space = NULL;
- }
-
- if (last_space)
- *last_space = '\0';
-}
-
-static int match_basename(const char *basename, int basenamelen,
- const char *pattern, int prefix, int patternlen,
- unsigned flags)
-{
- if (prefix == patternlen) {
- if (patternlen == basenamelen &&
- !fspathncmp(pattern, basename, basenamelen))
- return 1;
- } else if (flags & PATTERN_FLAG_ENDSWITH) {
- /* "*literal" matching against "fooliteral" */
- if (patternlen - 1 <= basenamelen &&
- !fspathncmp(pattern + 1,
- basename + basenamelen - (patternlen - 1),
- patternlen - 1))
- return 1;
- } else {
- // local modification: call wildmatch() directly
- if (!wildmatch(pattern, basename, flags))
- return 1;
- }
- return 0;
-}
-
-static int match_pathname(const char *pathname, int pathlen,
- const char *base, int baselen,
- const char *pattern, int prefix, int patternlen)
-{
- // local modification: remove local variables
-
- /*
- * match with FNM_PATHNAME; the pattern has base implicitly
- * in front of it.
- */
- if (*pattern == '/') {
- pattern++;
- patternlen--;
- prefix--;
- }
-
- /*
- * baselen does not count the trailing slash. base[] may or
- * may not end with a trailing slash though.
- */
- if (pathlen < baselen + 1 ||
- (baselen && pathname[baselen] != '/') ||
- fspathncmp(pathname, base, baselen))
- return 0;
-
- // local modification: simplified because always baselen > 0
- pathname += baselen + 1;
- pathlen -= baselen + 1;
-
- if (prefix) {
- /*
- * if the non-wildcard part is longer than the
- * remaining pathname, surely it cannot match.
- */
- if (prefix > pathlen)
- return 0;
-
- if (fspathncmp(pattern, pathname, prefix))
- return 0;
- pattern += prefix;
- patternlen -= prefix;
- pathname += prefix;
- pathlen -= prefix;
-
- /*
- * If the whole pattern did not have a wildcard,
- * then our prefix match is all we need; we
- * do not need to call fnmatch at all.
- */
- if (!patternlen && !pathlen)
- return 1;
- }
-
- // local modification: call wildmatch() directly
- return !wildmatch(pattern, pathname, WM_PATHNAME);
-}
-
-// Copied from git/utf8.c
-
-static const char utf8_bom[] = "\357\273\277";
-
-//----------------------------(IMPORT FROM GIT END)----------------------------
-
-struct pattern {
- unsigned int flags;
- int nowildcardlen;
- int patternlen;
- int dirlen;
- char pattern[];
-};
-
-static struct pattern **pattern_list;
-static int nr_patterns, alloced_patterns;
-
-// Remember the number of patterns at each directory level
-static int *nr_patterns_at;
-// Track the current/max directory level;
-static int depth, max_depth;
-static bool debug_on;
-static FILE *out_fp, *stat_fp;
-static char *prefix = "";
-static char *progname;
-
-static void __attribute__((noreturn)) perror_exit(const char *s)
-{
- perror(s);
-
- exit(EXIT_FAILURE);
-}
-
-static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
-{
- va_list args;
-
- fprintf(stderr, "%s: error: ", progname);
-
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
-
- exit(EXIT_FAILURE);
-}
-
-static void debug(const char *fmt, ...)
-{
- va_list args;
- int i;
-
- if (!debug_on)
- return;
-
- fprintf(stderr, "[DEBUG] ");
-
- for (i = 0; i < depth * 2; i++)
- fputc(' ', stderr);
-
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
-}
-
-static void *xrealloc(void *ptr, size_t size)
-{
- ptr = realloc(ptr, size);
- if (!ptr)
- perror_exit(progname);
-
- return ptr;
-}
-
-static void *xmalloc(size_t size)
-{
- return xrealloc(NULL, size);
-}
-
-// similar to last_matching_pattern_from_list() in GIT
-static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
-{
- int i;
-
- // Search in the reverse order because the last matching pattern wins.
- for (i = nr_patterns - 1; i >= 0; i--) {
- struct pattern *p = pattern_list[i];
- unsigned int flags = p->flags;
- const char *gitignore_dir = p->pattern + p->patternlen + 1;
- bool ignored;
-
- if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
- continue;
-
- if (flags & PATTERN_FLAG_NODIR) {
- if (!match_basename(path + dirlen + 1,
- pathlen - dirlen - 1,
- p->pattern,
- p->nowildcardlen,
- p->patternlen,
- p->flags))
- continue;
- } else {
- if (!match_pathname(path, pathlen,
- gitignore_dir, p->dirlen,
- p->pattern,
- p->nowildcardlen,
- p->patternlen))
- continue;
- }
-
- debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
- flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
- flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
- gitignore_dir);
-
- ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
- if (ignored)
- debug("Ignore: %s\n", path);
-
- return ignored;
- }
-
- debug("%s: no match\n", path);
-
- return false;
-}
-
-static void add_pattern(const char *string, const char *dir, int dirlen)
-{
- struct pattern *p;
- int patternlen, nowildcardlen;
- unsigned int flags;
-
- parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
-
- if (patternlen == 0)
- return;
-
- p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);
-
- memcpy(p->pattern, string, patternlen);
- p->pattern[patternlen] = 0;
- memcpy(p->pattern + patternlen + 1, dir, dirlen);
- p->pattern[patternlen + 1 + dirlen] = 0;
-
- p->patternlen = patternlen;
- p->nowildcardlen = nowildcardlen;
- p->dirlen = dirlen;
- p->flags = flags;
-
- debug("Add pattern: %s%s%s\n",
- flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
- flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");
-
- if (nr_patterns >= alloced_patterns) {
- alloced_patterns += 128;
- pattern_list = xrealloc(pattern_list,
- sizeof(*pattern_list) * alloced_patterns);
- }
-
- pattern_list[nr_patterns++] = p;
-}
-
-// similar to add_patterns_from_buffer() in GIT
-static void add_patterns_from_gitignore(const char *dir, int dirlen)
-{
- struct stat st;
- char path[PATH_MAX], *buf, *entry;
- size_t size;
- int fd, pathlen, i;
-
- pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
- if (pathlen >= sizeof(path))
- error_exit("%s: too long path was truncated\n", path);
-
- fd = open(path, O_RDONLY | O_NOFOLLOW);
- if (fd < 0) {
- if (errno != ENOENT)
- return perror_exit(path);
- return;
- }
-
- if (fstat(fd, &st) < 0)
- perror_exit(path);
-
- size = st.st_size;
-
- buf = xmalloc(size + 1);
- if (read(fd, buf, st.st_size) != st.st_size)
- perror_exit(path);
-
- buf[st.st_size] = '\n';
- if (close(fd))
- perror_exit(path);
-
- debug("Parse %s\n", path);
-
- entry = buf;
-
- // skip utf8 bom
- if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
- entry += strlen(utf8_bom);
-
- for (i = entry - buf; i < size; i++) {
- if (buf[i] == '\n') {
- if (entry != buf + i && entry[0] != '#') {
- buf[i - (i && buf[i-1] == '\r')] = 0;
- trim_trailing_spaces(entry);
- add_pattern(entry, dir, dirlen);
- }
- entry = buf + i + 1;
- }
- }
-
- free(buf);
-}
-
-// Save the current number of patterns and increment the depth
-static void increment_depth(void)
-{
- if (depth >= max_depth) {
- max_depth += 1;
- nr_patterns_at = xrealloc(nr_patterns_at,
- sizeof(*nr_patterns_at) * max_depth);
- }
-
- nr_patterns_at[depth] = nr_patterns;
- depth++;
-}
-
-// Decrement the depth, and free up the patterns of this directory level.
-static void decrement_depth(void)
-{
- depth--;
- assert(depth >= 0);
-
- while (nr_patterns > nr_patterns_at[depth])
- free(pattern_list[--nr_patterns]);
-}
-
-static void print_path(const char *path)
-{
- // The path always starts with "./"
- assert(strlen(path) >= 2);
-
- // Replace the root directory with a preferred prefix.
- // This is useful for the tar command.
- fprintf(out_fp, "%s%s\n", prefix, path + 2);
-}
-
-static void print_stat(const char *path, struct stat *st)
-{
- if (!stat_fp)
- return;
-
- if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
- return;
-
- assert(strlen(path) >= 2);
-
- fprintf(stat_fp, "%c %9ld %10ld %s\n",
- S_ISLNK(st->st_mode) ? 'l' : '-',
- st->st_size, st->st_mtim.tv_sec, path + 2);
-}
-
-// Traverse the entire directory tree, parsing .gitignore files.
-// Print file paths that are not tracked by git.
-//
-// Return true if all files under the directory are ignored, false otherwise.
-static bool traverse_directory(const char *dir, int dirlen)
-{
- bool all_ignored = true;
- DIR *dirp;
-
- debug("Enter[%d]: %s\n", depth, dir);
- increment_depth();
-
- add_patterns_from_gitignore(dir, dirlen);
-
- dirp = opendir(dir);
- if (!dirp)
- perror_exit(dir);
-
- while (1) {
- struct dirent *d;
- struct stat st;
- char path[PATH_MAX];
- int pathlen;
- bool ignored;
-
- errno = 0;
- d = readdir(dirp);
- if (!d) {
- if (errno)
- perror_exit(dir);
- break;
- }
-
- if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
- continue;
-
- pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
- if (pathlen >= sizeof(path))
- error_exit("%s: too long path was truncated\n", path);
-
- if (lstat(path, &st) < 0)
- perror_exit(path);
-
- if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
- is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
- ignored = true;
- } else {
- if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
- // If all the files in a directory are ignored,
- // let's ignore that directory as well. This
- // will avoid empty directories in the tarball.
- ignored = traverse_directory(path, pathlen);
- else
- ignored = false;
- }
-
- if (ignored) {
- print_path(path);
- } else {
- print_stat(path, &st);
- all_ignored = false;
- }
- }
-
- if (closedir(dirp))
- perror_exit(dir);
-
- decrement_depth();
- debug("Leave[%d]: %s\n", depth, dir);
-
- return all_ignored;
-}
-
-static void usage(void)
-{
- fprintf(stderr,
- "usage: %s [options]\n"
- "\n"
- "Show files that are ignored by git\n"
- "\n"
- "options:\n"
- " -d, --debug print debug messages to stderr\n"
- " -e, --exclude PATTERN add the given exclude pattern\n"
- " -h, --help show this help message and exit\n"
- " -i, --ignore-case Ignore case differences between the patterns and the files\n"
- " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n"
- " -p, --prefix PREFIX prefix added to each path (default: empty string)\n"
- " -r, --rootdir DIR root of the source tree (default: current working directory)\n"
- " -s, --stat FILE output the file stat of non-ignored files to a file\n",
- progname);
-}
-
-static void open_output(const char *pathname, FILE **fp)
-{
- if (strcmp(pathname, "-")) {
- *fp = fopen(pathname, "w");
- if (!*fp)
- perror_exit(pathname);
- } else {
- *fp = stdout;
- }
-}
-
-static void close_output(const char *pathname, FILE *fp)
-{
- fflush(fp);
-
- if (ferror(fp))
- error_exit("not all data was written to the output\n");
-
- if (fclose(fp))
- perror_exit(pathname);
-}
-
-int main(int argc, char *argv[])
-{
- const char *output = "-";
- const char *rootdir = ".";
- const char *stat = NULL;
-
- progname = strrchr(argv[0], '/');
- if (progname)
- progname++;
- else
- progname = argv[0];
-
- while (1) {
- static struct option long_options[] = {
- {"debug", no_argument, NULL, 'd'},
- {"help", no_argument, NULL, 'h'},
- {"ignore-case", no_argument, NULL, 'i'},
- {"output", required_argument, NULL, 'o'},
- {"prefix", required_argument, NULL, 'p'},
- {"rootdir", required_argument, NULL, 'r'},
- {"stat", required_argument, NULL, 's'},
- {"exclude", required_argument, NULL, 'x'},
- {},
- };
-
- int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 'd':
- debug_on = true;
- break;
- case 'h':
- usage();
- exit(0);
- case 'i':
- ignore_case = true;
- break;
- case 'o':
- output = optarg;
- break;
- case 'p':
- prefix = optarg;
- break;
- case 'r':
- rootdir = optarg;
- break;
- case 's':
- stat = optarg;
- break;
- case 'x':
- add_pattern(optarg, ".", strlen("."));
- break;
- case '?':
- usage();
- /* fallthrough */
- default:
- exit(EXIT_FAILURE);
- }
- }
-
- open_output(output, &out_fp);
- if (stat && stat[0])
- open_output(stat, &stat_fp);
-
- if (chdir(rootdir))
- perror_exit(rootdir);
-
- add_pattern(".git/", ".", strlen("."));
-
- if (traverse_directory(".", strlen(".")))
- print_path("./");
-
- assert(depth == 0);
-
- while (nr_patterns > 0)
- free(pattern_list[--nr_patterns]);
- free(pattern_list);
- free(nr_patterns_at);
-
- close_output(output, out_fp);
- if (stat_fp)
- close_output(stat, stat_fp);
-
- return 0;
-}