Imported Upstream version 2.17.1 upstream/2.17.1
authorDongHun Kwak <dh0128.kwak@samsung.com>
Wed, 3 Mar 2021 06:16:32 +0000 (15:16 +0900)
committerDongHun Kwak <dh0128.kwak@samsung.com>
Wed, 3 Mar 2021 06:16:32 +0000 (15:16 +0900)
30 files changed:
Documentation/RelNotes/2.13.7.txt [new file with mode: 0644]
Documentation/RelNotes/2.14.4.txt [new file with mode: 0644]
Documentation/RelNotes/2.15.2.txt
Documentation/RelNotes/2.16.4.txt [new file with mode: 0644]
Documentation/RelNotes/2.17.1.txt [new file with mode: 0644]
GIT-VERSION-GEN
RelNotes
apply.c
builtin/fsck.c
builtin/index-pack.c
builtin/submodule--helper.c
builtin/unpack-objects.c
builtin/update-index.c
cache.h
dir.c
fsck.c
fsck.h
git-compat-util.h
git-submodule.sh
path.c
read-cache.c
sha1_file.c
submodule-config.c
submodule-config.h
t/helper/test-path-utils.c
t/lib-pack.sh
t/t0060-path-utils.sh
t/t7415-submodule-names.sh [new file with mode: 0755]
utf8.c
utf8.h

diff --git a/Documentation/RelNotes/2.13.7.txt b/Documentation/RelNotes/2.13.7.txt
new file mode 100644 (file)
index 0000000..09fc014
--- /dev/null
@@ -0,0 +1,20 @@
+Git v2.13.7 Release Notes
+=========================
+
+Fixes since v2.13.6
+-------------------
+
+ * Submodule "names" come from the untrusted .gitmodules file, but we
+   blindly append them to $GIT_DIR/modules to create our on-disk repo
+   paths. This means you can do bad things by putting "../" into the
+   name. We now enforce some rules for submodule names which will cause
+   Git to ignore these malicious names (CVE-2018-11235).
+
+   Credit for finding this vulnerability and the proof of concept from
+   which the test script was adapted goes to Etienne Stalmans.
+
+ * It was possible to trick the code that sanity-checks paths on NTFS
+   into reading random piece of memory (CVE-2018-11233).
+
+Credit for fixing for these bugs goes to Jeff King, Johannes
+Schindelin and others.
diff --git a/Documentation/RelNotes/2.14.4.txt b/Documentation/RelNotes/2.14.4.txt
new file mode 100644 (file)
index 0000000..97755a8
--- /dev/null
@@ -0,0 +1,5 @@
+Git v2.14.4 Release Notes
+=========================
+
+This release is to forward-port the fixes made in the v2.13.7 version
+of Git.  See its release notes for details.
index 9f7e28f..b480e56 100644 (file)
@@ -43,5 +43,8 @@ Fixes since v2.15.1
  * Clarify and enhance documentation for "merge-base --fork-point", as
    it was clear what it computed but not why/what for.
 
+ * This release also contains the fixes made in the v2.13.7 version of
+   Git.  See its release notes for details.
+
 
 Also contains various documentation updates and code clean-ups.
diff --git a/Documentation/RelNotes/2.16.4.txt b/Documentation/RelNotes/2.16.4.txt
new file mode 100644 (file)
index 0000000..6be538b
--- /dev/null
@@ -0,0 +1,5 @@
+Git v2.16.4 Release Notes
+=========================
+
+This release is to forward-port the fixes made in the v2.13.7 version
+of Git.  See its release notes for details.
diff --git a/Documentation/RelNotes/2.17.1.txt b/Documentation/RelNotes/2.17.1.txt
new file mode 100644 (file)
index 0000000..e01384f
--- /dev/null
@@ -0,0 +1,16 @@
+Git v2.17.1 Release Notes
+=========================
+
+Fixes since v2.17
+-----------------
+
+ * This release contains the same fixes made in the v2.13.7 version of
+   Git, covering CVE-2018-11233 and 11235, and forward-ported to
+   v2.14.4, v2.15.2 and v2.16.4 releases.  See release notes to
+   v2.13.7 for details.
+
+ * In addition to the above fixes, this release has support on the
+   server side to reject pushes to repositories that attempt to create
+   such problematic .gitmodules file etc. as tracked contents, to help
+   hosting sites protect their customers by preventing malicious
+   contents from spreading.
index 1b4624c..4baa413 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=GIT-VERSION-FILE
-DEF_VER=v2.17.0
+DEF_VER=v2.17.1
 
 LF='
 '
index 7a6dc06..cde891d 120000 (symlink)
--- a/RelNotes
+++ b/RelNotes
@@ -1 +1 @@
-Documentation/RelNotes/2.17.0.txt
\ No newline at end of file
+Documentation/RelNotes/2.17.1.txt
\ No newline at end of file
diff --git a/apply.c b/apply.c
index 134dc7b..2d1cfe4 100644 (file)
--- a/apply.c
+++ b/apply.c
@@ -3860,9 +3860,9 @@ static int check_unsafe_path(struct patch *patch)
        if (!patch->is_delete)
                new_name = patch->new_name;
 
-       if (old_name && !verify_path(old_name))
+       if (old_name && !verify_path(old_name, patch->old_mode))
                return error(_("invalid path '%s'"), old_name);
-       if (new_name && !verify_path(new_name))
+       if (new_name && !verify_path(new_name, patch->new_mode))
                return error(_("invalid path '%s'"), new_name);
        return 0;
 }
index ef78c6c..028aba5 100644 (file)
@@ -337,7 +337,7 @@ static void check_connectivity(void)
        }
 }
 
-static int fsck_obj(struct object *obj)
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
 {
        int err;
 
@@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj)
 
        if (fsck_walk(obj, NULL, &fsck_obj_options))
                objerror(obj, "broken links");
-       err = fsck_object(obj, NULL, 0, &fsck_obj_options);
+       err = fsck_object(obj, buffer, size, &fsck_obj_options);
        if (err)
                goto out;
 
@@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type,
        }
        obj->flags &= ~(REACHABLE | SEEN);
        obj->flags |= HAS_OBJ;
-       return fsck_obj(obj);
+       return fsck_obj(obj, buffer, size);
 }
 
 static int default_refs;
@@ -504,44 +504,42 @@ static void get_default_heads(void)
        }
 }
 
-static struct object *parse_loose_object(const struct object_id *oid,
-                                        const char *path)
+static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 {
        struct object *obj;
-       void *contents;
        enum object_type type;
        unsigned long size;
+       void *contents;
        int eaten;
 
-       if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0)
-               return NULL;
+       if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) {
+               errors_found |= ERROR_OBJECT;
+               error("%s: object corrupt or missing: %s",
+                     oid_to_hex(oid), path);
+               return 0; /* keep checking other objects */
+       }
 
        if (!contents && type != OBJ_BLOB)
-               die("BUG: read_loose_object streamed a non-blob");
+               BUG("read_loose_object streamed a non-blob");
 
        obj = parse_object_buffer(oid, type, size, contents, &eaten);
-
-       if (!eaten)
-               free(contents);
-       return obj;
-}
-
-static int fsck_loose(const struct object_id *oid, const char *path, void *data)
-{
-       struct object *obj = parse_loose_object(oid, path);
-
        if (!obj) {
                errors_found |= ERROR_OBJECT;
-               error("%s: object corrupt or missing: %s",
+               error("%s: object could not be parsed: %s",
                      oid_to_hex(oid), path);
+               if (!eaten)
+                       free(contents);
                return 0; /* keep checking other objects */
        }
 
        obj->flags &= ~(REACHABLE | SEEN);
        obj->flags |= HAS_OBJ;
-       if (fsck_obj(obj))
+       if (fsck_obj(obj, contents, size))
                errors_found |= ERROR_OBJECT;
-       return 0;
+
+       if (!eaten)
+               free(contents);
+       return 0; /* keep checking other objects, even if we saw an error */
 }
 
 static int fsck_cruft(const char *basename, const char *path, void *data)
@@ -750,6 +748,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
                        }
                        stop_progress(&progress);
                }
+
+               if (fsck_finish(&fsck_obj_options))
+                       errors_found |= ERROR_OBJECT;
        }
 
        for (i = 0; i < argc; i++) {
index bda84a9..7b2f7c0 100644 (file)
@@ -836,6 +836,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
                                blob->object.flags |= FLAG_CHECKED;
                        else
                                die(_("invalid blob object %s"), oid_to_hex(oid));
+                       if (do_fsck_object &&
+                           fsck_object(&blob->object, (void *)data, size, &fsck_options))
+                               die(_("fsck error in packed object"));
                } else {
                        struct object *obj;
                        int eaten;
@@ -853,7 +856,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
                                die(_("invalid %s"), type_name(type));
                        if (do_fsck_object &&
                            fsck_object(obj, buf, size, &fsck_options))
-                               die(_("Error in object"));
+                               die(_("fsck error in packed object"));
                        if (strict && fsck_walk(obj, NULL, &fsck_options))
                                die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
 
@@ -1477,6 +1480,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
        } else
                chmod(final_index_name, 0444);
 
+       if (do_fsck_object)
+               add_packed_git(final_index_name, strlen(final_index_name), 0);
+
        if (!from_stdin) {
                printf("%s\n", sha1_to_hex(hash));
        } else {
@@ -1818,6 +1824,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                      pack_hash);
        else
                close(input_fd);
+
+       if (do_fsck_object && fsck_finish(&fsck_options))
+               die(_("fsck error in pack objects"));
+
        free(objects);
        strbuf_release(&index_name_buf);
        if (pack_name == NULL)
index 6ba8587..4f35c98 100644 (file)
@@ -1817,6 +1817,29 @@ static int is_active(int argc, const char **argv, const char *prefix)
        return !is_submodule_active(the_repository, argv[1]);
 }
 
+/*
+ * Exit non-zero if any of the submodule names given on the command line is
+ * invalid. If no names are given, filter stdin to print only valid names
+ * (which is primarily intended for testing).
+ */
+static int check_name(int argc, const char **argv, const char *prefix)
+{
+       if (argc > 1) {
+               while (*++argv) {
+                       if (check_submodule_name(*argv) < 0)
+                               return 1;
+               }
+       } else {
+               struct strbuf buf = STRBUF_INIT;
+               while (strbuf_getline(&buf, stdin) != EOF) {
+                       if (!check_submodule_name(buf.buf))
+                               printf("%s\n", buf.buf);
+               }
+               strbuf_release(&buf);
+       }
+       return 0;
+}
+
 #define SUPPORT_SUPER_PREFIX (1<<0)
 
 struct cmd_struct {
@@ -1842,6 +1865,7 @@ static struct cmd_struct commands[] = {
        {"push-check", push_check, 0},
        {"absorb-git-dirs", absorb_git_dirs, SUPPORT_SUPER_PREFIX},
        {"is-active", is_active, 0},
+       {"check-name", check_name, 0},
 };
 
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
index 6620fee..c8f1406 100644 (file)
@@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op
        if (!obj_buf)
                die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
        if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
-               die("Error in object");
+               die("fsck error in packed object");
        fsck_options.walk = check_object;
        if (fsck_walk(obj, NULL, &fsck_options))
                die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
@@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
        unpack_all();
        the_hash_algo->update_fn(&ctx, buffer, offset);
        the_hash_algo->final_fn(oid.hash, &ctx);
-       if (strict)
+       if (strict) {
                write_rest();
+               if (fsck_finish(&fsck_options))
+                       die(_("fsck error in pack objects"));
+       }
        if (hashcmp(fill(the_hash_algo->rawsz), oid.hash))
                die("final sha1 did not match");
        use(the_hash_algo->rawsz);
index 58d1c2d..1af8a00 100644 (file)
@@ -364,10 +364,9 @@ static int process_directory(const char *path, int len, struct stat *st)
        return error("%s: is a directory - add files inside instead", path);
 }
 
-static int process_path(const char *path)
+static int process_path(const char *path, struct stat *st, int stat_errno)
 {
        int pos, len;
-       struct stat st;
        const struct cache_entry *ce;
 
        len = strlen(path);
@@ -391,13 +390,13 @@ static int process_path(const char *path)
         * First things first: get the stat information, to decide
         * what to do about the pathname!
         */
-       if (lstat(path, &st) < 0)
-               return process_lstat_error(path, errno);
+       if (stat_errno)
+               return process_lstat_error(path, stat_errno);
 
-       if (S_ISDIR(st.st_mode))
-               return process_directory(path, len, &st);
+       if (S_ISDIR(st->st_mode))
+               return process_directory(path, len, st);
 
-       return add_one_path(ce, path, len, &st);
+       return add_one_path(ce, path, len, st);
 }
 
 static int add_cacheinfo(unsigned int mode, const struct object_id *oid,
@@ -406,7 +405,7 @@ static int add_cacheinfo(unsigned int mode, const struct object_id *oid,
        int size, len, option;
        struct cache_entry *ce;
 
-       if (!verify_path(path))
+       if (!verify_path(path, mode))
                return error("Invalid path '%s'", path);
 
        len = strlen(path);
@@ -449,7 +448,18 @@ static void chmod_path(char flip, const char *path)
 
 static void update_one(const char *path)
 {
-       if (!verify_path(path)) {
+       int stat_errno = 0;
+       struct stat st;
+
+       if (mark_valid_only || mark_skip_worktree_only || force_remove ||
+           mark_fsmonitor_only)
+               st.st_mode = 0;
+       else if (lstat(path, &st) < 0) {
+               st.st_mode = 0;
+               stat_errno = errno;
+       } /* else stat is valid */
+
+       if (!verify_path(path, st.st_mode)) {
                fprintf(stderr, "Ignoring path %s\n", path);
                return;
        }
@@ -475,7 +485,7 @@ static void update_one(const char *path)
                report("remove '%s'", path);
                return;
        }
-       if (process_path(path))
+       if (process_path(path, &st, stat_errno))
                die("Unable to process path %s", path);
        report("add '%s'", path);
 }
@@ -545,7 +555,7 @@ static void read_index_info(int nul_term_line)
                        path_name = uq.buf;
                }
 
-               if (!verify_path(path_name)) {
+               if (!verify_path(path_name, mode)) {
                        fprintf(stderr, "Ignoring path %s\n", path_name);
                        continue;
                }
diff --git a/cache.h b/cache.h
index a61b2d3..0323853 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -634,7 +634,7 @@ extern int unmerged_index(const struct index_state *);
  */
 extern int index_has_changes(struct strbuf *sb);
 
-extern int verify_path(const char *path);
+extern int verify_path(const char *path, unsigned mode);
 extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change);
 extern int index_dir_exists(struct index_state *istate, const char *name, int namelen);
 extern void adjust_dirname_case(struct index_state *istate, char *name);
@@ -1165,7 +1165,15 @@ int normalize_path_copy(char *dst, const char *src);
 int longest_ancestor_length(const char *path, struct string_list *prefixes);
 char *strip_path_suffix(const char *path, const char *suffix);
 int daemon_avoid_alias(const char *path);
-extern int is_ntfs_dotgit(const char *name);
+
+/*
+ * These functions match their is_hfs_dotgit() counterparts; see utf8.h for
+ * details.
+ */
+int is_ntfs_dotgit(const char *name);
+int is_ntfs_dotgitmodules(const char *name);
+int is_ntfs_dotgitignore(const char *name);
+int is_ntfs_dotgitattributes(const char *name);
 
 /*
  * Returns true iff "str" could be confused as a command-line option when
diff --git a/dir.c b/dir.c
index dedbf5d..41aac3b 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -2992,7 +2992,7 @@ void untracked_cache_invalidate_path(struct index_state *istate,
 {
        if (!istate->untracked || !istate->untracked->root)
                return;
-       if (!safe_path && !verify_path(path))
+       if (!safe_path && !verify_path(path, 0))
                return;
        invalidate_one_component(istate->untracked, istate->untracked->root,
                                 path, strlen(path));
diff --git a/fsck.c b/fsck.c
index 5c8c12d..9339f31 100644 (file)
--- a/fsck.c
+++ b/fsck.c
 #include "utf8.h"
 #include "sha1-array.h"
 #include "decorate.h"
+#include "oidset.h"
+#include "packfile.h"
+#include "submodule-config.h"
+#include "config.h"
+
+static struct oidset gitmodules_found = OIDSET_INIT;
+static struct oidset gitmodules_done = OIDSET_INIT;
 
 #define FSCK_FATAL -1
 #define FSCK_INFO -2
@@ -44,6 +51,7 @@
        FUNC(MISSING_TAG_ENTRY, ERROR) \
        FUNC(MISSING_TAG_OBJECT, ERROR) \
        FUNC(MISSING_TREE, ERROR) \
+       FUNC(MISSING_TREE_OBJECT, ERROR) \
        FUNC(MISSING_TYPE, ERROR) \
        FUNC(MISSING_TYPE_ENTRY, ERROR) \
        FUNC(MULTIPLE_AUTHORS, ERROR) \
        FUNC(TREE_NOT_SORTED, ERROR) \
        FUNC(UNKNOWN_TYPE, ERROR) \
        FUNC(ZERO_PADDED_DATE, ERROR) \
+       FUNC(GITMODULES_MISSING, ERROR) \
+       FUNC(GITMODULES_BLOB, ERROR) \
+       FUNC(GITMODULES_PARSE, ERROR) \
+       FUNC(GITMODULES_NAME, ERROR) \
+       FUNC(GITMODULES_SYMLINK, ERROR) \
        /* warnings */ \
        FUNC(BAD_FILEMODE, WARN) \
        FUNC(EMPTY_NAME, WARN) \
@@ -561,10 +574,18 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
                has_empty_name |= !*name;
                has_dot |= !strcmp(name, ".");
                has_dotdot |= !strcmp(name, "..");
-               has_dotgit |= (!strcmp(name, ".git") ||
-                              is_hfs_dotgit(name) ||
-                              is_ntfs_dotgit(name));
+               has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
                has_zero_pad |= *(char *)desc.buffer == '0';
+
+               if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
+                       if (!S_ISLNK(mode))
+                               oidset_insert(&gitmodules_found, oid);
+                       else
+                               retval += report(options, &item->object,
+                                                FSCK_MSG_GITMODULES_SYMLINK,
+                                                ".gitmodules is a symbolic link");
+               }
+
                if (update_tree_entry_gently(&desc)) {
                        retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
                        break;
@@ -901,6 +922,66 @@ static int fsck_tag(struct tag *tag, const char *data,
        return fsck_tag_buffer(tag, data, size, options);
 }
 
+struct fsck_gitmodules_data {
+       struct object *obj;
+       struct fsck_options *options;
+       int ret;
+};
+
+static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
+{
+       struct fsck_gitmodules_data *data = vdata;
+       const char *subsection, *key;
+       int subsection_len;
+       char *name;
+
+       if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
+           !subsection)
+               return 0;
+
+       name = xmemdupz(subsection, subsection_len);
+       if (check_submodule_name(name) < 0)
+               data->ret |= report(data->options, data->obj,
+                                   FSCK_MSG_GITMODULES_NAME,
+                                   "disallowed submodule name: %s",
+                                   name);
+       free(name);
+
+       return 0;
+}
+
+static int fsck_blob(struct blob *blob, const char *buf,
+                    unsigned long size, struct fsck_options *options)
+{
+       struct fsck_gitmodules_data data;
+
+       if (!oidset_contains(&gitmodules_found, &blob->object.oid))
+               return 0;
+       oidset_insert(&gitmodules_done, &blob->object.oid);
+
+       if (!buf) {
+               /*
+                * A missing buffer here is a sign that the caller found the
+                * blob too gigantic to load into memory. Let's just consider
+                * that an error.
+                */
+               return report(options, &blob->object,
+                             FSCK_MSG_GITMODULES_PARSE,
+                             ".gitmodules too large to parse");
+       }
+
+       data.obj = &blob->object;
+       data.options = options;
+       data.ret = 0;
+       if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
+                               ".gitmodules", buf, size, &data))
+               data.ret |= report(options, &blob->object,
+                                  FSCK_MSG_GITMODULES_PARSE,
+                                  "could not parse gitmodules blob");
+
+       return data.ret;
+}
+
 int fsck_object(struct object *obj, void *data, unsigned long size,
        struct fsck_options *options)
 {
@@ -908,7 +989,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
                return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
 
        if (obj->type == OBJ_BLOB)
-               return 0;
+               return fsck_blob((struct blob *)obj, data, size, options);
        if (obj->type == OBJ_TREE)
                return fsck_tree((struct tree *) obj, options);
        if (obj->type == OBJ_COMMIT)
@@ -932,3 +1013,52 @@ int fsck_error_function(struct fsck_options *o,
        error("object %s: %s", describe_object(o, obj), message);
        return 1;
 }
+
+int fsck_finish(struct fsck_options *options)
+{
+       int ret = 0;
+       struct oidset_iter iter;
+       const struct object_id *oid;
+
+       oidset_iter_init(&gitmodules_found, &iter);
+       while ((oid = oidset_iter_next(&iter))) {
+               struct blob *blob;
+               enum object_type type;
+               unsigned long size;
+               char *buf;
+
+               if (oidset_contains(&gitmodules_done, oid))
+                       continue;
+
+               blob = lookup_blob(oid);
+               if (!blob) {
+                       ret |= report(options, &blob->object,
+                                     FSCK_MSG_GITMODULES_BLOB,
+                                     "non-blob found at .gitmodules");
+                       continue;
+               }
+
+               buf = read_sha1_file(oid->hash, &type, &size);
+               if (!buf) {
+                       if (is_promisor_object(&blob->object.oid))
+                               continue;
+                       ret |= report(options, &blob->object,
+                                     FSCK_MSG_GITMODULES_MISSING,
+                                     "unable to read .gitmodules blob");
+                       continue;
+               }
+
+               if (type == OBJ_BLOB)
+                       ret |= fsck_blob(blob, buf, size, options);
+               else
+                       ret |= report(options, &blob->object,
+                                     FSCK_MSG_GITMODULES_BLOB,
+                                     "non-blob found at .gitmodules");
+               free(buf);
+       }
+
+
+       oidset_clear(&gitmodules_found);
+       oidset_clear(&gitmodules_done);
+       return ret;
+}
diff --git a/fsck.h b/fsck.h
index 4525510..c3cf5e0 100644 (file)
--- a/fsck.h
+++ b/fsck.h
@@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
 int fsck_object(struct object *obj, void *data, unsigned long size,
        struct fsck_options *options);
 
+/*
+ * Some fsck checks are context-dependent, and may end up queued; run this
+ * after completing all fsck_object() calls in order to resolve any remaining
+ * checks.
+ */
+int fsck_finish(struct fsck_options *options);
+
 #endif
index 07e3832..76cd42b 100644 (file)
@@ -1001,6 +1001,23 @@ static inline int sane_iscase(int x, int is_lower)
                return (x & 0x20) == 0;
 }
 
+/*
+ * Like skip_prefix, but compare case-insensitively. Note that the comparison
+ * is done via tolower(), so it is strictly ASCII (no multi-byte characters or
+ * locale-specific conversions).
+ */
+static inline int skip_iprefix(const char *str, const char *prefix,
+                              const char **out)
+{
+       do {
+               if (!*prefix) {
+                       *out = str;
+                       return 1;
+               }
+       } while (tolower(*str++) == tolower(*prefix++));
+       return 0;
+}
+
 static inline int strtoul_ui(char const *s, int base, unsigned int *result)
 {
        unsigned long ul;
index 2491496..7e27d0a 100755 (executable)
@@ -229,6 +229,11 @@ Use -f if you really want to add it." >&2
                sm_name="$sm_path"
        fi
 
+       if ! git submodule--helper check-name "$sm_name"
+       then
+               die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
+       fi
+
        # perhaps the path exists and is already a git repo, else clone it
        if test -e "$sm_path"
        then
diff --git a/path.c b/path.c
index da8b655..4c4a751 100644 (file)
--- a/path.c
+++ b/path.c
@@ -1305,7 +1305,7 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip)
 
 int is_ntfs_dotgit(const char *name)
 {
-       int len;
+       size_t len;
 
        for (len = 0; ; len++)
                if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) {
@@ -1322,6 +1322,90 @@ int is_ntfs_dotgit(const char *name)
                }
 }
 
+static int is_ntfs_dot_generic(const char *name,
+                              const char *dotgit_name,
+                              size_t len,
+                              const char *dotgit_ntfs_shortname_prefix)
+{
+       int saw_tilde;
+       size_t i;
+
+       if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) {
+               i = len + 1;
+only_spaces_and_periods:
+               for (;;) {
+                       char c = name[i++];
+                       if (!c)
+                               return 1;
+                       if (c != ' ' && c != '.')
+                               return 0;
+               }
+       }
+
+       /*
+        * Is it a regular NTFS short name, i.e. shortened to 6 characters,
+        * followed by ~1, ... ~4?
+        */
+       if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' &&
+           name[7] >= '1' && name[7] <= '4') {
+               i = 8;
+               goto only_spaces_and_periods;
+       }
+
+       /*
+        * Is it a fall-back NTFS short name (for details, see
+        * https://en.wikipedia.org/wiki/8.3_filename?
+        */
+       for (i = 0, saw_tilde = 0; i < 8; i++)
+               if (name[i] == '\0')
+                       return 0;
+               else if (saw_tilde) {
+                       if (name[i] < '0' || name[i] > '9')
+                               return 0;
+               } else if (name[i] == '~') {
+                       if (name[++i] < '1' || name[i] > '9')
+                               return 0;
+                       saw_tilde = 1;
+               } else if (i >= 6)
+                       return 0;
+               else if (name[i] < 0) {
+                       /*
+                        * We know our needles contain only ASCII, so we clamp
+                        * here to make the results of tolower() sane.
+                        */
+                       return 0;
+               } else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i])
+                       return 0;
+
+       goto only_spaces_and_periods;
+}
+
+/*
+ * Inline helper to make sure compiler resolves strlen() on literals at
+ * compile time.
+ */
+static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name,
+                                 const char *dotgit_ntfs_shortname_prefix)
+{
+       return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name),
+                                  dotgit_ntfs_shortname_prefix);
+}
+
+int is_ntfs_dotgitmodules(const char *name)
+{
+       return is_ntfs_dot_str(name, "gitmodules", "gi7eba");
+}
+
+int is_ntfs_dotgitignore(const char *name)
+{
+       return is_ntfs_dot_str(name, "gitignore", "gi250a");
+}
+
+int is_ntfs_dotgitattributes(const char *name)
+{
+       return is_ntfs_dot_str(name, "gitattributes", "gi7d29");
+}
+
 int looks_like_command_line_option(const char *str)
 {
        return str && str[0] == '-';
index 59a73f4..4b35e87 100644 (file)
@@ -752,7 +752,7 @@ struct cache_entry *make_cache_entry(unsigned int mode,
        int size, len;
        struct cache_entry *ce, *ret;
 
-       if (!verify_path(path)) {
+       if (!verify_path(path, mode)) {
                error("Invalid path '%s'", path);
                return NULL;
        }
@@ -817,7 +817,7 @@ int ce_same_name(const struct cache_entry *a, const struct cache_entry *b)
  * Also, we don't want double slashes or slashes at the
  * end that can make pathnames ambiguous.
  */
-static int verify_dotfile(const char *rest)
+static int verify_dotfile(const char *rest, unsigned mode)
 {
        /*
         * The first character was '.', but that
@@ -831,8 +831,13 @@ static int verify_dotfile(const char *rest)
 
        switch (*rest) {
        /*
-        * ".git" followed by  NUL or slash is bad. This
-        * shares the path end test with the ".." case.
+        * ".git" followed by NUL or slash is bad. Note that we match
+        * case-insensitively here, even if ignore_case is not set.
+        * This outlaws ".GIT" everywhere out of an abundance of caution,
+        * since there's really no good reason to allow it.
+        *
+        * Once we've seen ".git", we can also find ".gitmodules", etc (also
+        * case-insensitively).
         */
        case 'g':
        case 'G':
@@ -840,8 +845,15 @@ static int verify_dotfile(const char *rest)
                        break;
                if (rest[2] != 't' && rest[2] != 'T')
                        break;
-               rest += 2;
-       /* fallthrough */
+               if (rest[3] == '\0' || is_dir_sep(rest[3]))
+                       return 0;
+               if (S_ISLNK(mode)) {
+                       rest += 3;
+                       if (skip_iprefix(rest, "modules", &rest) &&
+                           (*rest == '\0' || is_dir_sep(*rest)))
+                               return 0;
+               }
+               break;
        case '.':
                if (rest[1] == '\0' || is_dir_sep(rest[1]))
                        return 0;
@@ -849,7 +861,7 @@ static int verify_dotfile(const char *rest)
        return 1;
 }
 
-int verify_path(const char *path)
+int verify_path(const char *path, unsigned mode)
 {
        char c;
 
@@ -862,12 +874,25 @@ int verify_path(const char *path)
                        return 1;
                if (is_dir_sep(c)) {
 inside:
-                       if (protect_hfs && is_hfs_dotgit(path))
-                               return 0;
-                       if (protect_ntfs && is_ntfs_dotgit(path))
-                               return 0;
+                       if (protect_hfs) {
+                               if (is_hfs_dotgit(path))
+                                       return 0;
+                               if (S_ISLNK(mode)) {
+                                       if (is_hfs_dotgitmodules(path))
+                                               return 0;
+                               }
+                       }
+                       if (protect_ntfs) {
+                               if (is_ntfs_dotgit(path))
+                                       return 0;
+                               if (S_ISLNK(mode)) {
+                                       if (is_ntfs_dotgitmodules(path))
+                                               return 0;
+                               }
+                       }
+
                        c = *path++;
-                       if ((c == '.' && !verify_dotfile(path)) ||
+                       if ((c == '.' && !verify_dotfile(path, mode)) ||
                            is_dir_sep(c) || c == '\0')
                                return 0;
                }
@@ -1184,7 +1209,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 
        if (!ok_to_add)
                return -1;
-       if (!verify_path(ce->name))
+       if (!verify_path(ce->name, ce->ce_mode))
                return error("Invalid path '%s'", ce->name);
 
        if (!skip_df_check &&
index cc0f43e..53f0a36 100644 (file)
@@ -2209,7 +2209,7 @@ int read_loose_object(const char *path,
                goto out;
        }
 
-       if (*type == OBJ_BLOB) {
+       if (*type == OBJ_BLOB && *size > big_file_threshold) {
                if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
                        goto out;
        } else {
index 602ba8c..e5f4901 100644 (file)
@@ -190,6 +190,31 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache,
        return NULL;
 }
 
+int check_submodule_name(const char *name)
+{
+       /* Disallow empty names */
+       if (!*name)
+               return -1;
+
+       /*
+        * Look for '..' as a path component. Check both '/' and '\\' as
+        * separators rather than is_dir_sep(), because we want the name rules
+        * to be consistent across platforms.
+        */
+       goto in_component; /* always start inside component */
+       while (*name) {
+               char c = *name++;
+               if (c == '/' || c == '\\') {
+in_component:
+                       if (name[0] == '.' && name[1] == '.' &&
+                           (!name[2] || name[2] == '/' || name[2] == '\\'))
+                               return -1;
+               }
+       }
+
+       return 0;
+}
+
 static int name_and_item_from_var(const char *var, struct strbuf *name,
                                  struct strbuf *item)
 {
@@ -201,6 +226,12 @@ static int name_and_item_from_var(const char *var, struct strbuf *name,
                return 0;
 
        strbuf_add(name, subsection, subsection_len);
+       if (check_submodule_name(name->buf) < 0) {
+               warning(_("ignoring suspicious submodule name: %s"), name->buf);
+               strbuf_release(name);
+               return 0;
+       }
+
        strbuf_addstr(item, key);
 
        return 1;
index a5503a5..17e2970 100644 (file)
@@ -48,4 +48,11 @@ extern const struct submodule *submodule_from_cache(struct repository *repo,
                                                    const char *key);
 extern void submodule_free(void);
 
+/*
+ * Returns 0 if the name is syntactically acceptable as a submodule "name"
+ * (e.g., that may be found in the subsection of a .gitmodules file) and -1
+ * otherwise.
+ */
+int check_submodule_name(const char *name);
+
 #endif /* SUBMODULE_CONFIG_H */
index 2b3c509..9484655 100644 (file)
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "string-list.h"
+#include "utf8.h"
 
 /*
  * A "string_list_each_func_t" function that normalizes an entry from
@@ -170,6 +171,11 @@ static struct test_data dirname_data[] = {
        { NULL,              NULL     }
 };
 
+static int is_dotgitmodules(const char *path)
+{
+       return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path);
+}
+
 int cmd_main(int argc, const char **argv)
 {
        if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) {
@@ -270,6 +276,20 @@ int cmd_main(int argc, const char **argv)
        if (argc == 2 && !strcmp(argv[1], "dirname"))
                return test_function(dirname_data, posix_dirname, argv[1]);
 
+       if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) {
+               int res = 0, expect = 1, i;
+               for (i = 2; i < argc; i++)
+                       if (!strcmp("--not", argv[i]))
+                               expect = !expect;
+                       else if (expect != is_dotgitmodules(argv[i]))
+                               res = error("'%s' is %s.gitmodules", argv[i],
+                                           expect ? "not " : "");
+                       else
+                               fprintf(stderr, "ok: '%s' is %s.gitmodules\n",
+                                       argv[i], expect ? "" : "not ");
+               return !!res;
+       }
+
        fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
                argv[1] ? argv[1] : "(there was none)");
        return 1;
index 7509846..4674899 100644 (file)
@@ -79,6 +79,18 @@ pack_obj () {
                ;;
        esac
 
+       # If it's not a delta, we can convince pack-objects to generate a pack
+       # with just our entry, and then strip off the header (12 bytes) and
+       # trailer (20 bytes).
+       if test -z "$2"
+       then
+               echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
+               size=$(wc -c <pack_obj.tmp) &&
+               dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
+               rm -f pack_obj.tmp
+               return
+       fi
+
        echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
        return 1
 }
index 7ea2bb5..3f3357e 100755 (executable)
@@ -349,4 +349,90 @@ test_submodule_relative_url "(null)" "ssh://hostname:22/repo" "../subrepo" "ssh:
 test_submodule_relative_url "(null)" "user@host:path/to/repo" "../subrepo" "user@host:path/to/subrepo"
 test_submodule_relative_url "(null)" "user@host:repo" "../subrepo" "user@host:subrepo"
 
+test_expect_success 'match .gitmodules' '
+       test-path-utils is_dotgitmodules \
+               .gitmodules \
+               \
+               .git${u200c}modules \
+               \
+               .Gitmodules \
+               .gitmoduleS \
+               \
+               ".gitmodules " \
+               ".gitmodules." \
+               ".gitmodules  " \
+               ".gitmodules. " \
+               ".gitmodules ." \
+               ".gitmodules.." \
+               ".gitmodules   " \
+               ".gitmodules.  " \
+               ".gitmodules . " \
+               ".gitmodules  ." \
+               \
+               ".Gitmodules " \
+               ".Gitmodules." \
+               ".Gitmodules  " \
+               ".Gitmodules. " \
+               ".Gitmodules ." \
+               ".Gitmodules.." \
+               ".Gitmodules   " \
+               ".Gitmodules.  " \
+               ".Gitmodules . " \
+               ".Gitmodules  ." \
+               \
+               GITMOD~1 \
+               gitmod~1 \
+               GITMOD~2 \
+               gitmod~3 \
+               GITMOD~4 \
+               \
+               "GITMOD~1 " \
+               "gitmod~2." \
+               "GITMOD~3  " \
+               "gitmod~4. " \
+               "GITMOD~1 ." \
+               "gitmod~2   " \
+               "GITMOD~3.  " \
+               "gitmod~4 . " \
+               \
+               GI7EBA~1 \
+               gi7eba~9 \
+               \
+               GI7EB~10 \
+               GI7EB~11 \
+               GI7EB~99 \
+               GI7EB~10 \
+               GI7E~100 \
+               GI7E~101 \
+               GI7E~999 \
+               ~1000000 \
+               ~9999999 \
+               \
+               --not \
+               ".gitmodules x"  \
+               ".gitmodules .x" \
+               \
+               " .gitmodules" \
+               \
+               ..gitmodules \
+               \
+               gitmodules \
+               \
+               .gitmodule \
+               \
+               ".gitmodules x " \
+               ".gitmodules .x" \
+               \
+               GI7EBA~ \
+               GI7EBA~0 \
+               GI7EBA~~1 \
+               GI7EBA~X \
+               Gx7EBA~1 \
+               GI7EBX~1 \
+               \
+               GI7EB~1 \
+               GI7EB~01 \
+               GI7EB~1X
+'
+
 test_done
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
new file mode 100755 (executable)
index 0000000..a770d92
--- /dev/null
@@ -0,0 +1,154 @@
+#!/bin/sh
+
+test_description='check handling of .. in submodule names
+
+Exercise the name-checking function on a variety of names, and then give a
+real-world setup that confirms we catch this in practice.
+'
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-pack.sh
+
+test_expect_success 'check names' '
+       cat >expect <<-\EOF &&
+       valid
+       valid/with/paths
+       EOF
+
+       git submodule--helper check-name >actual <<-\EOF &&
+       valid
+       valid/with/paths
+
+       ../foo
+       /../foo
+       ..\foo
+       \..\foo
+       foo/..
+       foo/../
+       foo\..
+       foo\..\
+       foo/../bar
+       EOF
+
+       test_cmp expect actual
+'
+
+test_expect_success 'create innocent subrepo' '
+       git init innocent &&
+       git -C innocent commit --allow-empty -m foo
+'
+
+test_expect_success 'submodule add refuses invalid names' '
+       test_must_fail \
+               git submodule add --name ../../modules/evil "$PWD/innocent" evil
+'
+
+test_expect_success 'add evil submodule' '
+       git submodule add "$PWD/innocent" evil &&
+
+       mkdir modules &&
+       cp -r .git/modules/evil modules &&
+       write_script modules/evil/hooks/post-checkout <<-\EOF &&
+       echo >&2 "RUNNING POST CHECKOUT"
+       EOF
+
+       git config -f .gitmodules submodule.evil.update checkout &&
+       git config -f .gitmodules --rename-section \
+               submodule.evil submodule.../../modules/evil &&
+       git add modules &&
+       git commit -am evil
+'
+
+# This step seems like it shouldn't be necessary, since the payload is
+# contained entirely in the evil submodule. But due to the vagaries of the
+# submodule code, checking out the evil module will fail unless ".git/modules"
+# exists. Adding another submodule (with a name that sorts before "evil") is an
+# easy way to make sure this is the case in the victim clone.
+test_expect_success 'add other submodule' '
+       git submodule add "$PWD/innocent" another-module &&
+       git add another-module &&
+       git commit -am another
+'
+
+test_expect_success 'clone evil superproject' '
+       git clone --recurse-submodules . victim >output 2>&1 &&
+       ! grep "RUNNING POST CHECKOUT" output
+'
+
+test_expect_success 'fsck detects evil superproject' '
+       test_must_fail git fsck
+'
+
+test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       git -C dst.git config transfer.fsckObjects true &&
+       test_must_fail git push dst.git HEAD
+'
+
+test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       git -C dst.git config transfer.fsckObjects true &&
+       git -C dst.git config transfer.unpackLimit 1 &&
+       test_must_fail git push dst.git HEAD
+'
+
+# Normally our packs contain commits followed by trees followed by blobs. This
+# reverses the order, which requires backtracking to find the context of a
+# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
+test_expect_success 'create oddly ordered pack' '
+       git checkout --orphan odd &&
+       git rm -rf --cached . &&
+       git add .gitmodules &&
+       git commit -m odd &&
+       {
+               pack_header 3 &&
+               pack_obj $(git rev-parse HEAD:.gitmodules) &&
+               pack_obj $(git rev-parse HEAD^{tree}) &&
+               pack_obj $(git rev-parse HEAD)
+       } >odd.pack &&
+       pack_trailer odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       test_must_fail git -C dst.git unpack-objects --strict <odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       test_must_fail git -C dst.git index-pack --strict --stdin <odd.pack
+'
+
+test_expect_success 'fsck detects symlinked .gitmodules file' '
+       git init symlink &&
+       (
+               cd symlink &&
+
+               # Make the tree directly to avoid index restrictions.
+               #
+               # Because symlinks store the target as a blob, choose
+               # a pathname that could be parsed as a .gitmodules file
+               # to trick naive non-symlink-aware checking.
+               tricky="[foo]bar=true" &&
+               content=$(git hash-object -w ../.gitmodules) &&
+               target=$(printf "$tricky" | git hash-object -w --stdin) &&
+               tree=$(
+                       {
+                               printf "100644 blob $content\t$tricky\n" &&
+                               printf "120000 blob $target\t.gitmodules\n"
+                       } | git mktree
+               ) &&
+               commit=$(git commit-tree $tree) &&
+
+               # Check not only that we fail, but that it is due to the
+               # symlink detector; this grep string comes from the config
+               # variable name and will not be translated.
+               test_must_fail git fsck 2>output &&
+               grep gitmodulesSymlink output
+       )
+'
+
+test_done
diff --git a/utf8.c b/utf8.c
index 2c27ce0..f04c244 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -620,28 +620,33 @@ static ucs_char_t next_hfs_char(const char **in)
        }
 }
 
-int is_hfs_dotgit(const char *path)
+static int is_hfs_dot_generic(const char *path,
+                             const char *needle, size_t needle_len)
 {
        ucs_char_t c;
 
        c = next_hfs_char(&path);
        if (c != '.')
                return 0;
-       c = next_hfs_char(&path);
 
        /*
         * there's a great deal of other case-folding that occurs
-        * in HFS+, but this is enough to catch anything that will
-        * convert to ".git"
+        * in HFS+, but this is enough to catch our fairly vanilla
+        * hard-coded needles.
         */
-       if (c != 'g' && c != 'G')
-               return 0;
-       c = next_hfs_char(&path);
-       if (c != 'i' && c != 'I')
-               return 0;
-       c = next_hfs_char(&path);
-       if (c != 't' && c != 'T')
-               return 0;
+       for (; needle_len > 0; needle++, needle_len--) {
+               c = next_hfs_char(&path);
+
+               /*
+                * We know our needles contain only ASCII, so we clamp here to
+                * make the results of tolower() sane.
+                */
+               if (c > 127)
+                       return 0;
+               if (tolower(c) != *needle)
+                       return 0;
+       }
+
        c = next_hfs_char(&path);
        if (c && !is_dir_sep(c))
                return 0;
@@ -649,6 +654,35 @@ int is_hfs_dotgit(const char *path)
        return 1;
 }
 
+/*
+ * Inline wrapper to make sure the compiler resolves strlen() on literals at
+ * compile time.
+ */
+static inline int is_hfs_dot_str(const char *path, const char *needle)
+{
+       return is_hfs_dot_generic(path, needle, strlen(needle));
+}
+
+int is_hfs_dotgit(const char *path)
+{
+       return is_hfs_dot_str(path, "git");
+}
+
+int is_hfs_dotgitmodules(const char *path)
+{
+       return is_hfs_dot_str(path, "gitmodules");
+}
+
+int is_hfs_dotgitignore(const char *path)
+{
+       return is_hfs_dot_str(path, "gitignore");
+}
+
+int is_hfs_dotgitattributes(const char *path)
+{
+       return is_hfs_dot_str(path, "gitattributes");
+}
+
 const char utf8_bom[] = "\357\273\277";
 
 int skip_utf8_bom(char **text, size_t len)
diff --git a/utf8.h b/utf8.h
index 6bbcf31..da19b43 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -52,8 +52,13 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding);
  * The path should be NUL-terminated, but we will match variants of both ".git\0"
  * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck
  * and verify_path().
+ *
+ * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo".
  */
 int is_hfs_dotgit(const char *path);
+int is_hfs_dotgitmodules(const char *path);
+int is_hfs_dotgitignore(const char *path);
+int is_hfs_dotgitattributes(const char *path);
 
 typedef enum {
        ALIGN_LEFT,