bpf: reject any prog that failed read-only lock

author Daniel Borkmann <daniel@iogearbox.net>

Fri, 15 Jun 2018 00:30:48 +0000 (02:30 +0200)

committer Alexei Starovoitov <ast@kernel.org>

Fri, 15 Jun 2018 18:14:25 +0000 (11:14 -0700)
author Daniel Borkmann <daniel@iogearbox.net>
Fri, 15 Jun 2018 00:30:48 +0000 (02:30 +0200)
committer Alexei Starovoitov <ast@kernel.org>
Fri, 15 Jun 2018 18:14:25 +0000 (11:14 -0700)
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 297c56f..108f981 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -469,7 +469,8 @@ struct sock_fprog_kern {
  };
  
  struct bpf_binary_header {
-       unsigned int pages;
+       u16 pages;
+       u16 locked:1;
         u8 image[];
  };
  
@@ -671,15 +672,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
  
  #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
  
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
  static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
  {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
         fp->locked = 1;
-       WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
+       if (set_memory_ro((unsigned long)fp, fp->pages))
+               fp->locked = 0;
+#endif
  }
  
  static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
  {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
         if (fp->locked) {
                 WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
                 /* In case set_memory_rw() fails, we want to be the first
@@ -687,34 +691,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
                  */
                 fp->locked = 0;
         }
+#endif
  }
  
  static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
  {
-       WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
-}
-
-static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
-{
-       WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-}
-#else
-static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
-{
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+       hdr->locked = 1;
+       if (set_memory_ro((unsigned long)hdr, hdr->pages))
+               hdr->locked = 0;
+#endif
  }
  
  static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
  {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+       if (hdr->locked) {
+               WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
+               /* In case set_memory_rw() fails, we want to be the first
+                * to crash here instead of some random place later on.
+                */
+               hdr->locked = 0;
+       }
+#endif
  }
-#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
  
  static inline struct bpf_binary_header *
  bpf_jit_binary_hdr(const struct bpf_prog *fp)
@@ -725,6 +725,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
         return (void *)addr;
  }
  
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
+{
+       if (!fp->locked)
+               return -ENOLCK;
+       if (fp->jited) {
+               const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+
+               if (!hdr->locked)
+                       return -ENOLCK;
+       }
+
+       return 0;
+}
+#endif
+
  int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
  static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
  {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 1061968..a9e6c04 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
         bpf_fill_ill_insns(hdr, size);
  
         hdr->pages = size / PAGE_SIZE;
+       hdr->locked = 0;
+
         hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
                      PAGE_SIZE - sizeof(*hdr));
         start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1448,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
         return 0;
  }
  
+static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
+{
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+       int i, err;
+
+       for (i = 0; i < fp->aux->func_cnt; i++) {
+               err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
+               if (err)
+                       return err;
+       }
+
+       return bpf_prog_check_pages_ro_single(fp);
+#endif
+       return 0;
+}
+
+static void bpf_prog_select_func(struct bpf_prog *fp)
+{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+       u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+       fp->bpf_func = __bpf_prog_ret0_warn;
+#endif
+}
+
  /**
   *     bpf_prog_select_runtime - select exec runtime for BPF program
   *     @fp: bpf_prog populated with internal BPF program
@@ -1458,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
   */
  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
  {
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-       u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+       /* In case of BPF to BPF calls, verifier did all the prep
+        * work with regards to JITing, etc.
+        */
+       if (fp->bpf_func)
+               goto finalize;
  
-       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
-#else
-       fp->bpf_func = __bpf_prog_ret0_warn;
-#endif
+       bpf_prog_select_func(fp);
  
         /* eBPF JITs can rewrite the program in case constant
          * blinding is active. However, in case of error during
@@ -1485,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
                 if (*err)
                         return fp;
         }
+
+finalize:
         bpf_prog_lock_ro(fp);
  
         /* The tail call compatibility check can only be done at
@@ -1493,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
          * all eBPF JITs might immediately support all features.
          */
         *err = bpf_check_tail_call(fp);
-
+       if (*err)
+               return fp;
+
+       /* Checkpoint: at this point onwards any cBPF -> eBPF or
+        * native eBPF program is read-only. If we failed to change
+        * the page attributes (e.g. allocation failure from
+        * splitting large pages), then reject the whole program
+        * in order to guarantee not ending up with any W+X pages
+        * from BPF side in kernel.
+        */
+       *err = bpf_prog_check_pages_ro_locked(fp);
         return fp;
  }
  EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 0f62692..35dc466 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1353,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
         if (err < 0)
                 goto free_used_maps;
  
-       /* eBPF program is ready to be JITed */
-       if (!prog->bpf_func)
-               prog = bpf_prog_select_runtime(prog, &err);
+       prog = bpf_prog_select_runtime(prog, &err);
         if (err < 0)
                 goto free_used_maps;
author	Daniel Borkmann <daniel@iogearbox.net>
	Fri, 15 Jun 2018 00:30:48 +0000 (02:30 +0200)
committer	Alexei Starovoitov <ast@kernel.org>
	Fri, 15 Jun 2018 18:14:25 +0000 (11:14 -0700)
include/linux/filter.h		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history