error-injection: Support fault injection framework
authorMasami Hiramatsu <mhiramat@kernel.org>
Fri, 12 Jan 2018 17:56:03 +0000 (02:56 +0900)
committerAlexei Starovoitov <ast@kernel.org>
Sat, 13 Jan 2018 01:33:38 +0000 (17:33 -0800)
Support in-kernel fault-injection framework via debugfs.
This allows you to inject a conditional error to specified
function using debugfs interfaces.

Here is the result of test script described in
Documentation/fault-injection/fault-injection.txt

  ===========
  # ./test_fail_function.sh
  1+0 records in
  1+0 records out
  1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0227404 s, 46.1 MB/s
  btrfs-progs v4.4
  See http://btrfs.wiki.kernel.org for more information.

  Label:              (null)
  UUID:               bfa96010-12e9-4360-aed0-42eec7af5798
  Node size:          16384
  Sector size:        4096
  Filesystem size:    1001.00MiB
  Block group profiles:
    Data:             single            8.00MiB
    Metadata:         DUP              58.00MiB
    System:           DUP              12.00MiB
  SSD detected:       no
  Incompat features:  extref, skinny-metadata
  Number of devices:  1
  Devices:
     ID        SIZE  PATH
      1  1001.00MiB  /dev/loop2

  mount: mount /dev/loop2 on /opt/tmpmnt failed: Cannot allocate memory
  SUCCESS!
  ===========

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Documentation/fault-injection/fault-injection.txt
kernel/Makefile
kernel/fail_function.c [new file with mode: 0644]
lib/Kconfig.debug

index 918972b..f4a3246 100644 (file)
@@ -30,6 +30,12 @@ o fail_mmc_request
   injects MMC data errors on devices permitted by setting
   debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
 
+o fail_function
+
+  injects error return on specific functions, which are marked by
+  ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
+  under /sys/kernel/debug/fail_function. No boot option supported.
+
 Configure fault-injection capabilities behavior
 -----------------------------------------------
 
@@ -123,6 +129,29 @@ configuration of fault-injection capabilities.
        default is 'N', setting it to 'Y' will disable failure injections
        when dealing with private (address space) futexes.
 
+- /sys/kernel/debug/fail_function/inject:
+
+       Format: { 'function-name' | '!function-name' | '' }
+       specifies the target function of error injection by name.
+       If the function name leads '!' prefix, given function is
+       removed from injection list. If nothing specified ('')
+       injection list is cleared.
+
+- /sys/kernel/debug/fail_function/injectable:
+
+       (read only) shows error injectable functions and what type of
+       error values can be specified. The error type will be one of
+       below;
+       - NULL: retval must be 0.
+       - ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
+       - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
+
+- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
+
+       specifies the "error" return value to inject to the given
+       function for given function. This will be created when
+       user specifies new injection entry.
+
 o Boot option
 
 In order to inject faults while debugfs is not available (early boot time),
@@ -268,6 +297,45 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
 echo "Injecting errors into the module $module... (interrupt to stop)"
 sleep 1000000
 
+------------------------------------------------------------------------------
+
+o Inject open_ctree error while btrfs mount
+
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir -p tmpmnt
+
+FAILTYPE=fail_function
+FAILFUNC=open_ctree
+echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
+echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+mount -t btrfs $DEVICE tmpmnt
+if [ $? -ne 0 ]
+then
+       echo "SUCCESS!"
+else
+       echo "FAILED!"
+       umount tmpmnt
+fi
+
+echo > /sys/kernel/debug/$FAILTYPE/inject
+
+rmdir tmpmnt
+losetup -d $DEVICE
+rm testfile.img
+
+
 Tool to run command with failslab or fail_page_alloc
 ----------------------------------------------------
 In order to make it easier to accomplish the tasks mentioned above, we can use
index 172d151..f85ae5d 100644 (file)
@@ -81,6 +81,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_KCOV) += kcov.o
 obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
new file mode 100644 (file)
index 0000000..21b0122
--- /dev/null
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fail_function.c: Function-based error injection
+ */
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
+
+struct fei_attr {
+       struct list_head list;
+       struct kprobe kp;
+       unsigned long retval;
+};
+static DEFINE_MUTEX(fei_lock);
+static LIST_HEAD(fei_attr_list);
+static DECLARE_FAULT_ATTR(fei_fault_attr);
+static struct dentry *fei_debugfs_dir;
+
+static unsigned long adjust_error_retval(unsigned long addr, unsigned long retv)
+{
+       switch (get_injectable_error_type(addr)) {
+       case EI_ETYPE_NULL:
+               if (retv != 0)
+                       return 0;
+               break;
+       case EI_ETYPE_ERRNO:
+               if (retv < (unsigned long)-MAX_ERRNO)
+                       return (unsigned long)-EINVAL;
+               break;
+       case EI_ETYPE_ERRNO_NULL:
+               if (retv != 0 && retv < (unsigned long)-MAX_ERRNO)
+                       return (unsigned long)-EINVAL;
+               break;
+       }
+
+       return retv;
+}
+
+static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr)
+{
+       struct fei_attr *attr;
+
+       attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+       if (attr) {
+               attr->kp.symbol_name = kstrdup(sym, GFP_KERNEL);
+               if (!attr->kp.symbol_name) {
+                       kfree(attr);
+                       return NULL;
+               }
+               attr->kp.pre_handler = fei_kprobe_handler;
+               attr->retval = adjust_error_retval(addr, 0);
+               INIT_LIST_HEAD(&attr->list);
+       }
+       return attr;
+}
+
+static void fei_attr_free(struct fei_attr *attr)
+{
+       if (attr) {
+               kfree(attr->kp.symbol_name);
+               kfree(attr);
+       }
+}
+
+static struct fei_attr *fei_attr_lookup(const char *sym)
+{
+       struct fei_attr *attr;
+
+       list_for_each_entry(attr, &fei_attr_list, list) {
+               if (!strcmp(attr->kp.symbol_name, sym))
+                       return attr;
+       }
+
+       return NULL;
+}
+
+static bool fei_attr_is_valid(struct fei_attr *_attr)
+{
+       struct fei_attr *attr;
+
+       list_for_each_entry(attr, &fei_attr_list, list) {
+               if (attr == _attr)
+                       return true;
+       }
+
+       return false;
+}
+
+static int fei_retval_set(void *data, u64 val)
+{
+       struct fei_attr *attr = data;
+       unsigned long retv = (unsigned long)val;
+       int err = 0;
+
+       mutex_lock(&fei_lock);
+       /*
+        * Since this operation can be done after retval file is removed,
+        * It is safer to check the attr is still valid before accessing
+        * its member.
+        */
+       if (!fei_attr_is_valid(attr)) {
+               err = -ENOENT;
+               goto out;
+       }
+
+       if (attr->kp.addr) {
+               if (adjust_error_retval((unsigned long)attr->kp.addr,
+                                       val) != retv)
+                       err = -EINVAL;
+       }
+       if (!err)
+               attr->retval = val;
+out:
+       mutex_unlock(&fei_lock);
+
+       return err;
+}
+
+static int fei_retval_get(void *data, u64 *val)
+{
+       struct fei_attr *attr = data;
+       int err = 0;
+
+       mutex_lock(&fei_lock);
+       /* Here we also validate @attr to ensure it still exists. */
+       if (!fei_attr_is_valid(attr))
+               err = -ENOENT;
+       else
+               *val = attr->retval;
+       mutex_unlock(&fei_lock);
+
+       return err;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fei_retval_ops, fei_retval_get, fei_retval_set,
+                        "%llx\n");
+
+static int fei_debugfs_add_attr(struct fei_attr *attr)
+{
+       struct dentry *dir;
+
+       dir = debugfs_create_dir(attr->kp.symbol_name, fei_debugfs_dir);
+       if (!dir)
+               return -ENOMEM;
+
+       if (!debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops)) {
+               debugfs_remove_recursive(dir);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void fei_debugfs_remove_attr(struct fei_attr *attr)
+{
+       struct dentry *dir;
+
+       dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir);
+       if (dir)
+               debugfs_remove_recursive(dir);
+}
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+       struct fei_attr *attr = container_of(kp, struct fei_attr, kp);
+
+       if (should_fail(&fei_fault_attr, 1)) {
+               regs_set_return_value(regs, attr->retval);
+               override_function_with_return(regs);
+               /* Kprobe specific fixup */
+               reset_current_kprobe();
+               preempt_enable_no_resched();
+               return 1;
+       }
+
+       return 0;
+}
+NOKPROBE_SYMBOL(fei_kprobe_handler)
+
+static void *fei_seq_start(struct seq_file *m, loff_t *pos)
+{
+       mutex_lock(&fei_lock);
+       return seq_list_start(&fei_attr_list, *pos);
+}
+
+static void fei_seq_stop(struct seq_file *m, void *v)
+{
+       mutex_unlock(&fei_lock);
+}
+
+static void *fei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return seq_list_next(v, &fei_attr_list, pos);
+}
+
+static int fei_seq_show(struct seq_file *m, void *v)
+{
+       struct fei_attr *attr = list_entry(v, struct fei_attr, list);
+
+       seq_printf(m, "%pf\n", attr->kp.addr);
+       return 0;
+}
+
+static const struct seq_operations fei_seq_ops = {
+       .start  = fei_seq_start,
+       .next   = fei_seq_next,
+       .stop   = fei_seq_stop,
+       .show   = fei_seq_show,
+};
+
+static int fei_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &fei_seq_ops);
+}
+
+static void fei_attr_remove(struct fei_attr *attr)
+{
+       fei_debugfs_remove_attr(attr);
+       unregister_kprobe(&attr->kp);
+       list_del(&attr->list);
+       fei_attr_free(attr);
+}
+
+static void fei_attr_remove_all(void)
+{
+       struct fei_attr *attr, *n;
+
+       list_for_each_entry_safe(attr, n, &fei_attr_list, list) {
+               fei_attr_remove(attr);
+       }
+}
+
+static ssize_t fei_write(struct file *file, const char __user *buffer,
+                        size_t count, loff_t *ppos)
+{
+       struct fei_attr *attr;
+       unsigned long addr;
+       char *buf, *sym;
+       int ret;
+
+       /* cut off if it is too long */
+       if (count > KSYM_NAME_LEN)
+               count = KSYM_NAME_LEN;
+       buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       if (copy_from_user(buf, buffer, count)) {
+               ret = -EFAULT;
+               goto out;
+       }
+       buf[count] = '\0';
+       sym = strstrip(buf);
+
+       mutex_lock(&fei_lock);
+
+       /* Writing just spaces will remove all injection points */
+       if (sym[0] == '\0') {
+               fei_attr_remove_all();
+               ret = count;
+               goto out;
+       }
+       /* Writing !function will remove one injection point */
+       if (sym[0] == '!') {
+               attr = fei_attr_lookup(sym + 1);
+               if (!attr) {
+                       ret = -ENOENT;
+                       goto out;
+               }
+               fei_attr_remove(attr);
+               ret = count;
+               goto out;
+       }
+
+       addr = kallsyms_lookup_name(sym);
+       if (!addr) {
+               ret = -EINVAL;
+               goto out;
+       }
+       if (!within_error_injection_list(addr)) {
+               ret = -ERANGE;
+               goto out;
+       }
+       if (fei_attr_lookup(sym)) {
+               ret = -EBUSY;
+               goto out;
+       }
+       attr = fei_attr_new(sym, addr);
+       if (!attr) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = register_kprobe(&attr->kp);
+       if (!ret)
+               ret = fei_debugfs_add_attr(attr);
+       if (ret < 0)
+               fei_attr_remove(attr);
+       else {
+               list_add_tail(&attr->list, &fei_attr_list);
+               ret = count;
+       }
+out:
+       kfree(buf);
+       mutex_unlock(&fei_lock);
+       return ret;
+}
+
+static const struct file_operations fei_ops = {
+       .open =         fei_open,
+       .read =         seq_read,
+       .write =        fei_write,
+       .llseek =       seq_lseek,
+       .release =      seq_release,
+};
+
+static int __init fei_debugfs_init(void)
+{
+       struct dentry *dir;
+
+       dir = fault_create_debugfs_attr("fail_function", NULL,
+                                       &fei_fault_attr);
+       if (IS_ERR(dir))
+               return PTR_ERR(dir);
+
+       /* injectable attribute is just a symlink of error_inject/list */
+       if (!debugfs_create_symlink("injectable", dir,
+                                   "../error_injection/list"))
+               goto error;
+
+       if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops))
+               goto error;
+
+       fei_debugfs_dir = dir;
+
+       return 0;
+error:
+       debugfs_remove_recursive(dir);
+       return -ENOMEM;
+}
+
+late_initcall(fei_debugfs_init);
index 2a33efd..890d476 100644 (file)
@@ -1551,6 +1551,16 @@ config FAIL_FUTEX
        help
          Provide fault-injection capability for futexes.
 
+config FAIL_FUNCTION
+       bool "Fault-injection capability for functions"
+       depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+       help
+         Provide function-based fault-injection capability.
+         This will allow you to override a specific function with a return
+         with given return value. As a result, function caller will see
+         an error value and have to handle it. This is useful to test the
+         error handling in various subsystems.
+
 config FAULT_INJECTION_DEBUG_FS
        bool "Debugfs entries for fault-injection capabilities"
        depends on FAULT_INJECTION && SYSFS && DEBUG_FS