genetlink: fix single op policy dump when do is present
authorJakub Kicinski <kuba@kernel.org>
Wed, 9 Nov 2022 18:32:54 +0000 (10:32 -0800)
committerJakub Kicinski <kuba@kernel.org>
Thu, 10 Nov 2022 21:52:51 +0000 (13:52 -0800)
Jonathan reports crashes when running net-next in Meta's fleet.
Stats collection uses ethtool -I which does a per-op policy dump
to check if stats are supported. We don't initialize the dumpit
information if doit succeeds due to evaluation short-circuiting.

The crash may look like this:

   BUG: kernel NULL pointer dereference, address: 0000000000000cc0
   RIP: 0010:netlink_policy_dump_add_policy+0x174/0x2a0
     ctrl_dumppolicy_start+0x19f/0x2f0
     genl_start+0xe7/0x140

Or we may trigger a warning:

   WARNING: CPU: 1 PID: 785 at net/netlink/policy.c:87 netlink_policy_dump_get_policy_idx+0x79/0x80
   RIP: 0010:netlink_policy_dump_get_policy_idx+0x79/0x80
     ctrl_dumppolicy_put_op+0x214/0x360

depending on what garbage we pick up from the stack.

Reported-by: Jonathan Lemon <bsd@meta.com>
Fixes: 26588edbef60 ("genetlink: support split policies in ctrl_dumppolicy_put_op()")
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20221109183254.554051-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/netlink/genetlink.c

index 9b7dfc4..600993c 100644 (file)
@@ -282,6 +282,7 @@ genl_cmd_full_to_split(struct genl_split_ops *op,
        return 0;
 }
 
+/* Must make sure that op is initialized to 0 on failure */
 static int
 genl_get_cmd(u32 cmd, u8 flags, const struct genl_family *family,
             struct genl_split_ops *op)
@@ -302,6 +303,21 @@ genl_get_cmd(u32 cmd, u8 flags, const struct genl_family *family,
        return err;
 }
 
+/* For policy dumping only, get ops of both do and dump.
+ * Fail if both are missing, genl_get_cmd() will zero-init in case of failure.
+ */
+static int
+genl_get_cmd_both(u32 cmd, const struct genl_family *family,
+                 struct genl_split_ops *doit, struct genl_split_ops *dumpit)
+{
+       int err1, err2;
+
+       err1 = genl_get_cmd(cmd, GENL_CMD_CAP_DO, family, doit);
+       err2 = genl_get_cmd(cmd, GENL_CMD_CAP_DUMP, family, dumpit);
+
+       return err1 && err2 ? -ENOENT : 0;
+}
+
 static bool
 genl_op_iter_init(const struct genl_family *family, struct genl_op_iter *iter)
 {
@@ -1406,10 +1422,10 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
                ctx->single_op = true;
                ctx->op = nla_get_u32(tb[CTRL_ATTR_OP]);
 
-               if (genl_get_cmd(ctx->op, GENL_CMD_CAP_DO, rt, &doit) &&
-                   genl_get_cmd(ctx->op, GENL_CMD_CAP_DUMP, rt, &dump)) {
+               err = genl_get_cmd_both(ctx->op, rt, &doit, &dump);
+               if (err) {
                        NL_SET_BAD_ATTR(cb->extack, tb[CTRL_ATTR_OP]);
-                       return -ENOENT;
+                       return err;
                }
 
                if (doit.policy) {
@@ -1551,13 +1567,9 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
                if (ctx->single_op) {
                        struct genl_split_ops doit, dumpit;
 
-                       if (genl_get_cmd(ctx->op, GENL_CMD_CAP_DO,
-                                        ctx->rt, &doit) &&
-                           genl_get_cmd(ctx->op, GENL_CMD_CAP_DUMP,
-                                        ctx->rt, &dumpit)) {
-                               WARN_ON(1);
+                       if (WARN_ON(genl_get_cmd_both(ctx->op, ctx->rt,
+                                                     &doit, &dumpit)))
                                return -ENOENT;
-                       }
 
                        if (ctrl_dumppolicy_put_op(skb, cb, &doit, &dumpit))
                                return skb->len;