bpf: Document EFAULT changes for sockopt
authorStanislav Fomichev <sdf@google.com>
Thu, 11 May 2023 17:04:56 +0000 (10:04 -0700)
committerMartin KaFai Lau <martin.lau@kernel.org>
Sat, 13 May 2023 23:55:46 +0000 (16:55 -0700)
And add examples for how to correctly handle large optlens.
This is less relevant now when we don't EFAULT anymore, but
that's still the correct thing to do.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230511170456.1759459-5-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Documentation/bpf/prog_cgroup_sockopt.rst

index 172f957..1226a94 100644 (file)
@@ -98,10 +98,65 @@ can access only the first ``PAGE_SIZE`` of that data. So it has to options:
   indicates that the kernel should use BPF's trimmed ``optval``.
 
 When the BPF program returns with the ``optlen`` greater than
-``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno.
+``PAGE_SIZE``, the userspace will receive original kernel
+buffers without any modifications that the BPF program might have
+applied.
 
 Example
 =======
 
+Recommended way to handle BPF programs is as follows:
+
+.. code-block:: c
+
+       SEC("cgroup/getsockopt")
+       int getsockopt(struct bpf_sockopt *ctx)
+       {
+               /* Custom socket option. */
+               if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+                       ctx->retval = 0;
+                       optval[0] = ...;
+                       ctx->optlen = 1;
+                       return 1;
+               }
+
+               /* Modify kernel's socket option. */
+               if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+                       ctx->retval = 0;
+                       optval[0] = ...;
+                       ctx->optlen = 1;
+                       return 1;
+               }
+
+               /* optval larger than PAGE_SIZE use kernel's buffer. */
+               if (ctx->optlen > PAGE_SIZE)
+                       ctx->optlen = 0;
+
+               return 1;
+       }
+
+       SEC("cgroup/setsockopt")
+       int setsockopt(struct bpf_sockopt *ctx)
+       {
+               /* Custom socket option. */
+               if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+                       /* do something */
+                       ctx->optlen = -1;
+                       return 1;
+               }
+
+               /* Modify kernel's socket option. */
+               if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+                       optval[0] = ...;
+                       return 1;
+               }
+
+               /* optval larger than PAGE_SIZE use kernel's buffer. */
+               if (ctx->optlen > PAGE_SIZE)
+                       ctx->optlen = 0;
+
+               return 1;
+       }
+
 See ``tools/testing/selftests/bpf/progs/sockopt_sk.c`` for an example
 of BPF program that handles socket options.