selftests/dma: add test application for DMA_MAP_BENCHMARK
authorBarry Song <song.bao.hua@hisilicon.com>
Mon, 16 Nov 2020 06:08:48 +0000 (19:08 +1300)
committerChristoph Hellwig <hch@lst.de>
Fri, 27 Nov 2020 09:33:42 +0000 (10:33 +0100)
This patch provides the test application for DMA_MAP_BENCHMARK.

Before running the test application, we need to bind a device to dma_map_
benchmark driver. For example, unbind "xxx" from its original driver and
bind to dma_map_benchmark:

echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override
echo xxx > /sys/bus/platform/drivers/xxx/unbind
echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind

Another example for PCI devices:
echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override
echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind
echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind

The below command will run 16 threads on numa node 0 for 10 seconds on
the device bound to dma_map_benchmark platform_driver or pci_driver:
./dma_map_benchmark -t 16 -s 10 -n 0
dma mapping benchmark: threads:16 seconds:10
average map latency(us):1.1 standard deviation:1.9
average unmap latency(us):0.5 standard deviation:0.8

Cc: Will Deacon <will@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
MAINTAINERS
tools/testing/selftests/dma/Makefile [new file with mode: 0644]
tools/testing/selftests/dma/config [new file with mode: 0644]
tools/testing/selftests/dma/dma_map_benchmark.c [new file with mode: 0644]

index a008b70..c88abd1 100644 (file)
@@ -5237,6 +5237,12 @@ F:       include/linux/dma-mapping.h
 F:     include/linux/dma-map-ops.h
 F:     kernel/dma/
 
+DMA MAPPING BENCHMARK
+M:     Barry Song <song.bao.hua@hisilicon.com>
+L:     iommu@lists.linux-foundation.org
+F:     kernel/dma/map_benchmark.c
+F:     tools/testing/selftests/dma/
+
 DMA-BUF HEAPS FRAMEWORK
 M:     Sumit Semwal <sumit.semwal@linaro.org>
 R:     Benjamin Gaignard <benjamin.gaignard@linaro.org>
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
new file mode 100644 (file)
index 0000000..aa8e8b5
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := dma_map_benchmark
+
+include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
new file mode 100644 (file)
index 0000000..6102ee3
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
new file mode 100644 (file)
index 0000000..7065163
--- /dev/null
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#define DMA_MAP_BENCHMARK      _IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS    1024
+#define DMA_MAP_MAX_SECONDS     300
+
+#define DMA_MAP_BIDIRECTIONAL  0
+#define DMA_MAP_TO_DEVICE      1
+#define DMA_MAP_FROM_DEVICE    2
+
+static char *directions[] = {
+       "BIDIRECTIONAL",
+       "TO_DEVICE",
+       "FROM_DEVICE",
+};
+
+struct map_benchmark {
+       __u64 avg_map_100ns; /* average map latency in 100ns */
+       __u64 map_stddev; /* standard deviation of map latency */
+       __u64 avg_unmap_100ns; /* as above */
+       __u64 unmap_stddev;
+       __u32 threads; /* how many threads will do map/unmap in parallel */
+       __u32 seconds; /* how long the test will last */
+       __s32 node; /* which numa node this benchmark will run on */
+       __u32 dma_bits; /* DMA addressing capability */
+       __u32 dma_dir; /* DMA data direction */
+       __u64 expansion[10];    /* For future use */
+};
+
+int main(int argc, char **argv)
+{
+       struct map_benchmark map;
+       int fd, opt;
+       /* default single thread, run 20 seconds on NUMA_NO_NODE */
+       int threads = 1, seconds = 20, node = -1;
+       /* default dma mask 32bit, bidirectional DMA */
+       int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+
+       int cmd = DMA_MAP_BENCHMARK;
+       char *p;
+
+       while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+               switch (opt) {
+               case 't':
+                       threads = atoi(optarg);
+                       break;
+               case 's':
+                       seconds = atoi(optarg);
+                       break;
+               case 'n':
+                       node = atoi(optarg);
+                       break;
+               case 'b':
+                       bits = atoi(optarg);
+                       break;
+               case 'd':
+                       dir = atoi(optarg);
+                       break;
+               default:
+                       return -1;
+               }
+       }
+
+       if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
+               fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
+                       DMA_MAP_MAX_THREADS);
+               exit(1);
+       }
+
+       if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
+               fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
+                       DMA_MAP_MAX_SECONDS);
+               exit(1);
+       }
+
+       /* suppose the mininum DMA zone is 1MB in the world */
+       if (bits < 20 || bits > 64) {
+               fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
+               exit(1);
+       }
+
+       if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
+                       dir != DMA_MAP_FROM_DEVICE) {
+               fprintf(stderr, "invalid dma direction\n");
+               exit(1);
+       }
+
+       fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
+       if (fd == -1) {
+               perror("open");
+               exit(1);
+       }
+
+       map.seconds = seconds;
+       map.threads = threads;
+       map.node = node;
+       map.dma_bits = bits;
+       map.dma_dir = dir;
+       if (ioctl(fd, cmd, &map)) {
+               perror("ioctl");
+               exit(1);
+       }
+
+       printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
+                       threads, seconds, node, dir[directions]);
+       printf("average map latency(us):%.1f standard deviation:%.1f\n",
+                       map.avg_map_100ns/10.0, map.map_stddev/10.0);
+       printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
+                       map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
+
+       return 0;
+}