--- /dev/null
+.TH syscount 8 "2017-02-15" "USER COMMANDS"
+.SH NAME
+syscount \- Summarize syscall counts and latencies.
+.SH SYNOPSIS
+.B syscount [-h] [-p PID] [-i INTERVAL] [-T TOP] [-x] [-L] [-m] [-P] [-l]
+.SH DESCRIPTION
+This tool traces syscall entry and exit tracepoints and summarizes either the
+number of syscalls of each type, or the number of syscalls per process. It can
+also collect latency (invocation time) for each syscall or each process.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc. Linux 4.7+ is required to attach a BPF program to the
+raw_syscalls:sys_{enter,exit} tracepoints, used by this tool.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-p PID
+Trace only this process.
+.TP
+\-i INTERVAL
+Print the summary at the specified interval (in seconds).
+.TP
+\-T TOP
+Print only this many entries. Default: 10.
+.TP
+\-x
+Trace only failed syscalls (i.e., the return value from the syscall was < 0).
+.TP
+\-m
+Display times in milliseconds. Default: microseconds.
+.TP
+\-P
+Summarize by process and not by syscall.
+.TP
+\-l
+List the syscalls recognized by the tool (hard-coded list). Syscalls beyond this
+list will still be displayed, as "[unknown: nnn]" where nnn is the syscall
+number.
+.SH EXAMPLES
+.TP
+Summarize all syscalls by syscall:
+#
+.B syscount
+.TP
+Summarize all syscalls by process:
+#
+.B syscount \-P
+.TP
+Summarize only failed syscalls:
+#
+.B syscount \-x
+.TP
+Trace PID 181 only:
+#
+.B syscount \-p 181
+.TP
+Summarize syscalls counts and latencies:
+#
+.B syscount \-L
+.SH FIELDS
+.TP
+PID
+Process ID
+.TP
+COMM
+Process name
+.TP
+SYSCALL
+Syscall name, or "[unknown: nnn]" for syscalls that aren't recognized
+.TP
+COUNT
+The number of events
+.TP
+TIME
+The total elapsed time (in us or ms)
+.SH OVERHEAD
+For most applications, the overhead should be manageable if they perform 1000's
+or even 10,000's of syscalls per second. For higher rates, the overhead may
+become considerable. For example, tracing a loop of 4 million calls to geteuid(),
+slows it down by 1.85x when tracing only syscall counts, and slows it down by
+more than 5x when tracing syscall counts and latencies. However, this represents
+a rate of >3.5 million syscalls per second, which should not be typical.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+funccount(8), ucalls(8), argdist(8), trace(8), funclatency(8)
--- /dev/null
+#!/usr/bin/env python
+#
+# syscount Summarize syscall counts and latencies.
+#
+# USAGE: syscount [-p PID] [-i INTERVAL] [-T TOP] [-x] [-L] [-m] [-P] [-l]
+#
+# Copyright 2017, Sasha Goldshtein.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 15-Feb-2017 Sasha Goldshtein Created this.
+
+from bcc import BPF
+from itertools import izip_longest
+from time import sleep, strftime
+import argparse
+import sys
+
+#
+# Syscall table for Linux x86_64, not very recent.
+# Automatically generated from strace/linux/x86_64/syscallent.h using the
+# following command:
+#
+# cat syscallent.h | awk -F, '{ gsub(/[ \t"}]/, "", $4);
+# gsub(/[ \t/*]/, "", $5);
+# print " "$5": \""$4"\","; }
+# BEGIN { print "syscalls = {" } END { print "}" }'
+#
+syscalls = {
+ 0: "read",
+ 1: "write",
+ 2: "open",
+ 3: "close",
+ 4: "stat",
+ 5: "fstat",
+ 6: "lstat",
+ 7: "poll",
+ 8: "lseek",
+ 9: "mmap",
+ 10: "mprotect",
+ 11: "munmap",
+ 12: "brk",
+ 13: "rt_sigaction",
+ 14: "rt_sigprocmask",
+ 15: "rt_sigreturn",
+ 16: "ioctl",
+ 17: "pread",
+ 18: "pwrite",
+ 19: "readv",
+ 20: "writev",
+ 21: "access",
+ 22: "pipe",
+ 23: "select",
+ 24: "sched_yield",
+ 25: "mremap",
+ 26: "msync",
+ 27: "mincore",
+ 28: "madvise",
+ 29: "shmget",
+ 30: "shmat",
+ 31: "shmctl",
+ 32: "dup",
+ 33: "dup2",
+ 34: "pause",
+ 35: "nanosleep",
+ 36: "getitimer",
+ 37: "alarm",
+ 38: "setitimer",
+ 39: "getpid",
+ 40: "sendfile",
+ 41: "socket",
+ 42: "connect",
+ 43: "accept",
+ 44: "sendto",
+ 45: "recvfrom",
+ 46: "sendmsg",
+ 47: "recvmsg",
+ 48: "shutdown",
+ 49: "bind",
+ 50: "listen",
+ 51: "getsockname",
+ 52: "getpeername",
+ 53: "socketpair",
+ 54: "setsockopt",
+ 55: "getsockopt",
+ 56: "clone",
+ 57: "fork",
+ 58: "vfork",
+ 59: "execve",
+ 60: "_exit",
+ 61: "wait4",
+ 62: "kill",
+ 63: "uname",
+ 64: "semget",
+ 65: "semop",
+ 66: "semctl",
+ 67: "shmdt",
+ 68: "msgget",
+ 69: "msgsnd",
+ 70: "msgrcv",
+ 71: "msgctl",
+ 72: "fcntl",
+ 73: "flock",
+ 74: "fsync",
+ 75: "fdatasync",
+ 76: "truncate",
+ 77: "ftruncate",
+ 78: "getdents",
+ 79: "getcwd",
+ 80: "chdir",
+ 81: "fchdir",
+ 82: "rename",
+ 83: "mkdir",
+ 84: "rmdir",
+ 85: "creat",
+ 86: "link",
+ 87: "unlink",
+ 88: "symlink",
+ 89: "readlink",
+ 90: "chmod",
+ 91: "fchmod",
+ 92: "chown",
+ 93: "fchown",
+ 94: "lchown",
+ 95: "umask",
+ 96: "gettimeofday",
+ 97: "getrlimit",
+ 98: "getrusage",
+ 99: "sysinfo",
+ 100: "times",
+ 101: "ptrace",
+ 102: "getuid",
+ 103: "syslog",
+ 104: "getgid",
+ 105: "setuid",
+ 106: "setgid",
+ 107: "geteuid",
+ 108: "getegid",
+ 109: "setpgid",
+ 110: "getppid",
+ 111: "getpgrp",
+ 112: "setsid",
+ 113: "setreuid",
+ 114: "setregid",
+ 115: "getgroups",
+ 116: "setgroups",
+ 117: "setresuid",
+ 118: "getresuid",
+ 119: "setresgid",
+ 120: "getresgid",
+ 121: "getpgid",
+ 122: "setfsuid",
+ 123: "setfsgid",
+ 124: "getsid",
+ 125: "capget",
+ 126: "capset",
+ 127: "rt_sigpending",
+ 128: "rt_sigtimedwait",
+ 129: "rt_sigqueueinfo",
+ 130: "rt_sigsuspend",
+ 131: "sigaltstack",
+ 132: "utime",
+ 133: "mknod",
+ 134: "uselib",
+ 135: "personality",
+ 136: "ustat",
+ 137: "statfs",
+ 138: "fstatfs",
+ 139: "sysfs",
+ 140: "getpriority",
+ 141: "setpriority",
+ 142: "sched_setparam",
+ 143: "sched_getparam",
+ 144: "sched_setscheduler",
+ 145: "sched_getscheduler",
+ 146: "sched_get_priority_max",
+ 147: "sched_get_priority_min",
+ 148: "sched_rr_get_interval",
+ 149: "mlock",
+ 150: "munlock",
+ 151: "mlockall",
+ 152: "munlockall",
+ 153: "vhangup",
+ 154: "modify_ldt",
+ 155: "pivot_root",
+ 156: "_sysctl",
+ 157: "prctl",
+ 158: "arch_prctl",
+ 159: "adjtimex",
+ 160: "setrlimit",
+ 161: "chroot",
+ 162: "sync",
+ 163: "acct",
+ 164: "settimeofday",
+ 165: "mount",
+ 166: "umount",
+ 167: "swapon",
+ 168: "swapoff",
+ 169: "reboot",
+ 170: "sethostname",
+ 171: "setdomainname",
+ 172: "iopl",
+ 173: "ioperm",
+ 174: "create_module",
+ 175: "init_module",
+ 176: "delete_module",
+ 177: "get_kernel_syms",
+ 178: "query_module",
+ 179: "quotactl",
+ 180: "nfsservctl",
+ 181: "getpmsg",
+ 182: "putpmsg",
+ 183: "afs_syscall",
+ 184: "tuxcall",
+ 185: "security",
+ 186: "gettid",
+ 187: "readahead",
+ 188: "setxattr",
+ 189: "lsetxattr",
+ 190: "fsetxattr",
+ 191: "getxattr",
+ 192: "lgetxattr",
+ 193: "fgetxattr",
+ 194: "listxattr",
+ 195: "llistxattr",
+ 196: "flistxattr",
+ 197: "removexattr",
+ 198: "lremovexattr",
+ 199: "fremovexattr",
+ 200: "tkill",
+ 201: "time",
+ 202: "futex",
+ 203: "sched_setaffinity",
+ 204: "sched_getaffinity",
+ 205: "set_thread_area",
+ 206: "io_setup",
+ 207: "io_destroy",
+ 208: "io_getevents",
+ 209: "io_submit",
+ 210: "io_cancel",
+ 211: "get_thread_area",
+ 212: "lookup_dcookie",
+ 213: "epoll_create",
+ 214: "epoll_ctl_old",
+ 215: "epoll_wait_old",
+ 216: "remap_file_pages",
+ 217: "getdents64",
+ 218: "set_tid_address",
+ 219: "restart_syscall",
+ 220: "semtimedop",
+ 221: "fadvise64",
+ 222: "timer_create",
+ 223: "timer_settime",
+ 224: "timer_gettime",
+ 225: "timer_getoverrun",
+ 226: "timer_delete",
+ 227: "clock_settime",
+ 228: "clock_gettime",
+ 229: "clock_getres",
+ 230: "clock_nanosleep",
+ 231: "exit_group",
+ 232: "epoll_wait",
+ 233: "epoll_ctl",
+ 234: "tgkill",
+ 235: "utimes",
+ 236: "vserver",
+ 237: "mbind",
+ 238: "set_mempolicy",
+ 239: "get_mempolicy",
+ 240: "mq_open",
+ 241: "mq_unlink",
+ 242: "mq_timedsend",
+ 243: "mq_timedreceive",
+ 244: "mq_notify",
+ 245: "mq_getsetattr",
+ 246: "kexec_load",
+ 247: "waitid",
+ 248: "add_key",
+ 249: "request_key",
+ 250: "keyctl",
+ 251: "ioprio_set",
+ 252: "ioprio_get",
+ 253: "inotify_init",
+ 254: "inotify_add_watch",
+ 255: "inotify_rm_watch",
+ 256: "migrate_pages",
+ 257: "openat",
+ 258: "mkdirat",
+ 259: "mknodat",
+ 260: "fchownat",
+ 261: "futimesat",
+ 262: "newfstatat",
+ 263: "unlinkat",
+ 264: "renameat",
+ 265: "linkat",
+ 266: "symlinkat",
+ 267: "readlinkat",
+ 268: "fchmodat",
+ 269: "faccessat",
+ 270: "pselect6",
+ 271: "ppoll",
+ 272: "unshare",
+ 273: "set_robust_list",
+ 274: "get_robust_list",
+ 275: "splice",
+ 276: "tee",
+ 277: "sync_file_range",
+ 278: "vmsplice",
+ 279: "move_pages",
+ 280: "utimensat",
+ 281: "epoll_pwait",
+ 282: "signalfd",
+ 283: "timerfd_create",
+ 284: "eventfd",
+ 285: "fallocate",
+ 286: "timerfd_settime",
+ 287: "timerfd_gettime",
+ 288: "accept4",
+ 289: "signalfd4",
+ 290: "eventfd2",
+ 291: "epoll_create1",
+ 292: "dup3",
+ 293: "pipe2",
+ 294: "inotify_init1",
+ 295: "preadv",
+ 296: "pwritev",
+ 297: "rt_tgsigqueueinfo",
+ 298: "perf_event_open",
+ 299: "recvmmsg",
+ 300: "fanotify_init",
+ 301: "fanotify_mark",
+ 302: "prlimit64",
+ 303: "name_to_handle_at",
+ 304: "open_by_handle_at",
+ 305: "clock_adjtime",
+ 306: "syncfs",
+ 307: "sendmmsg",
+ 308: "setns",
+ 309: "getcpu",
+ 310: "process_vm_readv",
+ 311: "process_vm_writev",
+ 312: "kcmp",
+ 313: "finit_module",
+}
+
+parser = argparse.ArgumentParser(
+ description="Summarize syscall counts and latencies.")
+parser.add_argument("-p", "--pid", type=int, help="trace only this pid")
+parser.add_argument("-i", "--interval", type=int,
+ help="print summary at this interval (seconds)")
+parser.add_argument("-T", "--top", type=int, default=10,
+ help="print only the top syscalls by count or latency")
+parser.add_argument("-x", "--failures", action="store_true",
+ help="trace only failed syscalls (return < 0)")
+parser.add_argument("-L", "--latency", action="store_true",
+ help="collect syscall latency")
+parser.add_argument("-m", "--milliseconds", action="store_true",
+ help="display latency in milliseconds (default: microseconds)")
+parser.add_argument("-P", "--process", action="store_true",
+ help="count by process and not by syscall")
+parser.add_argument("-l", "--list", action="store_true",
+ help="print list of recognized syscalls and exit")
+args = parser.parse_args()
+
+if args.list:
+ for grp in izip_longest(*(iter(sorted(syscalls.values())),) * 4):
+ print(" ".join(["%-20s" % s for s in grp if s is not None]))
+ sys.exit(0)
+
+text = """
+#ifdef LATENCY
+struct data_t {
+ u64 count;
+ u64 total_ns;
+};
+
+BPF_HASH(start, u64, u64);
+BPF_HASH(data, u32, struct data_t);
+#else
+BPF_HASH(data, u32, u64);
+#endif
+
+#ifdef LATENCY
+TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
+ u64 pid_tgid = bpf_get_current_pid_tgid();
+
+#ifdef FILTER_PID
+ if (pid_tgid >> 32 != FILTER_PID)
+ return 0;
+#endif
+
+ u64 t = bpf_ktime_get_ns();
+ start.update(&pid_tgid, &t);
+ return 0;
+}
+#endif
+
+TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
+ u64 pid_tgid = bpf_get_current_pid_tgid();
+
+#ifdef FILTER_PID
+ if (pid_tgid >> 32 != FILTER_PID)
+ return 0;
+#endif
+
+#ifdef FILTER_FAILED
+ if (args->ret >= 0)
+ return 0;
+#endif
+
+#ifdef BY_PROCESS
+ u32 key = pid_tgid >> 32;
+#else
+ u32 key = args->id;
+#endif
+
+#ifdef LATENCY
+ struct data_t *val, zero = {};
+ u64 *start_ns = start.lookup(&pid_tgid);
+ if (!start_ns)
+ return 0;
+
+ val = data.lookup_or_init(&key, &zero);
+ val->count++;
+ val->total_ns = bpf_ktime_get_ns() - *start_ns;
+#else
+ u64 *val, zero = 0;
+ val = data.lookup_or_init(&key, &zero);
+ ++(*val);
+#endif
+ return 0;
+}
+"""
+
+if args.pid:
+ text = ("#define FILTER_PID %d\n" % args.pid) + text
+if args.failures:
+ text = "#define FILTER_FAILED\n" + text
+if args.latency:
+ text = "#define LATENCY\n" + text
+if args.process:
+ text = "#define BY_PROCESS\n" + text
+
+bpf = BPF(text=text)
+
+def print_stats():
+ if args.latency:
+ print_latency_stats()
+ else:
+ print_count_stats()
+
+agg_colname = "PID COMM" if args.process else "SYSCALL"
+time_colname = "TIME (ms)" if args.milliseconds else "TIME (us)"
+
+def comm_for_pid(pid):
+ try:
+ return open("/proc/%d/comm" % pid).read().strip()
+ except Exception:
+ return "[unknown]"
+
+def agg_colval(key):
+ if args.process:
+ return "%-6d %-15s" % (key.value, comm_for_pid(key.value))
+ else:
+ return syscalls.get(key.value, "[unknown: %d]" % key.value)
+
+def print_count_stats():
+ data = bpf["data"]
+ print("[%s]" % strftime("%H:%M:%S"))
+ print("%-22s %8s" % (agg_colname, "COUNT"))
+ for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]:
+ if k.value == 0xFFFFFFFF:
+ continue # happens occasionally, we don't need it
+ print("%-22s %8d" % (agg_colval(k), v.value))
+ print("")
+ data.clear()
+
+def print_latency_stats():
+ data = bpf["data"]
+ print("[%s]" % strftime("%H:%M:%S"))
+ print("%-22s %8s %16s" % (agg_colname, "COUNT", time_colname))
+ for k, v in sorted(data.items(), key=lambda kv: -kv[1].total_ns)[:args.top]:
+ if k.value == 0xFFFFFFFF:
+ continue # happens occasionally, we don't need it
+ print(("%-22s %8d " + ("%16.6f" if args.milliseconds else "%16.3f")) %
+ (agg_colval(k), v.count,
+ v.total_ns / (1e6 if args.milliseconds else 1e3)))
+ print("")
+ data.clear()
+
+print("Tracing %ssyscalls, printing top %d... Ctrl+C to quit." %
+ ("failed " if args.failures else "", args.top))
+while True:
+ try:
+ sleep(args.interval or 999999999)
+ print_stats()
+ except KeyboardInterrupt:
+ if not args.interval:
+ print_stats()
+ break
+
--- /dev/null
+Demonstrations of syscount, the Linux/eBPF version.
+
+
+syscount summarizes syscall counts across the system or a specific process,
+with optional latency information. It is very useful for general workload
+characterization, for example:
+
+# syscount
+Tracing syscalls, printing top 10... Ctrl+C to quit.
+[09:39:04]
+SYSCALL COUNT
+write 10739
+read 10584
+wait4 1460
+nanosleep 1457
+select 795
+rt_sigprocmask 689
+clock_gettime 653
+rt_sigaction 128
+futex 86
+ioctl 83
+^C
+
+These are the top 10 entries; you can get more by using the -T switch. Here,
+the output indicates that the write and read syscalls were very common, followed
+immediately by wait4, nanosleep, and so on. By default, syscount counts across
+the entire system, but we can point it to a specific process of interest:
+
+# syscount -p $(pidof dd)
+Tracing syscalls, printing top 10... Ctrl+C to quit.
+[09:40:21]
+SYSCALL COUNT
+read 7878397
+write 7878397
+^C
+
+Indeed, dd's workload is a bit easier to characterize. Occasionally, the count
+of syscalls is not enough, and you'd also want an aggregate latency:
+
+# syscount -L
+Tracing syscalls, printing top 10... Ctrl+C to quit.
+[09:41:32]
+SYSCALL COUNT TIME (us)
+select 16 3415860.022
+nanosleep 291 12038.707
+ftruncate 1 122.939
+write 4 63.389
+stat 1 23.431
+fstat 1 5.088
+[unknown: 321] 32 4.965
+timerfd_settime 1 4.830
+ioctl 3 4.802
+kill 1 4.342
+^C
+
+The select and nanosleep calls are responsible for a lot of time, but remember
+these are blocking calls. This output was taken from a mostly idle system. Note
+the "unknown" entry -- syscall 321 is the bpf() syscall, which is not in the
+table used by this tool (borrowed from strace sources).
+
+Another direction would be to understand which processes are making a lot of
+syscalls, thus responsible for a lot of activity. This is what the -P switch
+does:
+
+# syscount -P
+Tracing syscalls, printing top 10... Ctrl+C to quit.
+[09:58:13]
+PID COMM COUNT
+13820 vim 548
+30216 sshd 149
+29633 bash 72
+25188 screen 70
+25776 mysqld 30
+31285 python 10
+529 systemd-udevd 9
+1 systemd 8
+494 systemd-journal 5
+^C
+
+This is again from a mostly idle system over an interval of a few seconds.
+
+Sometimes, you'd only care about failed syscalls -- these are the ones that
+might be worth investigating with follow-up tools like opensnoop, execsnoop,
+or trace. Use the -x switch for this; the following example also demonstrates
+the -i switch, for printing at predefined intervals:
+
+# syscount -x -i 5
+Tracing failed syscalls, printing top 10... Ctrl+C to quit.
+[09:44:16]
+SYSCALL COUNT
+futex 13
+getxattr 10
+stat 8
+open 6
+wait4 3
+access 2
+[unknown: 321] 1
+
+[09:44:21]
+SYSCALL COUNT
+futex 12
+getxattr 10
+[unknown: 321] 2
+wait4 1
+access 1
+pause 1
+^C
+
+USAGE:
+# syscount -h
+usage: syscount.py [-h] [-p PID] [-i INTERVAL] [-T TOP] [-x] [-L] [-m] [-P]
+ [-l]
+
+Summarize syscall counts and latencies.
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PID, --pid PID trace only this pid
+ -i INTERVAL, --interval INTERVAL
+ print summary at this interval (seconds)
+ -T TOP, --top TOP print only the top syscalls by count or latency
+ -x, --failures trace only failed syscalls (return < 0)
+ -L, --latency collect syscall latency
+ -m, --milliseconds display latency in milliseconds (default:
+ microseconds)
+ -P, --process count by process and not by syscall
+ -l, --list print list of recognized syscalls and exit