add print_linear_hist() for linear histograms
authorBrendan Gregg <brendan.d.gregg@gmail.com>
Wed, 14 Dec 2016 19:31:00 +0000 (11:31 -0800)
committerBrendan Gregg <brendan.d.gregg@gmail.com>
Wed, 14 Dec 2016 19:43:51 +0000 (11:43 -0800)
docs/reference_guide.md
src/python/bcc/table.py

index c8c5277..708806a 100644 (file)
@@ -62,6 +62,7 @@ This guide is incomplete. If something feels missing, check the bcc and kernel s
         - [4. values()](#4-values)
         - [5. clear()](#5-clear)
         - [6. print_log2_hist()](#6-print_log2_hist)
+        - [7. print_linear_hist()](#6-print_linear_hist)
     - [Helpers](#helpers)
         - [1. ksym()](#1-ksym)
         - [2. ksymaddr()](#2-ksymaddr)
@@ -1010,6 +1011,65 @@ Examples in situ:
 [search /examples](https://github.com/iovisor/bcc/search?q=print_log2_hist+path%3Aexamples+language%3Apython&type=Code),
 [search /tools](https://github.com/iovisor/bcc/search?q=print_log2_hist+path%3Atools+language%3Apython&type=Code)
 
+### 6. print_linear_hist()
+
+Syntax: ```table.print_linear_hist(val_type="value", section_header="Bucket ptr", section_print_fn=None)```
+
+Prints a table as a linear histogram in ASCII. This is intended to visualize small integer ranges, eg, 0 to 100.
+
+Arguments:
+
+- val_type: optional, column header.
+- section_header: if the histogram has a secondary key, multiple tables will print and section_header can be used as a header description for each.
+- section_print_fn: if section_print_fn is not None, it will be passed the bucket value.
+
+Example:
+
+```Python
+b = BPF(text="""
+BPF_HISTOGRAM(dist);
+
+int kprobe__blk_account_io_completion(struct pt_regs *ctx, struct request *req)
+{
+       dist.increment(req->__data_len / 1024);
+       return 0;
+}
+""")
+[...]
+
+b["dist"].print_linear_hist("kbytes")
+```
+
+Output:
+
+```
+     kbytes        : count     distribution
+        0          : 3        |******                                  |
+        1          : 0        |                                        |
+        2          : 0        |                                        |
+        3          : 0        |                                        |
+        4          : 19       |****************************************|
+        5          : 0        |                                        |
+        6          : 0        |                                        |
+        7          : 0        |                                        |
+        8          : 4        |********                                |
+        9          : 0        |                                        |
+        10         : 0        |                                        |
+        11         : 0        |                                        |
+        12         : 0        |                                        |
+        13         : 0        |                                        |
+        14         : 0        |                                        |
+        15         : 0        |                                        |
+        16         : 2        |****                                    |
+[...]
+```
+
+This is an efficient way to summarize data, as the summarization is performed in-kernel, and only the values in the count column are passed to user space.
+
+Examples in situ:
+[search /examples](https://github.com/iovisor/bcc/search?q=print_linear_hist+path%3Aexamples+language%3Apython&type=Code),
+[search /tools](https://github.com/iovisor/bcc/search?q=print_linear_hist+path%3Atools+language%3Apython&type=Code)
+
 ## Helpers
 
 Some helper methods provided by bcc. Note that since we're in Python, we can import any Python library and their methods, including, for example, the libraries: argparse, collections, ctypes, datetime, re, socket, struct, subprocess, sys, and time.
index 516cfa1..f47a4ca 100644 (file)
@@ -78,6 +78,27 @@ def _print_log2_hist(vals, val_type):
         print(body % (low, high, val, stars,
                       _stars(val, val_max, stars)))
 
+def _print_linear_hist(vals, val_type):
+    global stars_max
+    log2_dist_max = 64
+    idx_max = -1
+    val_max = 0
+
+    for i, v in enumerate(vals):
+        if v > 0: idx_max = i
+        if v > val_max: val_max = v
+
+    header = "     %-13s : count     distribution"
+    body = "        %-10d : %-8d |%-*s|"
+    stars = stars_max
+
+    if idx_max >= 0:
+        print(header % val_type);
+    for i in range(0, idx_max + 1):
+        val = vals[i]
+        print(body % (i, val, stars,
+                      _stars(val, val_max, stars)))
+
 
 def Table(bpf, map_id, map_fd, keytype, leaftype, **kwargs):
     """Table(bpf, map_id, map_fd, keytype, leaftype, **kwargs)
@@ -291,6 +312,43 @@ class TableBase(MutableMapping):
                 vals[k.value] = v.value
             _print_log2_hist(vals, val_type)
 
+    def print_linear_hist(self, val_type="value", section_header="Bucket ptr",
+            section_print_fn=None, bucket_fn=None):
+        """print_linear_hist(val_type="value", section_header="Bucket ptr",
+                           section_print_fn=None, bucket_fn=None)
+
+        Prints a table as a linear histogram. This is intended to span integer
+       ranges, eg, from 0 to 100. The val_type argument is optional, and is a
+       column header.  If the histogram has a secondary key, multiple tables
+       will print and section_header can be used as a header description for
+       each.  If section_print_fn is not None, it will be passed the bucket
+       value to format into a string as it sees fit. If bucket_fn is not None,
+        it will be used to produce a bucket value for the histogram keys.
+        """
+        if isinstance(self.Key(), ct.Structure):
+            tmp = {}
+            f1 = self.Key._fields_[0][0]
+            f2 = self.Key._fields_[1][0]
+            for k, v in self.items():
+                bucket = getattr(k, f1)
+                if bucket_fn:
+                    bucket = bucket_fn(bucket)
+                vals = tmp[bucket] = tmp.get(bucket, [0] * 65)
+                slot = getattr(k, f2)
+                vals[slot] = v.value
+            for bucket, vals in tmp.items():
+                if section_print_fn:
+                    print("\n%s = %s" % (section_header,
+                        section_print_fn(bucket)))
+                else:
+                    print("\n%s = %r" % (section_header, bucket))
+                _print_linear_hist(vals, val_type)
+        else:
+            vals = [0] * 65
+            for k, v in self.items():
+                vals[k.value] = v.value
+            _print_linear_hist(vals, val_type)
+
 
 class HashTable(TableBase):
     def __init__(self, *args, **kwargs):