tracer/structure: add more tests and a benchmark
authorStefan Sauer <ensonic@users.sf.net>
Fri, 16 Dec 2016 13:07:45 +0000 (14:07 +0100)
committerStefan Sauer <ensonic@users.sf.net>
Tue, 20 Dec 2016 07:24:57 +0000 (08:24 +0100)
tracer/tracer/structure.py
tracer/tracer/structure_perf.py [new file with mode: 0644]
tracer/tracer/structure_test.py

index f41e2e6..ecb91f1 100644 (file)
@@ -1,5 +1,14 @@
+import logging
 import re
 
+logger = logging.getLogger('structure')
+
+UNESCAPE = re.compile(r'(?<!\\)\\(.)')
+
+INT_TYPES = "".join(
+    ("int", "uint", "int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64")
+)
+
 class Structure(object):
     '''Gst Structure parser.'''
 
@@ -9,84 +18,77 @@ class Structure(object):
         self.types = {}
         self.values = {}
         self.pos = 0
-        self.valid = False
-        try:
-            self._parse(self.text)
-            self.valid = True
-        except ValueError:
-            pass
+        self._parse(self.text)
 
     def __repr__(self):
         return self.text
 
+    def _find_eos(self, s):
+        # find next '"' without preceeding '\'
+        l = 0
+        #logger.debug("find_eos: '%s'", s)
+        while 1:  # faster than regexp for '[^\\]\"'
+            p = s.index('"')
+            l += p + 1
+            if s[p - 1] != '\\':
+                #logger.debug("... ok  : '%s'", s[p:])
+                return l
+            s = s[(p + 1):]
+            #logger.debug("...     : '%s'", s)
+        return -1
+
     def _parse(self, s):
         scan = True
+        #logger.debug("===: '%s'", s)
         # parse id
         p = s.find(',')
         if p == -1:
             p = s.index(';')
             scan = False
         self.name = s[:p]
-        s = s[(p + 2):]  # skip 'name, '
-        self.pos += p + 2
         # parse fields
         while scan:
+            s = s[(p + 2):]  # skip 'name, ' / 'value, '
+            self.pos += p + 2
+            #logger.debug("...: '%s'", s)
             p = s.index('=')
             k = s[:p]
-            s = s[(p + 1):]  # skip 'key='
-            self.pos += p + 1
-            p = s.index('(')
-            s = s[(p + 1):]  # skip '('
-            self.pos += p + 1
+            if not s[p + 1] == '(':
+                self.pos += p + 1
+                raise ValueError
+            s = s[(p + 2):]  # skip 'key=('
+            self.pos += p + 2
             p = s.index(')')
             t = s[:p]
             s = s[(p + 1):]  # skip 'type)'
             self.pos += p + 1
-            if t == 'structure':
-                p = s.index('"')
-                s = s[(p + 1):]  # skip '"'
-                self.pos += p + 1
-                # find next '"' without preceeding '\'
-                sub = s
-                sublen = 0
-                while True:
-                    p = sub.index('"')
-                    sublen += p + 1
-                    if sub[p - 1] != '\\':
-                        sub = None
-                        break;
-                    sub = sub[(p + 1):]
-                if not sub:
-                    sub = s[:(sublen - 1)]
-                    # unescape \., but not \\. (using a backref)
-                    # FIXME: try to combine
-                    # also:
-                    # unescape = re.compile('search')
-                    # unescape.sub('replacement', sub)
-                    sub = re.sub(r'\\\\', r'\\', sub)
-                    sub = re.sub(r'(?<!\\)\\(.)', r'\1', sub)
-                    sub = re.sub(r'(?<!\\)\\(.)', r'\1', sub)
-                    # recurse
-                    v = Structure(sub)
-                    if s[sublen] == ';':
-                        scan = False
-                    s = s[(sublen + 2):]
-                    self.pos += sublen + 2
-                else:
+
+            if s[0] == '"':
+                s = s[1:]  # skip '"'
+                self.pos += 1
+                p = self._find_eos(s)
+                if p == -1:
                     raise ValueError
+                v = s[:(p - 1)]
+                if s[p] == ';':
+                    scan = False
+                # unescape \., but not \\. (using a backref)
+                # need a reverse for re.escape()
+                v = v.replace('\\\\', '\\')
+                v = UNESCAPE.sub(r'\1', v)
             else:
                 p = s.find(',')
                 if p == -1:
                     p = s.index(';')
                     scan = False
                 v= s[:p]
-                s = s[(p + 2):]  # skip "value, "
-                self.pos += p + 2
-                if t == 'string' and v[0] == '"':
-                    v = v[1:-1]
-                elif t in ['int', 'uint', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64' ]:
-                    v = int(v)
+
+            if t == 'structure':
+                v = Structure(v)
+            elif t == 'string' and v[0] == '"':
+                v = v[1:-1]
+            elif t in INT_TYPES:
+                v = int(v)
             self.types[k] = t
             self.values[k] = v
-
-        self.valid = True
+        self.pos += p + 1
diff --git a/tracer/tracer/structure_perf.py b/tracer/tracer/structure_perf.py
new file mode 100644 (file)
index 0000000..3af5a29
--- /dev/null
@@ -0,0 +1,67 @@
+import timeit
+
+from structure import Structure
+from gi.repository import Gst
+Gst.init(None)
+
+PLAIN_STRUCTURE = r'thread-rusage, thread-id=(guint64)37268592, ts=(guint64)79416000, average-cpuload=(uint)1000, current-cpuload=(uint)1000, time=(guint64)79418045;'
+NESTED_STRUCTURE = r'latency.class, src=(structure)"scope\,\ type\=\(type\)gchararray\,\ related-to\=\(GstTracerValueScope\)GST_TRACER_VALUE_SCOPE_PAD\;", sink=(structure)"scope\,\ type\=\(type\)gchararray\,\ related-to\=\(GstTracerValueScope\)GST_TRACER_VALUE_SCOPE_PAD\;", time=(structure)"value\,\ type\=\(type\)guint64\,\ description\=\(string\)\"time\\\ it\\\ took\\\ for\\\ the\\\ buffer\\\ to\\\ go\\\ from\\\ src\\\ to\\\ sink\\\ ns\"\,\ flags\=\(GstTracerValueFlags\)GST_TRACER_VALUE_FLAGS_AGGREGATED\,\ min\=\(guint64\)0\,\ max\=\(guint64\)18446744073709551615\;";'
+
+NAT_STRUCTURE = Structure(PLAIN_STRUCTURE)
+GI_STRUCTURE = Gst.Structure.from_string(PLAIN_STRUCTURE)[0]
+
+# native python impl
+
+def nat_parse_plain():
+    s = Structure(PLAIN_STRUCTURE)
+
+def nat_parse_nested():
+    s = Structure(NESTED_STRUCTURE)
+
+def nat_get_name():
+    return NAT_STRUCTURE.name
+
+def nat_get_value():
+    return NAT_STRUCTURE.values['thread-id']
+
+# gstreamer impl via gi
+
+def gi_parse_plain():
+    s = Gst.Structure.from_string(PLAIN_STRUCTURE)[0]
+
+def gi_parse_nested():
+    s = Gst.Structure.from_string(NESTED_STRUCTURE)[0]
+
+def gi_get_name():
+    return GI_STRUCTURE.get_name()
+
+def gi_get_value():
+    return GI_STRUCTURE.get_value('thread-id')
+
+def perf(method, n, flavor):
+    t = timeit.timeit(method + '()', 'from __main__ import ' + method, number=n)
+    print("%6s: %lf s, (%lf calls/s)" % (flavor, t, (n/t)))
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--iterations', default=10000, type=int,
+                        help='number of iterations (default: 10000)')
+    args = parser.parse_args()
+    n = args.iterations
+
+    print("parse_plain:")
+    t = perf('nat_parse_plain', n, 'native')
+    t = perf('gi_parse_plain', n, 'gi')
+
+    print("parse_nested:")
+    t = perf('nat_parse_nested', n, 'native')
+    t = perf('gi_parse_nested', n, 'gi')
+
+    print("get_name:")
+    t = perf('nat_get_name', n, 'native')
+    t = perf('gi_get_name', n, 'gi')
+
+    print("get_value:")
+    t = perf('nat_get_value', n, 'native')
+    t = perf('gi_get_value', n, 'gi')
index 9db6afe..ece7ec3 100644 (file)
@@ -1,7 +1,10 @@
+import logging
 import unittest
 
 from tracer.structure import Structure
 
+logging.basicConfig(level=logging.INFO)
+
 BAD_NAME = r'foo bar'
 BAD_KEY = r'foo, bar'
 BAD_TYPE1 = r'foo, bar=['
@@ -14,43 +17,48 @@ MISC_TYPES_STRUCTURE = r'foo, key1=(string)"value", key2=(int)5, key3=(boolean)t
 
 NESTED_STRUCTURE = r'foo, nested=(structure)"bar\,\ key1\=\(int\)0\,\ key2\=\(int\)5\;";'
 
+REGRESSIONS = [
+    r'query, thread-id=(guint64)139839438879824, ts=(guint64)220860464, pad-ix=(uint)8, element-ix=(uint)9, peer-pad-ix=(uint)9, peer-element-ix=(uint)8, name=(string)accept-caps, structure=(structure)"GstQueryAcceptCaps\,\ caps\=\(GstCaps\)\"audio/mpeg\\\,\\\ mpegversion\\\=\\\(int\\\)4\\\,\\\ framed\\\=\\\(boolean\\\)true\\\,\\\ stream-format\\\=\\\(string\\\)raw\\\,\\\ level\\\=\\\(string\\\)2\\\,\\\ base-profile\\\=\\\(string\\\)lc\\\,\\\ profile\\\=\\\(string\\\)lc\\\,\\\ codec_data\\\=\\\(buffer\\\)1210\\\,\\\ rate\\\=\\\(int\\\)44100\\\,\\\ channels\\\=\\\(int\\\)2\"\,\ result\=\(boolean\)false\;", have-res=(boolean)0, res=(boolean)0;',
+    r'message, thread-id=(guint64)139838900680560, ts=(guint64)1000451258, element-ix=(uint)2, name=(string)tag, structure=(structure)"GstMessageTag\,\ taglist\=\(taglist\)\"taglist\\\,\\\ datetime\\\=\\\(datetime\\\)2009-03-05T12:57:08Z\\\,\\\ private-qt-tag\\\=\\\(sample\\\)\\\{\\\ 00000019677373740000001164617461000000010000000030:None:R3N0U2VnbWVudCwgZmxhZ3M9KEdzdFNlZ21lbnRGbGFncylHU1RfU0VHTUVOVF9GTEFHX05PTkUsIHJhdGU9KGRvdWJsZSkxLCBhcHBsaWVkLXJhdGU9KGRvdWJsZSkxLCBmb3JtYXQ9KEdzdEZvcm1hdClHU1RfRk9STUFUX1RJTUUsIGJhc2U9KGd1aW50NjQpMCwgb2Zmc2V0PShndWludDY0KTAsIHN0YXJ0PShndWludDY0KTAsIHN0b3A9KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTUsIHRpbWU9KGd1aW50NjQpMCwgcG9zaXRpb249KGd1aW50NjQpMCwgZHVyYXRpb249KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTU7AA__:YXBwbGljYXRpb24veC1nc3QtcXQtZ3NzdC10YWcsIHN0eWxlPShzdHJpbmcpaXR1bmVzOwA_\\\,\\\ 0000001e6773746400000016646174610000000100000000313335353130:None:R3N0U2VnbWVudCwgZmxhZ3M9KEdzdFNlZ21lbnRGbGFncylHU1RfU0VHTUVOVF9GTEFHX05PTkUsIHJhdGU9KGRvdWJsZSkxLCBhcHBsaWVkLXJhdGU9KGRvdWJsZSkxLCBmb3JtYXQ9KEdzdEZvcm1hdClHU1RfRk9STUFUX1RJTUUsIGJhc2U9KGd1aW50NjQpMCwgb2Zmc2V0PShndWludDY0KTAsIHN0YXJ0PShndWludDY0KTAsIHN0b3A9KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTUsIHRpbWU9KGd1aW50NjQpMCwgcG9zaXRpb249KGd1aW50NjQpMCwgZHVyYXRpb249KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTU7AA__:YXBwbGljYXRpb24veC1nc3QtcXQtZ3N0ZC10YWcsIHN0eWxlPShzdHJpbmcpaXR1bmVzOwA_\\\,\\\ 0000003867737364000000306461746100000001000000004244354241453530354d4d313239353033343539373733353435370000000000:None:R3N0U2VnbWVudCwgZmxhZ3M9KEdzdFNlZ21lbnRGbGFncylHU1RfU0VHTUVOVF9GTEFHX05PTkUsIHJhdGU9KGRvdWJsZSkxLCBhcHBsaWVkLXJhdGU9KGRvdWJsZSkxLCBmb3JtYXQ9KEdzdEZvcm1hdClHU1RfRk9STUFUX1RJTUUsIGJhc2U9KGd1aW50NjQpMCwgb2Zmc2V0PShndWludDY0KTAsIHN0YXJ0PShndWludDY0KTAsIHN0b3A9KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTUsIHRpbWU9KGd1aW50NjQpMCwgcG9zaXRpb249KGd1aW50NjQpMCwgZHVyYXRpb249KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTU7AA__:YXBwbGljYXRpb24veC1nc3QtcXQtZ3NzZC10YWcsIHN0eWxlPShzdHJpbmcpaXR1bmVzOwA_\\\,\\\ 0000009867737075000000906461746100000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:None:R3N0U2VnbWVudCwgZmxhZ3M9KEdzdFNlZ21lbnRGbGFncylHU1RfU0VHTUVOVF9GTEFHX05PTkUsIHJhdGU9KGRvdWJsZSkxLCBhcHBsaWVkLXJhdGU9KGRvdWJsZSkxLCBmb3JtYXQ9KEdzdEZvcm1hdClHU1RfRk9STUFUX1RJTUUsIGJhc2U9KGd1aW50NjQpMCwgb2Zmc2V0PShndWludDY0KTAsIHN0YXJ0PShndWludDY0KTAsIHN0b3A9KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTUsIHRpbWU9KGd1aW50NjQpMCwgcG9zaXRpb249KGd1aW50NjQpMCwgZHVyYXRpb249KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTU7AA__:YXBwbGljYXRpb24veC1nc3QtcXQtZ3NwdS10YWcsIHN0eWxlPShzdHJpbmcpaXR1bmVzOwA_\\\,\\\ 000000986773706d000000906461746100000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:None:R3N0U2VnbWVudCwgZmxhZ3M9KEdzdFNlZ21lbnRGbGFncylHU1RfU0VHTUVOVF9GTEFHX05PTkUsIHJhdGU9KGRvdWJsZSkxLCBhcHBsaWVkLXJhdGU9KGRvdWJsZSkxLCBmb3JtYXQ9KEdzdEZvcm1hdClHU1RfRk9STUFUX1RJTUUsIGJhc2U9KGd1aW50NjQpMCwgb2Zmc2V0PShndWludDY0KTAsIHN0YXJ0PShndWludDY0KTAsIHN0b3A9KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTUsIHRpbWU9KGd1aW50NjQpMCwgcG9zaXRpb249KGd1aW50NjQpMCwgZHVyYXRpb249KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTU7AA__:YXBwbGljYXRpb24veC1nc3QtcXQtZ3NwbS10YWcsIHN0eWxlPShzdHJpbmcpaXR1bmVzOwA_\\\,\\\ 0000011867736868000001106461746100000001000000007631302e6c736361636865332e632e796f75747562652e636f6d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:None:R3N0U2VnbWVudCwgZmxhZ3M9KEdzdFNlZ21lbnRGbGFncylHU1RfU0VHTUVOVF9GTEFHX05PTkUsIHJhdGU9KGRvdWJsZSkxLCBhcHBsaWVkLXJhdGU9KGRvdWJsZSkxLCBmb3JtYXQ9KEdzdEZvcm1hdClHU1RfRk9STUFUX1RJTUUsIGJhc2U9KGd1aW50NjQpMCwgb2Zmc2V0PShndWludDY0KTAsIHN0YXJ0PShndWludDY0KTAsIHN0b3A9KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTUsIHRpbWU9KGd1aW50NjQpMCwgcG9zaXRpb249KGd1aW50NjQpMCwgZHVyYXRpb249KGd1aW50NjQpMTg0NDY3NDQwNzM3MDk1NTE2MTU7AA__:YXBwbGljYXRpb24veC1nc3QtcXQtZ3NoaC10YWcsIHN0eWxlPShzdHJpbmcpaXR1bmVzOwA_\\\ \\\}\\\,\\\ container-format\\\=\\\(string\\\)\\\"ISO\\\\\\\ MP4/M4A\\\"\\\;\"\;";',
+]
+
 class TestStructure(unittest.TestCase):
 
     def test_handles_bad_name(self):
-        structure = Structure(BAD_NAME)
-        self.assertFalse(structure.valid)
-        self.assertEquals(structure.pos, 0)
+        structure = None
+        with self.assertRaises(ValueError):
+            structure = Structure(BAD_NAME)
 
     def test_handles_bad_key(self):
-        structure = Structure(BAD_KEY)
-        self.assertFalse(structure.valid)
-        self.assertEquals(structure.pos, 5)
+        structure = None
+        with self.assertRaises(ValueError):
+            structure = Structure(BAD_KEY)
 
     def test_handles_bad_type1(self):
-        structure = Structure(BAD_TYPE1)
-        self.assertFalse(structure.valid)
-        self.assertEquals(structure.pos, 9)
+        structure = None
+        with self.assertRaises(ValueError):
+            structure = Structure(BAD_TYPE1)
 
     def test_handles_bad_type2(self):
-        structure = Structure(BAD_TYPE2)
-        self.assertFalse(structure.valid)
-        self.assertEquals(structure.pos, 10)
+        structure = None
+        with self.assertRaises(ValueError):
+            structure = Structure(BAD_TYPE2)
 
     def test_parses_empty_structure(self):
         structure = Structure(EMPTY_STRUCTURE)
-        self.assertTrue(structure.valid)
+        self.assertEqual(structure.text, EMPTY_STRUCTURE)
 
     def test_parses_name_in_empty_structure(self):
         structure = Structure(EMPTY_STRUCTURE)
-        self.assertEquals(structure.name, 'foo')
+        self.assertEqual(structure.name, 'foo')
 
     def test_parses_single_value_structure(self):
         structure = Structure(SINGLE_VALUE_STRUCTURE)
-        self.assertTrue(structure.valid)
+        self.assertEqual(structure.text, SINGLE_VALUE_STRUCTURE)
 
     def test_parses_name(self):
         structure = Structure(SINGLE_VALUE_STRUCTURE)
-        self.assertEquals(structure.name, 'foo')
+        self.assertEqual(structure.name, 'foo')
 
     def test_parses_key(self):
         structure = Structure(SINGLE_VALUE_STRUCTURE)
@@ -59,23 +67,25 @@ class TestStructure(unittest.TestCase):
 
     def test_parses_type(self):
         structure = Structure(SINGLE_VALUE_STRUCTURE)
-        self.assertEquals(structure.types['key'], 'string')
+        self.assertEqual(structure.types['key'], 'string')
 
     def test_parses_string_value(self):
         structure = Structure(MISC_TYPES_STRUCTURE)
-        self.assertEquals(structure.values['key1'], 'value')
+        self.assertEqual(structure.values['key1'], 'value')
 
     def test_parses_int_value(self):
         structure = Structure(MISC_TYPES_STRUCTURE)
-        self.assertEquals(structure.values['key2'], 5)
+        self.assertEqual(structure.values['key2'], 5)
 
     def test_parses_nested_structure(self):
         structure = Structure(NESTED_STRUCTURE)
-        self.assertTrue(structure.valid)
-        sub = structure.values['nested']
-        self.assertTrue(sub.valid)
+        self.assertEqual(structure.text, NESTED_STRUCTURE)
 
     def test_nested_structure_has_sub_structure(self):
         structure = Structure(NESTED_STRUCTURE)
-        self.assertEquals(structure.types['nested'], 'structure')
+        self.assertEqual(structure.types['nested'], 'structure')
         self.assertIsInstance(structure.values['nested'], Structure)
+
+    def test_regressions(self):
+        for s in REGRESSIONS:
+            structure = Structure(s)