hwdb: add a grammar-based generator for vendor/model and class tables
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Mon, 2 Oct 2017 11:19:22 +0000 (13:19 +0200)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Mon, 2 Oct 2017 11:19:22 +0000 (13:19 +0200)
This is rather slow (1 m 45 s on my laptop), but since it'd be only used
once per release, maybe this doesn't matter that much.

Output is identical to ids-update.pl with the set of source files committed in
the grandparent.

hwdb/ids_parser.py [new file with mode: 0755]

diff --git a/hwdb/ids_parser.py b/hwdb/ids_parser.py
new file mode 100755 (executable)
index 0000000..691b7fa
--- /dev/null
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+
+import re
+import sys
+from pyparsing import (Word, White, Literal, Regex,
+                       LineEnd, SkipTo,
+                       ZeroOrMore, OneOrMore, Combine, Optional, Suppress,
+                       stringEnd, pythonStyleComment)
+
+EOL = LineEnd().suppress()
+NUM1 = Word('0123456789abcdefABCDEF', exact=1)
+NUM2 = Word('0123456789abcdefABCDEF', exact=2)
+NUM3 = Word('0123456789abcdefABCDEF', exact=3)
+NUM4 = Word('0123456789abcdefABCDEF', exact=4)
+NUM6 = Word('0123456789abcdefABCDEF', exact=6)
+TAB = White('\t', exact=1).suppress()
+COMMENTLINE = pythonStyleComment + EOL
+EMPTYLINE = LineEnd()
+text_eol = lambda name: Regex(r'[^\n]+')(name) + EOL
+# text_eol = lambda name: Word(printables + ' ' + '®üäßçõãİó ×²⁶´‐“\u200E\u200B')(name) + EOL
+
+def klass_grammar():
+    klass_line = Literal('C ').suppress() + NUM2('klass') + text_eol('text')
+    subclass_line = TAB + NUM2('subclass') + text_eol('text')
+    protocol_line = TAB + TAB + NUM2('protocol') + text_eol('name')
+    subclass = (subclass_line('SUBCLASS') -
+                ZeroOrMore(protocol_line('PROTOCOLS*')
+                           ^ COMMENTLINE.suppress()))
+    klass = (klass_line('KLASS') -
+             ZeroOrMore(subclass('SUBCLASSES*')
+                        ^ COMMENTLINE.suppress()))
+    return klass
+
+def usb_ids_grammar():
+    vendor_line = NUM4('vendor') + text_eol('text')
+    device_line = TAB + NUM4('device') + text_eol('text')
+    vendor = (vendor_line('VENDOR') +
+             ZeroOrMore(device_line('VENDOR_DEV*') ^ COMMENTLINE.suppress()))
+
+    klass = klass_grammar()
+
+    other_line = (Literal('AT ') ^ Literal('HID ') ^ Literal('R ')
+                  ^ Literal('PHY ') ^ Literal('BIAS ') ^ Literal('HUT ')
+                  ^ Literal('L ') ^ Literal('VT ') ^ Literal('HCC ')) + text_eol('text')
+    other_group = (other_line - ZeroOrMore(TAB + text_eol('text')))
+
+    commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress()
+    grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*')
+                        ^ other_group.suppress() ^ commentgroup) + stringEnd()
+
+    grammar.parseWithTabs()
+    return grammar
+
+def pci_ids_grammar():
+    vendor_line = NUM4('vendor') + text_eol('text')
+    device_line = TAB + NUM4('device') + text_eol('text')
+    subvendor_line = TAB + TAB + NUM4('a') + White(' ') + NUM4('b') + text_eol('name')
+
+    device = (device_line('DEVICE') +
+              ZeroOrMore(subvendor_line('SUBVENDORS*') ^ COMMENTLINE.suppress()))
+    vendor = (vendor_line('VENDOR') +
+              ZeroOrMore(device('DEVICES*') ^ COMMENTLINE.suppress()))
+
+    klass = klass_grammar()
+
+    commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress()
+    grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*')
+                        ^ commentgroup) + stringEnd()
+
+    grammar.parseWithTabs()
+    return grammar
+
+def sdio_ids_grammar():
+    vendor_line = NUM4('vendor') + text_eol('text')
+    device_line = TAB + NUM4('device') + text_eol('text')
+    vendor = (vendor_line('VENDOR') +
+              ZeroOrMore(device_line('DEVICES*') ^ COMMENTLINE.suppress()))
+
+    klass = klass_grammar()
+
+    commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress()
+    grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*') ^ commentgroup) + stringEnd()
+
+    grammar.parseWithTabs()
+    return grammar
+
+def oui_grammar(type):
+    prefix_line = (Combine(NUM2 - Suppress('-') - NUM2 - Suppress('-') - NUM2)('prefix')
+                   - Literal('(hex)') -  text_eol('text'))
+    if type == 'small':
+        vendor_line = (NUM3('start') - '000-' - NUM3('end') - 'FFF'
+                       - Literal('(base 16)') - text_eol('text2'))
+    elif type == 'medium':
+        vendor_line = (NUM1('start') - '00000-' - NUM1('end') - 'FFFFF'
+                       - Literal('(base 16)') - text_eol('text2'))
+    else:
+        assert type == 'large'
+        vendor_line = (NUM6('start')
+                       - Literal('(base 16)') - text_eol('text2'))
+
+    extra_line = TAB - TAB - TAB - TAB - SkipTo(EOL)
+    vendor = prefix_line + vendor_line + ZeroOrMore(extra_line) + Optional(EMPTYLINE)
+
+    grammar = (Literal('OUI') + text_eol('header')
+               + text_eol('header') + text_eol('header') + EMPTYLINE
+               + OneOrMore(vendor('VENDORS*')) + stringEnd())
+
+    grammar.parseWithTabs()
+    return grammar
+
+
+def header(file, *sources):
+    print('''\
+# This file is part of systemd.
+#
+# Data imported from:{}{}'''.format(' ' if len(sources) == 1 else '\n#   ',
+                                    '\n#   '.join(sources)),
+          file=file)
+
+def usb_vendor_model(p):
+    with open('20-usb-vendor-model.hwdb', 'wt') as out:
+        header(out, 'http://www.linux-usb.org/usb.ids')
+
+        for vendor_group in p.VENDORS:
+            vendor = vendor_group.VENDOR.vendor.upper()
+            text = vendor_group.VENDOR.text.strip()
+            print(f'',
+                  f'usb:v{vendor}*',
+                  f' ID_VENDOR_FROM_DATABASE={text}', sep='\n', file=out)
+
+            for vendor_dev in vendor_group.VENDOR_DEV:
+                device = vendor_dev.device.upper()
+                text = vendor_dev.text.strip()
+                print(f'',
+                      f'usb:v{vendor}p{device}*',
+                      f' ID_MODEL_FROM_DATABASE={text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+def usb_classes(p):
+    with open('20-usb-classes.hwdb', 'wt') as out:
+        header(out, 'http://www.linux-usb.org/usb.ids')
+
+        for klass_group in p.CLASSES:
+            klass = klass_group.KLASS.klass.upper()
+            text = klass_group.KLASS.text.strip()
+
+            if klass != '00' and not re.match(r'(\?|None|Unused)\s*$', text):
+                print(f'',
+                      f'usb:v*p*d*dc{klass}*',
+                      f' ID_USB_CLASS_FROM_DATABASE={text}', sep='\n', file=out)
+
+            for subclass_group in klass_group.SUBCLASSES:
+                subclass = subclass_group.subclass.upper()
+                text = subclass_group.text.strip()
+                if subclass != '00' and not re.match(r'(\?|None|Unused)\s*$', text):
+                    print(f'',
+                          f'usb:v*p*d*dc{klass}dsc{subclass}*',
+                          f' ID_USB_SUBCLASS_FROM_DATABASE={text}', sep='\n', file=out)
+
+                for protocol_group in subclass_group.PROTOCOLS:
+                    protocol = protocol_group.protocol.upper()
+                    text = protocol_group.name.strip()
+                    if klass != '00' and not re.match(r'(\?|None|Unused)\s*$', text):
+                        print(f'',
+                              f'usb:v*p*d*dc{klass}dsc{subclass}dp{protocol}*',
+                              f' ID_USB_PROTOCOL_FROM_DATABASE={text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+def pci_vendor_model(p):
+    with open('20-pci-vendor-model.hwdb', 'wt') as out:
+        header(out, 'http://pci-ids.ucw.cz/v2.2/pci.ids')
+
+        for vendor_group in p.VENDORS:
+            vendor = vendor_group.VENDOR.vendor.upper()
+            text = vendor_group.VENDOR.text.strip()
+            print(f'',
+                  f'pci:v0000{vendor}*',
+                  f' ID_VENDOR_FROM_DATABASE={text}', sep='\n', file=out)
+
+            for device_group in vendor_group.DEVICES:
+                device = device_group.device.upper()
+                text = device_group.text.strip()
+                print(f'',
+                      f'pci:v0000{vendor}d0000{device}*',
+                      f' ID_MODEL_FROM_DATABASE={text}', sep='\n', file=out)
+
+                for subvendor_group in device_group.SUBVENDORS:
+                    sub_vendor = subvendor_group.a.upper()
+                    sub_model = subvendor_group.b.upper()
+                    sub_text = subvendor_group.name.strip()
+                    if sub_text.startswith(text):
+                        sub_text = sub_text[len(text):].lstrip()
+                    if sub_text:
+                        sub_text = f' ({sub_text})'
+                    print(f'',
+                          f'pci:v0000{vendor}d0000{device}sv0000{sub_vendor}sd0000{sub_model}*',
+                          f' ID_MODEL_FROM_DATABASE={text}{sub_text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+def pci_classes(p):
+    with open('20-pci-classes.hwdb', 'wt') as out:
+        header(out, 'http://pci-ids.ucw.cz/v2.2/pci.ids')
+
+        for klass_group in p.CLASSES:
+            klass = klass_group.KLASS.klass.upper()
+            text = klass_group.KLASS.text.strip()
+
+            print(f'',
+                  f'pci:v*d*sv*sd*bc{klass}*',
+                  f' ID_PCI_CLASS_FROM_DATABASE={text}', sep='\n', file=out)
+
+            for subclass_group in klass_group.SUBCLASSES:
+                subclass = subclass_group.subclass.upper()
+                text = subclass_group.text.strip()
+                print(f'',
+                      f'pci:v*d*sv*sd*bc{klass}sc{subclass}*',
+                      f' ID_PCI_SUBCLASS_FROM_DATABASE={text}', sep='\n', file=out)
+
+                for protocol_group in subclass_group.PROTOCOLS:
+                    protocol = protocol_group.protocol.upper()
+                    text = protocol_group.name.strip()
+                    print(f'',
+                          f'pci:v*d*sv*sd*bc{klass}sc{subclass}i{protocol}*',
+                          f' ID_PCI_INTERFACE_FROM_DATABASE={text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+def sdio_vendor_model(p):
+    with open('20-sdio-vendor-model.hwdb', 'wt') as out:
+        header(out, 'hwdb/sdio.ids')
+
+        for vendor_group in p.VENDORS:
+            vendor = vendor_group.VENDOR.vendor.upper()
+            text = vendor_group.VENDOR.text.strip()
+            print(f'',
+                  f'sdio:c*v{vendor}*',
+                  f' ID_VENDOR_FROM_DATABASE={text}', sep='\n', file=out)
+
+            for device_group in vendor_group.DEVICES:
+                device = device_group.device.upper()
+                text = device_group.text.strip()
+                print(f'',
+                      f'sdio:c*v{vendor}d{device}*',
+                      f' ID_MODEL_FROM_DATABASE={text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+def sdio_classes(p):
+    with open('20-sdio-classes.hwdb', 'wt') as out:
+        header(out, 'hwdb/sdio.ids')
+
+        for klass_group in p.CLASSES:
+            klass = klass_group.KLASS.klass.upper()
+            text = klass_group.KLASS.text.strip()
+
+            print(f'',
+                  f'sdio:c{klass}v*d*',
+                  f' ID_SDIO_CLASS_FROM_DATABASE={text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+# MAC Address Block Large/Medium/Small
+# Large  MA-L 24/24 bit (OUI)
+# Medium MA-M 28/20 bit (OUI prefix owned by IEEE)
+# Small  MA-S 36/12 bit (OUI prefix owned by IEEE)
+def oui(p1, p2, p3):
+    with open('20-OUI.hwdb', 'wt') as out:
+        header(out,
+               'https://services13.ieee.org/RST/standards-ra-web/rest/assignments/download/?registry=MA-L&format=txt',
+               'https://services13.ieee.org/RST/standards-ra-web/rest/assignments/download/?registry=MA-M&format=txt',
+               'https://services13.ieee.org/RST/standards-ra-web/rest/assignments/download/?registry=MA-S&format=txt')
+
+        prefixes = set()
+
+        for p, check in ((p1, False), (p2, False), (p3, True)):
+            for vendor_group in p.VENDORS:
+                prefix = vendor_group.prefix.upper()
+                if check:
+                    if prefix in prefixes:
+                        continue
+                else:
+                    prefixes.add(prefix)
+                start = vendor_group.start.upper()
+                end = vendor_group.end.upper()
+
+                if end and start != end:
+                    print(f'{prefix:} {start} != {end}', file=sys.stderr)
+                text = vendor_group.text.strip()
+
+                print(f'',
+                      f'OUI:{prefix}{start if end else ""}*',
+                      f' ID_OUI_FROM_DATABASE={text}', sep='\n', file=out)
+    print(f'Wrote {out.name}')
+
+if __name__ == '__main__':
+    p = usb_ids_grammar().parseFile(open('usb.ids'))
+    usb_vendor_model(p)
+    usb_classes(p)
+
+    p = pci_ids_grammar().parseFile(open('pci.ids'))
+    pci_vendor_model(p)
+    pci_classes(p)
+
+    p = pci_ids_grammar().parseFile(open('sdio.ids'))
+    sdio_vendor_model(p)
+    sdio_classes(p)
+
+    p = oui_grammar('small').parseFile(open('ma-small.txt'))
+    p2 = oui_grammar('medium').parseFile(open('ma-medium.txt'))
+    p3 = oui_grammar('large').parseFile(open('ma-large.txt'))
+
+    oui(p, p2, p3)