From 3a58a4308569bb80a79ff25a55f9e300e3839bdd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 2 Oct 2017 13:19:22 +0200 Subject: [PATCH] hwdb: add a grammar-based generator for vendor/model and class tables This is rather slow (1 m 45 s on my laptop), but since it'd be only used once per release, maybe this doesn't matter that much. Output is identical to ids-update.pl with the set of source files committed in the grandparent. --- hwdb/ids_parser.py | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100755 hwdb/ids_parser.py diff --git a/hwdb/ids_parser.py b/hwdb/ids_parser.py new file mode 100755 index 0000000..691b7fa --- /dev/null +++ b/hwdb/ids_parser.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 + +import re +import sys +from pyparsing import (Word, White, Literal, Regex, + LineEnd, SkipTo, + ZeroOrMore, OneOrMore, Combine, Optional, Suppress, + stringEnd, pythonStyleComment) + +EOL = LineEnd().suppress() +NUM1 = Word('0123456789abcdefABCDEF', exact=1) +NUM2 = Word('0123456789abcdefABCDEF', exact=2) +NUM3 = Word('0123456789abcdefABCDEF', exact=3) +NUM4 = Word('0123456789abcdefABCDEF', exact=4) +NUM6 = Word('0123456789abcdefABCDEF', exact=6) +TAB = White('\t', exact=1).suppress() +COMMENTLINE = pythonStyleComment + EOL +EMPTYLINE = LineEnd() +text_eol = lambda name: Regex(r'[^\n]+')(name) + EOL +# text_eol = lambda name: Word(printables + ' ' + '®üäßçõãİó ×²⁶´‐“\u200E\u200B')(name) + EOL + +def klass_grammar(): + klass_line = Literal('C ').suppress() + NUM2('klass') + text_eol('text') + subclass_line = TAB + NUM2('subclass') + text_eol('text') + protocol_line = TAB + TAB + NUM2('protocol') + text_eol('name') + subclass = (subclass_line('SUBCLASS') - + ZeroOrMore(protocol_line('PROTOCOLS*') + ^ COMMENTLINE.suppress())) + klass = (klass_line('KLASS') - + ZeroOrMore(subclass('SUBCLASSES*') + ^ COMMENTLINE.suppress())) + return klass + +def usb_ids_grammar(): + vendor_line = NUM4('vendor') + text_eol('text') + device_line = TAB + NUM4('device') + text_eol('text') + vendor = (vendor_line('VENDOR') + + ZeroOrMore(device_line('VENDOR_DEV*') ^ COMMENTLINE.suppress())) + + klass = klass_grammar() + + other_line = (Literal('AT ') ^ Literal('HID ') ^ Literal('R ') + ^ Literal('PHY ') ^ Literal('BIAS ') ^ Literal('HUT ') + ^ Literal('L ') ^ Literal('VT ') ^ Literal('HCC ')) + text_eol('text') + other_group = (other_line - ZeroOrMore(TAB + text_eol('text'))) + + commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress() + grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*') + ^ other_group.suppress() ^ commentgroup) + stringEnd() + + grammar.parseWithTabs() + return grammar + +def pci_ids_grammar(): + vendor_line = NUM4('vendor') + text_eol('text') + device_line = TAB + NUM4('device') + text_eol('text') + subvendor_line = TAB + TAB + NUM4('a') + White(' ') + NUM4('b') + text_eol('name') + + device = (device_line('DEVICE') + + ZeroOrMore(subvendor_line('SUBVENDORS*') ^ COMMENTLINE.suppress())) + vendor = (vendor_line('VENDOR') + + ZeroOrMore(device('DEVICES*') ^ COMMENTLINE.suppress())) + + klass = klass_grammar() + + commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress() + grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*') + ^ commentgroup) + stringEnd() + + grammar.parseWithTabs() + return grammar + +def sdio_ids_grammar(): + vendor_line = NUM4('vendor') + text_eol('text') + device_line = TAB + NUM4('device') + text_eol('text') + vendor = (vendor_line('VENDOR') + + ZeroOrMore(device_line('DEVICES*') ^ COMMENTLINE.suppress())) + + klass = klass_grammar() + + commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress() + grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*') ^ commentgroup) + stringEnd() + + grammar.parseWithTabs() + return grammar + +def oui_grammar(type): + prefix_line = (Combine(NUM2 - Suppress('-') - NUM2 - Suppress('-') - NUM2)('prefix') + - Literal('(hex)') - text_eol('text')) + if type == 'small': + vendor_line = (NUM3('start') - '000-' - NUM3('end') - 'FFF' + - Literal('(base 16)') - text_eol('text2')) + elif type == 'medium': + vendor_line = (NUM1('start') - '00000-' - NUM1('end') - 'FFFFF' + - Literal('(base 16)') - text_eol('text2')) + else: + assert type == 'large' + vendor_line = (NUM6('start') + - Literal('(base 16)') - text_eol('text2')) + + extra_line = TAB - TAB - TAB - TAB - SkipTo(EOL) + vendor = prefix_line + vendor_line + ZeroOrMore(extra_line) + Optional(EMPTYLINE) + + grammar = (Literal('OUI') + text_eol('header') + + text_eol('header') + text_eol('header') + EMPTYLINE + + OneOrMore(vendor('VENDORS*')) + stringEnd()) + + grammar.parseWithTabs() + return grammar + + +def header(file, *sources): + print('''\ +# This file is part of systemd. +# +# Data imported from:{}{}'''.format(' ' if len(sources) == 1 else '\n# ', + '\n# '.join(sources)), + file=file) + +def usb_vendor_model(p): + with open('20-usb-vendor-model.hwdb', 'wt') as out: + header(out, 'http://www.linux-usb.org/usb.ids') + + for vendor_group in p.VENDORS: + vendor = vendor_group.VENDOR.vendor.upper() + text = vendor_group.VENDOR.text.strip() + print(f'', + f'usb:v{vendor}*', + f' ID_VENDOR_FROM_DATABASE={text}', sep='\n', file=out) + + for vendor_dev in vendor_group.VENDOR_DEV: + device = vendor_dev.device.upper() + text = vendor_dev.text.strip() + print(f'', + f'usb:v{vendor}p{device}*', + f' ID_MODEL_FROM_DATABASE={text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +def usb_classes(p): + with open('20-usb-classes.hwdb', 'wt') as out: + header(out, 'http://www.linux-usb.org/usb.ids') + + for klass_group in p.CLASSES: + klass = klass_group.KLASS.klass.upper() + text = klass_group.KLASS.text.strip() + + if klass != '00' and not re.match(r'(\?|None|Unused)\s*$', text): + print(f'', + f'usb:v*p*d*dc{klass}*', + f' ID_USB_CLASS_FROM_DATABASE={text}', sep='\n', file=out) + + for subclass_group in klass_group.SUBCLASSES: + subclass = subclass_group.subclass.upper() + text = subclass_group.text.strip() + if subclass != '00' and not re.match(r'(\?|None|Unused)\s*$', text): + print(f'', + f'usb:v*p*d*dc{klass}dsc{subclass}*', + f' ID_USB_SUBCLASS_FROM_DATABASE={text}', sep='\n', file=out) + + for protocol_group in subclass_group.PROTOCOLS: + protocol = protocol_group.protocol.upper() + text = protocol_group.name.strip() + if klass != '00' and not re.match(r'(\?|None|Unused)\s*$', text): + print(f'', + f'usb:v*p*d*dc{klass}dsc{subclass}dp{protocol}*', + f' ID_USB_PROTOCOL_FROM_DATABASE={text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +def pci_vendor_model(p): + with open('20-pci-vendor-model.hwdb', 'wt') as out: + header(out, 'http://pci-ids.ucw.cz/v2.2/pci.ids') + + for vendor_group in p.VENDORS: + vendor = vendor_group.VENDOR.vendor.upper() + text = vendor_group.VENDOR.text.strip() + print(f'', + f'pci:v0000{vendor}*', + f' ID_VENDOR_FROM_DATABASE={text}', sep='\n', file=out) + + for device_group in vendor_group.DEVICES: + device = device_group.device.upper() + text = device_group.text.strip() + print(f'', + f'pci:v0000{vendor}d0000{device}*', + f' ID_MODEL_FROM_DATABASE={text}', sep='\n', file=out) + + for subvendor_group in device_group.SUBVENDORS: + sub_vendor = subvendor_group.a.upper() + sub_model = subvendor_group.b.upper() + sub_text = subvendor_group.name.strip() + if sub_text.startswith(text): + sub_text = sub_text[len(text):].lstrip() + if sub_text: + sub_text = f' ({sub_text})' + print(f'', + f'pci:v0000{vendor}d0000{device}sv0000{sub_vendor}sd0000{sub_model}*', + f' ID_MODEL_FROM_DATABASE={text}{sub_text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +def pci_classes(p): + with open('20-pci-classes.hwdb', 'wt') as out: + header(out, 'http://pci-ids.ucw.cz/v2.2/pci.ids') + + for klass_group in p.CLASSES: + klass = klass_group.KLASS.klass.upper() + text = klass_group.KLASS.text.strip() + + print(f'', + f'pci:v*d*sv*sd*bc{klass}*', + f' ID_PCI_CLASS_FROM_DATABASE={text}', sep='\n', file=out) + + for subclass_group in klass_group.SUBCLASSES: + subclass = subclass_group.subclass.upper() + text = subclass_group.text.strip() + print(f'', + f'pci:v*d*sv*sd*bc{klass}sc{subclass}*', + f' ID_PCI_SUBCLASS_FROM_DATABASE={text}', sep='\n', file=out) + + for protocol_group in subclass_group.PROTOCOLS: + protocol = protocol_group.protocol.upper() + text = protocol_group.name.strip() + print(f'', + f'pci:v*d*sv*sd*bc{klass}sc{subclass}i{protocol}*', + f' ID_PCI_INTERFACE_FROM_DATABASE={text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +def sdio_vendor_model(p): + with open('20-sdio-vendor-model.hwdb', 'wt') as out: + header(out, 'hwdb/sdio.ids') + + for vendor_group in p.VENDORS: + vendor = vendor_group.VENDOR.vendor.upper() + text = vendor_group.VENDOR.text.strip() + print(f'', + f'sdio:c*v{vendor}*', + f' ID_VENDOR_FROM_DATABASE={text}', sep='\n', file=out) + + for device_group in vendor_group.DEVICES: + device = device_group.device.upper() + text = device_group.text.strip() + print(f'', + f'sdio:c*v{vendor}d{device}*', + f' ID_MODEL_FROM_DATABASE={text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +def sdio_classes(p): + with open('20-sdio-classes.hwdb', 'wt') as out: + header(out, 'hwdb/sdio.ids') + + for klass_group in p.CLASSES: + klass = klass_group.KLASS.klass.upper() + text = klass_group.KLASS.text.strip() + + print(f'', + f'sdio:c{klass}v*d*', + f' ID_SDIO_CLASS_FROM_DATABASE={text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +# MAC Address Block Large/Medium/Small +# Large MA-L 24/24 bit (OUI) +# Medium MA-M 28/20 bit (OUI prefix owned by IEEE) +# Small MA-S 36/12 bit (OUI prefix owned by IEEE) +def oui(p1, p2, p3): + with open('20-OUI.hwdb', 'wt') as out: + header(out, + 'https://services13.ieee.org/RST/standards-ra-web/rest/assignments/download/?registry=MA-L&format=txt', + 'https://services13.ieee.org/RST/standards-ra-web/rest/assignments/download/?registry=MA-M&format=txt', + 'https://services13.ieee.org/RST/standards-ra-web/rest/assignments/download/?registry=MA-S&format=txt') + + prefixes = set() + + for p, check in ((p1, False), (p2, False), (p3, True)): + for vendor_group in p.VENDORS: + prefix = vendor_group.prefix.upper() + if check: + if prefix in prefixes: + continue + else: + prefixes.add(prefix) + start = vendor_group.start.upper() + end = vendor_group.end.upper() + + if end and start != end: + print(f'{prefix:} {start} != {end}', file=sys.stderr) + text = vendor_group.text.strip() + + print(f'', + f'OUI:{prefix}{start if end else ""}*', + f' ID_OUI_FROM_DATABASE={text}', sep='\n', file=out) + print(f'Wrote {out.name}') + +if __name__ == '__main__': + p = usb_ids_grammar().parseFile(open('usb.ids')) + usb_vendor_model(p) + usb_classes(p) + + p = pci_ids_grammar().parseFile(open('pci.ids')) + pci_vendor_model(p) + pci_classes(p) + + p = pci_ids_grammar().parseFile(open('sdio.ids')) + sdio_vendor_model(p) + sdio_classes(p) + + p = oui_grammar('small').parseFile(open('ma-small.txt')) + p2 = oui_grammar('medium').parseFile(open('ma-medium.txt')) + p3 = oui_grammar('large').parseFile(open('ma-large.txt')) + + oui(p, p2, p3) -- 2.7.4