directives: split global directives from backend-specific ones
authorH. Peter Anvin <hpa@zytor.com>
Mon, 13 Jul 2009 18:54:31 +0000 (14:54 -0400)
committerH. Peter Anvin <hpa@zytor.com>
Mon, 13 Jul 2009 18:57:48 +0000 (14:57 -0400)
Split out the global directives into a separate section, that allows
the switch() in the main code to be slightly faster.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
directives.dat
directives.pl [new file with mode: 0755]

index 3898821..b08d387 100644 (file)
 ;;
 ;; List of global NASM directives (including backend-specific ones)
 ;;
+
+; --- Global directives
 absolute
 bits
 common
 cpu
 debug
 default
-export
 extern
 float
 global
-group
-import
-library
 list
-map
-module
-org
-osabi
-safeseh
 section
 segment
-uppercase
 warning
+
+; --- Format-specific directives
+export                         ; outcoff, outobj
+group                          ; outobj
+import                         ; outobj
+library                                ; outrdf2
+map                            ; outbin
+module                         ; outrdf2
+org                            ; outbin
+osabi                          ; outelf
+safeseh                                ; outcoff
+uppercase                      ; outieee, outobj
diff --git a/directives.pl b/directives.pl
new file mode 100755 (executable)
index 0000000..d8bc6e8
--- /dev/null
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+## --------------------------------------------------------------------------
+##
+##   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+##   See the file AUTHORS included with the NASM distribution for
+##   the specific copyright holders.
+##
+##   Redistribution and use in source and binary forms, with or without
+##   modification, are permitted provided that the following
+##   conditions are met:
+##
+##   * Redistributions of source code must retain the above copyright
+##     notice, this list of conditions and the following disclaimer.
+##   * Redistributions in binary form must reproduce the above
+##     copyright notice, this list of conditions and the following
+##     disclaimer in the documentation and/or other materials provided
+##     with the distribution.
+##
+##     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+##     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+##     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+##     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+##     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+##     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+##     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+##     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+##     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+##     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+##     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+##     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+##     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# Generate a perfect hash for directive parsing
+#
+# Usage: directives.pl directives.dat directives.c directives.h
+#
+
+require 'phash.ph';
+
+my($output, $directives_dat, $outfile) = @ARGV;
+
+@directives = ();
+
+open(DD, "< ${directives_dat}\0")
+    or die "$0: cannot open: ${directives_dat}: $!\n";
+while (defined($line = <DD>)) {
+    chomp $line;
+    if ($line =~ /^\s*([[:alnum:]]+)\s*(|[\;\#].*)$/) {
+       push(@directives, $1);
+    }
+}
+close(DD);
+
+if ($output eq 'h') {
+    open(H, "> ${outfile}\0")
+       or die "$0: cannot create: ${outfile}: $!\n";
+
+    print H "/*\n";
+    print H " * This    file is generated from directives.dat\n";
+    print H " * by directives.pl; do not edit.\n";
+    print H " */\n";
+    print H "\n";
+
+    print H "#ifndef NASM_DIRECTIVES_H\n";
+    print H "#define NASM_DIRECTIVES_H\n";
+    print H "\n";
+
+    print H "enum directives {\n";
+    print H "    D_NONE";
+    foreach $d (@directives) {
+       print H ",\n    D_\U$d";
+    }
+    print H "\n};\n\n";
+    printf H "extern const char * const directives[%d];\n",
+        scalar(@directives)+1;
+    print H "enum directives find_directive(const char *token);\n\n";
+    print H "#endif /* NASM_DIRECTIVES_H */\n";
+} elsif ($output eq 'c') {
+    %directive = ();
+    $n = 0;
+    foreach $d (@directives) {
+       if (exists($directive{$d})) {
+           die "$0: $directives_dat: duplicate directive: $d\n";
+       }
+       $directive{$d} = $n++;  # This is zero-based, unlike the enum!
+    }
+
+    @hashinfo = gen_perfect_hash(\%directive);
+    if (!defined(@hashinfo)) {
+       die "$0: no hash found\n";
+    }
+
+    # Paranoia...
+    verify_hash_table(\%directive, \@hashinfo);
+
+    ($n, $sv, $g) = @hashinfo;
+    $sv2 = $sv+2;
+
+    die if ($n & ($n-1));
+
+    open(C, "> ${outfile}\0")
+       or die "$0: cannot create: ${directives_c}: $!\n";
+
+    print C "/*\n";
+    print C " * This file is generated from directives.dat\n";
+    print C " * by directives.pl; do not edit.\n";
+    print C " */\n";
+    print C "\n";
+
+    print C "#include \"compiler.h\"\n";
+    print C "#include <string.h>\n";
+    print C "#include \"nasm.h\"\n";
+    print C "#include \"hashtbl.h\"\n";
+    print C "#include \"directives.h\"\n";
+    print C "\n";
+
+    printf C "const char * const directives[%d] = {\n",
+        scalar(@directives)+1;
+    print C "    NULL";
+    foreach $d (@directives) {
+       print C ",\n    \"$d\"";
+    }
+    print C "\n};\n\n";
+
+    print C "enum directives find_directive(const char *token)\n";
+    print C "{\n";
+
+    # Put a large value in unused slots.  This makes it extremely unlikely
+    # that any combination that involves unused slot will pass the range test.
+    # This speeds up rejection of unrecognized tokens, i.e. identifiers.
+    print C "#define UNUSED 16383\n";
+
+    print C "    static const int16_t hash1[$n] = {\n";
+    for ($i = 0; $i < $n; $i++) {
+       my $h = ${$g}[$i*2+0];
+       print C "        ", defined($h) ? $h : 'UNUSED', ",\n";
+    }
+    print C "    };\n";
+
+    print C "    static const int16_t hash2[$n] = {\n";
+    for ($i = 0; $i < $n; $i++) {
+       my $h = ${$g}[$i*2+1];
+       print C "        ", defined($h) ? $h : 'UNUSED', ",\n";
+    }
+    print C "    };\n";
+
+    print C  "    uint32_t k1, k2;\n";
+    print C  "    uint64_t crc;\n";
+    # For correct overflow behavior, "ix" should be unsigned of the same
+    # width as the hash arrays.
+    print C  "    uint16_t ix;\n";
+    print C  "\n";
+    printf C "    crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
+       $$sv[0], $$sv[1];
+    print C  "    k1 = (uint32_t)crc;\n";
+    print C  "    k2 = (uint32_t)(crc >> 32);\n";
+    print C  "\n";
+    printf C "    ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+    printf C "    if (ix >= %d)\n", scalar(@directives);
+    print C  "        return D_NONE;\n";
+    print C  "\n";
+    print C  "    ix++;\n";    # Account for D_NONE
+    print C  "    if (nasm_stricmp(token, directives[ix]))\n";
+    print C  "        return D_NONE;\n";
+    print C  "\n";
+    print C  "    return ix;\n";
+    print C  "}\n";
+}