AVX-512: Fix instruction match function
authorJin Kyu Song <jin.kyu.song@intel.com>
Thu, 22 Aug 2013 02:29:10 +0000 (19:29 -0700)
committerCyrill Gorcunov <gorcunov@gmail.com>
Thu, 22 Aug 2013 15:37:37 +0000 (19:37 +0400)
When an instruction allows broadcasting, the memory element size is
different from the size of normal memory operation.
This information is provided in a decoflags field, so it should try to
match those properties before it fails.

Signed-off-by: Jin Kyu Song <jin.kyu.song@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
assemble.c
nasm.h
tables.h

index 6054d4a..83971f6 100644 (file)
@@ -1915,10 +1915,22 @@ static enum match_result find_match(const struct itemplate **tempp,
     enum match_result m, merr;
     opflags_t xsizeflags[MAX_OPERANDS];
     bool opsizemissing = false;
+    int8_t broadcast = -1;
     int i;
 
+    /* find the position of broadcasting operand */
     for (i = 0; i < instruction->operands; i++)
-        xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
+        if (instruction->oprs[i].decoflags & BRDCAST_MASK) {
+            broadcast = i;
+            break;
+        }
+
+    /* broadcasting uses a different data element size */
+    for (i = 0; i < instruction->operands; i++)
+        if (i == broadcast)
+            xsizeflags[i] = instruction->oprs[i].decoflags & BRSIZE_MASK;
+        else
+            xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
 
     merr = MERR_INVALOP;
 
@@ -1936,7 +1948,10 @@ static enum match_result find_match(const struct itemplate **tempp,
              * Missing operand size and a candidate for fuzzy matching...
              */
             for (i = 0; i < temp->operands; i++)
-                xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
+                if (i == broadcast)
+                    xsizeflags[i] |= temp->deco[i] & BRSIZE_MASK;
+                else
+                    xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
             opsizemissing = true;
         }
         if (m > merr)
@@ -1962,7 +1977,10 @@ static enum match_result find_match(const struct itemplate **tempp,
         if ((xsizeflags[i] & (xsizeflags[i]-1)))
             goto done;                /* No luck */
 
-        instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
+        if (i == broadcast)
+            instruction->oprs[i].decoflags |= xsizeflags[i];
+        else
+            instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
     }
 
     /* Try matching again... */
@@ -2107,7 +2125,16 @@ static enum match_result matches(const struct itemplate *itemp,
         } else if ((itemp->opd[i] & SIZE_MASK) &&
                    (itemp->opd[i] & SIZE_MASK) != (type & SIZE_MASK)) {
             if (type & SIZE_MASK) {
-                return MERR_INVALOP;
+                /*
+                 * when broadcasting, the element size depends on
+                 * the instruction type. decorator flag should match.
+                 */
+#define MATCH_BRSZ(bits) (((type & SIZE_MASK) == BITS##bits) &&             \
+                          ((itemp->deco[i] & BRSIZE_MASK) == BR_BITS##bits))
+                if (!((deco & BRDCAST_MASK) &&
+                      (MATCH_BRSZ(32) || MATCH_BRSZ(64)))) {
+                    return MERR_INVALOP;
+                }
             } else if (!is_class(REGISTER, type)) {
                 /*
                  * Note: we don't honor extrinsic operand sizes for registers,
diff --git a/nasm.h b/nasm.h
index 628ec43..e46b5ca 100644 (file)
--- a/nasm.h
+++ b/nasm.h
@@ -1038,6 +1038,7 @@ enum decorator_tokens {
  * ..........................1..... broadcast
  * .........................1...... static rounding
  * ........................1....... SAE
+ * ......................11........ broadcast element size
  */
 #define OP_GENVAL(val, bits, shift)     (((val) & ((UINT64_C(1) << (bits)) - 1)) << (shift))
 
@@ -1096,10 +1097,23 @@ enum decorator_tokens {
 #define SAE_MASK                OP_GENMASK(SAE_BITS, SAE_SHIFT)
 #define GEN_SAE(bit)            OP_GENBIT(bit, SAE_SHIFT)
 
+/*
+ * Broadcasting element size.
+ *
+ * Bits: 8 - 9
+ */
+#define BRSIZE_SHIFT            (8)
+#define BRSIZE_BITS             (2)
+#define BRSIZE_MASK             OP_GENMASK(BRSIZE_BITS, BRSIZE_SHIFT)
+#define GEN_BRSIZE(bit)         OP_GENBIT(bit, BRSIZE_SHIFT)
+
+#define BR_BITS32               GEN_BRSIZE(0)
+#define BR_BITS64               GEN_BRSIZE(1)
+
 #define MASK                    OPMASK_MASK             /* Opmask (k1 ~ 7) can be used */
 #define Z                       Z_MASK
-#define B32                     BRDCAST_MASK            /* {1to16} : load+op instruction can broadcast when it is reg-reg operation */
-#define B64                     BRDCAST_MASK            /* {1to8}  : There are two definitions just for conforming to SDM */
+#define B32                     (BRDCAST_MASK|BR_BITS32) /* {1to16} : broadcast 32b * 16 to zmm(512b) */ 
+#define B64                     (BRDCAST_MASK|BR_BITS64) /* {1to8}  : broadcast 64b *  8 to zmm(512b) */
 #define ER                      STATICRND_MASK          /* ER(Embedded Rounding) == Static rounding mode */
 #define SAE                     SAE_MASK                /* SAE(Suppress All Exception) */
 
index d0db3b3..4b14566 100644 (file)
--- a/tables.h
+++ b/tables.h
@@ -62,7 +62,7 @@ extern const char * const nasm_insn_names[];
 extern const char * const nasm_reg_names[];
 /* regflags.c */
 typedef uint64_t opflags_t;
-typedef uint8_t  decoflags_t;
+typedef uint16_t  decoflags_t;
 extern const opflags_t nasm_reg_flags[];
 /* regvals.c */
 extern const int nasm_regvals[];