[Arm64] ASIMD Shift instructions (#36552)
authorEgor Chesakov <Egor.Chesakov@microsoft.com>
Thu, 21 May 2020 22:05:21 +0000 (15:05 -0700)
committerGitHub <noreply@github.com>
Thu, 21 May 2020 22:05:21 +0000 (15:05 -0700)
* sqrshl

* sqrshrn

* sqrshrn2

* sqrshrun

* sqrshrun2

* sqshl

* sqshlu

* sqshrn

* sqshrn2

* sqshrun

* sqshrun2

* srshl

* sshl

* uqrshl

* uqrshrn

* uqrshrn2

* uqshl

* uqshrn

* uqshrn2

* urshl

* ushl

src/coreclr/src/jit/codegenarm64.cpp
src/coreclr/src/jit/emitarm64.cpp
src/coreclr/src/jit/emitarm64.h
src/coreclr/src/jit/emitfmtsarm64.h
src/coreclr/src/jit/instr.cpp
src/coreclr/src/jit/instr.h
src/coreclr/src/jit/instrsarm64.h

index f6b6ee3..2392eb8 100644 (file)
@@ -4312,17 +4312,10 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
 
     instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
 
-    if (varTypeIsFloating(baseType))
-    {
-        GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg);
-    }
-    else
-    {
-        emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE;
-        insOpts  opt  = genGetSimdInsOpt(attr, baseType);
+    emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE;
+    insOpts  opt  = genGetSimdInsOpt(attr, baseType);
 
-        GetEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
-    }
+    GetEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
 
     genProduceReg(simdNode);
 }
@@ -4362,43 +4355,39 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
     instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
     assert((ins == INS_fcvtn) || (ins == INS_xtn));
 
-    if (ins == INS_fcvtn)
+    instruction ins2 = (ins == INS_fcvtn) ? INS_fcvtn2 : INS_xtn2;
+
+    insOpts opt  = INS_OPTS_NONE;
+    insOpts opt2 = INS_OPTS_NONE;
+
+    // This is not the same as genGetSimdInsOpt()
+    // Basetype is the soure operand type
+    // However encoding is based on the destination operand type which is 1/2 the basetype.
+    switch (baseType)
     {
-        GetEmitter()->emitIns_R_R(INS_fcvtn, EA_8BYTE, targetReg, op1Reg);
-        GetEmitter()->emitIns_R_R(INS_fcvtn2, EA_8BYTE, targetReg, op2Reg);
+        case TYP_ULONG:
+        case TYP_LONG:
+        case TYP_DOUBLE:
+            opt  = INS_OPTS_2S;
+            opt2 = INS_OPTS_4S;
+            break;
+        case TYP_UINT:
+        case TYP_INT:
+            opt  = INS_OPTS_4H;
+            opt2 = INS_OPTS_8H;
+            break;
+        case TYP_USHORT:
+        case TYP_SHORT:
+            opt  = INS_OPTS_8B;
+            opt2 = INS_OPTS_16B;
+            break;
+        default:
+            assert(!"Unsupported narrowing element type");
+            unreached();
     }
-    else
-    {
-        insOpts opt  = INS_OPTS_NONE;
-        insOpts opt2 = INS_OPTS_NONE;
 
-        // This is not the same as genGetSimdInsOpt()
-        // Basetype is the soure operand type
-        // However encoding is based on the destination operand type which is 1/2 the basetype.
-        switch (baseType)
-        {
-            case TYP_ULONG:
-            case TYP_LONG:
-                opt  = INS_OPTS_2S;
-                opt2 = INS_OPTS_4S;
-                break;
-            case TYP_UINT:
-            case TYP_INT:
-                opt  = INS_OPTS_4H;
-                opt2 = INS_OPTS_8H;
-                break;
-            case TYP_USHORT:
-            case TYP_SHORT:
-                opt  = INS_OPTS_8B;
-                opt2 = INS_OPTS_16B;
-                break;
-            default:
-                assert(!"Unsupported narrowing element type");
-                unreached();
-        }
-        GetEmitter()->emitIns_R_R(INS_xtn, EA_8BYTE, targetReg, op1Reg, opt);
-        GetEmitter()->emitIns_R_R(INS_xtn2, EA_16BYTE, targetReg, op2Reg, opt2);
-    }
+    GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg, opt);
+    GetEmitter()->emitIns_R_R(ins2, EA_16BYTE, targetReg, op2Reg, opt2);
 
     genProduceReg(simdNode);
 }
@@ -5463,35 +5452,35 @@ void CodeGen::genArm64EmitterUnitTests()
 
     // tbl Vd, {Vt}, Vm
     theEmitter->emitIns_R_R_R(INS_tbl, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbl, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbl, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbx Vd, {Vt}, Vm
     theEmitter->emitIns_R_R_R(INS_tbx, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbx, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbx, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbl Vd, {Vt, Vt2}, Vm
     theEmitter->emitIns_R_R_R(INS_tbl_2regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbl_2regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbl_2regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbx Vd, {Vt, Vt2}, Vm
     theEmitter->emitIns_R_R_R(INS_tbx_2regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbx_2regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbx_2regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbl Vd, {Vt, Vt2, Vt3}, Vm
     theEmitter->emitIns_R_R_R(INS_tbl_3regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbl_3regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbl_3regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbx Vd, {Vt, Vt2, Vt3}, Vm
     theEmitter->emitIns_R_R_R(INS_tbx_3regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbx_3regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbx_3regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbl Vd, {Vt, Vt2, Vt3, Vt4}, Vm
     theEmitter->emitIns_R_R_R(INS_tbl_4regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbl_4regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbl_4regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
     // tbx Vd, {Vt, Vt2, Vt3, Vt4}, Vm
     theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B);
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
@@ -7938,17 +7927,18 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R(INS_ursqrte, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_2S);
     theEmitter->emitIns_R_R(INS_ursqrte, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_4S);
 
-    // INS_fcvtl
-    theEmitter->emitIns_R_R(INS_fcvtl, EA_4BYTE, REG_V0, REG_V1);
+    // fcvtl{2} vector
+    theEmitter->emitIns_R_R(INS_fcvtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_fcvtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_fcvtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtl2, EA_16BYTE, REG_V5, REG_V6, INS_OPTS_4S);
 
-    // INS_fcvtl2
-    theEmitter->emitIns_R_R(INS_fcvtl2, EA_4BYTE, REG_V0, REG_V1);
+    // fcvtn{2} vector
+    theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_fcvtn2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtn2, EA_16BYTE, REG_V5, REG_V6, INS_OPTS_4S);
 
-    // INS_fcvtn
-    theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V0, REG_V1);
-
-    // INS_fcvtn2
-    theEmitter->emitIns_R_R(INS_fcvtn2, EA_8BYTE, REG_V0, REG_V1);
 #endif
 
 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
@@ -8188,196 +8178,200 @@ void CodeGen::genArm64EmitterUnitTests()
     // R_R_I  vector operations, one dest, one source reg, one immed
     //
 
+    // Some of the tests cases below might appear redundant since they emit same combinations of instruction x size x
+    // vector arrangements. However, these are added to verify that the split constant encoding works with both - small
+    // and large constants.
+
     genDefineTempLabel(genCreateTempLabel());
 
-    // 'sshr' scalar
+    // sshr scalar
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'sshr' vector
+    // sshr vector
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'ssra' scalar
+    // ssra scalar
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'ssra' vector
+    // ssra vector
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'srshr' scalar
+    // srshr scalar
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'srshr' vector
+    // srshr vector
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'srsra' scalar
+    // srsra scalar
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'srsra' vector
+    // srsra vector
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'shl' scalar
-    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
+    // shl scalar
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 0);
     theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
     theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
 
-    // 'shl' vector
-    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    // shl vector
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 0, INS_OPTS_8B);
     theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
-    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 8, INS_OPTS_4H);
     theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
-    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 16, INS_OPTS_2S);
     theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
-    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 32, INS_OPTS_2D);
     theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
 
-    // 'ushr' scalar
+    // ushr scalar
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'ushr' vector
+    // ushr vector
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'usra' scalar
+    // usra scalar
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'usra' vector
+    // usra vector
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'urshr' scalar
+    // urshr scalar
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'urshr' vector
+    // urshr vector
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'ursra' scalar
+    // ursra scalar
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'srsra' vector
+    // ursra vector
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'sri' scalar
+    // sri scalar
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
-    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 64);
 
-    // 'sri' vector
+    // sri vector
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
     theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
-    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 64, INS_OPTS_2D);
 
-    // 'sli' scalar
-    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
+    // sli scalar
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 0);
     theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
     theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
     theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
     theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
 
-    // 'sli' vector
-    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    // sli vector
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 0, INS_OPTS_8B);
     theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
-    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 8, INS_OPTS_4H);
     theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
-    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 16, INS_OPTS_2S);
     theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
-    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 32, INS_OPTS_2D);
     theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
 
-    // 'sshll' vector
+    // sshll{2} vector
     theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
     theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
@@ -8385,7 +8379,7 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
     theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
 
-    // 'ushll' vector
+    // ushll{2} vector
     theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
     theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
@@ -8393,23 +8387,23 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
     theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
 
-    // 'shrn' vector
+    // shrn{2} vector
     theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
 
-    // 'rshrn' vector
+    // rshrn{2} vector
     theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
-    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 8, INS_OPTS_16B);
     theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
-    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 16, INS_OPTS_8H);
     theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
-    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 32, INS_OPTS_4S);
 
-    // 'sxtl' vector
+    // sxtl{2} vector
     theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
     theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
     theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
@@ -8417,7 +8411,7 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
     theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
 
-    // 'uxtl' vector
+    // uxtl{2} vector
     theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
     theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
     theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
@@ -8425,6 +8419,195 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
     theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
 
+    // sqrshrn scalar
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_1BYTE, REG_V0, REG_V1, 1, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_1BYTE, REG_V2, REG_V3, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_2BYTE, REG_V4, REG_V5, 9, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_2BYTE, REG_V6, REG_V7, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_4BYTE, REG_V8, REG_V9, 17, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_4BYTE, REG_V10, REG_V11, 32, INS_OPTS_NONE);
+
+    // sqrshrn{2} vector
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_8BYTE, REG_V2, REG_V3, 8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn2, EA_16BYTE, REG_V4, REG_V5, 1, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn2, EA_16BYTE, REG_V6, REG_V7, 8, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_8BYTE, REG_V8, REG_V9, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_8BYTE, REG_V10, REG_V11, 16, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn2, EA_16BYTE, REG_V12, REG_V13, 9, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn2, EA_16BYTE, REG_V14, REG_V15, 16, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_8BYTE, REG_V16, REG_V17, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn, EA_8BYTE, REG_V18, REG_V18, 32, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sqrshrn2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S);
+
+    // sqrshrun scalar
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_1BYTE, REG_V0, REG_V1, 1, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_1BYTE, REG_V0, REG_V1, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_2BYTE, REG_V2, REG_V3, 9, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_2BYTE, REG_V2, REG_V3, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_4BYTE, REG_V4, REG_V5, 17, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_4BYTE, REG_V4, REG_V5, 32, INS_OPTS_NONE);
+
+    // sqrshrun{2} vector
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_8BYTE, REG_V2, REG_V3, 8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun2, EA_16BYTE, REG_V4, REG_V5, 1, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun2, EA_16BYTE, REG_V6, REG_V7, 8, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_8BYTE, REG_V8, REG_V9, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_8BYTE, REG_V10, REG_V11, 16, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun2, EA_16BYTE, REG_V12, REG_V13, 9, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun2, EA_16BYTE, REG_V14, REG_V15, 16, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_8BYTE, REG_V16, REG_V17, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun, EA_8BYTE, REG_V18, REG_V18, 32, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sqrshrun2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S);
+
+    // sqshl scalar
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_1BYTE, REG_V0, REG_V1, 0, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_1BYTE, REG_V2, REG_V3, 7, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_2BYTE, REG_V4, REG_V5, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_2BYTE, REG_V6, REG_V7, 15, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_4BYTE, REG_V8, REG_V9, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_4BYTE, REG_V10, REG_V11, 31, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_8BYTE, REG_V12, REG_V13, 32, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_8BYTE, REG_V14, REG_V15, 63, INS_OPTS_NONE);
+
+    // sqshl vector
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sqshl, EA_16BYTE, REG_V12, REG_V13, 63, INS_OPTS_2D);
+
+    // sqshlu scalar
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_1BYTE, REG_V0, REG_V1, 0, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_1BYTE, REG_V2, REG_V3, 7, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_2BYTE, REG_V4, REG_V5, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_2BYTE, REG_V6, REG_V7, 15, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_4BYTE, REG_V8, REG_V9, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_4BYTE, REG_V10, REG_V11, 31, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_8BYTE, REG_V12, REG_V13, 32, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_8BYTE, REG_V14, REG_V15, 63, INS_OPTS_NONE);
+
+    // sqshlu vector
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sqshlu, EA_16BYTE, REG_V12, REG_V13, 63, INS_OPTS_2D);
+
+    // sqshrn scalar
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_1BYTE, REG_V0, REG_V1, 1, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_1BYTE, REG_V2, REG_V3, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_2BYTE, REG_V4, REG_V5, 9, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_2BYTE, REG_V6, REG_V7, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_4BYTE, REG_V8, REG_V9, 17, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_4BYTE, REG_V10, REG_V11, 32, INS_OPTS_NONE);
+
+    // sqshrn{2} vector
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_8BYTE, REG_V2, REG_V3, 8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqshrn2, EA_16BYTE, REG_V4, REG_V5, 1, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqshrn2, EA_16BYTE, REG_V6, REG_V7, 8, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_8BYTE, REG_V8, REG_V9, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_8BYTE, REG_V10, REG_V11, 16, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqshrn2, EA_16BYTE, REG_V12, REG_V13, 9, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqshrn2, EA_16BYTE, REG_V14, REG_V15, 16, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_8BYTE, REG_V16, REG_V17, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqshrn, EA_8BYTE, REG_V18, REG_V18, 32, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqshrn2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sqshrn2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S);
+
+    // sqshrun scalar
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_1BYTE, REG_V0, REG_V1, 1, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_1BYTE, REG_V2, REG_V3, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_2BYTE, REG_V4, REG_V5, 9, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_2BYTE, REG_V6, REG_V7, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_4BYTE, REG_V8, REG_V9, 17, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_4BYTE, REG_V10, REG_V11, 32, INS_OPTS_NONE);
+
+    // sqshrun{2} vector
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_8BYTE, REG_V2, REG_V3, 8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sqshrun2, EA_16BYTE, REG_V4, REG_V5, 1, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqshrun2, EA_16BYTE, REG_V6, REG_V7, 8, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_8BYTE, REG_V8, REG_V9, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_8BYTE, REG_V10, REG_V11, 16, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sqshrun2, EA_16BYTE, REG_V12, REG_V13, 9, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqshrun2, EA_16BYTE, REG_V14, REG_V15, 16, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_8BYTE, REG_V16, REG_V17, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqshrun, EA_8BYTE, REG_V18, REG_V18, 32, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sqshrun2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sqshrun2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S);
+
+    // uqrshrn scalar
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_1BYTE, REG_V0, REG_V1, 1, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_1BYTE, REG_V2, REG_V3, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_2BYTE, REG_V4, REG_V5, 9, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_2BYTE, REG_V6, REG_V7, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_4BYTE, REG_V8, REG_V9, 17, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_4BYTE, REG_V10, REG_V11, 32, INS_OPTS_NONE);
+
+    // uqrshrn{2} vector
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_8BYTE, REG_V2, REG_V3, 8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn2, EA_16BYTE, REG_V4, REG_V5, 1, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn2, EA_16BYTE, REG_V6, REG_V7, 8, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_8BYTE, REG_V8, REG_V9, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_8BYTE, REG_V10, REG_V11, 16, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn2, EA_16BYTE, REG_V12, REG_V13, 9, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn2, EA_16BYTE, REG_V14, REG_V15, 16, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_8BYTE, REG_V16, REG_V17, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn, EA_8BYTE, REG_V18, REG_V18, 32, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_uqrshrn2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S);
+
+    // uqshl scalar
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_1BYTE, REG_V0, REG_V1, 0, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_1BYTE, REG_V2, REG_V3, 7, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_2BYTE, REG_V4, REG_V5, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_2BYTE, REG_V6, REG_V7, 15, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_4BYTE, REG_V8, REG_V9, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_4BYTE, REG_V10, REG_V11, 31, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_8BYTE, REG_V12, REG_V13, 32, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_8BYTE, REG_V14, REG_V15, 63, INS_OPTS_NONE);
+
+    // uqshl vector
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_uqshl, EA_16BYTE, REG_V12, REG_V13, 63, INS_OPTS_2D);
+
+    // uqshrn scalar
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_1BYTE, REG_V0, REG_V1, 1, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_1BYTE, REG_V2, REG_V3, 8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_2BYTE, REG_V4, REG_V5, 9, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_2BYTE, REG_V6, REG_V7, 16, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_4BYTE, REG_V8, REG_V9, 17, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_4BYTE, REG_V10, REG_V11, 32, INS_OPTS_NONE);
+
+    // uqshrn{2} vector
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_8BYTE, REG_V2, REG_V3, 8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V4, REG_V5, 1, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V6, REG_V7, 8, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_8BYTE, REG_V8, REG_V9, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_8BYTE, REG_V10, REG_V11, 16, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V12, REG_V13, 9, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V14, REG_V15, 16, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_8BYTE, REG_V16, REG_V17, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_uqshrn, EA_8BYTE, REG_V18, REG_V18, 32, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S);
+
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
@@ -8738,6 +8921,54 @@ void CodeGen::genArm64EmitterUnitTests()
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // srshl scalar
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+
+    // srshl vector
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_srshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
+    // sshl scalar
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+
+    // sshl vector
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_sshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
+    // urshl scalar
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+
+    // urshl vector
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_urshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
+    // ushl scalar
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+
+    // ushl vector
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_ushl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
     // addhn vector
     theEmitter->emitIns_R_R_R(INS_addhn, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
     theEmitter->emitIns_R_R_R(INS_addhn, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
@@ -8838,6 +9069,36 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R_R(INS_sqadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
     theEmitter->emitIns_R_R_R(INS_sqadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
 
+    // sqrshl scalar
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+    // sqrshl vector
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_sqrshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
+    // sqshl scalar
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+    // sqshl vector
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_sqshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
     // sqsub scalar
     theEmitter->emitIns_R_R_R(INS_sqsub, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
     theEmitter->emitIns_R_R_R(INS_sqsub, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
@@ -8960,6 +9221,36 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_R_R(INS_uqadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
     theEmitter->emitIns_R_R_R(INS_uqadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
 
+    // uqrshl scalar
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+    // uqrshl vector
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_uqrshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
+    // uqshl scalar
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+    // uqshl vector
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_uqshl, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D);
+
     // uqsub scalar
     theEmitter->emitIns_R_R_R(INS_uqsub, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
     theEmitter->emitIns_R_R_R(INS_uqsub, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
index 5780ffb..7df6edf 100644 (file)
@@ -604,20 +604,23 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             break;
 
         case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
-            assert(id->idOpSize() == EA_8BYTE);
+            ins      = id->idIns();
+            datasize = id->idOpSize();
             assert(insOptsNone(id->idInsOpt()));
             assert(isVectorRegister(id->idReg1()));
             assert(isVectorRegister(id->idReg2()));
-            assert(isValidImmShift(emitGetInsSC(id), EA_8BYTE));
+            assert(isValidVectorShiftAmount(emitGetInsSC(id), datasize, emitInsIsVectorRightShift(ins)));
             break;
 
         case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
-            assert(isValidVectorDatasize(id->idOpSize()));
-            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            ins      = id->idIns();
+            datasize = id->idOpSize();
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorDatasize(datasize));
+            assert(isValidArrangement(datasize, id->idInsOpt()));
             assert(isVectorRegister(id->idReg1()));
             assert(isVectorRegister(id->idReg2()));
-            elemsize = optGetElemsize(id->idInsOpt());
-            assert(isValidImmShift(emitGetInsSC(id), elemsize));
+            assert(isValidVectorShiftAmount(emitGetInsSC(id), elemsize, emitInsIsVectorRightShift(ins)));
             break;
 
         case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[]  (umov/smov    - to general)
@@ -1376,13 +1379,13 @@ emitter::insFormat emitter::emitInsFormat(instruction ins)
     // clang-format off
     const static insFormat insFormats[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                ) fmt,
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) fmt,
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) fmt,
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) fmt,
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) fmt,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) fmt,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
+        #define INST1(id, nm, info, fmt, e1                                ) fmt,
+        #define INST2(id, nm, info, fmt, e1, e2                            ) fmt,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) fmt,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) fmt,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) fmt,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) fmt,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
         #include "instrs.h"
     };
     // clang-format on
@@ -1393,76 +1396,81 @@ emitter::insFormat emitter::emitInsFormat(instruction ins)
     return insFormats[ins];
 }
 
-// INST_FP is 1
-#define LD 2
-#define ST 4
-#define CMP 8
+#define LD 1
+#define ST 2
+#define CMP 4
+#define RSH 8
 
 // clang-format off
 /*static*/ const BYTE CodeGenInterface::instInfo[] =
 {
-    #define INST1(id, nm, fp, ldst, fmt, e1                                ) ldst | INST_FP*fp,
-    #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) ldst | INST_FP*fp,
-    #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) ldst | INST_FP*fp,
-    #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) ldst | INST_FP*fp,
-    #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) ldst | INST_FP*fp,
-    #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) ldst | INST_FP*fp,
-    #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
+    #define INST1(id, nm, info, fmt, e1                                ) info,
+    #define INST2(id, nm, info, fmt, e1, e2                            ) info,
+    #define INST3(id, nm, info, fmt, e1, e2, e3                        ) info,
+    #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) info,
+    #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) info,
+    #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) info,
+    #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) info,
     #include "instrs.h"
 };
 // clang-format on
 
-/*****************************************************************************
- *
- *  Returns true if the instruction is some kind of compare or test instruction
- */
-
+//------------------------------------------------------------------------
+// emitInsIsCompare: Returns true if the instruction is some kind of compare or test instruction.
+//
 bool emitter::emitInsIsCompare(instruction ins)
 {
     // We have pseudo ins like lea which are not included in emitInsLdStTab.
     if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & CMP) ? true : false;
+        return (CodeGenInterface::instInfo[ins] & CMP) != 0;
     else
         return false;
 }
 
-/*****************************************************************************
- *
- *  Returns true if the instruction is some kind of load instruction
- */
-
+//------------------------------------------------------------------------
+// emitInsIsLoad: Returns true if the instruction is some kind of load instruction.
+//
 bool emitter::emitInsIsLoad(instruction ins)
 {
     // We have pseudo ins like lea which are not included in emitInsLdStTab.
     if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & LD) ? true : false;
+        return (CodeGenInterface::instInfo[ins] & LD) != 0;
     else
         return false;
 }
-/*****************************************************************************
- *
- *  Returns true if the instruction is some kind of store instruction
- */
 
+//------------------------------------------------------------------------
+// emitInsIsStore: Returns true if the instruction is some kind of store instruction.
+//
 bool emitter::emitInsIsStore(instruction ins)
 {
     // We have pseudo ins like lea which are not included in emitInsLdStTab.
     if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & ST) ? true : false;
+        return (CodeGenInterface::instInfo[ins] & ST) != 0;
     else
         return false;
 }
 
-/*****************************************************************************
- *
- *  Returns true if the instruction is some kind of load/store instruction
- */
-
+//------------------------------------------------------------------------
+// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load or store instruction.
+//
 bool emitter::emitInsIsLoadOrStore(instruction ins)
 {
     // We have pseudo ins like lea which are not included in emitInsLdStTab.
     if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & (LD | ST)) ? true : false;
+        return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0;
+    else
+        return false;
+}
+
+//------------------------------------------------------------------------
+// emitInsIsVectorRightShift: Returns true if the instruction is ASIMD right shift.
+//
+bool emitter::emitInsIsVectorRightShift(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & RSH) != 0;
     else
         return false;
 }
@@ -1470,6 +1478,7 @@ bool emitter::emitInsIsLoadOrStore(instruction ins)
 #undef LD
 #undef ST
 #undef CMP
+#undef RHS
 
 /*****************************************************************************
  *
@@ -1481,101 +1490,101 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
     // clang-format off
     const static code_t insCodes1[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                ) e1,
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) e1,
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e1,
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e1,
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e1,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e1,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
+        #define INST1(id, nm, info, fmt, e1                                ) e1,
+        #define INST2(id, nm, info, fmt, e1, e2                            ) e1,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e1,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e1,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e1,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e1,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
         #include "instrs.h"
     };
     const static code_t insCodes2[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) e2,
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e2,
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e2,
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e2,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e2,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            ) e2,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e2,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e2,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e2,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e2,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
         #include "instrs.h"
     };
     const static code_t insCodes3[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e3,
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e3,
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e3,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e3,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e3,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e3,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e3,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e3,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
         #include "instrs.h"
     };
     const static code_t insCodes4[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e4,
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e4,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e4,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e4,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e4,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e4,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
         #include "instrs.h"
     };
     const static code_t insCodes5[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e5,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e5,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e5,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e5,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
         #include "instrs.h"
     };
     const static code_t insCodes6[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e6,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e6,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
         #include "instrs.h"
     };
     const static code_t insCodes7[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
         #include "instrs.h"
     };
     const static code_t insCodes8[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
         #include "instrs.h"
     };
     const static code_t insCodes9[] =
     {
-        #define INST1(id, nm, fp, ldst, fmt, e1                                )
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
         #include "instrs.h"
     };
     // clang-format on
@@ -1596,6 +1605,7 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
     const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L};
     const static insFormat formatEncode4H[4] = {IF_DV_3E, IF_DV_3A, IF_DV_2L, IF_DV_2M};
     const static insFormat formatEncode4I[4] = {IF_DV_3D, IF_DV_3B, IF_DV_2G, IF_DV_2A};
+    const static insFormat formatEncode4J[4] = {IF_DV_2N, IF_DV_2O, IF_DV_3E, IF_DV_3A};
     const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C};
     const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C};
     const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C};
@@ -1798,6 +1808,17 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
             }
             break;
 
+        case IF_EN4J:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
         case IF_EN3A:
             for (index = 0; index < 3; index++)
             {
@@ -4393,15 +4414,21 @@ void emitter::emitIns_R_R(
             break;
 
         case INS_fcvtl:
-        case INS_fcvtl2:
         case INS_fcvtn:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(size == EA_8BYTE);
+            assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+            fmt = IF_DV_2A;
+            break;
+
+        case INS_fcvtl2:
         case INS_fcvtn2:
             assert(isVectorRegister(reg1));
             assert(isVectorRegister(reg2));
-            assert(isValidVectorDatasize(size));
-            assert(insOptsNone(opt));
-            assert(size == EA_8BYTE); // Narrowing from Double or Widening to Double (Half not supported)
-            fmt = IF_DV_2G;
+            assert(size == EA_16BYTE);
+            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+            fmt = IF_DV_2A;
             break;
 
         case INS_scvtf:
@@ -4797,6 +4824,7 @@ void emitter::emitIns_R_R_I(
         bool       canEncode;
         bitMaskImm bmi;
         unsigned   registerListSize;
+        bool       isRightShift;
 
         case INS_mov:
             // Check for the 'mov' aliases for the vector registers
@@ -4850,19 +4878,21 @@ void emitter::emitIns_R_R_I(
             fmt = IF_DI_2B;
             break;
 
-        case INS_sshr:
-        case INS_ssra:
+        case INS_shl:
+        case INS_sli:
+        case INS_sri:
         case INS_srshr:
         case INS_srsra:
-        case INS_shl:
-        case INS_ushr:
-        case INS_usra:
+        case INS_sshr:
+        case INS_ssra:
         case INS_urshr:
         case INS_ursra:
-        case INS_sri:
-        case INS_sli:
+        case INS_ushr:
+        case INS_usra:
             assert(isVectorRegister(reg1));
             assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
+
             if (insOptsAnyArrangement(opt))
             {
                 // Vector operation
@@ -4870,7 +4900,7 @@ void emitter::emitIns_R_R_I(
                 assert(isValidArrangement(size, opt));
                 elemsize = optGetElemsize(opt);
                 assert(isValidVectorElemsize(elemsize));
-                assert(isValidImmShift(imm, elemsize));
+                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
                 assert(opt != INS_OPTS_1D); // Reserved encoding
                 fmt = IF_DV_2O;
                 break;
@@ -4880,7 +4910,63 @@ void emitter::emitIns_R_R_I(
                 // Scalar operation
                 assert(insOptsNone(opt));
                 assert(size == EA_8BYTE); // only supported size
-                assert(isValidImmShift(imm, size));
+                assert(isValidVectorShiftAmount(imm, size, isRightShift));
+                fmt = IF_DV_2N;
+            }
+            break;
+
+        case INS_sqshl:
+        case INS_uqshl:
+        case INS_sqshlu:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
+
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert(opt != INS_OPTS_1D); // The encoding immh = 1xxx, Q = 0 is reserved
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+                fmt = IF_DV_2O;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsize(size));
+                assert(isValidVectorShiftAmount(imm, size, isRightShift));
+                fmt = IF_DV_2N;
+            }
+            break;
+
+        case INS_sqrshrn:
+        case INS_sqrshrun:
+        case INS_sqshrn:
+        case INS_sqshrun:
+        case INS_uqrshrn:
+        case INS_uqshrn:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
+
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding immh = 1xxx, Q = x is reserved
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+                fmt = IF_DV_2O;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsize(size));
+                assert(size != EA_8BYTE); // The encoding immh = 1xxx is reserved
+                assert(isValidVectorShiftAmount(imm, size, isRightShift));
                 fmt = IF_DV_2N;
             }
             break;
@@ -4890,19 +4976,20 @@ void emitter::emitIns_R_R_I(
             assert(imm == 0);
             __fallthrough;
 
-        case INS_shrn:
         case INS_rshrn:
+        case INS_shrn:
         case INS_sshll:
         case INS_ushll:
             assert(isVectorRegister(reg1));
             assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
             // Vector operation
             assert(size == EA_8BYTE);
             assert(isValidArrangement(size, opt));
             elemsize = optGetElemsize(opt);
             assert(elemsize != EA_8BYTE); // Reserved encodings
             assert(isValidVectorElemsize(elemsize));
-            assert(isValidImmShift(imm, elemsize));
+            assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
             fmt = IF_DV_2O;
             break;
 
@@ -4911,19 +4998,27 @@ void emitter::emitIns_R_R_I(
             assert(imm == 0);
             __fallthrough;
 
-        case INS_shrn2:
         case INS_rshrn2:
+        case INS_shrn2:
+        case INS_sqrshrn2:
+        case INS_sqrshrun2:
+        case INS_sqshrn2:
+        case INS_sqshrun2:
         case INS_sshll2:
+        case INS_uqrshrn2:
+        case INS_uqshrn2:
         case INS_ushll2:
             assert(isVectorRegister(reg1));
             assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
+
             // Vector operation
             assert(size == EA_16BYTE);
             assert(isValidArrangement(size, opt));
             elemsize = optGetElemsize(opt);
-            assert(elemsize != EA_8BYTE); // Reserved encodings
+            assert(elemsize != EA_8BYTE); // The encoding immh = 1xxx, Q = x is reserved
             assert(isValidVectorElemsize(elemsize));
-            assert(isValidImmShift(imm, elemsize));
+            assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
             fmt = IF_DV_2O;
             break;
 
@@ -5571,6 +5666,10 @@ void emitter::emitIns_R_R_R(
         case INS_cmhi:
         case INS_cmhs:
         case INS_cmtst:
+        case INS_srshl:
+        case INS_sshl:
+        case INS_urshl:
+        case INS_ushl:
             assert(isVectorRegister(reg1));
             assert(isVectorRegister(reg2));
             assert(isVectorRegister(reg3));
@@ -5592,8 +5691,12 @@ void emitter::emitIns_R_R_R(
             break;
 
         case INS_sqadd:
+        case INS_sqrshl:
+        case INS_sqshl:
         case INS_sqsub:
         case INS_uqadd:
+        case INS_uqrshl:
+        case INS_uqshl:
         case INS_uqsub:
             assert(isVectorRegister(reg1));
             assert(isVectorRegister(reg2));
@@ -8754,18 +8857,33 @@ void emitter::emitIns_Call(EmitCallType          callType,
     return bits;
 }
 
-/*****************************************************************************
- *
- *   Returns the encoding to shift by 'shift' for an Arm64 vector or scalar instruction
- */
-
-/*static*/ emitter::code_t emitter::insEncodeVectorShift(emitAttr size, ssize_t shift)
+// insEncodeVectorShift: Returns the encoding for the SIMD shift (immediate) instructions.
+//
+// Arguments:
+//    size  - for the scalar variants specifies 'datasize', for the vector variants specifies 'element size'.
+//    shift - if the shift is positive, the operation is a left shift. Otherwise, it is a right shift.
+//
+// Returns:
+//    "immh:immb" field of the instruction that contains encoded shift amount.
+//
+/*static*/ emitter::code_t emitter::insEncodeVectorShift(emitAttr size, ssize_t shiftAmount)
 {
-    assert(shift < getBitWidth(size));
-
-    code_t imm = (code_t)(getBitWidth(size) + shift);
+    if (shiftAmount < 0)
+    {
+        shiftAmount = -shiftAmount;
+        // The right shift amount must be in the range 1 to the destination element width in bits.
+        assert((shiftAmount > 0) && (shiftAmount <= getBitWidth(size)));
 
-    return imm << 16;
+        code_t imm = (code_t)(2 * getBitWidth(size) - shiftAmount);
+        return imm << 16;
+    }
+    else
+    {
+        // The left shift amount must in the range 0 to the element width in bits minus 1.
+        assert(shiftAmount < getBitWidth(size));
+        code_t imm = (code_t)(getBitWidth(size) + shiftAmount);
+        return imm << 16;
+    }
 }
 
 /*****************************************************************************
@@ -10595,9 +10713,25 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             elemsize = optGetElemsize(id->idInsOpt());
             code     = emitInsCode(ins, fmt);
             code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeFloatElemsize(elemsize);    // X
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            if ((ins == INS_fcvtl) || (ins == INS_fcvtl2) || (ins == INS_fcvtn) || (ins == INS_fcvtn2))
+            {
+                // fcvtl{2} and fcvtn{2} encode the element size as
+                //   esize = 16 << UInt(sz)
+                if (elemsize == EA_4BYTE)
+                {
+                    code |= 0x00400000; // X
+                }
+                else
+                {
+                    assert(elemsize == EA_2BYTE);
+                }
+            }
+            else
+            {
+                code |= insEncodeFloatElemsize(elemsize); // X
+            }
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
             dst += emitOutput_Instr(dst, code);
             break;
 
@@ -10738,11 +10872,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
 
         case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
-            imm  = emitGetInsSC(id);
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeVectorShift(EA_8BYTE, imm); // iiiiiii
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins) ? -imm : imm); // iiiiiii
+            code |= insEncodeReg_Vd(id->idReg1());                                               // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());                                               // nnnnn
             dst += emitOutput_Instr(dst, code);
             break;
 
@@ -10750,10 +10885,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             imm      = emitGetInsSC(id);
             elemsize = optGetElemsize(id->idInsOpt());
             code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeVectorShift(elemsize, imm); // iiiiiii
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeVectorsize(id->idOpSize());                                         // Q
+            code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins) ? -imm : imm); // iiiiiii
+            code |= insEncodeReg_Vd(id->idReg1());                                               // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());                                               // nnnnn
             dst += emitOutput_Instr(dst, code);
             break;
 
@@ -12345,6 +12480,23 @@ void emitter::emitDispIns(
             break;
 
         case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn   (fabs, fcvt - vector)
+            if ((ins == INS_fcvtl) || (ins == INS_fcvtl2))
+            {
+                emitDispVectorReg(id->idReg1(), optWidenElemsize(id->idInsOpt()), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            else if ((ins == INS_fcvtn) || (ins == INS_fcvtn2))
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), optWidenElemsize(id->idInsOpt()), false);
+            }
+            else
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            break;
+
         case IF_DV_2P: // DV_2P   ................ ......nnnnnddddd      Vd Vn   (aes*, sha1su1)
             emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
             emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
@@ -12492,7 +12644,14 @@ void emitter::emitDispIns(
             }
             else
             {
-                elemsize = optGetElemsize(id->idInsOpt());
+                if ((ins == INS_saddlv) || (ins == INS_uaddlv))
+                {
+                    elemsize = optGetElemsize(optWidenDstArrangement(id->idInsOpt()));
+                }
+                else
+                {
+                    elemsize = optGetElemsize(id->idInsOpt());
+                }
                 emitDispReg(id->idReg1(), elemsize, true);
                 emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
             }
@@ -13931,6 +14090,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                     result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
 
+                case INS_fcvtl:
+                case INS_fcvtl2:
+                case INS_fcvtn:
+                case INS_fcvtn2:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -13983,14 +14150,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                     result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
 
-                case INS_fcvtl:
-                case INS_fcvtl2:
-                case INS_fcvtn:
-                case INS_fcvtn2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-
                 case INS_frecpe:
                 case INS_frecpx:
                 case INS_frsqrte:
@@ -14297,6 +14456,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                 case INS_shadd:
                 case INS_shsub:
                 case INS_srhadd:
+                case INS_srshl:
+                case INS_sshl:
                 case INS_smax:
                 case INS_smaxp:
                 case INS_smin:
@@ -14308,6 +14469,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                 case INS_uhadd:
                 case INS_uhsub:
                 case INS_urhadd:
+                case INS_urshl:
+                case INS_ushl:
                 case INS_uzp1:
                 case INS_uzp2:
                 case INS_zip1:
@@ -14346,6 +14509,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                 case INS_mul:
                 case INS_mla:
                 case INS_mls:
+                case INS_sqshl:
+                case INS_sqrshl:
+                case INS_uqrshl:
+                case INS_uqshl:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
                     result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
@@ -14475,9 +14642,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
 
                 case INS_rshrn:
                 case INS_rshrn2:
-                case INS_ssra:
                 case INS_srshr:
+                case INS_sqshrn:
+                case INS_sqshrn2:
+                case INS_ssra:
                 case INS_urshr:
+                case INS_uqshrn:
+                case INS_uqshrn2:
                 case INS_usra:
                     if (id->idOpSize() == EA_16BYTE)
                     {
@@ -14497,6 +14668,29 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                     result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
 
+                case INS_sqrshrn:
+                case INS_sqrshrn2:
+                case INS_sqrshrun:
+                case INS_sqrshrun2:
+                case INS_sqshrun:
+                case INS_sqshrun2:
+                case INS_sqshl:
+                case INS_sqshlu:
+                case INS_uqrshrn:
+                case INS_uqrshrn2:
+                case INS_uqshl:
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
index 96bbb37..9c365c0 100644 (file)
@@ -87,6 +87,7 @@ bool emitInsIsCompare(instruction ins);
 bool emitInsIsLoad(instruction ins);
 bool emitInsIsStore(instruction ins);
 bool emitInsIsLoadOrStore(instruction ins);
+bool emitInsIsVectorRightShift(instruction ins);
 emitAttr emitInsTargetRegSize(instrDesc* id);
 emitAttr emitInsLoadStoreSize(instrDesc* id);
 
@@ -300,8 +301,8 @@ static code_t insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2);
 // Returns the encoding to select 'index' for an Arm64 'mul' elem instruction
 static code_t insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index);
 
-// Returns the encoding to shift by 'shift' bits for an Arm64 vector or scalar instruction
-static code_t insEncodeVectorShift(emitAttr size, ssize_t shift);
+// Returns the encoding for ASIMD Shift instruction.
+static code_t insEncodeVectorShift(emitAttr size, ssize_t shiftAmount);
 
 // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
 static code_t insEncodeElemsize(emitAttr size);
@@ -517,6 +518,13 @@ inline static unsigned isValidImmShift(ssize_t imm, emitAttr size)
     return (imm >= 0) && (imm < getBitWidth(size));
 }
 
+// Returns true if the 'shiftAmount' represents a valid shift for the given 'size'.
+inline static unsigned isValidVectorShiftAmount(ssize_t shiftAmount, emitAttr size, bool rightShift)
+{
+    return (rightShift && (shiftAmount >= 1) && (shiftAmount <= getBitWidth(size))) ||
+           ((shiftAmount >= 0) && (shiftAmount < getBitWidth(size)));
+}
+
 inline static bool isValidGeneralDatasize(emitAttr size)
 {
     return (size == EA_8BYTE) || (size == EA_4BYTE);
index 16244c1..c39f85d 100644 (file)
@@ -61,6 +61,7 @@ IF_DEF(EN4F, IS_NONE, NONE) // Instruction has 4 possible encoding types, type F
 IF_DEF(EN4G, IS_NONE, NONE) // Instruction has 4 possible encoding types, type G
 IF_DEF(EN4H, IS_NONE, NONE) // Instruction has 4 possible encoding types, type H
 IF_DEF(EN4I, IS_NONE, NONE) // Instruction has 4 possible encoding types, type I
+IF_DEF(EN4J, IS_NONE, NONE) // Instruction has 3 possible encoding types, type J
 IF_DEF(EN3A, IS_NONE, NONE) // Instruction has 3 possible encoding types, type A
 IF_DEF(EN3B, IS_NONE, NONE) // Instruction has 3 possible encoding types, type B
 IF_DEF(EN3C, IS_NONE, NONE) // Instruction has 3 possible encoding types, type C
index 969efe5..ec776cb 100644 (file)
@@ -57,13 +57,13 @@ const char* CodeGen::genInsName(instruction ins)
         #include "instrs.h"
 
 #elif defined(TARGET_ARM64)
-        #define INST1(id, nm, fp, ldst, fmt, e1                                 ) nm,
-        #define INST2(id, nm, fp, ldst, fmt, e1, e2                             ) nm,
-        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                         ) nm,
-        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                     ) nm,
-        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                 ) nm,
-        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6             ) nm,
-        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
+        #define INST1(id, nm, ldst, fmt, e1                                 ) nm,
+        #define INST2(id, nm, ldst, fmt, e1, e2                             ) nm,
+        #define INST3(id, nm, ldst, fmt, e1, e2, e3                         ) nm,
+        #define INST4(id, nm, ldst, fmt, e1, e2, e3, e4                     ) nm,
+        #define INST5(id, nm, ldst, fmt, e1, e2, e3, e4, e5                 ) nm,
+        #define INST6(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6             ) nm,
+        #define INST9(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
         #include "instrs.h"
 
 #else
index 26ba6ee..3c0404c 100644 (file)
@@ -37,13 +37,13 @@ enum instruction : unsigned
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 
 #elif defined(TARGET_ARM64)
-    #define INST1(id, nm, fp, ldst, fmt, e1                                ) INS_##id,
-    #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) INS_##id,
-    #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) INS_##id,
-    #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) INS_##id,
-    #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) INS_##id,
-    #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) INS_##id,
-    #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) INS_##id,
+    #define INST1(id, nm, ldst, fmt, e1                                ) INS_##id,
+    #define INST2(id, nm, ldst, fmt, e1, e2                            ) INS_##id,
+    #define INST3(id, nm, ldst, fmt, e1, e2, e3                        ) INS_##id,
+    #define INST4(id, nm, ldst, fmt, e1, e2, e3, e4                    ) INS_##id,
+    #define INST5(id, nm, ldst, fmt, e1, e2, e3, e4, e5                ) INS_##id,
+    #define INST6(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6            ) INS_##id,
+    #define INST9(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) INS_##id,
     #include "instrs.h"
 
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
index 4569606..41d3344 100644 (file)
@@ -7,14 +7,17 @@
  *
  *          id      -- the enum name for the instruction
  *          nm      -- textual name (for assembly dipslay)
- *          fp      -- floating point instruction
- *          ld/st/cmp   -- load/store/compare instruction
+ *          info    -- miscellaneous instruction info (load/store/compare/ASIMD right shift)
  *          fmt     -- encoding format used by this instruction
  *          e1      -- encoding 1
  *          e2      -- encoding 2
  *          e3      -- encoding 3
  *          e4      -- encoding 4
  *          e5      -- encoding 5
+ *          e6      -- encoding 6
+ *          e7      -- encoding 7
+ *          e8      -- encoding 8
+ *          e9      -- encoding 9
  *
 ******************************************************************************/
 
@@ -45,7 +48,7 @@
 #endif
 
 /*****************************************************************************/
-/*               The following is ARM64-specific                               */
+/*               The following is ARM64-specific                             */
 /*****************************************************************************/
 
 // If you're adding a new instruction:
 //     emitInsMayWriteMultipleRegs in emitArm64.cpp.
 
 // clang-format off
-INST9(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,   BAD_CODE,     BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE)
+INST9(invalid,     "INVALID",      0,      IF_NONE,   BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE)
 
-//    enum     name     FP LD/ST            DR_2E        DR_2G        DI_1B        DI_1D        DV_3C        DV_2B        DV_2C        DV_2E        DV_2F
-INST9(mov,     "mov",    0, 0, IF_EN9,    0x2A0003E0,  0x11000000,  0x52800000,  0x320003E0,  0x0EA01C00,  0x0E003C00,  0x4E001C00,  0x5E000400,  0x6E000400)
+//    enum         name            info               DR_2E        DR_2G        DI_1B        DI_1D        DV_3C        DV_2B        DV_2C        DV_2E        DV_2F
+INST9(mov,         "mov",          0,      IF_EN9,    0x2A0003E0,  0x11000000,  0x52800000,  0x320003E0,  0x0EA01C00,  0x0E003C00,  0x4E001C00,  0x5E000400,  0x6E000400)
                                    //  mov     Rd,Rm                DR_2E  X0101010000mmmmm 00000011111ddddd   2A00 03E0
                                    //  mov     Rd,Rn                DR_2G  X001000100000000 000000nnnnnddddd   1100 0000   mov to/from SP only
                                    //  mov     Rd,imm(i16,hw)       DI_1B  X10100101hwiiiii iiiiiiiiiiiddddd   5280 0000   imm(i16,hw)
@@ -68,8 +71,8 @@ INST9(mov,     "mov",    0, 0, IF_EN9,    0x2A0003E0,  0x11000000,  0x52800000,
                                    //  mov     Vd,Vn[]              DV_2E  01011110000iiiii 000001nnnnnddddd   5E00 0400   Vd,Vn[] (scalar by elem)
                                    //  mov     Vd[],Vn[]            DV_2F  01101110000iiiii 0jjjj1nnnnnddddd   6E00 0400   Vd[],Vn[] (from/to elem)
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DR_3C        DI_2A        DV_3A        DV_3E
-INST6(add,     "add",    0, 0, IF_EN6A,   0x0B000000,  0x0B000000,  0x0B200000,  0x11000000,  0x0E208400,  0x5EE08400)
+//    enum         name            info               DR_3A        DR_3B        DR_3C        DI_2A        DV_3A        DV_3E
+INST6(add,         "add",          0,      IF_EN6A,   0x0B000000,  0x0B000000,  0x0B200000,  0x11000000,  0x0E208400,  0x5EE08400)
                                    //  add     Rd,Rn,Rm             DR_3A  X0001011000mmmmm 000000nnnnnddddd   0B00 0000   Rd,Rn,Rm
                                    //  add     Rd,Rn,(Rm,shk,imm)   DR_3B  X0001011sh0mmmmm ssssssnnnnnddddd   0B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  add     Rd,Rn,(Rm,ext,shl)   DR_3C  X0001011001mmmmm ooosssnnnnnddddd   0B20 0000   ext(Rm) LSL imm(0-4)
@@ -77,7 +80,7 @@ INST6(add,     "add",    0, 0, IF_EN6A,   0x0B000000,  0x0B000000,  0x0B200000,
                                    //  add     Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100001nnnnnddddd   0E20 8400   Vd,Vn,Vm  (vector)
                                    //  add     Vd,Vn,Vm             DV_3E  01011110111mmmmm 100001nnnnnddddd   5EE0 8400   Vd,Vn,Vm  (scalar)
 
-INST6(sub,     "sub",    0, 0, IF_EN6A,   0x4B000000,  0x4B000000,  0x4B200000,  0x51000000,  0x2E208400,  0x7EE08400)
+INST6(sub,         "sub",          0,      IF_EN6A,   0x4B000000,  0x4B000000,  0x4B200000,  0x51000000,  0x2E208400,  0x7EE08400)
                                    //  sub     Rd,Rn,Rm             DR_3A  X1001011000mmmmm 000000nnnnnddddd   4B00 0000   Rd,Rn,Rm
                                    //  sub     Rd,Rn,(Rm,shk,imm)   DR_3B  X1001011sh0mmmmm ssssssnnnnnddddd   4B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  sub     Rd,Rn,(Rm,ext,shl)   DR_3C  X1001011001mmmmm ooosssnnnnnddddd   4B20 0000   ext(Rm) LSL imm(0-4)
@@ -85,1776 +88,1772 @@ INST6(sub,     "sub",    0, 0, IF_EN6A,   0x4B000000,  0x4B000000,  0x4B200000,
                                    //  sub     Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100001nnnnnddddd   2E20 8400   Vd,Vn,Vm  (vector)
                                    //  sub     Vd,Vn,Vm             DV_3E  01111110111mmmmm 100001nnnnnddddd   7EE0 8400   Vd,Vn,Vm  (scalar)
 
-//    enum     name     FP LD/ST            LS_2D        LS_3F        LS_2E        LS_2F        LS_3G        LS_2G
-INST6(ld1,     "ld1",   0, LD, IF_EN6B,   0x0C407000,  0x0CC07000,  0x0CDF7000,  0x0D400000,  0x0DC00000,  0x0DDF0000)
-                                   // C7.2.170 LD1 (multiple structures, one register variant)
+//    enum         name            info               LS_2D        LS_3F        LS_2E        LS_2F        LS_3G        LS_2G
+INST6(ld1,         "ld1",          LD,     IF_EN6B,   0x0C407000,  0x0CC07000,  0x0CDF7000,  0x0D400000,  0x0DC00000,  0x0DDF0000)
+                                   // LD1 (multiple structures, one register variant)
                                    //  ld1     {Vt},[Xn]            LS_2D  0Q00110001000000 0111ssnnnnnttttt   0C40 7000   base register
                                    //  ld1     {Vt},[Xn],Xm         LS_3F  0Q001100110mmmmm 0111ssnnnnnttttt   0CC0 7000   post-indexed by a register
                                    //  ld1     {Vt},[Xn],#imm       LS_2E  0Q00110011011111 0111ssnnnnnttttt   0CDF 7000   post-indexed by an immediate
-                                   // C7.2.171 LD1 (single structure)
+                                   // LD1 (single structure)
                                    //  ld1     {Vt}[],[Xn]          LS_2F  0Q00110101000000 xx0Sssnnnnnttttt   0D40 0000   base register
                                    //  ld1     {Vt}[],[Xn],Xm       LS_3G  0Q001101110mmmmm xx0Sssnnnnnttttt   0DC0 0000   post-indexed by a register
                                    //  ld1     {Vt}[],[Xn],#imm     LS_2G  0Q00110111011111 xx0Sssnnnnnttttt   0DDF 0000   post-indexed by an immediate
 
-INST6(ld2,     "ld2",   0, LD, IF_EN6B,   0x0C408000,  0x0CC08000,  0x0CDF8000,  0x0D600000,  0x0DE00000,  0x0DFF0000)
-                                   // C7.2.173 LD2 (multiple structures)
+INST6(ld2,         "ld2",          LD,     IF_EN6B,   0x0C408000,  0x0CC08000,  0x0CDF8000,  0x0D600000,  0x0DE00000,  0x0DFF0000)
+                                   // LD2 (multiple structures)
                                    //  ld2     {Vt,Vt2},[Xn]        LS_2D  0Q00110001000000 1000ssnnnnnttttt   0C40 8000   base register
                                    //  ld2     {Vt,Vt2},[Xn],Xm     LS_3F  0Q001100110mmmmm 1000ssnnnnnttttt   0CC0 8000   post-indexed by a register
                                    //  ld2     {Vt,Vt2},[Xn],#imm   LS_2E  0Q001100110mmmmm 1000ssnnnnnttttt   0CDF 8000   post-indexed by an immediate
-                                   // C7.2.174 LD2 (single structure)
+                                   // LD2 (single structure)
                                    //  ld2     {Vt,Vt2}[],[Xn]      LS_2F  0Q00110101100000 xx0Sssnnnnnttttt   0D60 0000   base register
                                    //  ld2     {Vt,Vt2}[],[Xn],Xm   LS_3G  0Q001101111mmmmm xx0Sssnnnnnttttt   0DE0 0000   post-indexed by a register
                                    //  ld2     {Vt,Vt2}[],[Xn],#imm LS_2G  0Q00110111111111 xx0Sssnnnnnttttt   0DFF 0000   post-indexed by an immediate
 
-INST6(ld3,     "ld3",   0, LD, IF_EN6B,   0x0C404000,  0x0CC04000,  0x0CDF4000,  0x0D402000,  0x0DC02000,  0x0DDF2000)
-                                   // C7.2.176 LD3 (multiple structures)
+INST6(ld3,         "ld3",          LD,     IF_EN6B,   0x0C404000,  0x0CC04000,  0x0CDF4000,  0x0D402000,  0x0DC02000,  0x0DDF2000)
+                                   // LD3 (multiple structures)
                                    //  ld3     {Vt-Vt3},[Xn]        LS_2D  0Q00110001000000 0100ssnnnnnttttt   0C40 4000   base register
                                    //  ld3     {Vt-Vt3},[Xn],Xm     LS_3F  0Q001100110mmmmm 0100ssnnnnnttttt   0CC0 4000   post-indexed by a register
                                    //  ld3     {Vt-Vt3},[Xn],#imm   LS_2E  0Q001100110mmmmm 0100ssnnnnnttttt   0CDF 4000   post-indexed by an immediate
-                                   // C7.2.177 LD3 (single structure)
+                                   // LD3 (single structure)
                                    //  ld3     {Vt-Vt3}[],[Xn]      LS_2F  0Q00110101000000 xx1Sssnnnnnttttt   0D40 2000   base register
                                    //  ld3     {Vt-Vt3}[],[Xn],Xm   LS_3G  0Q001101110mmmmm xx1Sssnnnnnttttt   0DC0 2000   post-indexed by a register
                                    //  ld3     {Vt-Vt3}[],[Xn],#imm LS_2G  0Q00110111011111 xx1Sssnnnnnttttt   0DDF 2000   post-indexed by an immediate
 
-INST6(ld4,     "ld4",   0, LD, IF_EN6B,   0x0C400000,  0x0CC00000,  0x0CDF0000,  0x0D602000,  0x0DE02000,  0x0DFF2000)
-                                   // C7.2.179 LD4 (multiple structures)
+INST6(ld4,         "ld4",          LD,     IF_EN6B,   0x0C400000,  0x0CC00000,  0x0CDF0000,  0x0D602000,  0x0DE02000,  0x0DFF2000)
+                                   // LD4 (multiple structures)
                                    //  ld4     {Vt-Vt4},[Xn]        LS_2D  0Q00110001000000 0000ssnnnnnttttt   0C40 0000   base register
                                    //  ld4     {Vt-Vt4},[Xn],Xm     LS_3F  0Q001100110mmmmm 0000ssnnnnnttttt   0CC0 0000   post-indexed by a register
                                    //  ld4     {Vt-Vt4},[Xn],#imm   LS_2E  0Q00110011011111 0000ssnnnnnttttt   0CDF 0000   post-indexed by an immediate
-                                   // C7.2.180 LD4 (single structure)
+                                   // LD4 (single structure)
                                    //  ld4     {Vt-Vt4}[],[Xn]      LS_2F  0Q00110101100000 xx1Sssnnnnnttttt   0D60 2000   base register
                                    //  ld4     {Vt-Vt4}[],[Xn],Xm   LS_3G  0Q001101111mmmmm xx1Sssnnnnnttttt   0DE0 2000   post-indexed by a register
                                    //  ld4     {Vt-Vt4}[],[Xn],#imm LS_2G  0Q00110111111111 xx1Sssnnnnnttttt   0DFF 2000   post-indexed by an immediate
 
-INST6(st1,     "st1",   0, LD, IF_EN6B,   0x0C007000,  0x0C807000,  0x0C9F7000,  0x0D000000,  0x0D800000,  0x0D9F0000)
-                                   // C7.2.313 ST1 (multiple structures, one register variant)
+INST6(st1,         "st1",          LD,     IF_EN6B,   0x0C007000,  0x0C807000,  0x0C9F7000,  0x0D000000,  0x0D800000,  0x0D9F0000)
+                                   // ST1 (multiple structures, one register variant)
                                    //  st1     {Vt},[Xn]            LS_2D  0Q00110000000000 0111ssnnnnnttttt   0C00 7000   base register
                                    //  st1     {Vt},[Xn],Xm         LS_3F  0Q001100100mmmmm 0111ssnnnnnttttt   0C80 7000   post-indexed by a register
                                    //  st1     {Vt},[Xn],#imm       LS_2E  0Q00110010011111 0111ssnnnnnttttt   0C9F 7000   post-indexed by an immediate
-                                   // C7.2.314 ST1 (single structure)
+                                   // ST1 (single structure)
                                    //  st1     {Vt}[],[Xn]          LS_2F  0Q00110100000000 xx0Sssnnnnnttttt   0D00 0000   base register
                                    //  st1     {Vt}[],[Xn],Xm       LS_3G  0Q001101100mmmmm xx0Sssnnnnnttttt   0D80 0000   post-indexed by a register
                                    //  st1     {Vt}[],[Xn],#imm     LS_2G  0Q00110110011111 xx0Sssnnnnnttttt   0D9F 0000   post-indexed by an immediate
 
-INST6(st2,     "st2",   0, ST, IF_EN6B,   0x0C008000,  0x0C808000,  0x0C9F8000,  0x0D200000,  0x0DA00000,  0x0DBF0000)
-                                   // C7.2.315 ST2 (multiple structures)
+INST6(st2,         "st2",          ST,     IF_EN6B,   0x0C008000,  0x0C808000,  0x0C9F8000,  0x0D200000,  0x0DA00000,  0x0DBF0000)
+                                   // ST2 (multiple structures)
                                    //  st2     {Vt,Vt2},[Xn]        LS_2D  0Q00110000000000 1000ssnnnnnttttt   0C00 8000   base register
                                    //  st2     {Vt,Vt2},[Xn],Xm     LS_3F  0Q001100100mmmmm 1000ssnnnnnttttt   0C80 8000   post-indexed by a register
                                    //  st2     {Vt,Vt2},[Xn],#imm   LS_2E  0Q00110010011111 1000ssnnnnnttttt   0C9F 8000   post-indexed by an immediate
-                                   // C7.2.316 ST2 (single structure)
+                                   // ST2 (single structure)
                                    //  st2     {Vt,Vt2}[],[Xn]      LS_2F  0Q00110100100000 xx0Sssnnnnnttttt   0D20 0000   base register
                                    //  st2     {Vt,Vt2}[],[Xn],Xm   LS_3G  0Q001101101mmmmm xx0Sssnnnnnttttt   0DA0 0000   post-indexed by a register
                                    //  st2     {Vt,Vt2}[],[Xn],#imm LS_2G  0Q00110110111111 xx0Sssnnnnnttttt   0DBF 0000   post-indexed by an immediate
 
-INST6(st3,     "st3",   0, ST, IF_EN6B,   0x0C004000,  0x0C804000,  0x0C9F4000,  0x0D002000,  0x0D802000,  0x0D9F2000)
-                                   // C7.2.317 ST3 (multiple structures)
+INST6(st3,         "st3",          ST,     IF_EN6B,   0x0C004000,  0x0C804000,  0x0C9F4000,  0x0D002000,  0x0D802000,  0x0D9F2000)
+                                   // ST3 (multiple structures)
                                    //  st3     {Vt-Vt3},[Xn]        LS_2D  0Q00110000000000 0100ssnnnnnttttt   0C00 4000   base register
                                    //  st3     {Vt-Vt3},[Xn],Xm     LS_3F  0Q001100100mmmmm 0100ssnnnnnttttt   0C80 4000   post-indexed by a register
                                    //  st3     {Vt-Vt3},[Xn],#imm   LS_2E  0Q00110010011111 0100ssnnnnnttttt   0C9F 4000   post-indexed by an immediate
-                                   // C7.2.318 ST3 (single structure)
+                                   // ST3 (single structure)
                                    //  st3     {Vt-Vt3}[],[Xn]      LS_2F  0Q00110100000000 xx1Sssnnnnnttttt   0D00 2000   base register
                                    //  st3     {Vt-Vt3}[],[Xn],Xm   LS_3G  0Q001101100mmmmm xx1Sssnnnnnttttt   0D80 2000   post-indexed by a register
                                    //  st3     {Vt-Vt3}[],[Xn],#imm LS_2G  0Q00110110011111 xx1Sssnnnnnttttt   0D9F 2000   post-indexed by an immediate
 
-INST6(st4,     "st4",   0, ST, IF_EN6B,   0x0C000000,  0x0C800000,  0x0C9F0000,  0x0D202000,  0x0DA02000,  0x0DBF2000)
-                                   // C7.2.319 ST4 (multiple structures)
+INST6(st4,         "st4",          ST,     IF_EN6B,   0x0C000000,  0x0C800000,  0x0C9F0000,  0x0D202000,  0x0DA02000,  0x0DBF2000)
+                                   // ST4 (multiple structures)
                                    //  st4     {Vt-Vt4},[Xn]        LS_2D  0Q00110000000000 0000ssnnnnnttttt   0C00 0000   base register
                                    //  st4     {Vt-Vt4},[Xn],Xm     LS_3F  0Q001100100mmmmm 0000ssnnnnnttttt   0C80 0000   post-indexed by a register
                                    //  st4     {Vt-Vt4},[Xn],#imm   LS_2E  0Q00110010011111 0000ssnnnnnttttt   0C9F 0000   post-indexed by an immediate
-                                   // C7.2.320 ST4 (single structure)
+                                   // ST4 (single structure)
                                    //  st4     {Vt-Vt4}[],[Xn]      LS_2F  0Q00110100100000 xx1Sssnnnnnttttt   0D20 2000   base register
                                    //  st4     {Vt-Vt4}[],[Xn],Xm   LS_3G  0Q001101101mmmmm xx1Sssnnnnnttttt   0DA0 2000   post-indexed by a register
                                    //  st4     {Vt-Vt4}[],[Xn],#imm LS_2G  0Q00110110111111 xx1Sssnnnnnttttt   0DBF 2000   post-indexed by an immediate
 
-//    enum     name     FP LD/ST            LS_2A        LS_2B        LS_2C        LS_3A        LS_1A
-INST5(ldr,     "ldr",    0,LD, IF_EN5A,   0xB9400000,  0xB9400000,  0xB8400000,  0xB8600800,  0x18000000)
+//    enum         name            info               LS_2A        LS_2B        LS_2C        LS_3A        LS_1A
+INST5(ldr,         "ldr",          LD,     IF_EN5A,   0xB9400000,  0xB9400000,  0xB8400000,  0xB8600800,  0x18000000)
                                    //  ldr     Rt,[Xn]              LS_2A  1X11100101000000 000000nnnnnttttt   B940 0000
                                    //  ldr     Rt,[Xn+pimm12]       LS_2B  1X11100101iiiiii iiiiiinnnnnttttt   B940 0000   imm(0-4095<<{2,3})
                                    //  ldr     Rt,[Xn+simm9]        LS_2C  1X111000010iiiii iiiiPPnnnnnttttt   B840 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  ldr     Rt,[Xn,(Rm,ext,shl)] LS_3A  1X111000011mmmmm oooS10nnnnnttttt   B860 0800   [Xn, ext(Rm) LSL {0,2,3}]
                                    //  ldr     Vt/Rt,[PC+simm19<<2] LS_1A  XX011V00iiiiiiii iiiiiiiiiiittttt   1800 0000   [PC +- imm(1MB)]
 
-INST5(ldrsw,   "ldrsw",  0,LD, IF_EN5A,   0xB9800000,  0xB9800000,  0xB8800000,  0xB8A00800,  0x98000000)
+INST5(ldrsw,       "ldrsw",        LD,     IF_EN5A,   0xB9800000,  0xB9800000,  0xB8800000,  0xB8A00800,  0x98000000)
                                    //  ldrsw   Rt,[Xn]              LS_2A  1011100110000000 000000nnnnnttttt   B980 0000
                                    //  ldrsw   Rt,[Xn+pimm12]       LS_2B  1011100110iiiiii iiiiiinnnnnttttt   B980 0000   imm(0-4095<<2)
                                    //  ldrsw   Rt,[Xn+simm9]        LS_2C  10111000100iiiii iiiiPPnnnnnttttt   B880 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  ldrsw   Rt,[Xn,(Rm,ext,shl)] LS_3A  10111000101mmmmm oooS10nnnnnttttt   B8A0 0800   [Xn, ext(Rm) LSL {0,2}]
                                    //  ldrsw   Rt,[PC+simm19<<2]    LS_1A  10011000iiiiiiii iiiiiiiiiiittttt   9800 0000   [PC +- imm(1MB)]
 
-//    enum     name     FP LD/ST            DV_2G        DV_2H        DV_2I        DV_1A        DV_1B
-INST5(fmov,    "fmov",   0, 0, IF_EN5B,   0x1E204000,  0x1E260000,  0x1E270000,  0x1E201000,  0x0F00F400)
+//    enum         name            info               DV_2G        DV_2H        DV_2I        DV_1A        DV_1B
+INST5(fmov,        "fmov",         0,      IF_EN5B,   0x1E204000,  0x1E260000,  0x1E270000,  0x1E201000,  0x0F00F400)
                                    //  fmov    Vd,Vn                DV_2G  000111100X100000 010000nnnnnddddd   1E20 4000   Vd,Vn    (scalar)
                                    //  fmov    Rd,Vn                DV_2H  X00111100X100110 000000nnnnnddddd   1E26 0000   Rd,Vn    (scalar, to general)
                                    //  fmov    Vd,Rn                DV_2I  X00111100X100111 000000nnnnnddddd   1E27 0000   Vd,Rn    (scalar, from general)
                                    //  fmov    Vd,immfp             DV_1A  000111100X1iiiii iii10000000ddddd   1E20 1000   Vd,immfp (scalar)
                                    //  fmov    Vd,immfp             DV_1B  0QX0111100000iii 111101iiiiiddddd   0F00 F400   Vd,immfp (immediate vector)
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DI_2C        DV_3C        DV_1B
-INST5(orr,     "orr",    0, 0, IF_EN5C,   0x2A000000,  0x2A000000,  0x32000000,  0x0EA01C00,  0x0F001400)
+//    enum         name            info               DR_3A        DR_3B        DI_2C        DV_3C        DV_1B
+INST5(orr,         "orr",          0,      IF_EN5C,   0x2A000000,  0x2A000000,  0x32000000,  0x0EA01C00,  0x0F001400)
                                    //  orr     Rd,Rn,Rm             DR_3A  X0101010000mmmmm 000000nnnnnddddd   2A00 0000
                                    //  orr     Rd,Rn,(Rm,shk,imm)   DR_3B  X0101010sh0mmmmm iiiiiinnnnnddddd   2A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  orr     Rd,Rn,imm(N,r,s)     DI_2C  X01100100Nrrrrrr ssssssnnnnnddddd   3200 0000   imm(N,r,s)
                                    //  orr     Vd,Vn,Vm             DV_3C  0Q001110101mmmmm 000111nnnnnddddd   0EA0 1C00   Vd,Vn,Vm
                                    //  orr     Vd,imm8              DV_1B  0Q00111100000iii ---101iiiiiddddd   0F00 1400   Vd imm8  (immediate vector)
 
-//    enum     name     FP LD/ST            LS_2A        LS_2B        LS_2C        LS_3A
-INST4(ldrb,    "ldrb",   0,LD, IF_EN4A,   0x39400000,  0x39400000,  0x38400000,  0x38600800)
+//    enum         name            info               LS_2A        LS_2B        LS_2C        LS_3A
+INST4(ldrb,        "ldrb",         LD,     IF_EN4A,   0x39400000,  0x39400000,  0x38400000,  0x38600800)
                                    //  ldrb    Rt,[Xn]              LS_2A  0011100101000000 000000nnnnnttttt   3940 0000
                                    //  ldrb    Rt,[Xn+pimm12]       LS_2B  0011100101iiiiii iiiiiinnnnnttttt   3940 0000   imm(0-4095)
                                    //  ldrb    Rt,[Xn+simm9]        LS_2C  00111000010iiiii iiiiPPnnnnnttttt   3840 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  ldrb    Rt,[Xn,(Rm,ext,shl)] LS_3A  00111000011mmmmm oooS10nnnnnttttt   3860 0800   [Xn, ext(Rm)]
 
-INST4(ldrh,    "ldrh",   0,LD, IF_EN4A,   0x79400000,  0x79400000,  0x78400000,  0x78600800)
+INST4(ldrh,        "ldrh",         LD,     IF_EN4A,   0x79400000,  0x79400000,  0x78400000,  0x78600800)
                                    //  ldrh    Rt,[Xn]              LS_2A  0111100101000000 000000nnnnnttttt   7940 0000
                                    //  ldrh    Rt,[Xn+pimm12]       LS_2B  0111100101iiiiii iiiiiinnnnnttttt   7940 0000   imm(0-4095<<1)
                                    //  ldrh    Rt,[Xn+simm9]        LS_2C  01111000010iiiii iiiiPPnnnnnttttt   7840 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  ldrh    Rt,[Xn,(Rm,ext,shl)] LS_3A  01111000011mmmmm oooS10nnnnnttttt   7860 0800   [Xn, ext(Rm) LSL {0,1}]
 
-INST4(ldrsb,   "ldrsb",  0,LD, IF_EN4A,   0x39800000,  0x39800000,  0x38800000,  0x38A00800)
+INST4(ldrsb,       "ldrsb",        LD,     IF_EN4A,   0x39800000,  0x39800000,  0x38800000,  0x38A00800)
                                    //  ldrsb   Rt,[Xn]              LS_2A  001110011X000000 000000nnnnnttttt   3980 0000
                                    //  ldrsb   Rt,[Xn+pimm12]       LS_2B  001110011Xiiiiii iiiiiinnnnnttttt   3980 0000   imm(0-4095)
                                    //  ldrsb   Rt,[Xn+simm9]        LS_2C  001110001X0iiiii iiii01nnnnnttttt   3880 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  ldrsb   Rt,[Xn,(Rm,ext,shl)] LS_3A  001110001X1mmmmm oooS10nnnnnttttt   38A0 0800   [Xn, ext(Rm)]
 
-INST4(ldrsh,   "ldrsh",  0,LD, IF_EN4A,   0x79800000,  0x79800000,  0x78800000,  0x78A00800)
+INST4(ldrsh,       "ldrsh",        LD,     IF_EN4A,   0x79800000,  0x79800000,  0x78800000,  0x78A00800)
                                    //  ldrsh   Rt,[Xn]              LS_2A  011110011X000000 000000nnnnnttttt   7980 0000
                                    //  ldrsh   Rt,[Xn+pimm12]       LS_2B  011110011Xiiiiii iiiiiinnnnnttttt   7980 0000   imm(0-4095<<1)
                                    //  ldrsh   Rt,[Xn+simm9]        LS_2C  011110001X0iiiii iiiiPPnnnnnttttt   7880 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  ldrsh   Rt,[Xn,(Rm,ext,shl)] LS_3A  011110001X1mmmmm oooS10nnnnnttttt   78A0 0800   [Xn, ext(Rm) LSL {0,1}]
 
-INST4(str,     "str",    0,ST, IF_EN4A,   0xB9000000,  0xB9000000,  0xB8000000,  0xB8200800)
+INST4(str,         "str",          ST,     IF_EN4A,   0xB9000000,  0xB9000000,  0xB8000000,  0xB8200800)
                                    //  str     Rt,[Xn]              LS_2A  1X11100100000000 000000nnnnnttttt   B900 0000
                                    //  str     Rt,[Xn+pimm12]       LS_2B  1X11100100iiiiii iiiiiinnnnnttttt   B900 0000   imm(0-4095<<{2,3})
                                    //  str     Rt,[Xn+simm9]        LS_2C  1X111000000iiiii iiiiPPnnnnnttttt   B800 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  str     Rt,[Xn,(Rm,ext,shl)] LS_3A  1X111000001mmmmm oooS10nnnnnttttt   B820 0800   [Xn, ext(Rm)]
 
-INST4(strb,    "strb",   0,ST, IF_EN4A,   0x39000000,  0x39000000,  0x38000000,  0x38200800)
+INST4(strb,        "strb",         ST,     IF_EN4A,   0x39000000,  0x39000000,  0x38000000,  0x38200800)
                                    //  strb    Rt,[Xn]              LS_2A  0011100100000000 000000nnnnnttttt   3900 0000
                                    //  strb    Rt,[Xn+pimm12]       LS_2B  0011100100iiiiii iiiiiinnnnnttttt   3900 0000   imm(0-4095)
                                    //  strb    Rt,[Xn+simm9]        LS_2C  00111000000iiiii iiiiPPnnnnnttttt   3800 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  strb    Rt,[Xn,(Rm,ext,shl)] LS_3A  00111000001mmmmm oooS10nnnnnttttt   3820 0800   [Xn, ext(Rm)]
 
-INST4(strh,    "strh",   0,ST, IF_EN4A,   0x79000000,  0x79000000,  0x78000000,  0x78200800)
+INST4(strh,        "strh",         ST,     IF_EN4A,   0x79000000,  0x79000000,  0x78000000,  0x78200800)
                                    //  strh    Rt,[Xn]              LS_2A  0111100100000000 000000nnnnnttttt   7900 0000
                                    //  strh    Rt,[Xn+pimm12]       LS_2B  0111100100iiiiii iiiiiinnnnnttttt   7900 0000   imm(0-4095<<1)
                                    //  strh    Rt,[Xn+simm9]        LS_2C  01111000000iiiii iiiiPPnnnnnttttt   7800 0000   [Xn imm(-256..+255) pre/post/no inc]
                                    //  strh    Rt,[Xn,(Rm,ext,shl)] LS_3A  01111000001mmmmm oooS10nnnnnttttt   7820 0800   [Xn, ext(Rm)]
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DR_3C        DI_2A
-INST4(adds,    "adds",   0, 0, IF_EN4B,   0x2B000000,  0x2B000000,  0x2B200000,  0x31000000)
+//    enum         name            info               DR_3A        DR_3B        DR_3C        DI_2A
+INST4(adds,        "adds",         0,      IF_EN4B,   0x2B000000,  0x2B000000,  0x2B200000,  0x31000000)
                                    //  adds    Rd,Rn,Rm             DR_3A  X0101011000mmmmm 000000nnnnnddddd   2B00 0000
                                    //  adds    Rd,Rn,(Rm,shk,imm)   DR_3B  X0101011sh0mmmmm ssssssnnnnnddddd   2B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  adds    Rd,Rn,(Rm,ext,shl)   DR_3C  X0101011001mmmmm ooosssnnnnnddddd   2B20 0000   ext(Rm) LSL imm(0-4)
                                    //  adds    Rd,Rn,i12            DI_2A  X0110001shiiiiii iiiiiinnnnnddddd   3100 0000   imm(i12,sh)
 
-INST4(subs,    "subs",   0, 0, IF_EN4B,   0x6B000000,  0x6B000000,  0x6B200000,  0x71000000)
+INST4(subs,        "subs",         0,      IF_EN4B,   0x6B000000,  0x6B000000,  0x6B200000,  0x71000000)
                                    //  subs    Rd,Rn,Rm             DR_3A  X1101011000mmmmm 000000nnnnnddddd   6B00 0000
                                    //  subs    Rd,Rn,(Rm,shk,imm)   DR_3B  X1101011sh0mmmmm ssssssnnnnnddddd   6B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  subs    Rd,Rn,(Rm,ext,shl)   DR_3C  X1101011001mmmmm ooosssnnnnnddddd   6B20 0000   ext(Rm) LSL imm(0-4)
                                    //  subs    Rd,Rn,i12            DI_2A  X1110001shiiiiii iiiiiinnnnnddddd   7100 0000   imm(i12,sh)
 
-//    enum     name     FP LD/ST            DR_2A        DR_2B        DR_2C        DI_1A
-INST4(cmp,     "cmp",    0,CMP,IF_EN4C,   0x6B00001F,  0x6B00001F,  0x6B20001F,  0x7100001F)
+//    enum         name            info               DR_2A        DR_2B        DR_2C        DI_1A
+INST4(cmp,         "cmp",          CMP,    IF_EN4C,   0x6B00001F,  0x6B00001F,  0x6B20001F,  0x7100001F)
                                    //  cmp     Rn,Rm                DR_2A  X1101011000mmmmm 000000nnnnn11111   6B00 001F
                                    //  cmp     Rn,(Rm,shk,imm)      DR_2B  X1101011sh0mmmmm ssssssnnnnn11111   6B00 001F   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  cmp     Rn,(Rm,ext,shl)      DR_2C  X1101011001mmmmm ooosssnnnnn11111   6B20 001F   ext(Rm) LSL imm(0-4)
                                    //  cmp     Rn,i12               DI_1A  X111000100iiiiii iiiiiinnnnn11111   7100 001F   imm(i12,sh)
 
-INST4(cmn,     "cmn",    0,CMP,IF_EN4C,   0x2B00001F,  0x2B00001F,  0x2B20001F,  0x3100001F)
+INST4(cmn,         "cmn",          CMP,    IF_EN4C,   0x2B00001F,  0x2B00001F,  0x2B20001F,  0x3100001F)
                                    //  cmn     Rn,Rm                DR_2A  X0101011000mmmmm 000000nnnnn11111   2B00 001F
                                    //  cmn     Rn,(Rm,shk,imm)      DR_2B  X0101011sh0mmmmm ssssssnnnnn11111   2B00 001F   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  cmn     Rn,(Rm,ext,shl)      DR_2C  X0101011001mmmmm ooosssnnnnn11111   2B20 001F   ext(Rm) LSL imm(0-4)
                                    //  cmn     Rn,i12               DI_1A  X0110001shiiiiii iiiiiinnnnn11111   3100 001F   imm(0-4095)
 
-//    enum     name     FP LD/ST            DV_3B        DV_3D        DV_3BI       DV_3DI
-INST4(fmul,    "fmul",   0, 0, IF_EN4D,   0x2E20DC00,  0x1E200800,  0x0F809000,  0x5F809000)
+//    enum         name            info               DV_3B        DV_3D        DV_3BI       DV_3DI
+INST4(fmul,        "fmul",         0,      IF_EN4D,   0x2E20DC00,  0x1E200800,  0x0F809000,  0x5F809000)
                                    //  fmul    Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 110111nnnnnddddd   2E20 DC00   Vd,Vn,Vm   (vector)
                                    //  fmul    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 000010nnnnnddddd   1E20 0800   Vd,Vn,Vm   (scalar)
                                    //  fmul    Vd,Vn,Vm[]           DV_3BI 0Q0011111XLmmmmm 1001H0nnnnnddddd   0F80 9000   Vd,Vn,Vm[] (vector by elem)
                                    //  fmul    Vd,Vn,Vm[]           DV_3DI 010111111XLmmmmm 1001H0nnnnnddddd   5F80 9000   Vd,Vn,Vm[] (scalar by elem)
 
-INST4(fmulx,   "fmulx",  0, 0, IF_EN4D,   0x0E20DC00,  0x5E20DC00,  0x2F809000,  0x7F809000)
+INST4(fmulx,       "fmulx",        0,      IF_EN4D,   0x0E20DC00,  0x5E20DC00,  0x2F809000,  0x7F809000)
                                    //  fmulx   Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110111nnnnnddddd   0E20 DC00   Vd,Vn,Vm   (vector)
                                    //  fmulx   Vd,Vn,Vm             DV_3D  010111100X1mmmmm 110111nnnnnddddd   5E20 DC00   Vd,Vn,Vm   (scalar)
                                    //  fmulx   Vd,Vn,Vm[]           DV_3BI 0Q1011111XLmmmmm 1001H0nnnnnddddd   2F80 9000   Vd,Vn,Vm[] (vector by elem)
                                    //  fmulx   Vd,Vn,Vm[]           DV_3DI 011111111XLmmmmm 1001H0nnnnnddddd   7F80 9000   Vd,Vn,Vm[] (scalar by elem)
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DI_2C        DV_3C
-INST4(and,     "and",    0, 0, IF_EN4E,   0x0A000000,  0x0A000000,  0x12000000,  0x0E201C00)
+//    enum         name            info               DR_3A        DR_3B        DI_2C        DV_3C
+INST4(and,         "and",          0,      IF_EN4E,   0x0A000000,  0x0A000000,  0x12000000,  0x0E201C00)
                                    //  and     Rd,Rn,Rm             DR_3A  X0001010000mmmmm 000000nnnnnddddd   0A00 0000
                                    //  and     Rd,Rn,(Rm,shk,imm)   DR_3B  X0001010sh0mmmmm iiiiiinnnnnddddd   0A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  and     Rd,Rn,imm(N,r,s)     DI_2C  X00100100Nrrrrrr ssssssnnnnnddddd   1200 0000   imm(N,r,s)
                                    //  and     Vd,Vn,Vm             DV_3C  0Q001110001mmmmm 000111nnnnnddddd   0E20 1C00   Vd,Vn,Vm
 
-INST4(eor,     "eor",    0, 0, IF_EN4E,   0x4A000000,  0x4A000000,  0x52000000,  0x2E201C00)
+INST4(eor,         "eor",          0,      IF_EN4E,   0x4A000000,  0x4A000000,  0x52000000,  0x2E201C00)
                                    //  eor     Rd,Rn,Rm             DR_3A  X1001010000mmmmm 000000nnnnnddddd   4A00 0000
                                    //  eor     Rd,Rn,(Rm,shk,imm)   DR_3B  X1001010sh0mmmmm iiiiiinnnnnddddd   4A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  eor     Rd,Rn,imm(N,r,s)     DI_2C  X10100100Nrrrrrr ssssssnnnnnddddd   5200 0000   imm(N,r,s)
                                    //  eor     Vd,Vn,Vm             DV_3C  0Q101110001mmmmm 000111nnnnnddddd   2E20 1C00   Vd,Vn,Vm
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DV_3C        DV_1B
-INST4(bic,     "bic",    0, 0, IF_EN4F,   0x0A200000,  0x0A200000,  0x0E601C00,  0x2F001400)
+//    enum         name            info               DR_3A        DR_3B        DV_3C        DV_1B
+INST4(bic,         "bic",          0,      IF_EN4F,   0x0A200000,  0x0A200000,  0x0E601C00,  0x2F001400)
                                    //  bic     Rd,Rn,Rm             DR_3A  X0001010001mmmmm 000000nnnnnddddd   0A20 0000
                                    //  bic     Rd,Rn,(Rm,shk,imm)   DR_3B  X0001010sh1mmmmm iiiiiinnnnnddddd   0A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  bic     Vd,Vn,Vm             DV_3C  0Q001110011mmmmm 000111nnnnnddddd   0E60 1C00   Vd,Vn,Vm
                                    //  bic     Vd,imm8              DV_1B  0Q10111100000iii ---101iiiiiddddd   2F00 1400   Vd imm8  (immediate vector)
 
-//    enum     name     FP LD/ST            DR_2E        DR_2F        DV_2M        DV_2L
-INST4(neg,     "neg",    0, 0, IF_EN4G,   0x4B0003E0,  0x4B0003E0,  0x2E20B800,  0x7E20B800)
+//    enum         name            info               DR_2E        DR_2F        DV_2M        DV_2L
+INST4(neg,         "neg",          0,      IF_EN4G,   0x4B0003E0,  0x4B0003E0,  0x2E20B800,  0x7E20B800)
                                    //  neg     Rd,Rm                DR_2E  X1001011000mmmmm 00000011111ddddd   4B00 03E0
                                    //  neg     Rd,(Rm,shk,imm)      DR_2F  X1001011sh0mmmmm ssssss11111ddddd   4B00 03E0   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  neg     Vd,Vn                DV_2M  0Q101110XX100000 101110nnnnnddddd   2E20 B800   Vd,Vn    (vector)
                                    //  neg     Vd,Vn                DV_2L  01111110XX100000 101110nnnnnddddd   7E20 B800   Vd,Vn    (scalar)
 
-//    enum     name     FP LD/ST            DV_3E        DV_3A      DV_2L        DV_2M
-INST4(cmeq,    "cmeq",  0, 0, IF_EN4H,   0x7EE08C00,  0x2E208C00,  0x5E209800,  0x0E209800)
+//    enum         name            info               DV_3E        DV_3A      DV_2L        DV_2M
+INST4(cmeq,        "cmeq",         0,      IF_EN4H,   0x7EE08C00,  0x2E208C00,  0x5E209800,  0x0E209800)
                                    //  cmeq    Vd,Vn,Vm             DV_3E  01111110111mmmmm 100011nnnnnddddd   7EE0 8C00   Vd,Vn,Vm   (scalar)
                                    //  cmeq    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100011nnnnnddddd   2E20 8C00   Vd,Vn,Vm   (vector)
                                    //  cmeq    Vd,Vn                DV_2L  01011110XX100000 100110nnnnnddddd   5E20 9800   Vd,Vn      (scalar)
                                    //  cmeq    Vd,Vn                DV_2M  0Q001110XX100000 100110nnnnnddddd   0E20 9800   Vd,Vn      (vector)
 
-INST4(cmge,    "cmge",  0, 0, IF_EN4H,   0x5EE03C00,  0x0E203C00,  0x7E208800,  0x2E208800)
+INST4(cmge,        "cmge",         0,      IF_EN4H,   0x5EE03C00,  0x0E203C00,  0x7E208800,  0x2E208800)
                                    //  cmge    Vd,Vn,Vm             DV_3E  01011110111mmmmm 001111nnnnnddddd   5EE0 3C00   Vd,Vn,Vm   (scalar)
                                    //  cmge    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 001111nnnnnddddd   0E20 3C00   Vd,Vn,Vm   (vector)
                                    //  cmge    Vd,Vn                DV_2L  01111110XX100000 100010nnnnnddddd   5E20 8800   Vd,Vn      (scalar)
                                    //  cmge    Vd,Vn                DV_2M  0Q101110XX100000 100010nnnnnddddd   2E20 8800   Vd,Vn      (vector)
 
-INST4(cmgt,    "cmgt",  0, 0, IF_EN4H,   0x5EE03400,  0x0E203400,  0x5E208800,  0x0E208800)
+INST4(cmgt,        "cmgt",         0,      IF_EN4H,   0x5EE03400,  0x0E203400,  0x5E208800,  0x0E208800)
                                    //  cmgt    Vd,Vn,Vm             DV_3E  01011110111mmmmm 001101nnnnnddddd   5EE0 3400   Vd,Vn,Vm   (scalar)
                                    //  cmgt    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 001101nnnnnddddd   0E20 3400   Vd,Vn,Vm   (vector)
                                    //  cmgt    Vd,Vn                DV_2L  01011110XX100000 100010nnnnnddddd   5E20 8800   Vd,Vn      (scalar)
                                    //  cmgt    Vd,Vn                DV_2M  0Q001110XX100000 101110nnnnnddddd   0E20 8800   Vd,Vn      (vector)
 
-//    enum     name     FP LD/ST            DV_3D        DV_3B      DV_2G        DV_2A
-INST4(fcmeq,   "fcmeq", 0, 0, IF_EN4I,   0x5E20E400,  0x0E20E400,  0x5EA0D800,  0x0EA0D800)
+//    enum         name            info               DV_3D        DV_3B        DV_2G        DV_2A
+INST4(fcmeq,       "fcmeq",        0,      IF_EN4I,   0x5E20E400,  0x0E20E400,  0x5EA0D800,  0x0EA0D800)
                                    //  fcmeq   Vd,Vn,Vm             DV_3D  010111100X1mmmmm 111001nnnnnddddd   5E20 E400   Vd Vn Vm   (scalar)
                                    //  fcmeq   Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 111001nnnnnddddd   0E20 E400   Vd,Vn,Vm   (vector)
                                    //  fcmeq   Vd,Vn                DV_2G  010111101X100000 110110nnnnnddddd   5EA0 D800   Vd Vn      (scalar)
                                    //  fcmeq   Vd,Vn                DV_2A  0Q0011101X100000 110110nnnnnddddd   0EA0 D800   Vd Vn      (vector)
 
-INST4(fcmge,   "fcmge", 0, 0, IF_EN4I,   0x7E20E400,  0x2E20E400,  0x7EA0C800,  0x2EA0C800)
+INST4(fcmge,       "fcmge",        0,      IF_EN4I,   0x7E20E400,  0x2E20E400,  0x7EA0C800,  0x2EA0C800)
                                    //  fcmge   Vd,Vn,Vm             DV_3D  011111100X1mmmmm 111001nnnnnddddd   7E20 E400   Vd Vn Vm   (scalar)
                                    //  fcmge   Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 111001nnnnnddddd   2E20 E400   Vd,Vn,Vm   (vector)
                                    //  fcmge   Vd,Vn                DV_2G  011111101X100000 110010nnnnnddddd   7EA0 E800   Vd Vn      (scalar)
                                    //  fcmge   Vd,Vn                DV_2A  0Q1011101X100000 110010nnnnnddddd   2EA0 C800   Vd Vn      (vector)
 
-INST4(fcmgt,   "fcmgt", 0, 0, IF_EN4I,   0x7EA0E400,  0x2EA0E400,  0x5EA0C800,  0x0EA0C800)
+INST4(fcmgt,       "fcmgt",        0,      IF_EN4I,   0x7EA0E400,  0x2EA0E400,  0x5EA0C800,  0x0EA0C800)
                                    //  fcmgt   Vd,Vn,Vm             DV_3D  011111101X1mmmmm 111001nnnnnddddd   7EA0 E400   Vd Vn Vm   (scalar)
                                    //  fcmgt   Vd,Vn,Vm             DV_3B  0Q1011101X1mmmmm 111001nnnnnddddd   2EA0 E400   Vd,Vn,Vm   (vector)
                                    //  fcmgt   Vd,Vn                DV_2G  010111101X100000 110010nnnnnddddd   5EA0 E800   Vd Vn      (scalar)
                                    //  fcmgt   Vd,Vn                DV_2A  0Q0011101X100000 110010nnnnnddddd   0EA0 C800   Vd Vn      (vector)
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DI_2C
-INST3(ands,    "ands",   0, 0, IF_EN3A,   0x6A000000,  0x6A000000,  0x72000000)
+//    enum         name            info               DV_2N        DV_2O        DV_3E        DV_3A
+INST4(sqshl,       "sqshl",        0,      IF_EN4J,   0x5F007400,  0x0F007400,  0x5E204C00,  0x0E204C00)
+                                   //  sqshl   Vd,Vn,imm            DV_2N  010111110iiiiiii 011101nnnnnddddd   5F00 7400   Vd Vn imm  (left shift - scalar)
+                                   //  sqshl   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 011101nnnnnddddd   0F00 7400   Vd Vn imm  (left shift - vector)
+                                   //  sqshl   Vd,Vn,Vm             DV_3E  01011110XX1mmmmm 010011nnnnnddddd   5E20 4C00   Vd Vn Vm   (scalar)
+                                   //  sqshl   Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 010011nnnnnddddd   0E20 4C00   Vd Vn Vm   (vector)
+
+INST4(uqshl,       "uqshl",        0,      IF_EN4J,   0x7F007400,  0x2F007400,  0x7E204C00,  0x2E204C00)
+                                   //  uqshl   Vd,Vn,imm            DV_2N  011111110iiiiiii 011101nnnnnddddd   7F00 7400   Vd Vn imm  (left shift - scalar)
+                                   //  uqshl   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 011101nnnnnddddd   2F00 7400   Vd Vn imm  (left shift - vector)
+                                   //  uqshl   Vd,Vn,Vm             DV_3E  01111110XX1mmmmm 010011nnnnnddddd   7E20 4C00   Vd Vn Vm   (scalar)
+                                   //  uqshl   Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 010011nnnnnddddd   2E20 4C00   Vd Vn Vm   (vector)
+
+//    enum         name            info               DR_3A        DR_3B        DI_2C
+INST3(ands,        "ands",         0,      IF_EN3A,   0x6A000000,  0x6A000000,  0x72000000)
                                    //  ands    Rd,Rn,Rm             DR_3A  X1101010000mmmmm 000000nnnnnddddd   6A00 0000
                                    //  ands    Rd,Rn,(Rm,shk,imm)   DR_3B  X1101010sh0mmmmm iiiiiinnnnnddddd   6A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  ands    Rd,Rn,imm(N,r,s)     DI_2C  X11100100Nrrrrrr ssssssnnnnnddddd   7200 0000   imm(N,r,s)
 
-//    enum     name     FP LD/ST            DR_2A        DR_2B        DI_1C
-INST3(tst,     "tst",    0, 0, IF_EN3B,   0x6A00001F,  0x6A00001F,  0x7200001F)
+//    enum         name            info               DR_2A        DR_2B        DI_1C
+INST3(tst,         "tst",          0,      IF_EN3B,   0x6A00001F,  0x6A00001F,  0x7200001F)
                                    //  tst     Rn,Rm                DR_2A  X1101010000mmmmm 000000nnnnn11111   6A00 001F
                                    //  tst     Rn,(Rm,shk,imm)      DR_2B  X1101010sh0mmmmm iiiiiinnnnn11111   6A00 001F   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  tst     Rn,imm(N,r,s)        DI_1C  X11100100Nrrrrrr ssssssnnnnn11111   7200 001F   imm(N,r,s)
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B        DV_3C
-INST3(orn,     "orn",    0, 0, IF_EN3C,   0x2A200000,  0x2A200000,  0x0EE01C00)
+//    enum         name            info               DR_3A        DR_3B        DV_3C
+INST3(orn,         "orn",          0,      IF_EN3C,   0x2A200000,  0x2A200000,  0x0EE01C00)
                                    //  orn     Rd,Rn,Rm             DR_3A  X0101010001mmmmm 000000nnnnnddddd   2A20 0000
                                    //  orn     Rd,Rn,(Rm,shk,imm)   DR_3B  X0101010sh1mmmmm iiiiiinnnnnddddd   2A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
                                    //  orn     Vd,Vn,Vm             DV_3C  0Q001110111mmmmm 000111nnnnnddddd   0EE0 1C00   Vd,Vn,Vm
 
-//    enum     name     FP LD/ST            DV_2C        DV_2D       DV_2E
-INST3(dup,     "dup",    0, 0, IF_EN3D,   0x0E000C00,  0x0E000400,  0x5E000400)
+//    enum         name            info               DV_2C        DV_2D       DV_2E
+INST3(dup,         "dup",          0,      IF_EN3D,   0x0E000C00,  0x0E000400,  0x5E000400)
                                    //  dup     Vd,Rn                DV_2C  0Q001110000iiiii 000011nnnnnddddd   0E00 0C00   Vd,Rn   (vector from general)
                                    //  dup     Vd,Vn[]              DV_2D  0Q001110000iiiii 000001nnnnnddddd   0E00 0400   Vd,Vn[] (vector by elem)
                                    //  dup     Vd,Vn[]              DV_2E  01011110000iiiii 000001nnnnnddddd   5E00 0400   Vd,Vn[] (scalar by elem)
 
-//    enum     name     FP LD/ST            DV_3B        DV_3BI       DV_3DI
-INST3(fmla,    "fmla",   0, 0, IF_EN3E,   0x0E20CC00,  0x0F801000,  0x5F801000)
+//    enum         name            info               DV_3B        DV_3BI       DV_3DI
+INST3(fmla,        "fmla",         0,      IF_EN3E,   0x0E20CC00,  0x0F801000,  0x5F801000)
                                    //  fmla    Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110011nnnnnddddd   0E20 CC00   Vd,Vn,Vm   (vector)
                                    //  fmla    Vd,Vn,Vm[]           DV_3BI 0Q0011111XLmmmmm 0001H0nnnnnddddd   0F80 1000   Vd,Vn,Vm[] (vector by elem)
                                    //  fmla    Vd,Vn,Vm[]           DV_3DI 010111111XLmmmmm 0001H0nnnnnddddd   5F80 1000   Vd,Vn,Vm[] (scalar by elem)
 
-INST3(fmls,    "fmls",   0, 0, IF_EN3E,   0x0EA0CC00,  0x0F805000,  0x5F805000)
+INST3(fmls,        "fmls",         0,      IF_EN3E,   0x0EA0CC00,  0x0F805000,  0x5F805000)
                                    //  fmls    Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 110011nnnnnddddd   0EA0 CC00   Vd,Vn,Vm   (vector)
                                    //  fmls    Vd,Vn,Vm[]           DV_3BI 0Q0011111XLmmmmm 0101H0nnnnnddddd   0F80 5000   Vd,Vn,Vm[] (vector by elem)
                                    //  fmls    Vd,Vn,Vm[]           DV_3DI 010111111XLmmmmm 0101H0nnnnnddddd   5F80 5000   Vd,Vn,Vm[] (scalar by elem)
 
-//    enum     name     FP LD/ST            DV_2A        DV_2G        DV_2H
-INST3(fcvtas,  "fcvtas", 0, 0, IF_EN3F,   0x0E21C800,  0x5E21C800,  0x1E240000)
+//    enum         name            info               DV_2A        DV_2G        DV_2H
+INST3(fcvtas,      "fcvtas",       0,      IF_EN3F,   0x0E21C800,  0x5E21C800,  0x1E240000)
                                    //  fcvtas  Vd,Vn                DV_2A  0Q0011100X100001 110010nnnnnddddd   0E21 C800   Vd,Vn    (vector)
                                    //  fcvtas  Vd,Vn                DV_2G  010111100X100001 110010nnnnnddddd   5E21 C800   Vd,Vn    (scalar)
                                    //  fcvtas  Rd,Vn                DV_2H  X00111100X100100 000000nnnnnddddd   1E24 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtau,  "fcvtau", 0, 0, IF_EN3F,   0x2E21C800,  0x7E21C800,  0x1E250000)
+INST3(fcvtau,      "fcvtau",       0,      IF_EN3F,   0x2E21C800,  0x7E21C800,  0x1E250000)
                                    //  fcvtau  Vd,Vn                DV_2A  0Q1011100X100001 111010nnnnnddddd   2E21 C800   Vd,Vn    (vector)
                                    //  fcvtau  Vd,Vn                DV_2G  011111100X100001 111010nnnnnddddd   7E21 C800   Vd,Vn    (scalar)
                                    //  fcvtau  Rd,Vn                DV_2H  X00111100X100101 000000nnnnnddddd   1E25 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtms,  "fcvtms", 0, 0, IF_EN3F,   0x0E21B800,  0x5E21B800,  0x1E300000)
+INST3(fcvtms,      "fcvtms",       0,      IF_EN3F,   0x0E21B800,  0x5E21B800,  0x1E300000)
                                    //  fcvtms  Vd,Vn                DV_2A  0Q0011100X100001 101110nnnnnddddd   0E21 B800   Vd,Vn    (vector)
                                    //  fcvtms  Vd,Vn                DV_2G  010111100X100001 101110nnnnnddddd   5E21 B800   Vd,Vn    (scalar)
                                    //  fcvtms  Rd,Vn                DV_2H  X00111100X110000 000000nnnnnddddd   1E30 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtmu,  "fcvtmu", 0, 0, IF_EN3F,   0x2E21B800,  0x7E21B800,  0x1E310000)
+INST3(fcvtmu,      "fcvtmu",       0,      IF_EN3F,   0x2E21B800,  0x7E21B800,  0x1E310000)
                                    //  fcvtmu  Vd,Vn                DV_2A  0Q1011100X100001 101110nnnnnddddd   2E21 B800   Vd,Vn    (vector)
                                    //  fcvtmu  Vd,Vn                DV_2G  011111100X100001 101110nnnnnddddd   7E21 B800   Vd,Vn    (scalar)
                                    //  fcvtmu  Rd,Vn                DV_2H  X00111100X110001 000000nnnnnddddd   1E31 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtns,  "fcvtns", 0, 0, IF_EN3F,   0x0E21A800,  0x5E21A800,  0x1E200000)
+INST3(fcvtns,      "fcvtns",       0,      IF_EN3F,   0x0E21A800,  0x5E21A800,  0x1E200000)
                                    //  fcvtns  Vd,Vn                DV_2A  0Q0011100X100001 101010nnnnnddddd   0E21 A800   Vd,Vn    (vector)
                                    //  fcvtns  Vd,Vn                DV_2G  010111100X100001 101010nnnnnddddd   5E21 A800   Vd,Vn    (scalar)
                                    //  fcvtns  Rd,Vn                DV_2H  X00111100X100000 000000nnnnnddddd   1E20 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtnu,  "fcvtnu", 0, 0, IF_EN3F,   0x2E21A800,  0x7E21A800,  0x1E210000)
+INST3(fcvtnu,      "fcvtnu",       0,      IF_EN3F,   0x2E21A800,  0x7E21A800,  0x1E210000)
                                    //  fcvtnu  Vd,Vn                DV_2A  0Q1011100X100001 101010nnnnnddddd   2E21 A800   Vd,Vn    (vector)
                                    //  fcvtnu  Vd,Vn                DV_2G  011111100X100001 101010nnnnnddddd   7E21 A800   Vd,Vn    (scalar)
                                    //  fcvtnu  Rd,Vn                DV_2H  X00111100X100001 000000nnnnnddddd   1E21 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtps,  "fcvtps", 0, 0, IF_EN3F,   0x0EA1A800,  0x5EA1A800,  0x1E280000)
+INST3(fcvtps,      "fcvtps",       0,      IF_EN3F,   0x0EA1A800,  0x5EA1A800,  0x1E280000)
                                    //  fcvtps  Vd,Vn                DV_2A  0Q0011101X100001 101010nnnnnddddd   0EA1 A800   Vd,Vn    (vector)
                                    //  fcvtps  Vd,Vn                DV_2G  010111101X100001 101010nnnnnddddd   5EA1 A800   Vd,Vn    (scalar)
                                    //  fcvtps  Rd,Vn                DV_2H  X00111100X101000 000000nnnnnddddd   1E28 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtpu,  "fcvtpu", 0, 0, IF_EN3F,   0x2EA1A800,  0x7EA1A800,  0x1E290000)
+INST3(fcvtpu,      "fcvtpu",       0,      IF_EN3F,   0x2EA1A800,  0x7EA1A800,  0x1E290000)
                                    //  fcvtpu  Vd,Vn                DV_2A  0Q1011101X100001 101010nnnnnddddd   2EA1 A800   Vd,Vn    (vector)
                                    //  fcvtpu  Vd,Vn                DV_2G  011111101X100001 101010nnnnnddddd   7EA1 A800   Vd,Vn    (scalar)
                                    //  fcvtpu  Rd,Vn                DV_2H  X00111100X101001 000000nnnnnddddd   1E29 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtzs,  "fcvtzs", 0, 0, IF_EN3F,   0x0EA1B800,  0x5EA1B800,  0x1E380000)
+INST3(fcvtzs,      "fcvtzs",       0,      IF_EN3F,   0x0EA1B800,  0x5EA1B800,  0x1E380000)
                                    //  fcvtzs  Vd,Vn                DV_2A  0Q0011101X100001 101110nnnnnddddd   0EA1 B800   Vd,Vn    (vector)
                                    //  fcvtzs  Vd,Vn                DV_2G  010111101X100001 101110nnnnnddddd   5EA1 B800   Vd,Vn    (scalar)
                                    //  fcvtzs  Rd,Vn                DV_2H  X00111100X111000 000000nnnnnddddd   1E38 0000   Rd,Vn    (scalar, to general)
 
-INST3(fcvtzu,  "fcvtzu", 0, 0, IF_EN3F,   0x2EA1B800,  0x7EA1B800,  0x1E390000)
+INST3(fcvtzu,      "fcvtzu",       0,      IF_EN3F,   0x2EA1B800,  0x7EA1B800,  0x1E390000)
                                    //  fcvtzu  Vd,Vn                DV_2A  0Q1011101X100001 101110nnnnnddddd   2EA1 B800   Vd,Vn    (vector)
                                    //  fcvtzu  Vd,Vn                DV_2G  011111101X100001 101110nnnnnddddd   7EA1 B800   Vd,Vn    (scalar)
                                    //  fcvtzu  Rd,Vn                DV_2H  X00111100X111001 000000nnnnnddddd   1E39 0000   Rd,Vn    (scalar, to general)
 
-//    enum     name     FP LD/ST            DV_2A        DV_2G        DV_2I
-INST3(scvtf,   "scvtf",  0, 0, IF_EN3G,   0x0E21D800,  0x5E21D800,  0x1E220000)
+//    enum         name            info               DV_2A        DV_2G        DV_2I
+INST3(scvtf,       "scvtf",        0,      IF_EN3G,   0x0E21D800,  0x5E21D800,  0x1E220000)
                                    //  scvtf   Vd,Vn                DV_2A  0Q0011100X100001 110110nnnnnddddd   0E21 D800   Vd,Vn    (vector)
                                    //  scvtf   Vd,Vn                DV_2G  010111100X100001 110110nnnnnddddd   7E21 D800   Vd,Vn    (scalar)
                                    //  scvtf   Rd,Vn                DV_2I  X00111100X100010 000000nnnnnddddd   1E22 0000   Vd,Rn    (scalar, from general)
 
-INST3(ucvtf,   "ucvtf",  0, 0, IF_EN3G,   0x2E21D800,  0x7E21D800,  0x1E230000)
+INST3(ucvtf,       "ucvtf",        0,      IF_EN3G,   0x2E21D800,  0x7E21D800,  0x1E230000)
                                    //  ucvtf   Vd,Vn                DV_2A  0Q1011100X100001 110110nnnnnddddd   2E21 D800   Vd,Vn    (vector)
                                    //  ucvtf   Vd,Vn                DV_2G  011111100X100001 110110nnnnnddddd   7E21 D800   Vd,Vn    (scalar)
                                    //  ucvtf   Rd,Vn                DV_2I  X00111100X100011 000000nnnnnddddd   1E23 0000   Vd,Rn    (scalar, from general)
 
-INST3(mul,     "mul",    0, 0, IF_EN3H,   0x1B007C00,  0x0E209C00,  0x0F008000)
+INST3(mul,         "mul",          0,      IF_EN3H,   0x1B007C00,  0x0E209C00,  0x0F008000)
                                    //  mul     Rd,Rn,Rm             DR_3A  X0011011000mmmmm 011111nnnnnddddd   1B00 7C00
                                    //  mul     Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100111nnnnnddddd   0E20 9C00   Vd,Vn,Vm   (vector)
                                    //  mul     Vd,Vn,Vm[]           DV_3AI 0Q001111XXLMmmmm 1000H0nnnnnddddd   0F00 8000   Vd,Vn,Vm[] (vector by elem)
 
-//    enum     name     FP LD/ST            DR_2E        DR_2F        DV_2M
-INST3(mvn,     "mvn",    0, 0, IF_EN3I,   0x2A2003E0,  0x2A2003E0,  0x2E205800)
+//    enum         name            info               DR_2E        DR_2F        DV_2M
+INST3(mvn,         "mvn",          0,      IF_EN3I,   0x2A2003E0,  0x2A2003E0,  0x2E205800)
                                    //  mvn     Rd,Rm                DR_2E  X0101010001mmmmm 00000011111ddddd   2A20 03E0
                                    //  mvn     Rd,(Rm,shk,imm)      DR_2F  X0101010sh1mmmmm iiiiii11111ddddd   2A20 03E0   Rm {LSL,LSR,ASR} imm(0-63)
                                    //  mvn     Vd,Vn                DV_2M  0Q10111000100000 010110nnnnnddddd   2E20 5800   Vd,Vn    (vector)
 
-//    enum     name     FP LD/ST            LS_2D        LS_3F        LS_2E
-INST3(ld1_2regs,"ld1",   0,LD, IF_EN3J,   0x0C40A000,  0x0CC0A000,  0x0CDFA000)
-                                   // C7.2.170 LD1 (multiple structures, two registers variant)
+//    enum         name            info               LS_2D        LS_3F        LS_2E
+INST3(ld1_2regs,   "ld1",          LD,     IF_EN3J,   0x0C40A000,  0x0CC0A000,  0x0CDFA000)
+                                   // LD1 (multiple structures, two registers variant)
                                    //  ld1     {Vt,Vt2},[Xn]        LS_2D  0Q00110001000000 1010ssnnnnnttttt   0C40 A000   base register
                                    //  ld1     {Vt,Vt2},[Xn],Xm     LS_3F  0Q001100110mmmmm 1010ssnnnnnttttt   0CC0 A000   post-indexed by a register
                                    //  ld1     {Vt,Vt2},[Xn],#imm   LS_2E  0Q00110011011111 1010ssnnnnnttttt   0CDF A000   post-indexed by an immediate
 
-INST3(ld1_3regs,"ld1",   0,LD, IF_EN3J,   0x0C406000,  0x0CC06000,  0x0CDF6000)
-                                   // C7.2.170 LD1 (multiple structures, three registers variant)
+INST3(ld1_3regs,   "ld1",          LD,     IF_EN3J,   0x0C406000,  0x0CC06000,  0x0CDF6000)
+                                   // LD1 (multiple structures, three registers variant)
                                    //  ld1     {Vt-Vt3},[Xn]        LS_2D  0Q00110001000000 0110ssnnnnnttttt   0C40 6000   base register
                                    //  ld1     {Vt-Vt3},[Xn],Xm     LS_3F  0Q001100110mmmmm 0110ssnnnnnttttt   0CC0 6000   post-indexed by a register
                                    //  ld1     {Vt-Vt3},[Xn],#imm   LS_2E  0Q00110011011111 0110ssnnnnnttttt   0CDF 6000   post-indexed by an immediate
 
-INST3(ld1_4regs,"ld1",   0,LD, IF_EN3J,   0x0C402000,  0x0CC02000,  0x0CDF2000)
-                                   // C7.2.170 LD1 (multiple structures, four registers variant)
+INST3(ld1_4regs,   "ld1",          LD,     IF_EN3J,   0x0C402000,  0x0CC02000,  0x0CDF2000)
+                                   // LD1 (multiple structures, four registers variant)
                                    //  ld1     {Vt-Vt4},[Xn]        LS_2D  0Q00110001000000 0010ssnnnnnttttt   0C40 2000   base register
                                    //  ld1     {Vt-Vt4},[Xn],Xm     LS_3F  0Q001100110mmmmm 0010ssnnnnnttttt   0CC0 2000   post-indexed by a register
                                    //  ld1     {Vt-Vt4},[Xn],#imm   LS_2E  0Q00110011011111 0010ssnnnnnttttt   0CDF 2000   post-indexed by an immediate
 
-INST3(st1_2regs,"st1",   0,ST, IF_EN3J,   0x0C00A000,  0x0C80A000,  0x0C9FA000)
-                                   // C7.2.313 ST1 (multiple structures, two registers variant)
+INST3(st1_2regs,   "st1",          ST,     IF_EN3J,   0x0C00A000,  0x0C80A000,  0x0C9FA000)
+                                   // ST1 (multiple structures, two registers variant)
                                    //  st1     {Vt,Vt2},[Xn]        LS_2D  0Q00110000000000 1010ssnnnnnttttt   0C00 A000   base register
                                    //  st1     {Vt,Vt2},[Xn],Xm     LS_3F  0Q001100100mmmmm 1010ssnnnnnttttt   0C80 A000   post-indexed by a register
                                    //  st1     {Vt,Vt2},[Xn],#imm   LS_2E  0Q00110010011111 1010ssnnnnnttttt   0C9F A000   post-indexed by an immediate
 
-INST3(st1_3regs,"st1",   0,ST, IF_EN3J,   0x0C006000,  0x0C806000,  0x0C9F6000)
-                                   // C7.2.313 ST1 (multiple structures, three registers variant)
+INST3(st1_3regs,   "st1",          ST,     IF_EN3J,   0x0C006000,  0x0C806000,  0x0C9F6000)
+                                   // ST1 (multiple structures, three registers variant)
                                    //  st1     {Vt-Vt3},[Xn]        LS_2D  0Q00110000000000 0110ssnnnnnttttt   0C00 6000   base register
                                    //  st1     {Vt-Vt3},[Xn],Xm     LS_3F  0Q001100100mmmmm 0110XXnnnnnttttt   0C80 6000   post-indexed by a register
                                    //  st1     {Vt-Vt3},[Xn],#imm   LS_2E  0Q00110010011111 0110XXnnnnnttttt   0C9F 6000   post-indexed by an immediate
 
-INST3(st1_4regs,"st1",   0,ST, IF_EN3J,   0x0C002000,  0x0C802000,  0x0C9F2000)
-                                   // C7.2.313 ST1 (multiple structures, four registers variant)
+INST3(st1_4regs,   "st1",          ST,     IF_EN3J,   0x0C002000,  0x0C802000,  0x0C9F2000)
+                                   // ST1 (multiple structures, four registers variant)
                                    //  st1     {Vt-Vt4},[Xn]        LS_2D  0Q00110000000000 0010XXnnnnnttttt   0C00 2000   base register
                                    //  st1     {Vt-Vt4},[Xn],Xm     LS_3F  0Q001100100mmmmm 0010XXnnnnnttttt   0C80 2000   post-indexed by a register
                                    //  st1     {Vt-Vt4},[Xn],#imm   LS_2E  0Q00110010011111 0010XXnnnnnttttt   0C9F 2000   post-indexed by an immediate
 
-INST3(ld1r,    "ld1r",   0,LD, IF_EN3J,   0x0D40C000,  0x0DC0C000,  0x0DDFC000)
-                                   // C7.2.172 LD1R
+INST3(ld1r,        "ld1r",         LD,     IF_EN3J,   0x0D40C000,  0x0DC0C000,  0x0DDFC000)
                                    //  ld1r    {Vt},[Xn]            LS_2D  0Q00110101000000 1100ssnnnnnttttt   0D40 C000   base register
                                    //  ld1r    {Vt},[Xn],Xm         LS_3F  0Q001101110mmmmm 1100ssnnnnnttttt   0DC0 C000   post-indexed by a register
                                    //  ld1r    {Vt},[Xn],#1         LS_2E  0Q00110111011111 1100ssnnnnnttttt   0DDF C000   post-indexed by an immediate
 
-INST3(ld2r,    "ld2r",   0,LD, IF_EN3J,   0x0D60C000,  0x0DE0C000,  0x0DFFC000)
-                                   // C7.2.175 LD2R
+INST3(ld2r,        "ld2r",         LD,     IF_EN3J,   0x0D60C000,  0x0DE0C000,  0x0DFFC000)
                                    //  ld2r    {Vt,Vt2},[Xn]        LS_2D  0Q00110101100000 1100ssnnnnnttttt   0D60 C000   base register
                                    //  ld2r    {Vt,Vt2},[Xn],Xm     LS_3F  0Q001101111mmmmm 1100ssnnnnnttttt   0DE0 C000   post-indexed by a register
                                    //  ld2r    {Vt,Vt2},[Xn],#2     LS_2E  0Q00110111111111 1100ssnnnnnttttt   0DFF C000   post-indexed by an immediate
 
-INST3(ld3r,    "ld3r",   0,LD, IF_EN3J,   0x0D40E000,  0x0DC0E000,  0x0DDFE000)
-                                   // C7.2.178 LD3R
+INST3(ld3r,        "ld3r",         LD,     IF_EN3J,   0x0D40E000,  0x0DC0E000,  0x0DDFE000)
                                    //  ld3r    {Vt-Vt3},[Xn]        LS_2D  0Q00110101000000 1110ssnnnnnttttt   0D40 E000   base register
                                    //  ld3r    {Vt-Vt3},[Xn],Xm     LS_3F  0Q001101110mmmmm 1110ssnnnnnttttt   0DC0 E000   post-indexed by a register
                                    //  ld3r    {Vt-Vt3},[Xn],#4     LS_2E  0Q00110111011111 1110ssnnnnnttttt   0DDF E000   post-indexed by an immediate
 
-INST3(ld4r,    "ld4r",   0,LD, IF_EN3J,   0x0D60E000,  0x0DE0E000,  0x0DFFE000)
-                                   // C7.2.181 LD4R
+INST3(ld4r,        "ld4r",         LD,     IF_EN3J,   0x0D60E000,  0x0DE0E000,  0x0DFFE000)
                                    //  ld4r    {Vt-Vt4},[Xn]        LS_2D  0Q00110101100000 1110ssnnnnnttttt   0D60 E000   base register
                                    //  ld4r    {Vt-Vt4},[Xn],Xm     LS_3F  0Q001101111mmmmm 1110ssnnnnnttttt   0DE0 E000   post-indexed by a register
                                    //  ld4r    {Vt-Vt4},[Xn],#8     LS_2E  0Q00110111111111 1110ssnnnnnttttt   0DFF E000   post-indexed by an immediate
 
-INST3(smull,   "smull",  0, 0, IF_EN3K,   0x9B207C00,  0x0E20C000,  0x0F00A000)
-                                   // C6.2.243 SMULL
-                                   // C7.2.272 SMULL, SMULL2 (by element)
-                                   // C7.2.273 SMULL, SMULL2 (vector)
+INST3(smull,       "smull",        0,      IF_EN3K,   0x9B207C00,  0x0E20C000,  0x0F00A000)
                                    //  smull   Rd,Rn,Rm             DR_3A  10011011001mmmmm 011111nnnnnddddd   9B20 7C00
                                    //  smull   Vd,Vn,Vm             DV_3H  0000111000100000 1100000000000000   0E20 C000   Vd,Vn,Vm   (vector)
                                    //  smull   Vd,Vn,Vm[]           DV_3HI 00001111XXLMmmmm 1010H0nnnnnddddd   0F00 A000   Vd,Vn,Vm[] (vector by elem)
 
-INST3(umull,   "umull",  0, 0, IF_EN3K,   0x9BA07C00,  0x2E20C000,  0x2F00A000)
-                                   // C6.2.340 UMULL
-                                   // C7.2.362 UMULL, UMULL2 (by element)
-                                   // C7.2.363 UMULL, UMULL2 (vector)
+INST3(umull,       "umull",        0,      IF_EN3K,   0x9BA07C00,  0x2E20C000,  0x2F00A000)
                                    //  umull   Rd,Rn,Rm             DR_3A  10011011101mmmmm 011111nnnnnddddd   9BA0 7C00
                                    //  umull   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 110000nnnnnddddd   2E20 C000   Vd,Vn,Vm   (vector)
                                    //  umull   Vd,Vn,Vm[]           DV_3HI 00101111XXLMmmmm 1010H0nnnnnddddd   2F00 A000   Vd,Vn,Vm[] (vector by elem)
 
-//    enum     name     FP LD/ST            DR_2E        DR_2F
-INST2(negs,    "negs",   0, 0, IF_EN2A,   0x6B0003E0,  0x6B0003E0)
+//    enum         name            info               DR_2E        DR_2F
+INST2(negs,        "negs",         0,      IF_EN2A,   0x6B0003E0,  0x6B0003E0)
                                    //  negs    Rd,Rm                DR_2E  X1101011000mmmmm 00000011111ddddd   6B00 03E0
                                    //  negs    Rd,(Rm,shk,imm)      DR_2F  X1101011sh0mmmmm ssssss11111ddddd   6B00 03E0   Rm {LSL,LSR,ASR} imm(0-63)
 
-//    enum     name     FP LD/ST            DR_3A        DR_3B
-INST2(bics,    "bics",   0, 0, IF_EN2B,   0x6A200000,  0x6A200000)
+//    enum         name            info             DR_3A        DR_3B
+INST2(bics,        "bics",         0,      IF_EN2B,   0x6A200000,  0x6A200000)
                                    //  bics    Rd,Rn,Rm             DR_3A  X1101010001mmmmm 000000nnnnnddddd   6A20 0000
                                    //  bics    Rd,Rn,(Rm,shk,imm)   DR_3B  X1101010sh1mmmmm iiiiiinnnnnddddd   6A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
 
-INST2(eon,     "eon",    0, 0, IF_EN2B,   0x4A200000,  0x4A200000)
+INST2(eon,         "eon",          0,      IF_EN2B,   0x4A200000,  0x4A200000)
                                    //  eon     Rd,Rn,Rm             DR_3A  X1001010001mmmmm 000000nnnnnddddd   4A20 0000
                                    //  eon     Rd,Rn,(Rm,shk,imm)   DR_3B  X1001010sh1mmmmm iiiiiinnnnnddddd   4A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
 
-//    enum     name     FP LD/ST            DR_3A        DI_2C
-INST2(lsl,     "lsl",    0, 0, IF_EN2C,   0x1AC02000,  0x53000000)
+//    enum         name            info               DR_3A         DI_2C
+INST2(lsl,         "lsl",          0,      IF_EN2C,   0x1AC02000,  0x53000000)
                                    //  lsl     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001000nnnnnddddd   1AC0 2000
                                    //  lsl     Rd,Rn,imm6           DI_2D  X10100110Xrrrrrr ssssssnnnnnddddd   5300 0000   imm(N,r,s)
 
-INST2(lsr,     "lsr",    0, 0, IF_EN2C,   0x1AC02400,  0x53000000)
+INST2(lsr,         "lsr",          0,      IF_EN2C,   0x1AC02400,  0x53000000)
                                    //  lsr     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001001nnnnnddddd   1AC0 2400
                                    //  lsr     Rd,Rn,imm6           DI_2D  X10100110Xrrrrrr ssssssnnnnnddddd   5300 0000   imm(N,r,s)
 
-INST2(asr,     "asr",    0, 0, IF_EN2C,   0x1AC02800,  0x13000000)
+INST2(asr,         "asr",          0,      IF_EN2C,   0x1AC02800,  0x13000000)
                                    //  asr     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001010nnnnnddddd   1AC0 2800
                                    //  asr     Rd,Rn,imm6           DI_2D  X00100110Xrrrrrr ssssssnnnnnddddd   1300 0000   imm(N,r,s)
 
-//    enum     name     FP LD/ST            DR_3A        DI_2B
-INST2(ror,     "ror",    0, 0, IF_EN2D,   0x1AC02C00,  0x13800000)
+//    enum         name            info               DR_3A        DI_2B
+INST2(ror,         "ror",          0,      IF_EN2D,   0x1AC02C00,  0x13800000)
                                    //  ror     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001011nnnnnddddd   1AC0 2C00
                                    //  ror     Rd,Rn,imm6           DI_2B  X00100111X0nnnnn ssssssnnnnnddddd   1380 0000   imm(0-63)
 
-//    enum     name     FP LD/ST            LS_3B        LS_3C
-INST2(ldp,     "ldp",    0,LD, IF_EN2E,   0x29400000,  0x28400000)
+//    enum         name            info               LS_3B        LS_3C
+INST2(ldp,         "ldp",          LD,     IF_EN2E,   0x29400000,  0x28400000)
                                    //  ldp     Rt,Ra,[Xn]           LS_3B  X010100101000000 0aaaaannnnnttttt   2940 0000   [Xn imm7]
                                    //  ldp     Rt,Ra,[Xn+simm7]     LS_3C  X010100PP1iiiiii iaaaaannnnnttttt   2840 0000   [Xn imm7 LSL {} pre/post/no inc]
 
-INST2(ldpsw,   "ldpsw",  0,LD, IF_EN2E,   0x69400000,  0x68400000)
+INST2(ldpsw,       "ldpsw",        LD,     IF_EN2E,   0x69400000,  0x68400000)
                                    //  ldpsw   Rt,Ra,[Xn]           LS_3B  0110100101000000 0aaaaannnnnttttt   6940 0000   [Xn imm7]
                                    //  ldpsw   Rt,Ra,[Xn+simm7]     LS_3C  0110100PP1iiiiii iaaaaannnnnttttt   6840 0000   [Xn imm7 LSL {} pre/post/no inc]
 
-INST2(stp,     "stp",    0,ST, IF_EN2E,   0x29000000,  0x28000000)
+INST2(stp,         "stp",          ST,     IF_EN2E,   0x29000000,  0x28000000)
                                    //  stp     Rt,Ra,[Xn]           LS_3B  X010100100000000 0aaaaannnnnttttt   2900 0000   [Xn imm7]
                                    //  stp     Rt,Ra,[Xn+simm7]     LS_3C  X010100PP0iiiiii iaaaaannnnnttttt   2800 0000   [Xn imm7 LSL {} pre/post/no inc]
 
-INST2(ldnp,    "ldnp",   0,LD, IF_EN2E,   0x28400000,  0x28400000)
+INST2(ldnp,        "ldnp",         LD,     IF_EN2E,   0x28400000,  0x28400000)
                                    //  ldnp    Rt,Ra,[Xn]           LS_3B  X010100001000000 0aaaaannnnnttttt   2840 0000   [Xn imm7]
                                    //  ldnp    Rt,Ra,[Xn+simm7]     LS_3C  X010100001iiiiii iaaaaannnnnttttt   2840 0000   [Xn imm7 LSL {}]
 
-INST2(stnp,    "stnp",   0,ST, IF_EN2E,   0x28000000,  0x28000000)
+INST2(stnp,        "stnp",         ST,     IF_EN2E,   0x28000000,  0x28000000)
                                    //  stnp    Rt,Ra,[Xn]           LS_3B  X010100000000000 0aaaaannnnnttttt   2800 0000   [Xn imm7]
                                    //  stnp    Rt,Ra,[Xn+simm7]     LS_3C  X010100000iiiiii iaaaaannnnnttttt   2800 0000   [Xn imm7 LSL {}]
 
-INST2(ccmp,    "ccmp",   0,CMP,IF_EN2F,   0x7A400000,  0x7A400800)
+INST2(ccmp,        "ccmp",         CMP,    IF_EN2F,   0x7A400000,  0x7A400800)
                                    //  ccmp    Rn,Rm,  nzcv,cond    DR_2I  X1111010010mmmmm cccc00nnnnn0nzcv   7A40 0000         nzcv, cond
                                    //  ccmp    Rn,imm5,nzcv,cond    DI_1F  X1111010010iiiii cccc10nnnnn0nzcv   7A40 0800   imm5, nzcv, cond
 
-INST2(ccmn,    "ccmn",   0,CMP,IF_EN2F,   0x3A400000,  0x3A400800)
+INST2(ccmn,        "ccmn",         CMP,    IF_EN2F,   0x3A400000,  0x3A400800)
                                    //  ccmn    Rn,Rm,  nzcv,cond    DR_2I  X0111010010mmmmm cccc00nnnnn0nzcv   3A40 0000         nzcv, cond
                                    //  ccmn    Rn,imm5,nzcv,cond    DI_1F  X0111010910iiiii cccc10nnnnn0nzcv   3A40 0800   imm5, nzcv, cond
 
-//    enum     name     FP LD/ST            DV_2C        DV_2F
-INST2(ins,     "ins",    0, 0, IF_EN2H,   0x4E001C00,  0x6E000400)
+//    enum         name            info               DV_2C        DV_2F
+INST2(ins,         "ins",          0,      IF_EN2H,   0x4E001C00,  0x6E000400)
                                    //  ins     Vd[],Rn              DV_2C  01001110000iiiii 000111nnnnnddddd   4E00 1C00   Vd[],Rn   (from general)
                                    //  ins     Vd[],Vn[]            DV_2F  01101110000iiiii 0jjjj1nnnnnddddd   6E00 0400   Vd[],Vn[] (from/to elem)
 
-//    enum     name     FP LD/ST            DV_3B        DV_3D
-INST2(fadd,    "fadd",   0, 0, IF_EN2G,   0x0E20D400,  0x1E202800)
+//    enum         name            info               DV_3B        DV_3D
+INST2(fadd,        "fadd",         0,      IF_EN2G,   0x0E20D400,  0x1E202800)
                                    //  fadd    Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110101nnnnnddddd   0E20 D400   Vd,Vn,Vm  (vector)
                                    //  fadd    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 001010nnnnnddddd   1E20 2800   Vd,Vn,Vm  (scalar)
 
-INST2(fsub,    "fsub",   0, 0, IF_EN2G,   0x0EA0D400,  0x1E203800)
+INST2(fsub,        "fsub",         0,      IF_EN2G,   0x0EA0D400,  0x1E203800)
                                    //  fsub    Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 110101nnnnnddddd   0EA0 D400   Vd,Vn,Vm  (vector)
                                    //  fsub    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 001110nnnnnddddd   1E20 3800   Vd,Vn,Vm  (scalar)
 
-INST2(fdiv,    "fdiv",   0, 0, IF_EN2G,   0x2E20FC00,  0x1E201800)
+INST2(fdiv,        "fdiv",         0,      IF_EN2G,   0x2E20FC00,  0x1E201800)
                                    //  fdiv    Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 111111nnnnnddddd   2E20 FC00   Vd,Vn,Vm  (vector)
                                    //  fdiv    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 000110nnnnnddddd   1E20 1800   Vd,Vn,Vm  (scalar)
 
-INST2(fmax,    "fmax",   0, 0, IF_EN2G,   0x0E20F400,  0x1E204800)
+INST2(fmax,        "fmax",         0,      IF_EN2G,   0x0E20F400,  0x1E204800)
                                    //  fmax    Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 111101nnnnnddddd   0E20 F400   Vd,Vn,Vm  (vector)
                                    //  fmax    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 010010nnnnnddddd   1E20 4800   Vd,Vn,Vm  (scalar)
 
-INST2(fmaxnm,  "fmaxnm", 0, 0, IF_EN2G,   0x0E20C400,  0x1E206800)
+INST2(fmaxnm,      "fmaxnm",       0,      IF_EN2G,   0x0E20C400,  0x1E206800)
                                    //  fmaxnm  Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110001nnnnnddddd   0E20 C400   Vd,Vn,Vm  (vector)
                                    //  fmaxnm  Vd,Vn,Vm             DV_3D  000111100X1mmmmm 011010nnnnnddddd   1E20 6800   Vd,Vn,Vm  (scalar)
 
-INST2(fmin,    "fmin",   0, 0, IF_EN2G,   0x0EA0F400,  0x1E205800)
+INST2(fmin,        "fmin",         0,      IF_EN2G,   0x0EA0F400,  0x1E205800)
                                    //  fmin    Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 111101nnnnnddddd   0EA0 F400   Vd,Vn,Vm  (vector)
                                    //  fmin    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 010110nnnnnddddd   1E20 5800   Vd,Vn,Vm  (scalar)
 
-INST2(fminnm,  "fminnm", 0, 0, IF_EN2G,   0x0EA0C400,  0x1E207800)
+INST2(fminnm,      "fminnm",       0,      IF_EN2G,   0x0EA0C400,  0x1E207800)
                                    //  fminnm  Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 110001nnnnnddddd   0EA0 C400   Vd,Vn,Vm  (vector)
                                    //  fminnm  Vd,Vn,Vm             DV_3D  000111100X1mmmmm 011110nnnnnddddd   1E20 7800   Vd,Vn,Vm  (scalar)
 
-INST2(fabd,    "fabd",   0, 0, IF_EN2G,   0x2EA0D400,  0x7EA0D400)
+INST2(fabd,        "fabd",         0,      IF_EN2G,   0x2EA0D400,  0x7EA0D400)
                                    //  fabd    Vd,Vn,Vm             DV_3B  0Q1011101X1mmmmm 110101nnnnnddddd   2EA0 D400   Vd,Vn,Vm  (vector)
                                    //  fabd    Vd,Vn,Vm             DV_3D  011111101X1mmmmm 110101nnnnnddddd   7EA0 D400   Vd,Vn,Vm  (scalar)
 
-INST2(facge,   "facge",  0, 0, IF_EN2G,   0x2E20EC00,  0x7E20EC00)
+INST2(facge,       "facge",        0,      IF_EN2G,   0x2E20EC00,  0x7E20EC00)
                                    //  facge   Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 111011nnnnnddddd   2E20 EC00   Vd,Vn,Vm  (vector)
                                    //  facge   Vd,Vn,Vm             DV_3D  011111100X1mmmmm 111011nnnnnddddd   7E20 EC00   Vd,Vn,Vm  (scalar)
 
-INST2(facgt,   "facgt",  0, 0, IF_EN2G,   0x2EA0EC00,  0x7EA0EC00)
+INST2(facgt,       "facgt",        0,      IF_EN2G,   0x2EA0EC00,  0x7EA0EC00)
                                    //  facgt   Vd,Vn,Vm             DV_3B  0Q1011101X1mmmmm 111011nnnnnddddd   2EA0 EC00   Vd,Vn,Vm  (vector)
                                    //  facgt   Vd,Vn,Vm             DV_3D  011111101X1mmmmm 111011nnnnnddddd   7EA0 EC00   Vd,Vn,Vm  (scalar)
 
-INST2(frecps,  "frecps", 0, 0, IF_EN2G,   0x0E20FC00,  0x5E20FC00)
-                                   // C7.2.138 FRECPS
+INST2(frecps,      "frecps",       0,      IF_EN2G,   0x0E20FC00,  0x5E20FC00)
                                    //  frecps  Vd,Vn,Vm            DV_3B  0Q0011100X1mmmmm 111111nnnnnddddd    0E20 FC00   Vd,Vn,Vm  (vector)
                                    //  frecps  Vd,Vn,Vm            DV_3D  010111100X1mmmmm 111111nnnnnddddd    5E20 FC00   Vd,Vn,Vm  (scalar)
 
-INST2(frsqrts, "frsqrts",0, 0, IF_EN2G,   0x0EA0FC00,  0x5EA0FC00)
-                                   // C7.2.163 FRSQRTS
+INST2(frsqrts,     "frsqrts",      0,      IF_EN2G,   0x0EA0FC00,  0x5EA0FC00)
                                    //  frsqrts Vd,Vn,Vm            DV_3B  0Q0011101X1mmmmm 111111nnnnnddddd    0EA0 FC00   Vd,Vn,Vm  (vector)
                                    //  frsqrts Vd,Vn,Vm            DV_3D  010111101X1mmmmm 111111nnnnnddddd    5EA0 FC00   Vd,Vn,Vm  (scalar)
 
-//    enum     name     FP LD/ST            DV_2K        DV_1C
-INST2(fcmp,    "fcmp",   0, 0, IF_EN2I,   0x1E202000,  0x1E202008)
+//    enum         name            info               DV_2K        DV_1C
+INST2(fcmp,        "fcmp",         0,      IF_EN2I,   0x1E202000,  0x1E202008)
                                    //  fcmp    Vn,Vm                DV_2K  000111100X1mmmmm 001000nnnnn00000   1E20 2000   Vn Vm
                                    //  fcmp    Vn,#0.0              DV_1C  000111100X100000 001000nnnnn01000   1E20 2008   Vn #0.0
 
-INST2(fcmpe,   "fcmpe",  0, 0, IF_EN2I,   0x1E202010,  0x1E202018)
+INST2(fcmpe,       "fcmpe",        0,      IF_EN2I,   0x1E202010,  0x1E202018)
                                    //  fcmpe   Vn,Vm                DV_2K  000111100X1mmmmm 001000nnnnn10000   1E20 2010   Vn Vm
                                    //  fcmpe   Vn,#0.0              DV_1C  000111100X100000 001000nnnnn11000   1E20 2018   Vn #0.0
 
-//    enum     name     FP LD/ST            DV_2A        DV_2G
-INST2(fabs,    "fabs",   0, 0, IF_EN2J,   0x0EA0F800,  0x1E20C000)
+//    enum         name            info               DV_2A        DV_2G
+INST2(fabs,        "fabs",         0,      IF_EN2J,   0x0EA0F800,  0x1E20C000)
                                    //  fabs    Vd,Vn                DV_2A  0Q0011101X100000 111110nnnnnddddd   0EA0 F800   Vd,Vn    (vector)
                                    //  fabs    Vd,Vn                DV_2G  000111100X100000 110000nnnnnddddd   1E20 C000   Vd,Vn    (scalar)
 
-INST2(fcmle,   "fcmle",  0, 0, IF_EN2J,   0x2EA0D800,  0x7EA0D800)
+INST2(fcmle,       "fcmle",        0,      IF_EN2J,   0x2EA0D800,  0x7EA0D800)
                                    //  fcmle   Vd,Vn                DV_2A  0Q1011101X100000 111110nnnnnddddd   2EA0 D800   Vd,Vn    (vector)
                                    //  fcmle   Vd,Vn                DV_2G  011111101X100000 110110nnnnnddddd   7EA0 D800   Vd,Vn    (scalar)
 
-INST2(fcmlt,   "fcmlt",  0, 0, IF_EN2J,   0x0EA0E800,  0x5EA0E800)
+INST2(fcmlt,       "fcmlt",        0,      IF_EN2J,   0x0EA0E800,  0x5EA0E800)
                                    //  fcmlt   Vd,Vn                DV_2A  0Q0011101X100000 111110nnnnnddddd   0EA0 E800   Vd,Vn    (vector)
                                    //  fcmlt   Vd,Vn                DV_2G  010111101X100000 111010nnnnnddddd   5EA0 E800   Vd,Vn    (scalar)
 
-INST2(fneg,    "fneg",   0, 0, IF_EN2J,   0x2EA0F800,  0x1E214000)
+INST2(fneg,        "fneg",         0,      IF_EN2J,   0x2EA0F800,  0x1E214000)
                                    //  fneg    Vd,Vn                DV_2A  0Q1011101X100000 111110nnnnnddddd   2EA0 F800   Vd,Vn    (vector)
                                    //  fneg    Vd,Vn                DV_2G  000111100X100001 010000nnnnnddddd   1E21 4000   Vd,Vn    (scalar)
 
-INST2(frecpe,  "frecpe", 0, 0, IF_EN2J,   0x0EA1D800,  0x5EA1D800)
+INST2(frecpe,      "frecpe",       0,      IF_EN2J,   0x0EA1D800,  0x5EA1D800)
                                    //  frecpe  Vd,Vn                DV_2A  0Q0011101X100001 110110nnnnnddddd   0EA1 D800   Vd,Vn    (vector)
                                    //  frecpe  Vd,Vn                DV_2G  010111101X100001 110110nnnnnddddd   5EA1 D800   Vd,Vn    (scalar)
 
-INST2(frintn,  "frintn", 0, 0, IF_EN2J,   0x0E218800,  0x1E244000)
+INST2(frintn,      "frintn",       0,      IF_EN2J,   0x0E218800,  0x1E244000)
                                    //  frintn  Vd,Vn                DV_2A  0Q0011100X100001 100010nnnnnddddd   0E21 8800   Vd,Vn    (vector)
                                    //  frintn  Vd,Vn                DV_2G  000111100X100100 010000nnnnnddddd   1E24 4000   Vd,Vn    (scalar)
 
-INST2(frintp,  "frintp", 0, 0, IF_EN2J,   0x0EA18800,  0x1E24C000)
+INST2(frintp,      "frintp",       0,      IF_EN2J,   0x0EA18800,  0x1E24C000)
                                    //  frintp  Vd,Vn                DV_2A  0Q0011101X100001 100010nnnnnddddd   0EA1 8800   Vd,Vn    (vector)
                                    //  frintp  Vd,Vn                DV_2G  000111100X100100 110000nnnnnddddd   1E24 C000   Vd,Vn    (scalar)
 
-INST2(frintm,  "frintm", 0, 0, IF_EN2J,   0x0E219800,  0x1E254000)
+INST2(frintm,      "frintm",       0,      IF_EN2J,   0x0E219800,  0x1E254000)
                                    //  frintm  Vd,Vn                DV_2A  0Q0011100X100001 100110nnnnnddddd   0E21 9800   Vd,Vn    (vector)
                                    //  frintm  Vd,Vn                DV_2G  000111100X100101 010000nnnnnddddd   1E25 4000   Vd,Vn    (scalar)
 
-INST2(frintz,  "frintz", 0, 0, IF_EN2J,   0x0EA19800,  0x1E25C000)
+INST2(frintz,      "frintz",       0,      IF_EN2J,   0x0EA19800,  0x1E25C000)
                                    //  frintz  Vd,Vn                DV_2A  0Q0011101X100001 100110nnnnnddddd   0EA1 9800   Vd,Vn    (vector)
                                    //  frintz  Vd,Vn                DV_2G  000111100X100101 110000nnnnnddddd   1E25 C000   Vd,Vn    (scalar)
 
-INST2(frinta,  "frinta", 0, 0, IF_EN2J,   0x2E218800,  0x1E264000)
+INST2(frinta,      "frinta",       0,      IF_EN2J,   0x2E218800,  0x1E264000)
                                    //  frinta  Vd,Vn                DV_2A  0Q1011100X100001 100010nnnnnddddd   2E21 8800   Vd,Vn    (vector)
                                    //  frinta  Vd,Vn                DV_2G  000111100X100110 010000nnnnnddddd   1E26 4000   Vd,Vn    (scalar)
 
-INST2(frintx,  "frintx", 0, 0, IF_EN2J,   0x2E219800,  0x1E274000)
+INST2(frintx,      "frintx",       0,      IF_EN2J,   0x2E219800,  0x1E274000)
                                    //  frintx  Vd,Vn                DV_2A  0Q1011100X100001 100110nnnnnddddd   2E21 9800   Vd,Vn    (vector)
                                    //  frintx  Vd,Vn                DV_2G  000111100X100111 010000nnnnnddddd   1E27 4000   Vd,Vn    (scalar)
 
-INST2(frinti,  "frinti", 0, 0, IF_EN2J,   0x2EA19800,  0x1E27C000)
+INST2(frinti,      "frinti",       0,      IF_EN2J,   0x2EA19800,  0x1E27C000)
                                    //  frinti  Vd,Vn                DV_2A  0Q1011101X100001 100110nnnnnddddd   2EA1 9800   Vd,Vn    (vector)
                                    //  frinti  Vd,Vn                DV_2G  000111100X100111 110000nnnnnddddd   1E27 C000   Vd,Vn    (scalar)
 
-INST2(frsqrte, "frsqrte",0, 0, IF_EN2J,   0x2EA1D800,  0x7EA1D800)
-                                   // C7.2.162 FRSQRTE
+INST2(frsqrte,     "frsqrte",      0,      IF_EN2J,   0x2EA1D800,  0x7EA1D800)
                                    //  frsqrte Vd,Vn                DV_2A  0Q1011101X100001 110110nnnnnddddd   2EA1 D800   Vd,Vn    (vector)
                                    //  frsqrte Vd,Vn                DV_2G  011111101X100001 110110nnnnnddddd   7EA1 D800   Vd,Vn    (scalar)
 
-INST2(fsqrt,   "fsqrt",  0, 0, IF_EN2J,   0x2EA1F800,  0x1E21C000)
-                                   // C7.2.164 FSQRT (vector)
+INST2(fsqrt,       "fsqrt",        0,      IF_EN2J,   0x2EA1F800,  0x1E21C000)
                                    //  fsqrt   Vd,Vn                DV_2A  0Q1011101X100001 111110nnnnnddddd   2EA1 F800   Vd,Vn    (vector)
-                                   // C7.2.165 FSQRT (scalar)
                                    //  fsqrt   Vd,Vn                DV_2G  000111100X100001 110000nnnnnddddd   1E21 C000   Vd,Vn    (scalar)
 
-//    enum     name     FP LD/ST            DV_2M        DV_2L
-INST2(abs,     "abs",    0, 0, IF_EN2K,   0x0E20B800,  0x5E20B800)
+//    enum         name            info               DV_2M        DV_2L
+INST2(abs,         "abs",          0,      IF_EN2K,   0x0E20B800,  0x5E20B800)
                                    //  abs     Vd,Vn                DV_2M  0Q001110XX100000 101110nnnnnddddd   0E20 B800   Vd,Vn    (vector)
                                    //  abs     Vd,Vn                DV_2L  01011110XX100000 101110nnnnnddddd   5E20 B800   Vd,Vn    (scalar)
 
-INST2(cmle,    "cmle",   0, 0, IF_EN2K,   0x2E209800,  0x7E209800)
+INST2(cmle,        "cmle",         0,      IF_EN2K,   0x2E209800,  0x7E209800)
                                    //  cmle    Vd,Vn                DV_2M  0Q101110XX100000 100110nnnnnddddd   2E20 9800   Vd,Vn    (vector)
                                    //  cmle    Vd,Vn                DV_2L  01111110XX100000 100110nnnnnddddd   7E20 9800   Vd,Vn    (scalar)
 
-INST2(cmlt,    "cmlt",   0, 0, IF_EN2K,  0x0E20A800,   0x5E20A800)
+INST2(cmlt,        "cmlt",         0,      IF_EN2K,   0x0E20A800,  0x5E20A800)
                                    //  cmlt    Vd,Vn                DV_2M  0Q101110XX100000 101010nnnnnddddd   0E20 A800   Vd,Vn    (vector)
                                    //  cmlt    Vd,Vn                DV_2L  01011110XX100000 101010nnnnnddddd   5E20 A800   Vd,Vn    (scalar)
 
-//    enum     name     FP LD/ST            DR_2G        DV_2M
-INST2(cls,     "cls",    0, 0, IF_EN2L,   0x5AC01400,  0x0E204800)
+//    enum         name            info               DR_2G        DV_2M
+INST2(cls,         "cls",          0,      IF_EN2L,   0x5AC01400,  0x0E204800)
                                    //  cls     Rd,Rm                DR_2G  X101101011000000 000101nnnnnddddd   5AC0 1400   Rd Rn    (general)
                                    //  cls     Vd,Vn                DV_2M  0Q00111000100000 010010nnnnnddddd   0E20 4800   Vd,Vn    (vector)
 
-INST2(clz,     "clz",    0, 0, IF_EN2L,   0x5AC01000,  0x2E204800)
+INST2(clz,         "clz",          0,      IF_EN2L,   0x5AC01000,  0x2E204800)
                                    //  clz     Rd,Rm                DR_2G  X101101011000000 000100nnnnnddddd   5AC0 1000   Rd Rn    (general)
                                    //  clz     Vd,Vn                DV_2M  0Q10111000100000 010010nnnnnddddd   2E20 4800   Vd,Vn    (vector)
 
-INST2(rbit,    "rbit",   0, 0, IF_EN2L,   0x5AC00000,  0x2E605800)
+INST2(rbit,        "rbit",         0,      IF_EN2L,   0x5AC00000,  0x2E605800)
                                    //  rbit    Rd,Rm                DR_2G  X101101011000000 000000nnnnnddddd   5AC0 0000   Rd Rn    (general)
                                    //  rbit    Vd,Vn                DV_2M  0Q10111001100000 010110nnnnnddddd   2E60 5800   Vd,Vn    (vector)
 
-INST2(rev16,   "rev16",  0, 0, IF_EN2L,   0x5AC00400,  0x0E201800)
+INST2(rev16,       "rev16",        0,      IF_EN2L,   0x5AC00400,  0x0E201800)
                                    //  rev16   Rd,Rm                DR_2G  X101101011000000 000001nnnnnddddd   5AC0 0400   Rd Rn    (general)
                                    //  rev16   Vd,Vn                DV_2M  0Q001110XX100000 000110nnnnnddddd   0E20 1800   Vd,Vn    (vector)
 
-INST2(rev32,   "rev32",  0, 0, IF_EN2L,   0xDAC00800,  0x2E200800)
+INST2(rev32,       "rev32",        0,      IF_EN2L,   0xDAC00800,  0x2E200800)
                                    //  rev32   Rd,Rm                DR_2G  1101101011000000 000010nnnnnddddd   DAC0 0800   Rd Rn    (general)
                                    //  rev32   Vd,Vn                DV_2M  0Q101110XX100000 000010nnnnnddddd   2E20 0800   Vd,Vn    (vector)
 
-//    enum     name     FP LD/ST            DV_3A        DV_3AI
-INST2(mla,     "mla",    0, 0, IF_EN2M,   0x0E209400,  0x2F000000)
+//    enum         name            info               DV_3A        DV_3AI
+INST2(mla,         "mla",          0,      IF_EN2M,   0x0E209400,  0x2F000000)
                                    //  mla     Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100101nnnnnddddd   0E20 9400   Vd,Vn,Vm   (vector)
                                    //  mla     Vd,Vn,Vm[]           DV_3AI 0Q101111XXLMmmmm 0000H0nnnnnddddd   2F00 0000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(mls,     "mls",    0, 0, IF_EN2M,   0x2E209400,  0x2F004000)
+INST2(mls,         "mls",          0,      IF_EN2M,   0x2E209400,  0x2F004000)
                                    //  mls     Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100101nnnnnddddd   2E20 9400   Vd,Vn,Vm   (vector)
                                    //  mls     Vd,Vn,Vm[]           DV_3AI 0Q101111XXLMmmmm 0100H0nnnnnddddd   2F00 4000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(smlal,   "smlal",  0, 0, IF_EN2R,   0x0E208000,  0x0F002000)
-                                   // C7.2.267 SMLAL, SMLAL2 (by element)
-                                   // C7.2.268 SMLAL, SMLAL2 (vector)
+INST2(smlal,       "smlal",        0,      IF_EN2R,   0x0E208000,  0x0F002000)
                                    //  smlal     Vd,Vn,Vm           DV_3H  00001110XX1mmmmm 100000nnnnnddddd   0E20 8000   Vd,Vn,Vm   (vector)
                                    //  smlal     Vd,Vn,Vm[]         DV_3HI 00001111XXLMmmmm 0010H0nnnnnddddd   0F00 2000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(smlal2,  "smlal2", 0, 0, IF_EN2R,   0x4E208000,  0x4F002000)
-                                   // C7.2.267 SMLAL, SMLAL2 (by element)
-                                   // C7.2.268 SMLAL, SMLAL2 (vector)
+INST2(smlal2,      "smlal2",       0,      IF_EN2R,   0x4E208000,  0x4F002000)
                                    //  smlal2     Vd,Vn,Vm          DV_3H  01001110XX1mmmmm 100000nnnnnddddd   4E20 8000   Vd,Vn,Vm   (vector)
                                    //  smlal2     Vd,Vn,Vm[]        DV_3HI 01001111XXLMmmmm 0010H0nnnnnddddd   4F00 2000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(smlsl,   "smlsl",  0, 0, IF_EN2R,   0x0E20A000,  0x0F006000)
-                                   // C7.2.269 SMLSL, SMLSL2 (by element)
-                                   // C7.2.270 SMLSL, SMLSL2 (vector)
+INST2(smlsl,       "smlsl",        0,      IF_EN2R,   0x0E20A000,  0x0F006000)
                                    //  smlsl     Vd,Vn,Vm           DV_3H  00001110XX1mmmmm 101000nnnnnddddd   0E20 A000   Vd,Vn,Vm   (vector)
                                    //  smlsl     Vd,Vn,Vm[]         DV_3HI 00001111XXLMmmmm 0110H0nnnnnddddd   0F00 6000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(smlsl2,  "smlsl2", 0, 0, IF_EN2R,   0x4E20A000,  0x4F006000)
-                                   // C7.2.269 SMLSL, SMLSL2 (by element)
-                                   // C7.2.270 SMLSL, SMLSL2 (vector)
+INST2(smlsl2,      "smlsl2",       0,      IF_EN2R,   0x4E20A000,  0x4F006000)
                                    //  smlsl2     Vd,Vn,Vm          DV_3H  01001110XX1mmmmm 101000nnnnnddddd   4E20 A000   Vd,Vn,Vm   (vector)
                                    //  smlsl2     Vd,Vn,Vm[]        DV_3HI 01001111XXLMmmmm 0110H0nnnnnddddd   4F00 6000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(smull2,  "smull2", 0, 0, IF_EN2R,   0x4E20C000,  0x4F00A000)
-                                   // C7.2.272 SMULL, SMULL2 (by element)
-                                   // C7.2.273 SMULL, SMULL2 (vector)
+INST2(smull2,      "smull2",       0,      IF_EN2R,   0x4E20C000,  0x4F00A000)
                                    //  smull2     Vd,Vn,Vm          DV_3H  01001110XX1mmmmm 110000nnnnnddddd   4E20 C000   Vd,Vn,Vm   (vector)
                                    //  smull2     Vd,Vn,Vm[]        DV_3HI 01001111XXLMmmmm 1010H0nnnnnddddd   4F00 A000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(umlal,   "umlal",  0, 0, IF_EN2R,   0x2E208000,  0x2F002000)
-                                   // C7.2.357 UMLAL, UMLAL2 (by element)
-                                   // C7.2.358 UMLAL, UMLAL2 (vector)
+INST2(umlal,       "umlal",        0,      IF_EN2R,   0x2E208000,  0x2F002000)
                                    //  umlal     Vd,Vn,Vm           DV_3H  00101110XX1mmmmm 100000nnnnnddddd   2E20 8000   Vd,Vn,Vm   (vector)
                                    //  umlal     Vd,Vn,Vm[]         DV_3HI 00101111XXLMmmmm 0010H0nnnnnddddd   2F00 2000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(umlal2,  "umlal2", 0, 0, IF_EN2R,   0x6E208000,  0x6F002000)
-                                   // C7.2.357 UMLAL, UMLAL2 (by element)
-                                   // C7.2.358 UMLAL, UMLAL2 (vector)
+INST2(umlal2,      "umlal2",       0,      IF_EN2R,   0x6E208000,  0x6F002000)
                                    //  umlal2     Vd,Vn,Vm          DV_3H  01101110XX1mmmmm 100000nnnnnddddd   6E20 8000   Vd,Vn,Vm   (vector)
                                    //  umlal2     Vd,Vn,Vm[]        DV_3HI 01101111XXLMmmmm 0010H0nnnnnddddd   6F00 2000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(umlsl,   "umlsl",  0, 0, IF_EN2R,   0x2E20A000,  0x2F006000)
-                                   // C7.2.359 UMLSL, UMLSL2 (by element)
-                                   // C7.2.360 UMLSL, UMLSL2 (vector)
+INST2(umlsl,       "umlsl",        0,      IF_EN2R,   0x2E20A000,  0x2F006000)
                                    //  umlsl     Vd,Vn,Vm           DV_3H  00101110XX1mmmmm 101000nnnnnddddd   2E20 A000   Vd,Vn,Vm   (vector)
                                    //  umlsl     Vd,Vn,Vm[]         DV_3HI 00101111XXLMmmmm 0110H0nnnnnddddd   2F00 6000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(umlsl2,  "umlsl2", 0, 0, IF_EN2R,   0x6E20A000,  0x6F006000)
-                                   // C7.2.359 UMLSL, UMLSL2 (by element)
-                                   // C7.2.360 UMLSL, UMLSL2 (vector)
+INST2(umlsl2,      "umlsl2",       0,      IF_EN2R,   0x6E20A000,  0x6F006000)
                                    //  umlsl2     Vd,Vn,Vm          DV_3H  01101110XX1mmmmm 101000nnnnnddddd   6E20 A000   Vd,Vn,Vm   (vector)
                                    //  umlsl2     Vd,Vn,Vm[]        DV_3HI 01101111XXLMmmmm 0110H0nnnnnddddd   6F00 6000   Vd,Vn,Vm[] (vector by elem)
 
-INST2(umull2,  "umull2", 0, 0, IF_EN2R,   0x6E20C000,  0x6F00A000)
-                                   // C7.2.362 UMULL, UMULL2 (by element)
-                                   // C7.2.363 UMULL, UMULL2 (vector)
+INST2(umull2,      "umull2",       0,      IF_EN2R,   0x6E20C000,  0x6F00A000)
                                    //  umull2     Vd,Vn,Vm          DV_3H  01101110XX1mmmmm 110000nnnnnddddd   6E20 C000   Vd,Vn,Vm   (vector)
                                    //  umull2     Vd,Vn,Vm[]        DV_3HI 01101111XXLMmmmm 1010H0nnnnnddddd   6F00 A000   Vd,Vn,Vm[] (vector by elem)
 
-//    enum     name     FP LD/ST            DV_2N        DV_2O
-INST2(sshr,    "sshr",   0, 0, IF_EN2N,   0x5F000400,  0x0F000400)
-                                   //  sshr    Vd,Vn,imm            DV_2N  010111110iiiiiii 000001nnnnnddddd   5F00 0400   Vd Vn imm  (shift - scalar)
-                                   //  sshr    Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 000001nnnnnddddd   0F00 0400   Vd,Vn imm  (shift - vector)
+//    enum         name            info               DV_2N        DV_2O
+INST2(sshr,        "sshr",         RSH,    IF_EN2N,   0x5F000400,  0x0F000400)
+                                   //  sshr    Vd,Vn,imm            DV_2N  010111110iiiiiii 000001nnnnnddddd   5F00 0400   Vd Vn imm  (right shift - scalar)
+                                   //  sshr    Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 000001nnnnnddddd   0F00 0400   Vd,Vn imm  (right shift - vector)
 
-INST2(ssra,    "ssra",   0, 0, IF_EN2N,   0x5F001400,  0x0F001400)
-                                   //  ssra    Vd,Vn,imm            DV_2N  010111110iiiiiii 000101nnnnnddddd   5F00 1400   Vd Vn imm  (shift - scalar)
-                                   //  ssra    Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 000101nnnnnddddd   0F00 1400   Vd,Vn imm  (shift - vector)
+INST2(ssra,        "ssra",         RSH,    IF_EN2N,   0x5F001400,  0x0F001400)
+                                   //  ssra    Vd,Vn,imm            DV_2N  010111110iiiiiii 000101nnnnnddddd   5F00 1400   Vd Vn imm  (right shift - scalar)
+                                   //  ssra    Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 000101nnnnnddddd   0F00 1400   Vd,Vn imm  (right shift - vector)
 
-INST2(srshr,   "srshr",  0, 0, IF_EN2N,   0x5F002400,  0x0F002400)
-                                   //  srshr   Vd,Vn,imm            DV_2N  010111110iiiiiii 001001nnnnnddddd   5F00 0400   Vd Vn imm  (shift - scalar)
-                                   //  srshr   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 001001nnnnnddddd   0F00 0400   Vd,Vn imm  (shift - vector)
+INST2(srshr,       "srshr",        RSH,    IF_EN2N,   0x5F002400,  0x0F002400)
+                                   //  srshr   Vd,Vn,imm            DV_2N  010111110iiiiiii 001001nnnnnddddd   5F00 0400   Vd Vn imm  (right shift - scalar)
+                                   //  srshr   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 001001nnnnnddddd   0F00 0400   Vd,Vn imm  (right shift - vector)
 
-INST2(srsra,   "srsra",  0, 0, IF_EN2N,   0x5F003400,  0x0F003400)
-                                   //  srsra   Vd,Vn,imm            DV_2N  010111110iiiiiii 001101nnnnnddddd   5F00 1400   Vd Vn imm  (shift - scalar)
-                                   //  srsra   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 001101nnnnnddddd   0F00 1400   Vd,Vn imm  (shift - vector)
+INST2(srsra,       "srsra",        RSH,    IF_EN2N,   0x5F003400,  0x0F003400)
+                                   //  srsra   Vd,Vn,imm            DV_2N  010111110iiiiiii 001101nnnnnddddd   5F00 1400   Vd Vn imm  (right shift - scalar)
+                                   //  srsra   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 001101nnnnnddddd   0F00 1400   Vd,Vn imm  (right shift - vector)
 
-INST2(shl,     "shl",    0, 0, IF_EN2N,   0x5F005400,  0x0F005400)
-                                   //  shl     Vd,Vn,imm            DV_2N  010111110iiiiiii 010101nnnnnddddd   5F00 5400   Vd Vn imm  (shift - scalar)
-                                   //  shl     Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 010101nnnnnddddd   0F00 5400   Vd,Vn imm  (shift - vector)
+INST2(shl,         "shl",          0,      IF_EN2N,   0x5F005400,  0x0F005400)
+                                   //  shl     Vd,Vn,imm            DV_2N  010111110iiiiiii 010101nnnnnddddd   5F00 5400   Vd Vn imm  (left shift - scalar)
+                                   //  shl     Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 010101nnnnnddddd   0F00 5400   Vd,Vn imm  (left shift - vector)
 
-INST2(ushr,    "ushr",   0, 0, IF_EN2N,   0x7F000400,  0x2F000400)
-                                   //  ushr    Vd,Vn,imm            DV_2N  011111110iiiiiii 000001nnnnnddddd   7F00 0400   Vd Vn imm  (shift - scalar)
-                                   //  ushr    Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 000001nnnnnddddd   2F00 0400   Vd,Vn imm  (shift - vector)
+INST2(ushr,        "ushr",         RSH,    IF_EN2N,   0x7F000400,  0x2F000400)
+                                   //  ushr    Vd,Vn,imm            DV_2N  011111110iiiiiii 000001nnnnnddddd   7F00 0400   Vd Vn imm  (right shift - scalar)
+                                   //  ushr    Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 000001nnnnnddddd   2F00 0400   Vd,Vn imm  (right shift - vector)
 
-INST2(usra,    "usra",   0, 0, IF_EN2N,   0x7F001400,  0x2F001400)
-                                   //  usra    Vd,Vn,imm            DV_2N  011111110iiiiiii 000101nnnnnddddd   7F00 1400   Vd Vn imm  (shift - scalar)
-                                   //  usra    Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 000101nnnnnddddd   2F00 1400   Vd,Vn imm  (shift - vector)
+INST2(usra,        "usra",         RSH,    IF_EN2N,   0x7F001400,  0x2F001400)
+                                   //  usra    Vd,Vn,imm            DV_2N  011111110iiiiiii 000101nnnnnddddd   7F00 1400   Vd Vn imm  (right shift - scalar)
+                                   //  usra    Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 000101nnnnnddddd   2F00 1400   Vd,Vn imm  (right shift - vector)
 
-INST2(urshr,   "urshr",  0, 0, IF_EN2N,   0x7F002400,  0x2F002400)
-                                   //  urshr   Vd,Vn,imm            DV_2N  011111110iiiiiii 001001nnnnnddddd   7F00 2400   Vd Vn imm  (shift - scalar)
-                                   //  urshr   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 001001nnnnnddddd   2F00 2400   Vd,Vn imm  (shift - vector)
+INST2(urshr,       "urshr",        RSH,    IF_EN2N,   0x7F002400,  0x2F002400)
+                                   //  urshr   Vd,Vn,imm            DV_2N  011111110iiiiiii 001001nnnnnddddd   7F00 2400   Vd Vn imm  (right shift - scalar)
+                                   //  urshr   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 001001nnnnnddddd   2F00 2400   Vd,Vn imm  (right shift - vector)
 
-INST2(ursra,   "ursra",  0, 0, IF_EN2N,   0x7F003400,  0x2F003400)
-                                   //  ursra   Vd,Vn,imm            DV_2N  011111110iiiiiii 001101nnnnnddddd   7F00 3400   Vd Vn imm  (shift - scalar)
-                                   //  ursra   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 001101nnnnnddddd   2F00 3400   Vd,Vn imm  (shift - vector)
+INST2(ursra,       "ursra",        RSH,    IF_EN2N,   0x7F003400,  0x2F003400)
+                                   //  ursra   Vd,Vn,imm            DV_2N  011111110iiiiiii 001101nnnnnddddd   7F00 3400   Vd Vn imm  (right shift - scalar)
+                                   //  ursra   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 001101nnnnnddddd   2F00 3400   Vd,Vn imm  (right shift - vector)
 
-INST2(sri,     "sri",    0, 0, IF_EN2N,   0x7F004400,  0x2F004400)
-                                   //  sri     Vd,Vn,imm            DV_2N  011111110iiiiiii 010001nnnnnddddd   7F00 4400   Vd Vn imm  (shift - scalar)
-                                   //  sri     Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 010001nnnnnddddd   2F00 4400   Vd,Vn imm  (shift - vector)
+INST2(sri,         "sri",          RSH,    IF_EN2N,   0x7F004400,  0x2F004400)
+                                   //  sri     Vd,Vn,imm            DV_2N  011111110iiiiiii 010001nnnnnddddd   7F00 4400   Vd Vn imm  (right shift - scalar)
+                                   //  sri     Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 010001nnnnnddddd   2F00 4400   Vd,Vn imm  (right shift - vector)
 
-INST2(sli,     "sli",    0, 0, IF_EN2N,   0x7F005400,  0x2F005400)
-                                   //  sli     Vd,Vn,imm            DV_2N  011111110iiiiiii 010101nnnnnddddd   7F00 5400   Vd Vn imm  (shift - scalar)
-                                   //  sli     Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 010101nnnnnddddd   2F00 5400   Vd,Vn imm  (shift - vector)
+INST2(sli,         "sli",          0,      IF_EN2N,   0x7F005400,  0x2F005400)
+                                   //  sli     Vd,Vn,imm            DV_2N  011111110iiiiiii 010101nnnnnddddd   7F00 5400   Vd Vn imm  (left shift - scalar)
+                                   //  sli     Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 010101nnnnnddddd   2F00 5400   Vd,Vn imm  (left shift - vector)
 
-//    enum     name     FP LD/ST            DV_3E        DV_3A
-INST2(cmhi,    "cmhi",   0, 0, IF_EN2O,   0x7EE03400,  0x2E203400)
+INST2(sqshlu,      "sqshlu",       0,      IF_EN2N,   0x7F006400,  0x2F006400)
+                                   //  sqshlu  Vd,Vn,imm            DV_2N  011111110iiiiiii 011001nnnnnddddd   7F00 6400   Vd Vn imm  (left shift - scalar)
+                                   //  sqshlu  Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 011001nnnnnddddd   2F00 6400   Vd Vn imm  (left shift - vector)
+
+INST2(sqrshrn,     "sqrshrn",      RSH,    IF_EN2N,   0x5F009C00,  0x0F009C00)
+                                   //  sqrshrn Vd,Vn,imm            DV_2N  010111110iiiiiii 100111nnnnnddddd   5F00 9C00   Vd Vn imm  (right shift - scalar)
+                                   //  sqrshrn Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 100111nnnnnddddd   0F00 9C00   Vd Vn imm  (right shift - vector)
+
+INST2(sqrshrun,    "sqrshrun",     RSH,    IF_EN2N,   0x7F008C00,  0x2F008C00)
+                                   //  sqrshrun Vd,Vn,imm           DV_2N  011111110iiiiiii 100011nnnnnddddd   7F00 8C00   Vd Vn imm  (right shift - scalar)
+                                   //  sqrshrun Vd,Vn,imm           DV_2O  0Q1011110iiiiiii 100011nnnnnddddd   2F00 8C00   Vd Vn imm  (right shift - vector)
+
+INST2(sqshrn,      "sqshrn",       RSH,    IF_EN2N,   0x5F009400,  0x0F009400)
+                                   //  sqshrn  Vd,Vn,imm            DV_2N  010111110iiiiiii 100101nnnnnddddd   5F00 9400   Vd Vn imm  (right shift - scalar)
+                                   //  sqshrn  Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 100101nnnnnddddd   0F00 9400   Vd Vn imm  (right shift - vector)
+
+INST2(sqshrun,     "sqshrun",      RSH,    IF_EN2N,   0x7F008400,  0x2F008400)
+                                   //  sqshrun  Vd,Vn,imm           DV_2N  011111110iiiiiii 100001nnnnnddddd   7F00 8400   Vd Vn imm  (right shift - scalar)
+                                   //  sqshrun  Vd,Vn,imm           DV_2O  0Q1011110iiiiiii 100001nnnnnddddd   2F00 8400   Vd Vn imm  (right shift - vector)
+
+INST2(uqrshrn,     "uqrshrn",      RSH,    IF_EN2N,   0x7F009C00,  0x2F009C00)
+                                   //  uqrshrn Vd,Vn,imm            DV_2N  011111110iiiiiii 100111nnnnnddddd   7F00 9C00   Vd Vn imm  (right shift - scalar)
+                                   //  uqrshrn Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 100111nnnnnddddd   2F00 9C00   Vd Vn imm  (right shift - vector)
+
+INST2(uqshrn,      "uqshrn",       RSH,    IF_EN2N,   0x7F009400,  0x2F009400)
+                                   //  usqhrn  Vd,Vn,imm            DV_2N  011111110iiiiiii 100101nnnnnddddd   7F00 9400   Vd Vn imm  (right shift - scalar)
+                                   //  usqhrn  Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 100101nnnnnddddd   2F00 9400   Vd Vn imm  (right shift - vector)
+
+//    enum         name            info               DV_3E        DV_3A
+INST2(cmhi,        "cmhi",         0,      IF_EN2O,   0x7EE03400,  0x2E203400)
                                    //  cmhi    Vd,Vn,Vm             DV_3E  01111110111mmmmm 001101nnnnnddddd   7EE0 3400   Vd,Vn,Vm   (scalar)
                                    //  cmhi    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 001101nnnnnddddd   2E20 3400   Vd,Vn,Vm   (vector)
 
-INST2(cmhs,    "cmhs",   0, 0, IF_EN2O,   0x7EE03C00,  0x2E203C00)
+INST2(cmhs,        "cmhs",         0,      IF_EN2O,   0x7EE03C00,  0x2E203C00)
                                    //  cmhs    Vd,Vn,Vm             DV_3E  01111110111mmmmm 001111nnnnnddddd   7EE0 3C00   Vd,Vn,Vm   (scalar)
                                    //  cmhs    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 001111nnnnnddddd   2E20 3C00   Vd,Vn,Vm   (vector)
 
-INST2(cmtst,   "cmtst",  0, 0, IF_EN2O,   0x5EE08C00,  0x0E208C00)
+INST2(cmtst,       "cmtst",        0,      IF_EN2O,   0x5EE08C00,  0x0E208C00)
                                    //  cmtst   Vd,Vn,Vm             DV_3E  01011110111mmmmm 100011nnnnnddddd   5EE0 8C00   Vd,Vn,Vm   (scalar)
                                    //  cmtst   Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100011nnnnnddddd   0E20 8C00   Vd,Vn,Vm   (vector)
 
-INST2(sqadd,   "sqadd",  0, 0, IF_EN2O,   0x5E200C00,  0x0E200C00)
-                                   // C7.2.275 SQADD
+INST2(sqadd,       "sqadd",        0,      IF_EN2O,   0x5E200C00,  0x0E200C00)
                                    //  sqadd   Vd,Vn,Vm             DV_3E  01011110XX1mmmmm 000011nnnnnddddd   5E20 0C00   Vd,Vn,Vm   (scalar)
                                    //  sqadd   Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 000011nnnnnddddd   0E20 0C00   Vd,Vn,Vm   (vector)
 
-INST2(sqsub,   "sqsub",  0, 0, IF_EN2O,   0x5E202C00,  0x0E202C00)
-                                   // C7.2.299 SQSUB
+INST2(sqrshl,      "sqrshl",       0,      IF_EN2O,   0x5E205C00,  0x0E205C00)
+                                   //  sqrshl  Vd,Vn,Vm             DV_3E  01011110XX1mmmmm 010111nnnnnddddd   5E20 5C00   Vd,Vn,Vm   (scalar)
+                                   //  sqrshl  Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 010111nnnnnddddd   0E20 5C00   Vd,Vn,Vm   (vector)
+
+INST2(sqsub,       "sqsub",        0,      IF_EN2O,   0x5E202C00,  0x0E202C00)
                                    //  sqsub   Vd,Vn,Vm             DV_3E  01011110XX1mmmmm 001011nnnnnddddd   5E20 2C00   Vd,Vn,Vm   (scalar)
                                    //  sqsub   Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 001011nnnnnddddd   0E20 2C00   Vd,Vn,Vm   (vector)
 
-INST2(uqadd,   "uqadd",  0, 0, IF_EN2O,   0x7E200C00,  0x2E200C00)
-                                   // C7.2.364 UQADD
+INST2(srshl,       "srshl",        0,      IF_EN2O,   0x5E205400,  0x0E205400)
+                                   //  srshl    Vd,Vn,Vm            DV_3E  01011110XX1mmmmm 010101nnnnnddddd   5E20 5400   Vd,Vn,Vm   (scalar)
+                                   //  srshl    Vd,Vn,Vm            DV_3A  0Q001110XX1mmmmm 010101nnnnnddddd   0E20 5400   Vd,Vn,Vm   (vector)
+
+INST2(sshl,        "sshl",         0,      IF_EN2O,   0x5E204400,  0x0E204400)
+                                   //  sshl    Vd,Vn,Vm             DV_3E  01011110XX1mmmmm 010001nnnnnddddd   5E20 4400   Vd,Vn,Vm   (scalar)
+                                   //  sshl    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 010001nnnnnddddd   0E20 4400   Vd,Vn,Vm   (vector)
+
+INST2(uqadd,       "uqadd",        0,      IF_EN2O,   0x7E200C00,  0x2E200C00)
                                    //  uqadd   Vd,Vn,Vm             DV_3E  01111110XX1mmmmm 000011nnnnnddddd   7E20 0C00   Vd,Vn,Vm   (scalar)
                                    //  uqadd   Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 000011nnnnnddddd   2E20 0C00   Vd,Vn,Vm   (vector)
 
-INST2(uqsub,   "uqsub",  0, 0, IF_EN2O,   0x7E202C00,  0x2E202C00)
-                                   // C7.2.370 UQSUB
+INST2(uqrshl,      "uqrshl",       0,      IF_EN2O,   0x7E205C00,  0x2E205C00)
+                                   //  uqrshl  Vd,Vn,Vm             DV_3E  01111110XX1mmmmm 010111nnnnnddddd   7E20 5C00   Vd,Vn,Vm   (scalar)
+                                   //  uqrshl  Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 010111nnnnnddddd   2E20 5C00   Vd,Vn,Vm   (vector)
+
+INST2(uqsub,       "uqsub",        0,      IF_EN2O,   0x7E202C00,  0x2E202C00)
                                    //  uqsub   Vd,Vn,Vm             DV_3E  01111110XX1mmmmm 001011nnnnnddddd   7E20 2C00   Vd,Vn,Vm   (scalar)
                                    //  uqsub   Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 001011nnnnnddddd   2E20 2C00   Vd,Vn,Vm   (vector)
 
-//    enum     name     FP LD/ST            DV_2Q        DV_3B
-INST2(faddp,   "faddp",  0, 0, IF_EN2P,   0x7E30D800,  0x2E20D400)
+INST2(urshl,       "urshl",        0,      IF_EN2O,   0x7E205400,  0x2E205400)
+                                   //  urshl    Vd,Vn,Vm            DV_3E  01111110XX1mmmmm 010101nnnnnddddd   7E20 5400   Vd,Vn,Vm   (scalar)
+                                   //  urshl    Vd,Vn,Vm            DV_3A  0Q101110XX1mmmmm 010101nnnnnddddd   2E20 5400   Vd,Vn,Vm   (vector)
+
+INST2(ushl,        "ushl",         0,      IF_EN2O,   0x7E204400,  0x2E204400)
+                                   //  ushl    Vd,Vn,Vm             DV_3E  01111110XX1mmmmm 010001nnnnnddddd   7E20 4400   Vd,Vn,Vm   (scalar)
+                                   //  ushl    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 010001nnnnnddddd   2E20 4400   Vd,Vn,Vm   (vector)
+
+//    enum         name            info               DV_2Q        DV_3B
+INST2(faddp,       "faddp",        0,      IF_EN2P,   0x7E30D800,  0x2E20D400)
                                    //  faddp   Vd,Vn                DV_2Q  011111100X110000 110110nnnnnddddd   7E30 D800   Vd,Vn      (scalar)
                                    //  faddp   Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 110101nnnnnddddd   2E20 D400   Vd,Vn,Vm   (vector)
 
-INST2(fmaxnmp, "fmaxnmp",0, 0, IF_EN2P,   0x7E30C800,  0x2E20C400)
+INST2(fmaxnmp,     "fmaxnmp",      0,      IF_EN2P,   0x7E30C800,  0x2E20C400)
                                    //  fmaxnmp Vd,Vn                DV_2Q  011111100X110000 110010nnnnnddddd   7E30 C800   Vd,Vn      (scalar)
                                    //  fmaxnmp Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 110001nnnnnddddd   2E20 C400   Vd,Vn,Vm   (vector)
 
-INST2(fmaxp,   "fmaxp",  0, 0, IF_EN2P,   0x7E30F800,  0x2E20F400)
+INST2(fmaxp,       "fmaxp",        0,      IF_EN2P,   0x7E30F800,  0x2E20F400)
                                    //  fmaxp Vd,Vn                  DV_2Q  011111100X110000 111110nnnnnddddd   7E30 F800   Vd,Vn      (scalar)
                                    //  fmaxp Vd,Vn,Vm               DV_3B  0Q1011100X1mmmmm 111101nnnnnddddd   2E20 F400   Vd,Vn,Vm   (vector)
 
-INST2(fminnmp, "fminnmp",0, 0, IF_EN2P,   0x7EB0C800,  0x2EA0C400)
+INST2(fminnmp,     "fminnmp",      0,      IF_EN2P,   0x7EB0C800,  0x2EA0C400)
                                    //  fminnmp Vd,Vn                DV_2Q  011111101X110000 110010nnnnnddddd   7EB0 C800   Vd,Vn      (scalar)
                                    //  fminnmp Vd,Vn,Vm             DV_3B  0Q1011101X1mmmmm 110001nnnnnddddd   2EA0 C400   Vd,Vn,Vm   (vector)
 
-INST2(fminp,   "fminp",  0, 0, IF_EN2P,   0x7EB0F800,  0x2EA0F400)
+INST2(fminp,       "fminp",        0,      IF_EN2P,   0x7EB0F800,  0x2EA0F400)
                                    //  fminp Vd,Vn                  DV_2Q  011111101X110000 111110nnnnnddddd   7EB0 F800   Vd,Vn      (scalar)
                                    //  fminp Vd,Vn,Vm               DV_3B  0Q1011101X1mmmmm 111101nnnnnddddd   2EA0 F400   Vd,Vn,Vm   (vector)
 
-INST2(addp,    "addp",   0, 0, IF_EN2Q,   0x5E31B800,  0x0E20BC00)
+INST2(addp,        "addp",         0,      IF_EN2Q,   0x5E31B800,  0x0E20BC00)
                                    //  addp Vd,Vn                   DV_2S  01011110XX110001 101110nnnnnddddd   5E31 B800   Vd,Vn      (scalar)
                                    //  addp Vd,Vn,Vm                DV_3A  0Q001110XX1mmmmm 101111nnnnnddddd   0E20 BC00   Vd,Vn,Vm   (vector)
 
-INST1(ldar,    "ldar",   0,LD, IF_LS_2A,  0x88DFFC00)
+INST1(ldar,        "ldar",         LD,     IF_LS_2A,  0x88DFFC00)
                                    //  ldar    Rt,[Xn]              LS_2A  1X00100011011111 111111nnnnnttttt   88DF FC00
 
-INST1(ldarb,   "ldarb",  0,LD, IF_LS_2A,  0x08DFFC00)
+INST1(ldarb,       "ldarb",        LD,     IF_LS_2A,  0x08DFFC00)
                                    //  ldarb   Rt,[Xn]              LS_2A  0000100011011111 111111nnnnnttttt   08DF FC00
 
-INST1(ldarh,   "ldarh",  0,LD, IF_LS_2A,  0x48DFFC00)
+INST1(ldarh,       "ldarh",        LD,     IF_LS_2A,  0x48DFFC00)
                                    //  ldarh   Rt,[Xn]              LS_2A  0100100011011111 111111nnnnnttttt   48DF FC00
 
-INST1(ldxr,    "ldxr",   0,LD, IF_LS_2A,  0x885F7C00)
+INST1(ldxr,        "ldxr",         LD,     IF_LS_2A,  0x885F7C00)
                                    //  ldxr    Rt,[Xn]              LS_2A  1X00100001011111 011111nnnnnttttt   885F 7C00
 
-INST1(ldxrb,   "ldxrb",  0,LD, IF_LS_2A,  0x085F7C00)
+INST1(ldxrb,       "ldxrb",        LD,     IF_LS_2A,  0x085F7C00)
                                    //  ldxrb   Rt,[Xn]              LS_2A  0000100001011111 011111nnnnnttttt   085F 7C00
 
-INST1(ldxrh,   "ldxrh",  0,LD, IF_LS_2A,  0x485F7C00)
+INST1(ldxrh,       "ldxrh",        LD,     IF_LS_2A,  0x485F7C00)
                                    //  ldxrh   Rt,[Xn]              LS_2A  0100100001011111 011111nnnnnttttt   485F 7C00
 
-INST1(ldaxr,   "ldaxr",   0,LD, IF_LS_2A,  0x885FFC00)
+INST1(ldaxr,       "ldaxr",        LD,     IF_LS_2A,  0x885FFC00)
                                    //  ldaxr   Rt,[Xn]              LS_2A  1X00100001011111 111111nnnnnttttt   885F FC00
 
-INST1(ldaxrb,  "ldaxrb",  0,LD, IF_LS_2A,  0x085FFC00)
+INST1(ldaxrb,      "ldaxrb",       LD,     IF_LS_2A,  0x085FFC00)
                                    //  ldaxrb  Rt,[Xn]              LS_2A  0000100001011111 111111nnnnnttttt   085F FC00
 
-INST1(ldaxrh,  "ldaxrh",  0,LD, IF_LS_2A,  0x485FFC00)
+INST1(ldaxrh,      "ldaxrh",       LD,     IF_LS_2A,  0x485FFC00)
                                    //  ldaxrh  Rt,[Xn]              LS_2A  0100100001011111 111111nnnnnttttt   485F FC00
 
-INST1(ldur,    "ldur",   0,LD, IF_LS_2C,  0xB8400000)
+INST1(ldur,        "ldur",         LD,     IF_LS_2C,  0xB8400000)
                                    //  ldur    Rt,[Xn+simm9]        LS_2C  1X111000010iiiii iiii00nnnnnttttt   B840 0000   [Xn imm(-256..+255)]
 
-INST1(ldurb,   "ldurb",  0,LD, IF_LS_2C,  0x38400000)
+INST1(ldurb,       "ldurb",        LD,     IF_LS_2C,  0x38400000)
                                    //  ldurb   Rt,[Xn+simm9]        LS_2C  00111000010iiiii iiii00nnnnnttttt   3840 0000   [Xn imm(-256..+255)]
 
-INST1(ldurh,   "ldurh",  0,LD, IF_LS_2C,  0x78400000)
+INST1(ldurh,       "ldurh",        LD,     IF_LS_2C,  0x78400000)
                                    //  ldurh   Rt,[Xn+simm9]        LS_2C  01111000010iiiii iiii00nnnnnttttt   7840 0000   [Xn imm(-256..+255)]
 
-INST1(ldursb,  "ldursb", 0,LD, IF_LS_2C,  0x38800000)
+INST1(ldursb,      "ldursb",       LD,     IF_LS_2C,  0x38800000)
                                    //  ldursb  Rt,[Xn+simm9]        LS_2C  001110001X0iiiii iiii00nnnnnttttt   3880 0000   [Xn imm(-256..+255)]
 
-INST1(ldursh,  "ldursh", 0,LD, IF_LS_2C,  0x78800000)
+INST1(ldursh,      "ldursh",       LD,     IF_LS_2C,  0x78800000)
                                    //  ldursh  Rt,[Xn+simm9]        LS_2C  011110001X0iiiii iiii00nnnnnttttt   7880 0000   [Xn imm(-256..+255)]
 
-INST1(ldursw,  "ldursw", 0,LD, IF_LS_2C,  0xB8800000)
+INST1(ldursw,      "ldursw",       LD,     IF_LS_2C,  0xB8800000)
                                    //  ldursw  Rt,[Xn+simm9]        LS_2C  10111000100iiiii iiii00nnnnnttttt   B880 0000   [Xn imm(-256..+255)]
 
-INST1(stlr,    "stlr",   0,ST, IF_LS_2A,  0x889FFC00)
+INST1(stlr,        "stlr",         ST,     IF_LS_2A,  0x889FFC00)
                                    //  stlr    Rt,[Xn]              LS_2A  1X00100010011111 111111nnnnnttttt   889F FC00
 
-INST1(stlrb,   "stlrb",  0,ST, IF_LS_2A,  0x089FFC00)
+INST1(stlrb,       "stlrb",        ST,     IF_LS_2A,  0x089FFC00)
                                    //  stlrb   Rt,[Xn]              LS_2A  0000100010011111 111111nnnnnttttt   089F FC00
 
-INST1(stlrh,   "stlrh",  0,ST, IF_LS_2A,  0x489FFC00)
+INST1(stlrh,       "stlrh",        ST,     IF_LS_2A,  0x489FFC00)
                                    //  stlrh   Rt,[Xn]              LS_2A  0100100010011111 111111nnnnnttttt   489F FC00
 
-INST1(stxr,    "stxr",   0,ST, IF_LS_3D,  0x88007C00)
+INST1(stxr,        "stxr",         ST,     IF_LS_3D,  0x88007C00)
                                    //  stxr    Ws, Rt,[Xn]          LS_3D  1X001000000sssss 011111nnnnnttttt   8800 7C00
 
-INST1(stxrb,   "stxrb",  0,ST, IF_LS_3D,  0x08007C00)
+INST1(stxrb,       "stxrb",        ST,     IF_LS_3D,  0x08007C00)
                                    //  stxrb   Ws, Rt,[Xn]          LS_3D  00001000000sssss 011111nnnnnttttt   0800 7C00
 
-INST1(stxrh,   "stxrh",  0,ST, IF_LS_3D,  0x48007C00)
+INST1(stxrh,       "stxrh",        ST,     IF_LS_3D,  0x48007C00)
                                    //  stxrh   Ws, Rt,[Xn]          LS_3D  01001000000sssss 011111nnnnnttttt   4800 7C00
 
-INST1(stlxr,   "stlxr",   0,ST, IF_LS_3D,  0x8800FC00)
+INST1(stlxr,       "stlxr",        ST,     IF_LS_3D,  0x8800FC00)
                                    //  stlxr   Ws, Rt,[Xn]          LS_3D  1X001000000sssss 111111nnnnnttttt   8800 FC00
 
-INST1(stlxrb,  "stlxrb",  0,ST, IF_LS_3D,  0x0800FC00)
+INST1(stlxrb,      "stlxrb",       ST,     IF_LS_3D,  0x0800FC00)
                                    //  stlxrb  Ws, Rt,[Xn]          LS_3D  00001000000sssss 111111nnnnnttttt   0800 FC00
 
-INST1(stlxrh,  "stlxrh",  0,ST, IF_LS_3D,  0x4800FC00)
+INST1(stlxrh,      "stlxrh",       ST,     IF_LS_3D,  0x4800FC00)
                                    //  stlxrh  Ws, Rt,[Xn]          LS_3D  01001000000sssss 111111nnnnnttttt   4800 FC00
 
-INST1(stur,    "stur",   0,ST, IF_LS_2C,  0xB8000000)
+INST1(stur,        "stur",         ST,     IF_LS_2C,  0xB8000000)
                                    //  stur    Rt,[Xn+simm9]        LS_2C  1X111000000iiiii iiii00nnnnnttttt   B800 0000   [Xn imm(-256..+255)]
 
-INST1(sturb,   "sturb",  0,ST, IF_LS_2C,  0x38000000)
+INST1(sturb,       "sturb",        ST,     IF_LS_2C,  0x38000000)
                                    //  sturb   Rt,[Xn+simm9]        LS_2C  00111000000iiiii iiii00nnnnnttttt   3800 0000   [Xn imm(-256..+255)]
 
-INST1(sturh,   "sturh",  0,ST, IF_LS_2C,  0x78000000)
+INST1(sturh,       "sturh",        ST,     IF_LS_2C,  0x78000000)
                                    //  sturh   Rt,[Xn+simm9]        LS_2C  01111000000iiiii iiii00nnnnnttttt   7800 0000   [Xn imm(-256..+255)]
 
-INST1(casb,    "casb",   0, LD|ST, IF_LS_3E,  0x08A07C00)
+INST1(casb,        "casb",         LD|ST,  IF_LS_3E,  0x08A07C00)
                                    //  casb    Wm, Wt, [Xn]         LS_3E  00001000101mmmmm 011111nnnnnttttt   08A0 7C00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casab,   "casab",  0, LD|ST, IF_LS_3E,  0x08E07C00)
+INST1(casab,       "casab",        LD|ST,  IF_LS_3E,  0x08E07C00)
                                    //  casab   Wm, Wt, [Xn]         LS_3E  00001000111mmmmm 011111nnnnnttttt   08E0 7C00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casalb,  "casalb", 0, LD|ST, IF_LS_3E,  0x08E0FC00)
+INST1(casalb,      "casalb",       LD|ST,  IF_LS_3E,  0x08E0FC00)
                                    //  casalb  Wm, Wt, [Xn]         LS_3E  00001000111mmmmm 111111nnnnnttttt   08E0 FC00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(caslb,   "caslb",  0, LD|ST, IF_LS_3E,  0x08A0FC00)
+INST1(caslb,       "caslb",        LD|ST,  IF_LS_3E,  0x08A0FC00)
                                    //  caslb   Wm, Wt, [Xn]         LS_3E  00001000101mmmmm 111111nnnnnttttt   08A0 FC00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(cash,    "cash",   0, LD|ST, IF_LS_3E,  0x48A07C00)
+INST1(cash,        "cash",         LD|ST,  IF_LS_3E,  0x48A07C00)
                                    //  cash    Wm, Wt, [Xn]         LS_3E  01001000101mmmmm 011111nnnnnttttt   48A0 7C00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casah,   "casah",  0, LD|ST, IF_LS_3E,  0x48E07C00)
+INST1(casah,       "casah",        LD|ST,  IF_LS_3E,  0x48E07C00)
                                    //  casah   Wm, Wt, [Xn]         LS_3E  01001000111mmmmm 011111nnnnnttttt   48E0 7C00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casalh,  "casalh", 0, LD|ST, IF_LS_3E,  0x48E0FC00)
+INST1(casalh,      "casalh",       LD|ST,  IF_LS_3E,  0x48E0FC00)
                                    //  casalh  Wm, Wt, [Xn]         LS_3E  01001000111mmmmm 111111nnnnnttttt   48E0 FC00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(caslh,   "caslh",  0, LD|ST, IF_LS_3E,  0x48A0FC00)
+INST1(caslh,       "caslh",        LD|ST,  IF_LS_3E,  0x48A0FC00)
                                    //  caslh   Wm, Wt, [Xn]         LS_3E  01001000101mmmmm 111111nnnnnttttt   48A0 FC00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(cas,     "cas",    0, LD|ST, IF_LS_3E,  0x88A07C00)
+INST1(cas,         "cas",          LD|ST,  IF_LS_3E,  0x88A07C00)
                                    //  cas     Rm, Rt, [Xn]         LS_3E  1X001000101mmmmm 011111nnnnnttttt   88A0 7C00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casa,    "casa",   0, LD|ST, IF_LS_3E,  0x88E07C00)
+INST1(casa,        "casa",         LD|ST,  IF_LS_3E,  0x88E07C00)
                                    //  casa    Rm, Rt, [Xn]         LS_3E  1X001000111mmmmm 011111nnnnnttttt   88E0 7C00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casal,   "casal",  0, LD|ST, IF_LS_3E,  0x88E0FC00)
+INST1(casal,       "casal",        LD|ST,  IF_LS_3E,  0x88E0FC00)
                                    //  casal   Rm, Rt, [Xn]         LS_3E  1X001000111mmmmm 111111nnnnnttttt   88E0 FC00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(casl,    "casl",   0, LD|ST, IF_LS_3E,  0x88A0FC00)
+INST1(casl,        "casl",         LD|ST,  IF_LS_3E,  0x88A0FC00)
                                    //  casl    Rm, Rt, [Xn]         LS_3E  1X001000101mmmmm 111111nnnnnttttt   88A0 FC00   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddb,  "ldaddb",  0, LD|ST, IF_LS_3E,  0x38200000)
+INST1(ldaddb,      "ldaddb",       LD|ST,  IF_LS_3E,  0x38200000)
                                    //  ldaddb   Wm, Wt, [Xn]        LS_3E  00111000001mmmmm 000000nnnnnttttt   3820 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddab, "ldaddab", 0, LD|ST, IF_LS_3E,  0x38A00000)
+INST1(ldaddab,     "ldaddab",      LD|ST,  IF_LS_3E,  0x38A00000)
                                    //  ldaddab  Wm, Wt, [Xn]        LS_3E  00111000101mmmmm 000000nnnnnttttt   38A0 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddalb,"ldaddalb",0, LD|ST, IF_LS_3E,  0x38E00000)
+INST1(ldaddalb,    "ldaddalb",     LD|ST,  IF_LS_3E,  0x38E00000)
                                    //  ldaddalb Wm, Wt, [Xn]        LS_3E  00111000111mmmmm 000000nnnnnttttt   38E0 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddlb, "ldaddlb", 0, LD|ST, IF_LS_3E,  0x38600000)
+INST1(ldaddlb,     "ldaddlb",      LD|ST,  IF_LS_3E,  0x38600000)
                                    //  ldaddlb  Wm, Wt, [Xn]        LS_3E  00111000011mmmmm 000000nnnnnttttt   3860 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddh,  "ldaddh",  0, LD|ST, IF_LS_3E,  0x78200000)
+INST1(ldaddh,      "ldaddh",       LD|ST,  IF_LS_3E,  0x78200000)
                                    //  ldaddh   Wm, Wt, [Xn]        LS_3E  01111000001mmmmm 000000nnnnnttttt   7820 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddah, "ldaddah", 0, LD|ST, IF_LS_3E,  0x78A00000)
+INST1(ldaddah,     "ldaddah",      LD|ST,  IF_LS_3E,  0x78A00000)
                                    //  ldaddah  Wm, Wt, [Xn]        LS_3E  01111000101mmmmm 000000nnnnnttttt   78A0 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddalh,"ldaddalh",0, LD|ST, IF_LS_3E,  0x78E00000)
+INST1(ldaddalh,    "ldaddalh",     LD|ST,  IF_LS_3E,  0x78E00000)
                                    //  ldaddalh Wm, Wt, [Xn]        LS_3E  01111000111mmmmm 000000nnnnnttttt   78E0 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddlh, "ldaddlh", 0, LD|ST, IF_LS_3E,  0x78600000)
+INST1(ldaddlh,     "ldaddlh",      LD|ST,  IF_LS_3E,  0x78600000)
                                    //  ldaddlh  Wm, Wt, [Xn]        LS_3E  01111000011mmmmm 000000nnnnnttttt   7860 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldadd,   "ldadd",   0, LD|ST, IF_LS_3E,  0xB8200000)
+INST1(ldadd,       "ldadd",        LD|ST,  IF_LS_3E,  0xB8200000)
                                    //  ldadd    Rm, Rt, [Xn]        LS_3E  1X111000001mmmmm 000000nnnnnttttt   B820 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldadda,  "ldadda",  0, LD|ST, IF_LS_3E,  0xB8A00000)
+INST1(ldadda,      "ldadda",       LD|ST,  IF_LS_3E,  0xB8A00000)
                                    //  ldadda   Rm, Rt, [Xn]        LS_3E  1X111000101mmmmm 000000nnnnnttttt   B8A0 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddal, "ldaddal", 0, LD|ST, IF_LS_3E,  0xB8E00000)
+INST1(ldaddal,     "ldaddal",      LD|ST,  IF_LS_3E,  0xB8E00000)
                                    //  ldaddal  Rm, Rt, [Xn]        LS_3E  1X111000111mmmmm 000000nnnnnttttt   B8E0 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(ldaddl,  "ldaddl",  0, LD|ST, IF_LS_3E,  0xB8600000)
+INST1(ldaddl,      "ldaddl",       LD|ST,  IF_LS_3E,  0xB8600000)
                                    //  ldaddl   Rm, Rt, [Xn]        LS_3E  1X111000011mmmmm 000000nnnnnttttt   B860 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(staddb,  "staddb",  0, ST, IF_LS_3E,  0x38200000)
+INST1(staddb,      "staddb",       ST,     IF_LS_3E,  0x38200000)
                                    //  staddb   Wm, [Xn]            LS_3E  00111000001mmmmm 000000nnnnnttttt   3820 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(staddlb, "staddlb", 0, ST, IF_LS_3E,  0x38600000)
+INST1(staddlb,     "staddlb",      ST,     IF_LS_3E,  0x38600000)
                                    //  staddlb  Wm, [Xn]            LS_3E  00111000011mmmmm 000000nnnnnttttt   3860 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(staddh,  "staddh",  0, ST, IF_LS_3E,  0x78200000)
+INST1(staddh,      "staddh",       ST,     IF_LS_3E,  0x78200000)
                                    //  staddh   Wm, [Xn]            LS_3E  01111000001mmmmm 000000nnnnnttttt   7820 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(staddlh, "staddlh", 0, ST, IF_LS_3E,  0x78600000)
+INST1(staddlh,     "staddlh",      ST,     IF_LS_3E,  0x78600000)
                                    //  staddlh  Wm, [Xn]            LS_3E  01111000011mmmmm 000000nnnnnttttt   7860 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(stadd,   "stadd",   0, ST, IF_LS_3E,  0xB8200000)
+INST1(stadd,       "stadd",        ST,     IF_LS_3E,  0xB8200000)
                                    //  stadd    Rm, [Xn]            LS_3E  1X111000001mmmmm 000000nnnnnttttt   B820 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(staddl,  "staddl",  0, ST, IF_LS_3E,  0xB8600000)
+INST1(staddl,      "staddl",       ST,     IF_LS_3E,  0xB8600000)
                                    //  staddl   Rm, [Xn]            LS_3E  1X111000011mmmmm 000000nnnnnttttt   B860 0000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpb,    "swpb",   0, LD|ST, IF_LS_3E,  0x38208000)
+INST1(swpb,        "swpb",         LD|ST,  IF_LS_3E,  0x38208000)
                                    //  swpb    Wm, Wt, [Xn]         LS_3E  00111000001mmmmm 100000nnnnnttttt   3820 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpab,   "swpab",  0, LD|ST, IF_LS_3E,  0x38A08000)
+INST1(swpab,       "swpab",        LD|ST,  IF_LS_3E,  0x38A08000)
                                    //  swpab   Wm, Wt, [Xn]         LS_3E  00111000101mmmmm 100000nnnnnttttt   38A0 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpalb,  "swpalb", 0, LD|ST, IF_LS_3E,  0x38E08000)
+INST1(swpalb,      "swpalb",       LD|ST,  IF_LS_3E,  0x38E08000)
                                    //  swpalb  Wm, Wt, [Xn]         LS_3E  00111000111mmmmm 100000nnnnnttttt   38E0 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swplb,   "swplb",  0, LD|ST, IF_LS_3E,  0x38608000)
+INST1(swplb,       "swplb",        LD|ST,  IF_LS_3E,  0x38608000)
                                    //  swplb   Wm, Wt, [Xn]         LS_3E  00111000011mmmmm 100000nnnnnttttt   3860 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swph,    "swph",   0, LD|ST, IF_LS_3E,  0x78208000)
+INST1(swph,        "swph",         LD|ST,  IF_LS_3E,  0x78208000)
                                    //  swph    Wm, Wt, [Xn]         LS_3E  01111000001mmmmm 100000nnnnnttttt   7820 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpah,   "swpah",  0, LD|ST, IF_LS_3E,  0x78A08000)
+INST1(swpah,       "swpah",        LD|ST,  IF_LS_3E,  0x78A08000)
                                    //  swpah   Wm, Wt, [Xn]         LS_3E  01111000101mmmmm 100000nnnnnttttt   78A0 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpalh,  "swpalh", 0, LD|ST, IF_LS_3E,  0x78E08000)
+INST1(swpalh,      "swpalh",       LD|ST,  IF_LS_3E,  0x78E08000)
                                    //  swpalh  Wm, Wt, [Xn]         LS_3E  01111000111mmmmm 100000nnnnnttttt   78E0 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swplh,   "swplh",  0, LD|ST, IF_LS_3E,  0x78608000)
+INST1(swplh,       "swplh",        LD|ST,  IF_LS_3E,  0x78608000)
                                    //  swplh   Wm, Wt, [Xn]         LS_3E  01111000011mmmmm 100000nnnnnttttt   7860 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swp,     "swp",    0, LD|ST, IF_LS_3E,  0xB8208000)
+INST1(swp,         "swp",          LD|ST,  IF_LS_3E,  0xB8208000)
                                    //  swp     Rm, Rt, [Xn]         LS_3E  1X111000001mmmmm 100000nnnnnttttt   B820 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpa,    "swpa",   0, LD|ST, IF_LS_3E,  0xB8A08000)
+INST1(swpa,        "swpa",         LD|ST,  IF_LS_3E,  0xB8A08000)
                                    //  swpa    Rm, Rt, [Xn]         LS_3E  1X111000101mmmmm 100000nnnnnttttt   B8A0 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpal,   "swpal",  0, LD|ST, IF_LS_3E,  0xB8E08000)
+INST1(swpal,       "swpal",        LD|ST,  IF_LS_3E,  0xB8E08000)
                                    //  swpal   Rm, Rt, [Xn]         LS_3E  1X111000111mmmmm 100000nnnnnttttt   B8E0 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(swpl,    "swpl",   0, LD|ST, IF_LS_3E,  0xB8608000)
+INST1(swpl,        "swpl",         LD|ST,  IF_LS_3E,  0xB8608000)
                                    //  swpl    Rm, Rt, [Xn]         LS_3E  1X111000011mmmmm 100000nnnnnttttt   B860 8000   Rm Rt Rn ARMv8.1 LSE Atomics
 
-INST1(adr,     "adr",    0, 0, IF_DI_1E,  0x10000000)
+INST1(adr,         "adr",          0,      IF_DI_1E,  0x10000000)
                                    //  adr     Rd, simm21           DI_1E  0ii10000iiiiiiii iiiiiiiiiiiddddd   1000 0000   Rd simm21
 
-INST1(adrp,    "adrp",   0, 0, IF_DI_1E,  0x90000000)
+INST1(adrp,        "adrp",         0,      IF_DI_1E,  0x90000000)
                                    //  adrp    Rd, simm21           DI_1E  1ii10000iiiiiiii iiiiiiiiiiiddddd   9000 0000   Rd simm21
 
-INST1(b,       "b",      0, 0, IF_BI_0A,  0x14000000)
+INST1(b,           "b",            0,      IF_BI_0A,  0x14000000)
                                    //  b       simm26               BI_0A  000101iiiiiiiiii iiiiiiiiiiiiiiii   1400 0000   simm26:00
 
-INST1(b_tail,  "b",      0, 0, IF_BI_0C,  0x14000000)
+INST1(b_tail,      "b",            0,      IF_BI_0C,  0x14000000)
                                    //  b       simm26               BI_0A  000101iiiiiiiiii iiiiiiiiiiiiiiii   1400 0000   simm26:00, same as b representing a tail call of bl.
 
-INST1(bl_local,"bl",     0, 0, IF_BI_0A,  0x94000000)
+INST1(bl_local,    "bl",           0,      IF_BI_0A,  0x94000000)
                                    //  bl      simm26               BI_0A  100101iiiiiiiiii iiiiiiiiiiiiiiii   9400 0000   simm26:00, same as bl, but with a BasicBlock target.
 
-INST1(bl,      "bl",     0, 0, IF_BI_0C,  0x94000000)
+INST1(bl,          "bl",           0,      IF_BI_0C,  0x94000000)
                                    //  bl      simm26               BI_0C  100101iiiiiiiiii iiiiiiiiiiiiiiii   9400 0000   simm26:00
 
-INST1(br,      "br",     0, 0, IF_BR_1A,  0xD61F0000)
+INST1(br,          "br",           0,      IF_BR_1A,  0xD61F0000)
                                    //  br      Rn                   BR_1A  1101011000011111 000000nnnnn00000   D61F 0000, an indirect branch like switch expansion
 
-INST1(br_tail, "br",     0, 0, IF_BR_1B,  0xD61F0000)
+INST1(br_tail,     "br",           0,      IF_BR_1B,  0xD61F0000)
                                    //  br      Rn                   BR_1B  1101011000011111 000000nnnnn00000   D61F 0000, same as br representing a tail call of blr. Encode target with Reg3.
 
-INST1(blr,     "blr",    0, 0, IF_BR_1B,  0xD63F0000)
+INST1(blr,         "blr",          0,      IF_BR_1B,  0xD63F0000)
                                    //  blr     Rn                   BR_1B  1101011000111111 000000nnnnn00000   D63F 0000, Encode target with Reg3.
 
-INST1(ret,     "ret",    0, 0, IF_BR_1A,  0xD65F0000)
+INST1(ret,         "ret",          0,      IF_BR_1A,  0xD65F0000)
                                    //  ret     Rn                   BR_1A  1101011001011111 000000nnnnn00000   D65F 0000
 
-INST1(beq,     "beq",    0, 0, IF_BI_0B,  0x54000000)
+INST1(beq,         "beq",          0,      IF_BI_0B,  0x54000000)
                                    //  beq     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00000   5400 0000   simm19:00
 
-INST1(bne,     "bne",    0, 0, IF_BI_0B,  0x54000001)
+INST1(bne,         "bne",          0,      IF_BI_0B,  0x54000001)
                                    //  bne     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00001   5400 0001   simm19:00
 
-INST1(bhs,     "bhs",    0, 0, IF_BI_0B,  0x54000002)
+INST1(bhs,         "bhs",          0,      IF_BI_0B,  0x54000002)
                                    //  bhs     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00010   5400 0002   simm19:00
 
-INST1(blo,     "blo",    0, 0, IF_BI_0B,  0x54000003)
+INST1(blo,         "blo",          0,      IF_BI_0B,  0x54000003)
                                    //  blo     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00011   5400 0003   simm19:00
 
-INST1(bmi,     "bmi",    0, 0, IF_BI_0B,  0x54000004)
+INST1(bmi,         "bmi",          0,      IF_BI_0B,  0x54000004)
                                    //  bmi     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00100   5400 0004   simm19:00
 
-INST1(bpl,     "bpl",    0, 0, IF_BI_0B,  0x54000005)
+INST1(bpl,         "bpl",          0,      IF_BI_0B,  0x54000005)
                                    //  bpl     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00101   5400 0005   simm19:00
 
-INST1(bvs,     "bvs",    0, 0, IF_BI_0B,  0x54000006)
+INST1(bvs,         "bvs",          0,      IF_BI_0B,  0x54000006)
                                    //  bvs     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00110   5400 0006   simm19:00
 
-INST1(bvc,     "bvc",    0, 0, IF_BI_0B,  0x54000007)
+INST1(bvc,         "bvc",          0,      IF_BI_0B,  0x54000007)
                                    //  bvc     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00111   5400 0007   simm19:00
 
-INST1(bhi,     "bhi",    0, 0, IF_BI_0B,  0x54000008)
+INST1(bhi,         "bhi",          0,      IF_BI_0B,  0x54000008)
                                    //  bhi     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01000   5400 0008   simm19:00
 
-INST1(bls,     "bls",    0, 0, IF_BI_0B,  0x54000009)
+INST1(bls,         "bls",          0,      IF_BI_0B,  0x54000009)
                                    //  bls     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01001   5400 0009   simm19:00
 
-INST1(bge,     "bge",    0, 0, IF_BI_0B,  0x5400000A)
+INST1(bge,         "bge",          0,      IF_BI_0B,  0x5400000A)
                                    //  bge     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01010   5400 000A   simm19:00
 
-INST1(blt,     "blt",    0, 0, IF_BI_0B,  0x5400000B)
+INST1(blt,         "blt",          0,      IF_BI_0B,  0x5400000B)
                                    //  blt     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01011   5400 000B   simm19:00
 
-INST1(bgt,     "bgt",    0, 0, IF_BI_0B,  0x5400000C)
+INST1(bgt,         "bgt",          0,      IF_BI_0B,  0x5400000C)
                                    //  bgt     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01100   5400 000C   simm19:00
 
-INST1(ble,     "ble",    0, 0, IF_BI_0B,  0x5400000D)
+INST1(ble,         "ble",          0,      IF_BI_0B,  0x5400000D)
                                    //  ble     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01101   5400 000D   simm19:00
 
-INST1(cbz,     "cbz",    0, 0, IF_BI_1A,  0x34000000)
+INST1(cbz,         "cbz",          0,      IF_BI_1A,  0x34000000)
                                    //  cbz     Rt, simm19           BI_1A  X0110100iiiiiiii iiiiiiiiiiittttt   3400 0000   Rt simm19:00
 
-INST1(cbnz,    "cbnz",   0, 0, IF_BI_1A,  0x35000000)
+INST1(cbnz,        "cbnz",         0,      IF_BI_1A,  0x35000000)
                                    //  cbnz    Rt, simm19           BI_1A  X0110101iiiiiiii iiiiiiiiiiittttt   3500 0000   Rt simm19:00
 
-INST1(tbz,     "tbz",    0, 0, IF_BI_1B,  0x36000000)
+INST1(tbz,         "tbz",          0,      IF_BI_1B,  0x36000000)
                                    //  tbz     Rt, imm6, simm14     BI_1B  B0110110bbbbbiii iiiiiiiiiiittttt   3600 0000   Rt imm6, simm14:00
 
-INST1(tbnz,    "tbnz",   0, 0, IF_BI_1B,  0x37000000)
+INST1(tbnz,        "tbnz",         0,      IF_BI_1B,  0x37000000)
                                    //  tbnz    Rt, imm6, simm14     BI_1B  B0110111bbbbbiii iiiiiiiiiiittttt   3700 0000   Rt imm6, simm14:00
 
-INST1(movk,    "movk",   0, 0, IF_DI_1B,  0x72800000)
+INST1(movk,        "movk",         0,      IF_DI_1B,  0x72800000)
                                    //  movk    Rd,imm(i16,hw)       DI_1B  X11100101hwiiiii iiiiiiiiiiiddddd   7280 0000   imm(i16,hw)
 
-INST1(movn,    "movn",   0, 0, IF_DI_1B,  0x12800000)
+INST1(movn,        "movn",         0,      IF_DI_1B,  0x12800000)
                                    //  movn    Rd,imm(i16,hw)       DI_1B  X00100101hwiiiii iiiiiiiiiiiddddd   1280 0000   imm(i16,hw)
 
-INST1(movz,    "movz",   0, 0, IF_DI_1B,  0x52800000)
+INST1(movz,        "movz",         0,      IF_DI_1B,  0x52800000)
                                    //  movz    Rd,imm(i16,hw)       DI_1B  X10100101hwiiiii iiiiiiiiiiiddddd   5280 0000   imm(i16,hw)
 
-INST1(csel,    "csel",   0, 0, IF_DR_3D,  0x1A800000)
+INST1(csel,        "csel",         0,      IF_DR_3D,  0x1A800000)
                                    //  csel    Rd,Rn,Rm,cond        DR_3D  X0011010100mmmmm cccc00nnnnnddddd   1A80 0000   cond
 
-INST1(csinc,   "csinc",  0, 0, IF_DR_3D,  0x1A800400)
+INST1(csinc,       "csinc",        0,      IF_DR_3D,  0x1A800400)
                                    //  csinc   Rd,Rn,Rm,cond        DR_3D  X0011010100mmmmm cccc01nnnnnddddd   1A80 0400   cond
 
-INST1(csinv,   "csinv",  0, 0, IF_DR_3D,  0x5A800000)
+INST1(csinv,       "csinv",        0,      IF_DR_3D,  0x5A800000)
                                    //  csinv   Rd,Rn,Rm,cond        DR_3D  X1011010100mmmmm cccc00nnnnnddddd   5A80 0000   cond
 
-INST1(csneg,   "csneg",  0, 0, IF_DR_3D,  0x5A800400)
+INST1(csneg,       "csneg",        0,      IF_DR_3D,  0x5A800400)
                                    //  csneg   Rd,Rn,Rm,cond        DR_3D  X1011010100mmmmm cccc01nnnnnddddd   5A80 0400   cond
 
-INST1(cinc,    "cinc",   0, 0, IF_DR_2D,  0x1A800400)
+INST1(cinc,        "cinc",         0,      IF_DR_2D,  0x1A800400)
                                    //  cinc    Rd,Rn,cond           DR_2D  X0011010100nnnnn cccc01nnnnnddddd   1A80 0400   cond
 
-INST1(cinv,    "cinv",   0, 0, IF_DR_2D,  0x5A800000)
+INST1(cinv,        "cinv",         0,      IF_DR_2D,  0x5A800000)
                                    //  cinv    Rd,Rn,cond           DR_2D  X1011010100nnnnn cccc00nnnnnddddd   5A80 0000   cond
 
-INST1(cneg,    "cneg",   0, 0, IF_DR_2D,  0x5A800400)
+INST1(cneg,        "cneg",         0,      IF_DR_2D,  0x5A800400)
                                    //  cneg    Rd,Rn,cond           DR_2D  X1011010100nnnnn cccc01nnnnnddddd   5A80 0400   cond
 
-INST1(cset,    "cset",   0, 0, IF_DR_1D,  0x1A9F07E0)
+INST1(cset,        "cset",         0,      IF_DR_1D,  0x1A9F07E0)
                                    //  cset    Rd,cond              DR_1D  X001101010011111 cccc0111111ddddd   1A9F 07E0   Rd cond
 
-INST1(csetm,   "csetm",  0, 0, IF_DR_1D,  0x5A9F03E0)
+INST1(csetm,       "csetm",        0,      IF_DR_1D,  0x5A9F03E0)
                                    //  csetm   Rd,cond              DR_1D  X101101010011111 cccc0011111ddddd   5A9F 03E0   Rd cond
 
-INST1(aese,    "aese",   0, 0, IF_DV_2P,  0x4E284800)
+INST1(aese,        "aese",         0,      IF_DV_2P,  0x4E284800)
                                    //  aese   Vd.16B,Vn.16B         DV_2P  0100111000101000 010010nnnnnddddd   4E28 4800   Vd.16B Vn.16B  (vector)
 
-INST1(aesd,    "aesd",   0, 0, IF_DV_2P,  0x4E285800)
+INST1(aesd,        "aesd",         0,      IF_DV_2P,  0x4E285800)
                                    //  aesd   Vd.16B,Vn.16B         DV_2P  0100111000101000 010110nnnnnddddd   4E28 5800   Vd.16B Vn.16B  (vector)
 
-INST1(aesmc,   "aesmc",  0, 0, IF_DV_2P,  0x4E286800)
+INST1(aesmc,       "aesmc",        0,      IF_DV_2P,  0x4E286800)
                                    //  aesmc  Vd.16B,Vn.16B         DV_2P  0100111000101000 011010nnnnnddddd   4E28 6800   Vd.16B Vn.16B  (vector)
 
-INST1(aesimc,  "aesimc", 0, 0, IF_DV_2P,  0x4E287800)
+INST1(aesimc,      "aesimc",       0,      IF_DV_2P,  0x4E287800)
                                    //  aesimc Vd.16B,Vn.16B         DV_2P  0100111000101000 011110nnnnnddddd   4E28 7800   Vd.16B Vn.16B  (vector)
 
-INST1(rev,     "rev",    0, 0, IF_DR_2G,  0x5AC00800)
+INST1(rev,         "rev",          0,      IF_DR_2G,  0x5AC00800)
                                    //  rev     Rd,Rm                DR_2G  X101101011000000 00001Xnnnnnddddd   5AC0 0800   Rd Rn
 
-INST1(rev64,   "rev64",  0, 0, IF_DV_2M,  0x0E200800)
+INST1(rev64,       "rev64",        0,      IF_DV_2M,  0x0E200800)
                                    //  rev64   Vd,Vn                DV_2M  0Q001110XX100000 000010nnnnnddddd   0E20 0800   Vd,Vn    (vector)
 
-INST1(adc,     "adc",    0, 0, IF_DR_3A,  0x1A000000)
+INST1(adc,         "adc",          0,      IF_DR_3A,  0x1A000000)
                                    //  adc     Rd,Rn,Rm             DR_3A  X0011010000mmmmm 000000nnnnnddddd   1A00 0000
 
-INST1(adcs,    "adcs",   0, 0, IF_DR_3A,  0x3A000000)
+INST1(adcs,        "adcs",         0,      IF_DR_3A,  0x3A000000)
                                    //  adcs    Rd,Rn,Rm             DR_3A  X0111010000mmmmm 000000nnnnnddddd   3A00 0000
 
-INST1(sbc,     "sbc",    0, 0, IF_DR_3A,  0x5A000000)
+INST1(sbc,         "sbc",          0,      IF_DR_3A,  0x5A000000)
                                    //  sdc     Rd,Rn,Rm             DR_3A  X1011010000mmmmm 000000nnnnnddddd   5A00 0000
 
-INST1(sbcs,    "sbcs",   0, 0, IF_DR_3A,  0x7A000000)
+INST1(sbcs,        "sbcs",         0,      IF_DR_3A,  0x7A000000)
                                    //  sdcs    Rd,Rn,Rm             DR_3A  X1111010000mmmmm 000000nnnnnddddd   7A00 0000
 
-INST1(udiv,    "udiv",   0, 0, IF_DR_3A,  0x1AC00800)
+INST1(udiv,        "udiv",         0,      IF_DR_3A,  0x1AC00800)
                                    //  udiv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 000010nnnnnddddd   1AC0 0800
 
-INST1(sdiv,    "sdiv",   0, 0, IF_DR_3A,  0x1AC00C00)
+INST1(sdiv,        "sdiv",         0,      IF_DR_3A,  0x1AC00C00)
                                    //  sdiv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 000011nnnnnddddd   1AC0 0C00
 
-INST1(mneg,    "mneg",   0, 0, IF_DR_3A,  0x1B00FC00)
+INST1(mneg,        "mneg",         0,      IF_DR_3A,  0x1B00FC00)
                                    //  mneg    Rd,Rn,Rm             DR_3A  X0011011000mmmmm 111111nnnnnddddd   1B00 FC00
 
-INST1(madd,    "madd",   0, 0, IF_DR_4A,  0x1B000000)
+INST1(madd,        "madd",         0,      IF_DR_4A,  0x1B000000)
                                    //  madd    Rd,Rn,Rm,Ra          DR_4A  X0011011000mmmmm 0aaaaannnnnddddd   1B00 0000
 
-INST1(msub,    "msub",   0, 0, IF_DR_4A,  0x1B008000)
+INST1(msub,        "msub",         0,      IF_DR_4A,  0x1B008000)
                                    //  msub    Rd,Rn,Rm,Ra          DR_4A  X0011011000mmmmm 1aaaaannnnnddddd   1B00 8000
 
-INST1(smaddl,  "smaddl", 0, 0, IF_DR_4A,  0x9B200000)
+INST1(smaddl,      "smaddl",       0,      IF_DR_4A,  0x9B200000)
                                    //  smaddl  Rd,Rn,Rm,Ra          DR_4A  10011011001mmmmm 0aaaaannnnnddddd   9B20 0000
 
-INST1(smnegl,  "smnegl", 0, 0, IF_DR_3A,  0x9B20FC00)
+INST1(smnegl,      "smnegl",       0,      IF_DR_3A,  0x9B20FC00)
                                    //  smnegl  Rd,Rn,Rm             DR_3A  10011011001mmmmm 111111nnnnnddddd   9B20 FC00
 
-INST1(smsubl,  "smsubl", 0, 0, IF_DR_4A,  0x9B208000)
+INST1(smsubl,      "smsubl",       0,      IF_DR_4A,  0x9B208000)
                                    //  smsubl  Rd,Rn,Rm,Ra          DR_4A  10011011001mmmmm 1aaaaannnnnddddd   9B20 8000
 
-INST1(smulh,   "smulh",  0, 0, IF_DR_3A,  0x9B407C00)
+INST1(smulh,       "smulh",        0,      IF_DR_3A,  0x9B407C00)
                                    //  smulh   Rd,Rn,Rm             DR_3A  10011011010mmmmm 011111nnnnnddddd   9B40 7C00
 
-INST1(umaddl,  "umaddl", 0, 0, IF_DR_4A,  0x9BA00000)
+INST1(umaddl,      "umaddl",       0,      IF_DR_4A,  0x9BA00000)
                                    //  umaddl  Rd,Rn,Rm,Ra          DR_4A  10011011101mmmmm 0aaaaannnnnddddd   9BA0 0000
 
-INST1(umnegl,  "umnegl", 0, 0, IF_DR_3A,  0x9BA0FC00)
+INST1(umnegl,      "umnegl",       0,      IF_DR_3A,  0x9BA0FC00)
                                    //  umnegl  Rd,Rn,Rm             DR_3A  10011011101mmmmm 111111nnnnnddddd   9BA0 FC00
 
-INST1(umsubl,  "umsubl", 0, 0, IF_DR_4A,  0x9BA08000)
+INST1(umsubl,      "umsubl",       0,      IF_DR_4A,  0x9BA08000)
                                    //  umsubl  Rd,Rn,Rm,Ra          DR_4A  10011011101mmmmm 1aaaaannnnnddddd   9BA0 8000
 
-INST1(umulh,   "umulh",  0, 0, IF_DR_3A,  0x9BC07C00)
+INST1(umulh,       "umulh",        0,      IF_DR_3A,  0x9BC07C00)
                                    //  umulh   Rd,Rn,Rm             DR_3A  10011011110mmmmm 011111nnnnnddddd   9BC0 7C00
 
-INST1(extr,    "extr",   0, 0, IF_DR_3E,  0x13800000)
+INST1(extr,        "extr",         0,      IF_DR_3E,  0x13800000)
                                    //  extr    Rd,Rn,Rm,imm6        DR_3E  X00100111X0mmmmm ssssssnnnnnddddd   1380 0000   imm(0-63)
 
-INST1(lslv,    "lslv",   0, 0, IF_DR_3A,  0x1AC02000)
+INST1(lslv,        "lslv",         0,      IF_DR_3A,  0x1AC02000)
                                    //  lslv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001000nnnnnddddd   1AC0 2000
 
-INST1(lsrv,    "lsrv",   0, 0, IF_DR_3A,  0x1AC02400)
+INST1(lsrv,        "lsrv",         0,      IF_DR_3A,  0x1AC02400)
                                    //  lsrv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001001nnnnnddddd   1AC0 2400
 
-INST1(asrv,    "asrv",   0, 0, IF_DR_3A,  0x1AC02800)
+INST1(asrv,        "asrv",         0,      IF_DR_3A,  0x1AC02800)
                                    //  asrv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001010nnnnnddddd   1AC0 2800
 
-INST1(rorv,    "rorv",   0, 0, IF_DR_3A,  0x1AC02C00)
+INST1(rorv,        "rorv",         0,      IF_DR_3A,  0x1AC02C00)
                                    //  rorv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001011nnnnnddddd   1AC0 2C00
 
-INST1(crc32b,  "crc32b", 0, 0, IF_DR_3A,  0x1AC04000)
+INST1(crc32b,      "crc32b",       0,      IF_DR_3A,  0x1AC04000)
                                    //  crc32b  Rd,Rn,Rm             DR_3A  00011010110mmmmm 010000nnnnnddddd   1AC0 4000
 
-INST1(crc32h,  "crc32h", 0, 0, IF_DR_3A,  0x1AC04400)
+INST1(crc32h,      "crc32h",       0,      IF_DR_3A,  0x1AC04400)
                                    //  crc32h  Rd,Rn,Rm             DR_3A  00011010110mmmmm 010001nnnnnddddd   1AC0 4400
 
-INST1(crc32w,  "crc32w", 0, 0, IF_DR_3A,  0x1AC04800)
+INST1(crc32w,      "crc32w",       0,      IF_DR_3A,  0x1AC04800)
                                    //  crc32w  Rd,Rn,Rm             DR_3A  00011010110mmmmm 010010nnnnnddddd   1AC0 4800
 
-INST1(crc32x,  "crc32x", 0, 0, IF_DR_3A,  0x9AC04C00)
+INST1(crc32x,      "crc32x",       0,      IF_DR_3A,  0x9AC04C00)
                                    //  crc32x  Rd,Rn,Xm             DR_3A  10011010110mmmmm 010011nnnnnddddd   9AC0 4C00
 
-INST1(crc32cb, "crc32cb",0, 0, IF_DR_3A,  0x1AC05000)
+INST1(crc32cb,     "crc32cb",      0,      IF_DR_3A,  0x1AC05000)
                                    //  crc32cb Rd,Rn,Rm             DR_3A  00011010110mmmmm 010100nnnnnddddd   1AC0 5000
 
-INST1(crc32ch, "crc32ch",0, 0, IF_DR_3A,  0x1AC05400)
+INST1(crc32ch,     "crc32ch",      0,      IF_DR_3A,  0x1AC05400)
                                    //  crc32ch Rd,Rn,Rm             DR_3A  00011010110mmmmm 010101nnnnnddddd   1AC0 5400
 
-INST1(crc32cw, "crc32cw",0, 0, IF_DR_3A,  0x1AC05800)
+INST1(crc32cw,     "crc32cw",      0,      IF_DR_3A,  0x1AC05800)
                                    //  crc32cw Rd,Rn,Rm             DR_3A  00011010110mmmmm 010110nnnnnddddd   1AC0 5800
 
-INST1(crc32cx, "crc32cx",0, 0, IF_DR_3A,  0x9AC05C00)
+INST1(crc32cx,     "crc32cx",      0,      IF_DR_3A,  0x9AC05C00)
                                    //  crc32cx Rd,Rn,Xm             DR_3A  10011010110mmmmm 010111nnnnnddddd   9AC0 5C00
 
-INST1(sha1c,   "sha1c",  0, 0, IF_DV_3F,   0x5E000000)
+INST1(sha1c,       "sha1c",        0,      IF_DV_3F,  0x5E000000)
                                    //  sha1c   Qd, Sn Vm.4S         DV_3F  01011110000mmmmm 000000nnnnnddddd   5E00 0000   Qd Sn Vm.4S   (vector)
 
-INST1(sha1m,   "sha1m",  0, 0, IF_DV_3F,   0x5E002000)
+INST1(sha1m,       "sha1m",        0,      IF_DV_3F,  0x5E002000)
                                    //  sha1m   Qd, Sn Vm.4S         DV_3F  01011110000mmmmm 001000nnnnnddddd   5E00 0000   Qd Sn Vm.4S   (vector)
 
-INST1(sha1p,   "sha1p",  0, 0, IF_DV_3F,   0x5E001000)
+INST1(sha1p,       "sha1p",        0,      IF_DV_3F,  0x5E001000)
                                    //  sha1m   Qd, Sn Vm.4S         DV_3F  01011110000mmmmm 000100nnnnnddddd   5E00 0000   Qd Sn Vm.4S   (vector)
 
-INST1(sha1h,   "sha1h",  0, 0, IF_DV_2U,   0x5E280800)
+INST1(sha1h,       "sha1h",        0,      IF_DV_2U,  0x5E280800)
                                    //  sha1h   Sd, Sn               DV_2U  0101111000101000 000010nnnnnddddd   5E28 0800   Sn Sn
 
-INST1(sha1su0, "sha1su0",  0, 0, IF_DV_3F,  0x5E003000)
+INST1(sha1su0,     "sha1su0",      0,      IF_DV_3F,  0x5E003000)
                                    //  sha1su0 Vd.4S,Vn.4S,Vm.4S    DV_3F  01011110000mmmmm 001100nnnnnddddd   5E00 3000   Vd.4S Vn.4S Vm.4S  (vector)
 
-INST1(sha1su1, "sha1su1",  0, 0, IF_DV_2P,  0x5E281800)
+INST1(sha1su1,     "sha1su1",      0,      IF_DV_2P,  0x5E281800)
                                    //  sha1su1 Vd.4S, Vn.4S         DV_2P  0101111000101000 000110nnnnnddddd   5E28 1800   Vd.4S Vn.4S   (vector)
 
-INST1(sha256h, "sha256h",  0, 0, IF_DV_3F,  0x5E004000)
+INST1(sha256h,     "sha256h",      0,      IF_DV_3F,  0x5E004000)
                                    //  sha256h  Qd,Qn,Vm.4S         DV_3F  01011110000mmmmm 010000nnnnnddddd   5E00 4000   Qd Qn Vm.4S   (vector)
 
-INST1(sha256h2, "sha256h2",  0, 0, IF_DV_3F,  0x5E005000)
+INST1(sha256h2,    "sha256h2",     0,      IF_DV_3F,  0x5E005000)
                                    //  sha256h  Qd,Qn,Vm.4S         DV_3F  01011110000mmmmm 010100nnnnnddddd   5E00 5000   Qd Qn Vm.4S   (vector)
 
-INST1(sha256su0, "sha256su0",  0, 0, IF_DV_2P,  0x5E282800)
+INST1(sha256su0,   "sha256su0",    0,      IF_DV_2P,  0x5E282800)
                                    // sha256su0  Vd.4S,Vn.4S        DV_2P  0101111000101000 001010nnnnnddddd   5E28 2800   Vd.4S Vn.4S   (vector)
 
-INST1(sha256su1, "sha256su1",  0, 0, IF_DV_3F,  0x5E006000)
+INST1(sha256su1,   "sha256su1",    0,      IF_DV_3F,  0x5E006000)
                                    // sha256su1  Vd.4S,Vn.4S,Vm.4S  DV_3F  01011110000mmmmm 011000nnnnnddddd   5E00 6000   Vd.4S Vn.4S Vm.4S  (vector)
 
-INST1(ext,     "ext",    0, 0, IF_DV_3G,  0x2E000000)
-                                   // C7.2.36 EXT
+INST1(ext,         "ext",          0,      IF_DV_3G,  0x2E000000)
                                    //  ext     Vd,Vn,Vm,index       DV_3G  0Q101110000mmmmm 0iiii0nnnnnddddd   2E00 0000   Vd Vn Vm index  (vector)
 
-INST1(sbfm,    "sbfm",   0, 0, IF_DI_2D,  0x13000000)
+INST1(sbfm,        "sbfm",         0,      IF_DI_2D,  0x13000000)
                                    //  sbfm    Rd,Rn,imr,ims        DI_2D  X00100110Nrrrrrr ssssssnnnnnddddd   1300 0000   imr, ims
 
-INST1(bfm,     "bfm",    0, 0, IF_DI_2D,  0x33000000)
+INST1(bfm,         "bfm",          0,      IF_DI_2D,  0x33000000)
                                    //  bfm     Rd,Rn,imr,ims        DI_2D  X01100110Nrrrrrr ssssssnnnnnddddd   3300 0000   imr, ims
 
-INST1(ubfm,    "ubfm",   0, 0, IF_DI_2D,  0x53000000)
+INST1(ubfm,        "ubfm",         0,      IF_DI_2D,  0x53000000)
                                    //  ubfm    Rd,Rn,imr,ims        DI_2D  X10100110Nrrrrrr ssssssnnnnnddddd   5300 0000   imr, ims
 
-INST1(sbfiz,   "sbfiz",  0, 0, IF_DI_2D,  0x13000000)
+INST1(sbfiz,       "sbfiz",        0,      IF_DI_2D,  0x13000000)
                                    //  sbfiz   Rd,Rn,lsb,width      DI_2D  X00100110Nrrrrrr ssssssnnnnnddddd   1300 0000   imr, ims
 
-INST1(bfi,     "bfi",    0, 0, IF_DI_2D,  0x33000000)
+INST1(bfi,         "bfi",          0,      IF_DI_2D,  0x33000000)
                                    //  bfi     Rd,Rn,lsb,width      DI_2D  X01100110Nrrrrrr ssssssnnnnnddddd   3300 0000   imr, ims
 
-INST1(ubfiz,   "ubfiz",  0, 0, IF_DI_2D,  0x53000000)
+INST1(ubfiz,       "ubfiz",        0,      IF_DI_2D,  0x53000000)
                                    //  ubfiz   Rd,Rn,lsb,width      DI_2D  X10100110Nrrrrrr ssssssnnnnnddddd   5300 0000   imr, ims
 
-INST1(sbfx,    "sbfx",   0, 0, IF_DI_2D,  0x13000000)
+INST1(sbfx,        "sbfx",         0,      IF_DI_2D,  0x13000000)
                                    //  sbfx    Rd,Rn,lsb,width      DI_2D  X00100110Nrrrrrr ssssssnnnnnddddd   1300 0000   imr, ims
 
-INST1(bfxil,   "bfxil",  0, 0, IF_DI_2D,  0x33000000)
+INST1(bfxil,       "bfxil",        0,      IF_DI_2D,  0x33000000)
                                    //  bfxil   Rd,Rn,lsb,width      DI_2D  X01100110Nrrrrrr ssssssnnnnnddddd   3300 0000   imr, ims
 
-INST1(ubfx,    "ubfx",   0, 0, IF_DI_2D,  0x53000000)
+INST1(ubfx,        "ubfx",         0,      IF_DI_2D,  0x53000000)
                                    //  ubfx    Rd,Rn,lsb,width      DI_2D  X10100110Nrrrrrr ssssssnnnnnddddd   5300 0000   imr, ims
 
-INST1(sxtb,    "sxtb",   0, 0, IF_DR_2H,  0x13001C00)
+INST1(sxtb,        "sxtb",         0,      IF_DR_2H,  0x13001C00)
                                    //  sxtb    Rd,Rn                DR_2H  X00100110X000000 000111nnnnnddddd   1300 1C00
 
-INST1(sxth,    "sxth",   0, 0, IF_DR_2H,  0x13003C00)
+INST1(sxth,        "sxth",         0,      IF_DR_2H,  0x13003C00)
                                    //  sxth    Rd,Rn                DR_2H  X00100110X000000 001111nnnnnddddd   1300 3C00
 
-INST1(sxtw,    "sxtw",   0, 0, IF_DR_2H,  0x13007C00)
+INST1(sxtw,        "sxtw",         0,      IF_DR_2H,  0x13007C00)
                                    //  sxtw    Rd,Rn                DR_2H  X00100110X000000 011111nnnnnddddd   1300 7C00
 
-INST1(uxtb,    "uxtb",   0, 0, IF_DR_2H,  0x53001C00)
+INST1(uxtb,        "uxtb",         0,      IF_DR_2H,  0x53001C00)
                                    //  uxtb    Rd,Rn                DR_2H  0101001100000000 000111nnnnnddddd   5300 1C00
 
-INST1(uxth,    "uxth",   0, 0, IF_DR_2H,  0x53003C00)
+INST1(uxth,        "uxth",         0,      IF_DR_2H,  0x53003C00)
                                    //  uxth    Rd,Rn                DR_2H  0101001100000000 001111nnnnnddddd   5300 3C00
 
-INST1(nop,     "nop",    0, 0, IF_SN_0A,  0xD503201F)
+INST1(nop,         "nop",          0,      IF_SN_0A,  0xD503201F)
                                    //  nop                          SN_0A  1101010100000011 0010000000011111   D503 201F
 
-INST1(bkpt,    "bkpt",   0, 0, IF_SN_0A,  0xD43E0000)
+INST1(bkpt,        "bkpt",         0,      IF_SN_0A,  0xD43E0000)
                                    //  brpt                         SN_0A  1101010000111110 0000000000000000   D43E 0000   0xF000
 
-INST1(brk,     "brk",    0, 0, IF_SI_0A,  0xD4200000)
+INST1(brk,         "brk",          0,      IF_SI_0A,  0xD4200000)
                                    //  brk     imm16                SI_0A  11010100001iiiii iiiiiiiiiii00000   D420 0000   imm16
 
-INST1(dsb,     "dsb",    0, 0, IF_SI_0B,  0xD503309F)
+INST1(dsb,         "dsb",          0,      IF_SI_0B,  0xD503309F)
                                    //  dsb     barrierKind          SI_0B  1101010100000011 0011bbbb10011111   D503 309F   imm4 - barrier kind
 
-INST1(dmb,     "dmb",    0, 0, IF_SI_0B,  0xD50330BF)
+INST1(dmb,         "dmb",          0,      IF_SI_0B,  0xD50330BF)
                                    //  dmb     barrierKind          SI_0B  1101010100000011 0011bbbb10111111   D503 30BF   imm4 - barrier kind
 
-INST1(isb,     "isb",    0, 0, IF_SI_0B,  0xD50330DF)
+INST1(isb,         "isb",          0,      IF_SI_0B,  0xD50330DF)
                                    //  isb     barrierKind          SI_0B  1101010100000011 0011bbbb11011111   D503 30DF   imm4 - barrier kind
 
-INST1(umov,    "umov",   0, 0, IF_DV_2B,  0x0E003C00)
+INST1(umov,        "umov",         0,      IF_DV_2B,  0x0E003C00)
                                    //  umov    Rd,Vn[]              DV_2B  0Q001110000iiiii 001111nnnnnddddd   0E00 3C00   Rd,Vn[]
 
-INST1(smov,    "smov",   0, 0, IF_DV_2B,  0x0E002C00)
+INST1(smov,        "smov",         0,      IF_DV_2B,  0x0E002C00)
                                    //  smov    Rd,Vn[]              DV_2B  0Q001110000iiiii 001011nnnnnddddd   0E00 3C00   Rd,Vn[]
 
-INST1(movi,    "movi",   0, 0, IF_DV_1B,  0x0F000400)
+INST1(movi,        "movi",         0,      IF_DV_1B,  0x0F000400)
                                    //  movi    Vd,imm8              DV_1B  0QX0111100000iii cmod01iiiiiddddd   0F00 0400   Vd imm8 (immediate vector)
 
-INST1(mvni,    "mvni",   0, 0, IF_DV_1B,  0x2F000400)
+INST1(mvni,        "mvni",         0,      IF_DV_1B,  0x2F000400)
                                    //  mvni    Vd,imm8              DV_1B  0Q10111100000iii cmod01iiiiiddddd   2F00 0400   Vd imm8 (immediate vector)
 
-INST1(urecpe,  "urecpe", 0, 0, IF_DV_2A,  0x0EA1C800)
-                                   // C7.2.372 URECPE
+INST1(urecpe,      "urecpe",       0,      IF_DV_2A,  0x0EA1C800)
                                    //  urecpe  Vd,Vn                DV_2A  0Q0011101X100001 110010nnnnnddddd   0EA1 C800   Vd,Vn      (vector)
 
-INST1(ursqrte, "ursqrte",0, 0, IF_DV_2A,  0x2EA1C800)
-                                   // C7.2.376 URSQRTE
+INST1(ursqrte,     "ursqrte",      0,      IF_DV_2A,  0x2EA1C800)
                                    //  ursqrte Vd,Vn                DV_2A  0Q1011101X100001 110010nnnnnddddd   2EA1 C800   Vd,Vn      (vector)
 
-INST1(bsl,     "bsl",    0, 0, IF_DV_3C,  0x2E601C00)
+INST1(bsl,         "bsl",          0,      IF_DV_3C,  0x2E601C00)
                                    //  bsl     Vd,Vn,Vm             DV_3C  0Q101110011mmmmm 000111nnnnnddddd   2E60 1C00   Vd,Vn,Vm
 
-INST1(bit,     "bit",    0, 0, IF_DV_3C,  0x2EA01C00)
+INST1(bit,         "bit",          0,      IF_DV_3C,  0x2EA01C00)
                                    //  bit     Vd,Vn,Vm             DV_3C  0Q101110101mmmmm 000111nnnnnddddd   2EA0 1C00   Vd,Vn,Vm
 
-INST1(bif,     "bif",    0, 0, IF_DV_3C,  0x2EE01C00)
+INST1(bif,         "bif",          0,      IF_DV_3C,  0x2EE01C00)
                                    //  bif     Vd,Vn,Vm             DV_3C  0Q101110111mmmmm 000111nnnnnddddd   2EE0 1C00   Vd,Vn,Vm
 
-INST1(addv,    "addv",   0, 0, IF_DV_2T,  0x0E31B800)
+INST1(addv,        "addv",         0,      IF_DV_2T,  0x0E31B800)
                                    //  addv    Vd,Vn                DV_2T  0Q001110XX110001 101110nnnnnddddd   0E31 B800   Vd,Vn      (vector)
 
-INST1(cnt,     "cnt",    0, 0, IF_DV_2M,  0x0E205800)
+INST1(cnt,         "cnt",          0,      IF_DV_2M,  0x0E205800)
                                    //  cnt     Vd,Vn                DV_2M  0Q00111000100000 010110nnnnnddddd   0E20 5800   Vd,Vn      (vector)
 
-INST1(not,     "not",    0, 0, IF_DV_2M,  0x2E205800)
+INST1(not,         "not",          0,      IF_DV_2M,  0x2E205800)
                                    //  not     Vd,Vn                DV_2M  0Q10111000100000 010110nnnnnddddd   2E20 5800   Vd,Vn      (vector)
 
-INST1(saddlv,  "saddlv", 0, 0, IF_DV_2T,  0x0E303800)
+INST1(saddlv,      "saddlv",       0,      IF_DV_2T,  0x0E303800)
                                    //  saddlv  Vd,Vn                DV_2T  0Q001110XX110000 001110nnnnnddddd   0E30 3800   Vd,Vn      (vector)
 
-INST1(smaxv,   "smaxv",  0, 0, IF_DV_2T,  0x0E30A800)
+INST1(smaxv,       "smaxv",        0,      IF_DV_2T,  0x0E30A800)
                                    //  smaxv   Vd,Vn                DV_2T  0Q001110XX110000 101010nnnnnddddd   0E30 A800   Vd,Vn      (vector)
 
-INST1(sminv,   "sminv",  0, 0, IF_DV_2T,  0x0E31A800)
+INST1(sminv,       "sminv",        0,      IF_DV_2T,  0x0E31A800)
                                    //  sminv   Vd,Vn                DV_2T  0Q001110XX110001 101010nnnnnddddd   0E31 A800   Vd,Vn      (vector)
 
-INST1(uaddlv,  "uaddlv", 0, 0, IF_DV_2T,  0x2E303800)
+INST1(uaddlv,      "uaddlv",       0,      IF_DV_2T,  0x2E303800)
                                    //  uaddlv  Vd,Vn                DV_2T  0Q101110XX110000 001110nnnnnddddd   2E30 3800   Vd,Vn      (vector)
 
-INST1(umaxv,   "umaxv",  0, 0, IF_DV_2T,  0x2E30A800)
+INST1(umaxv,       "umaxv",        0,      IF_DV_2T,  0x2E30A800)
                                    //  umaxv   Vd,Vn                DV_2T  0Q101110XX110000 101010nnnnnddddd   2E30 A800   Vd,Vn      (vector)
 
-INST1(uminv,   "uminv",  0, 0, IF_DV_2T,  0x2E31A800)
+INST1(uminv,       "uminv",        0,      IF_DV_2T,  0x2E31A800)
                                    //  uminv   Vd,Vn                DV_2T  0Q101110XX110001 101010nnnnnddddd   2E31 A800   Vd,Vn      (vector)
 
-INST1(fmaxnmv, "fmaxnmv",0, 0, IF_DV_2R,  0x2E30C800)
+INST1(fmaxnmv,     "fmaxnmv",      0,      IF_DV_2R,  0x2E30C800)
                                    //  fmaxnmv Vd,Vn                DV_2R  0Q1011100X110000 110010nnnnnddddd   2E30 C800   Vd,Vn      (vector)
 
-INST1(fmaxv,   "fmaxv",  0, 0, IF_DV_2R,  0x2E30F800)
+INST1(fmaxv,       "fmaxv",        0,      IF_DV_2R,  0x2E30F800)
                                    //  fmaxv   Vd,Vn                DV_2R  0Q1011100X110000 111110nnnnnddddd   2E30 F800   Vd,Vn      (vector)
 
-INST1(fminnmv, "fminnmv",0, 0, IF_DV_2R,  0x2EB0C800)
+INST1(fminnmv,     "fminnmv",      0,      IF_DV_2R,  0x2EB0C800)
                                    //  fminnmv Vd,Vn                DV_2R  0Q1011101X110000 110010nnnnnddddd   2EB0 C800   Vd,Vn      (vector)
 
-INST1(fminv,   "fminv",  0, 0, IF_DV_2R,  0x2EB0F800)
+INST1(fminv,       "fminv",        0,      IF_DV_2R,  0x2EB0F800)
                                    //  fminv   Vd,Vn                DV_2R  0Q1011101X110000 111110nnnnnddddd   2EB0 F800   Vd,Vn      (vector)
 
-INST1(uzp1,    "uzp1",   0, 0, IF_DV_3A,  0x0E001800)
+INST1(uzp1,        "uzp1",         0,      IF_DV_3A,  0x0E001800)
                                    //  uzp1    Vd,Vn,Vm             DV_3A  0Q001110XX0mmmmm 000110nnnnnddddd   0E00 1800   Vd,Vn,Vm  (vector)
 
-INST1(uzp2,    "uzp2",   0, 0, IF_DV_3A,  0x0E005800)
+INST1(uzp2,        "uzp2",         0,      IF_DV_3A,  0x0E005800)
                                    //  upz2    Vd,Vn,Vm             DV_3A  0Q001110XX0mmmmm 010110nnnnnddddd   0E00 5800   Vd,Vn,Vm  (vector)
 
-INST1(zip1,    "zip1",   0, 0, IF_DV_3A,  0x0E003800)
+INST1(zip1,        "zip1",         0,      IF_DV_3A,  0x0E003800)
                                    //  zip1    Vd,Vn,Vm             DV_3A  0Q001110XX0mmmmm 011110nnnnnddddd   0E00 3800   Vd,Vn,Vm  (vector)
 
-INST1(zip2,    "zip2",   0, 0, IF_DV_3A,  0x0E007800)
+INST1(zip2,        "zip2",         0,      IF_DV_3A,  0x0E007800)
                                    //  zip2    Vd,Vn,Vm             DV_3A  0Q001110XX0mmmmm 001110nnnnnddddd   0E00 7800   Vd,Vn,Vm  (vector)
 
-INST1(trn1,    "trn1",   0, 0, IF_DV_3A,  0x0E002800)
+INST1(trn1,        "trn1",         0,      IF_DV_3A,  0x0E002800)
                                    //  trn1    Vd,Vn,Vm             DV_3A  0Q001110XX0mmmmm 001010nnnnnddddd   0E00 2800   Vd,Vn,Vm  (vector)
 
-INST1(trn2,    "trn2",   0, 0, IF_DV_3A,  0x0E006800)
+INST1(trn2,        "trn2",         0,      IF_DV_3A,  0x0E006800)
                                    //  trn2    Vd,Vn,Vm             DV_3A  0Q001110XX0mmmmm 011010nnnnnddddd   0E00 6800   Vd,Vn,Vm  (vector)
 
-INST1(xtn,     "xtn",    0, 0, IF_DV_2M,  0x0E212800)
+INST1(xtn,         "xtn",          0,      IF_DV_2M,  0x0E212800)
                                    //  xtn     Vd,Vn                DV_2M  00101110XX110000 001110nnnnnddddd   0E21 2800   Vd,Vn      (vector)
 
-INST1(xtn2,    "xtn2",   0, 0, IF_DV_2M,  0x4E212800)
+INST1(xtn2,        "xtn2",         0,      IF_DV_2M,  0x4E212800)
                                    //  xtn2    Vd,Vn                DV_2M  01101110XX110000 001110nnnnnddddd   4E21 2800   Vd,Vn      (vector)
 
-INST1(fnmul,   "fnmul",  0, 0, IF_DV_3D,  0x1E208800)
+INST1(fnmul,       "fnmul",        0,      IF_DV_3D,  0x1E208800)
                                    //  fnmul   Vd,Vn,Vm             DV_3D  000111100X1mmmmm 100010nnnnnddddd   1E20 8800   Vd,Vn,Vm   (scalar)
 
-INST1(fmadd,   "fmadd",  0, 0, IF_DV_4A,  0x1F000000)
+INST1(fmadd,       "fmadd",        0,      IF_DV_4A,  0x1F000000)
                                    //  fmadd   Vd,Va,Vn,Vm          DV_4A  000111110X0mmmmm 0aaaaannnnnddddd   1F00 0000   Vd Vn Vm Va (scalar)
 
-INST1(fmsub,   "fmsub",  0, 0, IF_DV_4A,  0x1F008000)
+INST1(fmsub,       "fmsub",        0,      IF_DV_4A,  0x1F008000)
                                    //  fmsub   Vd,Va,Vn,Vm          DV_4A  000111110X0mmmmm 1aaaaannnnnddddd   1F00 8000   Vd Vn Vm Va (scalar)
 
-INST1(fnmadd,  "fnmadd", 0, 0, IF_DV_4A,  0x1F200000)
+INST1(fnmadd,      "fnmadd",       0,      IF_DV_4A,  0x1F200000)
                                    //  fnmadd  Vd,Va,Vn,Vm          DV_4A  000111110X1mmmmm 0aaaaannnnnddddd   1F20 0000   Vd Vn Vm Va (scalar)
 
-INST1(fnmsub,  "fnmsub", 0, 0, IF_DV_4A,  0x1F208000)
+INST1(fnmsub,      "fnmsub",       0,      IF_DV_4A,  0x1F208000)
                                    //  fnmsub  Vd,Va,Vn,Vm          DV_4A  000111110X1mmmmm 1aaaaannnnnddddd   1F20 8000   Vd Vn Vm Va (scalar)
 
-INST1(fcvt,    "fcvt",   0, 0, IF_DV_2J,  0x1E224000)
+INST1(fcvt,        "fcvt",         0,      IF_DV_2J,  0x1E224000)
                                    //  fcvt    Vd,Vn                DV_2J  00011110SS10001D D10000nnnnnddddd   1E22 4000   Vd,Vn
 
-INST1(pmul,    "pmul",   0, 0, IF_DV_3A,  0x2E209C00)
+INST1(pmul,        "pmul",         0,      IF_DV_3A,  0x2E209C00)
                                    //  pmul    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100111nnnnnddddd   2E20 9C00   Vd,Vn,Vm  (vector)
 
-INST1(saba,    "saba",   0, 0, IF_DV_3A,  0x0E207C00)
+INST1(saba,        "saba",         0,      IF_DV_3A,  0x0E207C00)
                                    //  saba    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 011111nnnnnddddd   0E20 7C00   Vd,Vn,Vm  (vector)
 
-INST1(sabd,    "sabd",   0, 0, IF_DV_3A,  0x0E207400)
+INST1(sabd,        "sabd",         0,      IF_DV_3A,  0x0E207400)
                                    //  sabd    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 011101nnnnnddddd   0E20 7400   Vd,Vn,Vm  (vector)
 
-INST1(smax,    "smax",   0, 0, IF_DV_3A,  0x0E206400)
+INST1(smax,        "smax",         0,      IF_DV_3A,  0x0E206400)
                                    //  smax    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 011001nnnnnddddd   0E20 6400   Vd,Vn,Vm  (vector)
 
-INST1(smaxp,   "smaxp",  0, 0, IF_DV_3A,  0x0E20A400)
+INST1(smaxp,       "smaxp",        0,      IF_DV_3A,  0x0E20A400)
                                    //  smaxp   Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 101001nnnnnddddd   0E20 A400   Vd,Vn,Vm  (vector)
 
-INST1(smin,    "smin",   0, 0, IF_DV_3A,  0x0E206C00)
+INST1(smin,        "smin",         0,      IF_DV_3A,  0x0E206C00)
                                    //  smax    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 011011nnnnnddddd   0E20 6C00   Vd,Vn,Vm  (vector)
 
-INST1(sminp,   "sminp",  0, 0, IF_DV_3A,  0x0E20AC00)
+INST1(sminp,       "sminp",        0,      IF_DV_3A,  0x0E20AC00)
                                    //  smax    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 101011nnnnnddddd   0E20 AC00   Vd,Vn,Vm  (vector)
 
-INST1(uaba,    "uaba",   0, 0, IF_DV_3A,  0x2E207C00)
+INST1(uaba,        "uaba",         0,      IF_DV_3A,  0x2E207C00)
                                    //  uaba    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 011111nnnnnddddd   2E20 7C00   Vd,Vn,Vm  (vector)
 
-INST1(uabd,    "uabd",   0, 0, IF_DV_3A,  0x2E207400)
+INST1(uabd,        "uabd",         0,      IF_DV_3A,  0x2E207400)
                                    //  uabd    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 011101nnnnnddddd   2E20 7400   Vd,Vn,Vm  (vector)
 
-INST1(umax,    "umax",   0, 0, IF_DV_3A,  0x2E206400)
+INST1(umax,        "umax",         0,      IF_DV_3A,  0x2E206400)
                                    //  umax    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 011001nnnnnddddd   2E20 6400   Vd,Vn,Vm  (vector)
 
-INST1(umaxp,   "umaxp",  0, 0, IF_DV_3A,  0x2E20A400)
+INST1(umaxp,       "umaxp",        0,      IF_DV_3A,  0x2E20A400)
                                    //  umaxp   Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 101001nnnnnddddd   2E20 A400   Vd,Vn,Vm  (vector)
 
-INST1(umin,    "umin",   0, 0, IF_DV_3A,  0x2E206C00)
+INST1(umin,        "umin",         0,      IF_DV_3A,  0x2E206C00)
                                    //  umin    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 011011nnnnnddddd   2E20 6C00   Vd,Vn,Vm  (vector)
 
-INST1(uminp,   "uminp",  0, 0, IF_DV_3A,  0x2E20AC00)
+INST1(uminp,       "uminp",        0,      IF_DV_3A,  0x2E20AC00)
                                    //  umin    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 101011nnnnnddddd   2E20 AC00   Vd,Vn,Vm  (vector)
 
-INST1(fcvtl,   "fcvtl",  0, 0, IF_DV_2G,  0x0E217800)
-                                   //  fcvtl   Vd,Vn                DV_2G  000011100X100001 011110nnnnnddddd   0E21 7800   Vd,Vn    (scalar)
+INST1(fcvtl,       "fcvtl",        0,      IF_DV_2A,  0x0E217800)
+                                   //  fcvtl   Vd,Vn                DV_2A  000011100X100001 011110nnnnnddddd   0E21 7800   Vd,Vn    (vector)
 
-INST1(fcvtl2,  "fcvtl2", 0, 0, IF_DV_2G,  0x4E217800)
-                                   //  fcvtl2  Vd,Vn                DV_2G  040011100X100001 011110nnnnnddddd   4E21 7800   Vd,Vn    (scalar)
+INST1(fcvtl2,      "fcvtl2",       0,      IF_DV_2A,  0x4E217800)
+                                   //  fcvtl2  Vd,Vn                DV_2A  040011100X100001 011110nnnnnddddd   4E21 7800   Vd,Vn    (vector)
 
-INST1(fcvtn,   "fcvtn",  0, 0, IF_DV_2G,  0x0E216800)
-                                   //  fcvtn   Vd,Vn                DV_2G  000011100X100001 011010nnnnnddddd   0E21 6800   Vd,Vn    (scalar)
+INST1(fcvtn,       "fcvtn",        0,      IF_DV_2A,  0x0E216800)
+                                   //  fcvtn   Vd,Vn                DV_2A  000011100X100001 011010nnnnnddddd   0E21 6800   Vd,Vn    (vector)
 
-INST1(fcvtn2,  "fcvtn2", 0, 0, IF_DV_2G,  0x4E216800)
-                                   //  fcvtn2  Vd,Vn                DV_2G  040011100X100001 011010nnnnnddddd   4E21 6800   Vd,Vn    (scalar)
+INST1(fcvtn2,      "fcvtn2",       0,      IF_DV_2A,  0x4E216800)
+                                   //  fcvtn2  Vd,Vn                DV_2A  040011100X100001 011010nnnnnddddd   4E21 6800   Vd,Vn    (vector)
 
-INST1(frecpx,  "frecpx", 0, 0, IF_DV_2G,  0x5EA1F800)
-                                   // C7.2.139 FRECPX
+INST1(frecpx,      "frecpx",       0,      IF_DV_2G,  0x5EA1F800)
                                    //  frecpx  Vd,Vn                DV_2G  010111101X100001 111110nnnnnddddd   5EA1 F800   Vd,Vn    (scalar)
 
-INST1(addhn,   "addhn",  0, 0, IF_DV_3H,  0x0E204000)
-                                   // C7.2.3 ADDHN, ADDHN2
+INST1(addhn,       "addhn",        0,      IF_DV_3H,  0x0E204000)
                                    //  addhn   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 010000nnnnnddddd   0E20 4000   Vd,Vn,Vm (vector)
 
-INST1(addhn2,  "addhn2", 0, 0, IF_DV_3H,  0x4E204000)
-                                   // C7.2.3 ADDHN, ADDHN2
+INST1(addhn2,      "addhn2",       0,      IF_DV_3H,  0x4E204000)
                                    //  addhn2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 010000nnnnnddddd   4E20 4000   Vd,Vn,Vm (vector)
 
-INST1(pmull,   "pmull",  0, 0, IF_DV_3H,  0x0E20E000)
-                                   // C7.2.208 PMULL, PMULL2
+INST1(pmull,       "pmull",        0,      IF_DV_3H,  0x0E20E000)
                                    //  pmull   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 111000nnnnnddddd   0E20 E000   Vd,Vn,Vm (vector)
 
-INST1(pmull2,  "pmull2", 0, 0, IF_DV_3H,  0x4E20E000)
-                                   // C7.2.208 PMULL, PMULL2
+INST1(pmull2,      "pmull2",       0,      IF_DV_3H,  0x4E20E000)
                                    //  pmull2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 111000nnnnnddddd   4E20 E000   Vd,Vn,Vm (vector)
 
-INST1(raddhn,  "raddhn", 0, 0, IF_DV_3H,  0x2E204000)
-                                   // C7.2.209 RADDHN, RADDHN2
+INST1(raddhn,      "raddhn",       0,      IF_DV_3H,  0x2E204000)
                                    //  raddhn   Vd,Vn,Vm            DV_3H  00101110XX1mmmmm 010000nnnnnddddd   2E20 4000   Vd,Vn,Vm (vector)
 
-INST1(raddhn2, "raddhn2",0, 0, IF_DV_3H,  0x6E204000)
-                                   // C7.2.209 RADDHN, RADDHN2
+INST1(raddhn2,     "raddhn2",      0,      IF_DV_3H,  0x6E204000)
                                    //  raddhn2  Vd,Vn,Vm            DV_3H  01101110XX1mmmmm 010000nnnnnddddd   6E20 4000   Vd,Vn,Vm (vector)
 
-INST1(rsubhn,  "rsubhn", 0, 0, IF_DV_3H,  0x2E206000)
-                                   // C7.2.216 RSUBHN, RSUBHN2
+INST1(rsubhn,      "rsubhn",       0,      IF_DV_3H,  0x2E206000)
                                    //  rsubhn   Vd,Vn,Vm            DV_3H  00101110XX1mmmmm 011000nnnnnddddd   2E20 6000   Vd,Vn,Vm (vector)
 
-INST1(rsubhn2, "rsubhn2",0, 0, IF_DV_3H,  0x6E206000)
-                                   // C7.2.216 RSUBHN, RSUBHN2
+INST1(rsubhn2,     "rsubhn2",      0,      IF_DV_3H,  0x6E206000)
                                    //  rsubhn2  Vd,Vn,Vm            DV_3H  01101110XX1mmmmm 011000nnnnnddddd   6E20 6000   Vd,Vn,Vm (vector)
 
-INST1(sabal,   "sabal",  0, 0, IF_DV_3H,  0x0E205000)
-                                   // C7.2.218 SABAL, SABAL2
+INST1(sabal,       "sabal",        0,      IF_DV_3H,  0x0E205000)
                                    //  sabal   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 010100nnnnnddddd   0E20 5000   Vd,Vn,Vm (vector)
 
-INST1(sabal2,  "sabal2", 0, 0, IF_DV_3H,  0x4E205000)
-                                   // C7.2.218 SABAL, SABAL2
+INST1(sabal2,      "sabal2",       0,      IF_DV_3H,  0x4E205000)
                                    //  sabal2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 010100nnnnnddddd   4E20 5000   Vd,Vn,Vm (vector)
 
-INST1(sabdl,   "sabdl",  0, 0, IF_DV_3H,  0x0E207000)
-                                   // C7.2.220 SABDL, SABDL2
+INST1(sabdl,       "sabdl",        0,      IF_DV_3H,  0x0E207000)
                                    //  sabdl   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 011100nnnnnddddd   0E20 7000   Vd,Vn,Vm (vector)
 
-INST1(sabdl2,  "sabdl2", 0, 0, IF_DV_3H,  0x4E207000)
-                                   // C7.2.220 SABDL, SABDL2
+INST1(sabdl2,      "sabdl2",       0,      IF_DV_3H,  0x4E207000)
                                    //  sabdl2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 011100nnnnnddddd   4E20 7000   Vd,Vn,Vm (vector)
 
-INST1(sadalp,  "sadalp", 0, 0, IF_DV_2T,  0x0E206800)
-                                   // C7.2.221 SADALP
+INST1(sadalp,      "sadalp",       0,      IF_DV_2T,  0x0E206800)
                                    //  sadalp  Vd,Vn                DV_2T  0Q001110XX100000 011010nnnnnddddd   0E20 6800   Vd,Vn    (vector)
 
-INST1(saddl,   "saddl",  0, 0, IF_DV_3H,  0x0E200000)
-                                   // C7.2.222 SADDL, SADDL2
+INST1(saddl,       "saddl",        0,      IF_DV_3H,  0x0E200000)
                                    //  saddl   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 000000nnnnnddddd   0E20 0000   Vd,Vn,Vm (vector)
 
-INST1(saddl2,  "saddl2", 0, 0, IF_DV_3H,  0x4E200000)
-                                   // C7.2.222 SADDL, SADDL2
+INST1(saddl2,      "saddl2",       0,      IF_DV_3H,  0x4E200000)
                                    //  saddl2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 000000nnnnnddddd   4E20 0000   Vd,Vn,Vm (vector)
 
-INST1(saddlp,  "saddlp", 0, 0, IF_DV_2T, 0x0E202800)
-                                   // C7.2.223 SADDLP
+INST1(saddlp,      "saddlp",       0,      IF_DV_2T,  0x0E202800)
                                    //  saddlp  Vd,Vn                DV_2T  0Q001110XX100000 001010nnnnnddddd   0E20 2800   Vd,Vn    (vector)
 
-INST1(saddw,   "saddw",  0, 0, IF_DV_3H,  0x0E201000)
-                                   // C7.2.225 SADDW, SADDW2
+INST1(saddw,       "saddw",        0,      IF_DV_3H,  0x0E201000)
                                    //  saddw   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 000100nnnnnddddd   0E20 1000   Vd,Vn,Vm (vector)
 
-INST1(saddw2,  "saddw2", 0, 0, IF_DV_3H,  0x4E201000)
-                                   // C7.2.225 SADDW, SADDW2
+INST1(saddw2,      "saddw2",       0,      IF_DV_3H,  0x4E201000)
                                    //  saddw2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 000100nnnnnddddd   4E20 1000   Vd,Vn,Vm (vector)
 
-INST1(shadd,   "shadd", 0, 0,  IF_DV_3A,  0x0E200400)
-                                   // C7.2.246 SHADD
+INST1(shadd,       "shadd",        0,      IF_DV_3A,  0x0E200400)
                                    //  shadd  Vd,Vn,Vm              DV_3A  0Q001110XX1mmmmm 000001nnnnnddddd   0E20 0400   Vd,Vn,Vm (vector)
 
-INST1(shsub,   "shsub", 0, 0,  IF_DV_3A,  0x0E202400)
-                                   // C7.2.250 SHSUB
+INST1(shsub,       "shsub",        0,      IF_DV_3A,  0x0E202400)
                                    //  shsub  Vd,Vn,Vm              DV_3A  0Q001110XX1mmmmm 001001nnnnnddddd   0E20 2400   Vd,Vn,Vm (vector)
 
-INST1(srhadd,  "srhadd", 0, 0, IF_DV_3A,  0x0E201400)
-                                   // C7.2.302 SRHADD
+INST1(srhadd,      "srhadd",       0,      IF_DV_3A,  0x0E201400)
                                    //  srhadd Vd,Vn,Vm              DV_3A  0Q001110XX1mmmmm 000101nnnnnddddd   0E20 1400   Vd,Vn,Vm (vector)
 
-INST1(ssubl,   "ssubl",  0, 0, IF_DV_3H,  0x0E202000)
-                                   // C7.2.311 SSUBL, SSUBL2
+INST1(ssubl,       "ssubl",        0,      IF_DV_3H,  0x0E202000)
                                    //  ssubl   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 001000nnnnnddddd   0E20 2000   Vd,Vn,Vm (vector)
 
-INST1(ssubl2,  "ssubl2", 0, 0, IF_DV_3H,  0x4E202000)
-                                   // C7.2.311 SSUBL, SSUBL2
+INST1(ssubl2,      "ssubl2",       0,      IF_DV_3H,  0x4E202000)
                                    //  ssubl2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 001000nnnnnddddd   4E20 2000   Vd,Vn,Vm (vector)
 
-INST1(ssubw,   "ssubw",  0, 0, IF_DV_3H,  0x0E203000)
-                                   // C7.2.312 SSUBW, SSUBW2
+INST1(ssubw,       "ssubw",        0,      IF_DV_3H,  0x0E203000)
                                    //  ssubw   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 001100nnnnnddddd   0E20 3000   Vd,Vn,Vm (vector)
 
-INST1(ssubw2,  "ssubw2", 0, 0, IF_DV_3H,  0x4E203000)
-                                   // C7.2.312 SSUBW, SSUBW2
+INST1(ssubw2,      "ssubw2",       0,      IF_DV_3H,  0x4E203000)
                                    //  ssubw2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 001100nnnnnddddd   4E20 3000   Vd,Vn,Vm (vector)
 
-INST1(subhn,   "subhn", 0, 0,  IF_DV_3H,  0x0E206000)
-                                   // C7.2.327 SUBHN, SUBHN2
+INST1(subhn,       "subhn",        0,      IF_DV_3H,  0x0E206000)
                                    //  subhn   Vd,Vn,Vm             DV_3H  00001110XX1mmmmm 011000nnnnnddddd   0E20 6000   Vd,Vn,Vm (vector)
 
-INST1(subhn2,  "subhn2",0, 0,  IF_DV_3H,  0x4E206000)
-                                   // C7.2.327 SUBHN, SUBHN2
+INST1(subhn2,      "subhn2",       0,      IF_DV_3H,  0x4E206000)
                                    //  subhn2  Vd,Vn,Vm             DV_3H  01001110XX1mmmmm 011000nnnnnddddd   4E20 6000   Vd,Vn,Vm (vector)
 
-INST1(uabal,   "uabal",  0, 0, IF_DV_3H,  0x2E205000)
-                                   // C7.2.335 UABAL, UABAL2
+INST1(uabal,       "uabal",        0,      IF_DV_3H,  0x2E205000)
                                    //  uabal   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 010100nnnnnddddd   2E20 5000   Vd,Vn,Vm (vector)
 
-INST1(uabal2,  "uabal2", 0, 0, IF_DV_3H,  0x6E205000)
-                                   // C7.2.335 UABAL, UABAL2
+INST1(uabal2,      "uabal2",       0,      IF_DV_3H,  0x6E205000)
                                    //  uabal2  Vd,Vn,Vm             DV_3H  01101110XX1mmmmm 010100nnnnnddddd   6E20 5000   Vd,Vn,Vm (vector)
 
-INST1(uabdl,   "uabdl",  0, 0, IF_DV_3H,  0x2E207000)
-                                   // C7.2.337 UABDL, UABDL2
+INST1(uabdl,       "uabdl",        0,      IF_DV_3H,  0x2E207000)
                                    //  uabdl   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 011100nnnnnddddd   2E20 7000   Vd,Vn,Vm (vector)
 
-INST1(uabdl2,  "uabdl2", 0, 0, IF_DV_3H,  0x6E207000)
-                                   // C7.2.337 UABDL, UABDL2
+INST1(uabdl2,      "uabdl2",       0,      IF_DV_3H,  0x6E207000)
                                    //  uabdl2  Vd,Vn,Vm             DV_3H  01101110XX1mmmmm 011100nnnnnddddd   6E20 7000   Vd,Vn,Vm (vector)
 
-INST1(uadalp,  "uadalp", 0, 0, IF_DV_2T, 0x2E206800)
-                                   // C7.2.338 UADALP
+INST1(uadalp,      "uadalp",       0,      IF_DV_2T,  0x2E206800)
                                    //  uadalp  Vd,Vn                DV_2T  0Q101110XX100000 011010nnnnnddddd   2E20 6800   Vd,Vn    (vector)
 
-INST1(uaddl,   "uaddl",  0, 0, IF_DV_3H,  0x2E200000)
-                                   // C7.2.339 UADDL, UADDL2
+INST1(uaddl,       "uaddl",        0,      IF_DV_3H,  0x2E200000)
                                    //  uaddl   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 000000nnnnnddddd   2E20 0000   Vd,Vn,Vm (vector)
 
-INST1(uaddl2,  "uaddl2", 0, 0, IF_DV_3H,  0x6E200000)
-                                   // C7.2.339 UADDL, UADDL2
+INST1(uaddl2,      "uaddl2",       0,      IF_DV_3H,  0x6E200000)
                                    //  uaddl2  Vd,Vn,Vm             DV_3H  01101110XX1mmmmm 000000nnnnnddddd   6E20 0000   Vd,Vn,Vm (vector)
 
-INST1(uaddlp,  "uaddlp", 0, 0, IF_DV_2T, 0x2E202800)
-                                   // C7.2.340 UADDLP
+INST1(uaddlp,      "uaddlp",       0,      IF_DV_2T,  0x2E202800)
                                    //  uaddlp  Vd,Vn                DV_2T  0Q101110XX100000 001010nnnnnddddd   2E20 2800   Vd,Vn    (vector)
 
-INST1(uaddw,   "uaddw",  0, 0, IF_DV_3H,  0x2E201000)
-                                   // C7.2.342 UADDW, UADDW2
+INST1(uaddw,       "uaddw",        0,      IF_DV_3H,  0x2E201000)
                                    //  uaddw   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 000100nnnnnddddd   2E20 1000   Vd,Vn,Vm (vector)
 
-INST1(uaddw2,  "uaddw2", 0, 0, IF_DV_3H,  0x6E201000)
-                                   // C7.2.342 UADDW, UADDW2
+INST1(uaddw2,      "uaddw2",       0,      IF_DV_3H,  0x6E201000)
                                    //  uaddw2  Vd,Vn,Vm             DV_3H  01101110XX1mmmmm 000100nnnnnddddd   6E20 1000   Vd,Vn,Vm (vector)
 
-INST1(uhadd,   "uhadd", 0, 0,  IF_DV_3A,  0x2E200400)
-                                   // C7.2.349 UHADD
+INST1(uhadd,       "uhadd",        0,      IF_DV_3A,  0x2E200400)
                                    //  uhadd  Vd,Vn,Vm              DV_3A  0Q101110XX1mmmmm 000001nnnnnddddd   2E20 0400   Vd,Vn,Vm (vector)
 
-INST1(uhsub,   "uhsub", 0, 0,  IF_DV_3A,  0x2E202400)
-                                   // C7.2.350 UHSUB
+INST1(uhsub,       "uhsub",        0,      IF_DV_3A,  0x2E202400)
                                    //  uhsub  Vd,Vn,Vm              DV_3A  0Q101110XX1mmmmm 001001nnnnnddddd   2E20 2400   Vd,Vn,Vm (vector)
 
-INST1(urhadd,  "urhadd", 0, 0, IF_DV_3A,  0x2E201400)
-                                   // C7.2.373 URHADD
+INST1(urhadd,      "urhadd",       0,      IF_DV_3A,  0x2E201400)
                                    //  urhadd Vd,Vn,Vm              DV_3A  0Q101110XX1mmmmm 000101nnnnnddddd   2E20 1400   Vd,Vn,Vm (vector)
 
-INST1(usubl,   "usubl",  0, 0, IF_DV_3H,  0x2E202000)
-                                   // C7.2.383 USUBL, USUBL2
+INST1(usubl,       "usubl",        0,      IF_DV_3H,  0x2E202000)
                                    //  usubl   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 001000nnnnnddddd   2E20 2000   Vd,Vn,Vm (vector)
 
-INST1(usubl2,  "usubl2", 0, 0, IF_DV_3H,  0x6E202000)
-                                   // C7.2.383 USUBL, USUBL2
+INST1(usubl2,      "usubl2",       0,      IF_DV_3H,  0x6E202000)
                                    //  usubl2  Vd,Vn,Vm             DV_3H  01101110XX1mmmmm 001000nnnnnddddd   6E20 2000   Vd,Vn,Vm (vector)
 
-INST1(usubw,   "usubw",  0, 0, IF_DV_3H,  0x2E203000)
-                                   // C7.2.384 USUBW, USUBW2
+INST1(usubw,       "usubw",        0,      IF_DV_3H,  0x2E203000)
                                    //  usubw   Vd,Vn,Vm             DV_3H  00101110XX1mmmmm 001100nnnnnddddd   2E20 3000   Vd,Vn,Vm (vector)
 
-INST1(usubw2,  "usubw2", 0, 0, IF_DV_3H,  0x6E203000)
-                                   // C7.2.384 USUBW, USUBW2
+INST1(usubw2,      "usubw2",       0,      IF_DV_3H,  0x6E203000)
                                    //  usubw2  Vd,Vn,Vm             DV_3H  01101110XX1mmmmm 001100nnnnnddddd   6E20 3000   Vd,Vn,Vm (vector)
 
-INST1(shll,    "shll",   0, 0, IF_DV_2M,  0x2F00A400)
+INST1(shll,        "shll",         0,      IF_DV_2M,  0x2F00A400)
                                    //  shll    Vd,Vn,imm            DV_2M  0Q101110XX100001 001110nnnnnddddd   2E21 3800   Vd,Vn, {8/16/32}
 
-INST1(shll2,   "shll2",  0, 0, IF_DV_2M,  0x6F00A400)
+INST1(shll2,       "shll2",        0,      IF_DV_2M,  0x6F00A400)
                                    //  shll    Vd,Vn,imm            DV_2M  0Q101110XX100001 001110nnnnnddddd   2E21 3800   Vd,Vn, {8/16/32}
 
-INST1(sshll,   "sshll",  0, 0, IF_DV_2O,  0x0F00A400)
-                                   //  sshll   Vd,Vn,imm            DV_2O  000011110iiiiiii 101001nnnnnddddd   0F00 A400   Vd,Vn imm  (shift - vector)
+INST1(sshll,       "sshll",        0,      IF_DV_2O,  0x0F00A400)
+                                   //  sshll   Vd,Vn,imm            DV_2O  000011110iiiiiii 101001nnnnnddddd   0F00 A400   Vd,Vn imm  (left shift - vector)
+
+INST1(sshll2,      "sshll2",       0,      IF_DV_2O,  0x4F00A400)
+                                   //  sshll2  Vd,Vn,imm            DV_2O  010011110iiiiiii 101001nnnnnddddd   4F00 A400   Vd,Vn imm  (left shift - vector)
+
+INST1(ushll,       "ushll",        0,      IF_DV_2O,  0x2F00A400)
+                                   //  ushll   Vd,Vn,imm            DV_2O  001011110iiiiiii 101001nnnnnddddd   2F00 A400   Vd,Vn imm  (left shift - vector)
+
+INST1(ushll2,      "ushll2",       0,      IF_DV_2O,  0x6F00A400)
+                                   //  ushll2  Vd,Vn,imm            DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn imm  (left shift - vector)
+
+INST1(shrn,        "shrn",         RSH,    IF_DV_2O,  0x0F008400)
+                                   //  shrn    Vd,Vn,imm            DV_2O  000011110iiiiiii 100001nnnnnddddd   0F00 8400   Vd,Vn imm  (right shift - vector)
+
+INST1(shrn2,       "shrn2",        RSH,    IF_DV_2O,  0x4F008400)
+                                   //  shrn2   Vd,Vn,imm            DV_2O  010011110iiiiiii 100001nnnnnddddd   4F00 8400   Vd,Vn imm  (right shift - vector)
+
+INST1(rshrn,       "rshrn",        RSH,    IF_DV_2O,  0x0F008C00)
+                                   //  rshrn   Vd,Vn,imm            DV_2O  000011110iiiiiii 100011nnnnnddddd   0F00 8C00   Vd,Vn imm  (right shift - vector)
 
-INST1(sshll2,  "sshll2", 0, 0, IF_DV_2O,  0x4F00A400)
-                                   //  sshll2  Vd,Vn,imm            DV_2O  010011110iiiiiii 101001nnnnnddddd   4F00 A400   Vd,Vn imm  (shift - vector)
+INST1(rshrn2,      "rshrn2",       RSH,    IF_DV_2O,  0x4F008C00)
+                                   //  rshrn2  Vd,Vn,imm            DV_2O  010011110iiiiiii 100011nnnnnddddd   4F00 8C00   Vd,Vn imm  (right shift - vector)
 
-INST1(ushll,   "ushll",  0, 0, IF_DV_2O,  0x2F00A400)
-                                   //  ushll   Vd,Vn,imm            DV_2O  001011110iiiiiii 101001nnnnnddddd   2F00 A400   Vd,Vn imm  (shift - vector)
+INST1(sqrshrn2,    "sqrshrn2",     RSH,    IF_DV_2O,  0x0F009C00)
+                                   //  sqrshrn2 Vd,Vn,imm           DV_2O  0Q0011110iiiiiii 100111nnnnnddddd   0F00 9C00   Vd Vn imm  (right shift - vector)
 
-INST1(ushll2,  "ushll2", 0, 0, IF_DV_2O,  0x6F00A400)
-                                   //  ushll2  Vd,Vn,imm            DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn imm  (shift - vector)
+INST1(sqrshrun2,   "sqrshrun2",    RSH,    IF_DV_2O,  0x2F008C00)
+                                   //  sqrshrun2 Vd,Vn,imm          DV_2O  0Q1011110iiiiiii 100011nnnnnddddd   2F00 8C00   Vd Vn imm  (right shift - vector)
 
-INST1(shrn,    "shrn",   0, 0, IF_DV_2O,  0x0F008400)
-                                   //  shrn    Vd,Vn,imm            DV_2O  000011110iiiiiii 100001nnnnnddddd   0F00 8400   Vd,Vn imm  (shift - vector)
+INST1(sqshrn2,     "sqshrn2",      RSH,    IF_DV_2O,  0x0F009400)
+                                   //  sqshrn2 Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 100101nnnnnddddd   0F00 9400   Vd Vn imm  (right shift - vector)
 
-INST1(shrn2,   "shrn2",  0, 0, IF_DV_2O,  0x4F008400)
-                                   //  shrn2   Vd,Vn,imm            DV_2O  010011110iiiiiii 100001nnnnnddddd   4F00 8400   Vd,Vn imm  (shift - vector)
+INST1(sqshrun2,    "sqshrun2",     RSH,    IF_DV_2O,  0x2F008400)
+                                   //  sqshrun2 Vd,Vn,imm           DV_2O  0Q1011110iiiiiii 100001nnnnnddddd   2F00 8400   Vd Vn imm  (right shift - vector)
 
-INST1(rshrn,   "rshrn",  0, 0, IF_DV_2O,  0x0F008C00)
-                                   //  rshrn   Vd,Vn,imm            DV_2O  000011110iiiiiii 100011nnnnnddddd   0F00 8C00   Vd,Vn imm  (shift - vector)
+INST1(uqrshrn2,    "uqrshrn2",     RSH,    IF_DV_2O,  0x2F009C00)
+                                   //  uqrshrn2 Vd,Vn,imm           DV_2O  0Q1011110iiiiiii 100111nnnnnddddd   2F00 9C00   Vd Vn imm  (right shift - vector)
 
-INST1(rshrn2,  "rshrn2", 0, 0, IF_DV_2O,  0x4F008C00)
-                                   //  rshrn2  Vd,Vn,imm            DV_2O  010011110iiiiiii 100011nnnnnddddd   4F00 8C00   Vd,Vn imm  (shift - vector)
+INST1(uqshrn2,     "uqshrn2",      RSH,    IF_DV_2O,  0x2F009400)
+                                   //  uqshrn2 Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 100101nnnnnddddd   2F00 9400   Vd Vn imm  (right shift - vector)
 
-INST1(sxtl,    "sxtl",   0, 0, IF_DV_2O,  0x0F00A400)
-                                   //  sxtl    Vd,Vn                DV_2O  000011110iiiiiii 101001nnnnnddddd   0F00 A400   Vd,Vn      (shift - vector)
+INST1(sxtl,        "sxtl",         0,      IF_DV_2O,  0x0F00A400)
+                                   //  sxtl    Vd,Vn                DV_2O  000011110iiiiiii 101001nnnnnddddd   0F00 A400   Vd,Vn      (left shift - vector)
 
-INST1(sxtl2,   "sxtl2",  0, 0, IF_DV_2O,  0x4F00A400)
-                                   //  sxtl2   Vd,Vn                DV_2O  010011110iiiiiii 101001nnnnnddddd   4F00 A400   Vd,Vn      (shift - vector)
+INST1(sxtl2,       "sxtl2",        0,      IF_DV_2O,  0x4F00A400)
+                                   //  sxtl2   Vd,Vn                DV_2O  010011110iiiiiii 101001nnnnnddddd   4F00 A400   Vd,Vn      (left shift - vector)
 
-INST1(uxtl,    "uxtl",   0, 0, IF_DV_2O,  0x2F00A400)
-                                   //  uxtl    Vd,Vn                DV_2O  001011110iiiiiii 101001nnnnnddddd   2F00 A400   Vd,Vn      (shift - vector)
+INST1(uxtl,        "uxtl",         0,      IF_DV_2O,  0x2F00A400)
+                                   //  uxtl    Vd,Vn                DV_2O  001011110iiiiiii 101001nnnnnddddd   2F00 A400   Vd,Vn      (left shift - vector)
 
-INST1(uxtl2,   "uxtl2",  0, 0, IF_DV_2O,  0x6F00A400)
-                                   //  uxtl2   Vd,Vn                DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn      (shift - vector)
+INST1(uxtl2,       "uxtl2",        0,      IF_DV_2O,  0x6F00A400)
+                                   //  uxtl2   Vd,Vn                DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn      (left shift - vector)
 
-INST1(tbl,       "tbl",   0, 0, IF_DV_3C,   0x0E000000)
+INST1(tbl,         "tbl",          0,      IF_DV_3C,  0x0E000000)
                                    //  tbl    Vd,{Vn},Vm                DV_3C  0Q001110000mmmmm 000000nnnnnddddd   0E00 0000   Vd,Vn,Vm   (vector)
 
-INST1(tbl_2regs, "tbl",   0, 0, IF_DV_3C,   0x0E002000)
+INST1(tbl_2regs,   "tbl",          0,      IF_DV_3C,  0x0E002000)
                                    //  tbl    Vd,{Vn,Vn+1},Vm           DV_3C  0Q001110000mmmmm 001000nnnnnddddd   0E00 2000   Vd,Vn,Vm   (vector)
 
-INST1(tbl_3regs, "tbl",   0, 0, IF_DV_3C,   0x0E004000)
+INST1(tbl_3regs,   "tbl",          0,      IF_DV_3C,  0x0E004000)
                                    //  tbl    Vd,{Vn,Vn+1,Vn+2},Vm      DV_3C  0Q001110000mmmmm 010000nnnnnddddd   0E00 4000   Vd,Vn,Vm   (vector)
 
-INST1(tbl_4regs, "tbl",   0, 0, IF_DV_3C,   0x0E006000)
+INST1(tbl_4regs,   "tbl",          0,      IF_DV_3C,  0x0E006000)
                                    //  tbl    Vd,{Vn,Vn+1,Vn+2,Vn+3},Vm DV_3C  0Q001110000mmmmm 011000nnnnnddddd   0E00 6000   Vd,Vn,Vm   (vector)
 
-INST1(tbx,       "tbx",   0, 0, IF_DV_3C,   0x0E001000)
+INST1(tbx,         "tbx",          0,      IF_DV_3C,  0x0E001000)
                                    //  tbx    Vd,{Vn},Vm                DV_3C  0Q001110000mmmmm 000100nnnnnddddd   0E00 1000   Vd,Vn,Vm   (vector)
 
-INST1(tbx_2regs, "tbx",   0, 0, IF_DV_3C,   0x0E003000)
+INST1(tbx_2regs,   "tbx",          0,      IF_DV_3C,  0x0E003000)
                                    //  tbx    Vd,{Vn,Vn+1},Vm           DV_3C  0Q001110000mmmmm 001100nnnnnddddd   0E00 3000   Vd,Vn,Vm   (vector)
 
-INST1(tbx_3regs, "tbx",   0, 0, IF_DV_3C,   0x0E005000)
+INST1(tbx_3regs,   "tbx",          0,      IF_DV_3C,  0x0E005000)
                                    //  tbx    Vd,{Vn,Vn+1,Vn+2},Vm      DV_3C  0Q001110000mmmmm 010100nnnnnddddd   0E00 5000   Vd,Vn,Vm   (vector)
 
-INST1(tbx_4regs, "tbx",   0, 0, IF_DV_3C,   0x0E007000)
+INST1(tbx_4regs,   "tbx",          0,      IF_DV_3C,  0x0E007000)
                                    //  tbx    Vd,{Vn,Vn+1,Vn+2,Vn+3},Vm DV_3C  0Q001110000mmmmm 011100nnnnnddddd   0E00 7000   Vd,Vn,Vm   (vector)
 
 // clang-format on