return;
}
- if (!compiler->opts.altJit)
- {
- // No point doing this in a "real" JIT.
- return;
- }
-
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// Mark the "fake" instructions in the output.
printf("*************** In genArm64EmitterUnitTests()\n");
emitter* theEmitter = GetEmitter();
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// We use this:
#endif
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // sadalp vector
+ theEmitter->emitIns_R_R(INS_sadalp, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_sadalp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_sadalp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_sadalp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_sadalp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_sadalp, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+ // saddlp vector
+ theEmitter->emitIns_R_R(INS_saddlp, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_saddlp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_saddlp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_saddlp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_saddlp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_saddlp, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+ // uadalp vector
+ theEmitter->emitIns_R_R(INS_uadalp, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_uadalp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_uadalp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_uadalp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_uadalp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_uadalp, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+ // uaddlp vector
+ theEmitter->emitIns_R_R(INS_uaddlp, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_uaddlp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_uaddlp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_uaddlp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_uaddlp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_uaddlp, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R floating point round to int, one dest, one source
//
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // addhn vector
+ theEmitter->emitIns_R_R_R(INS_addhn, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_addhn, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_addhn, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // addhn2 vector
+ theEmitter->emitIns_R_R_R(INS_addhn2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_addhn2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_addhn2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // raddhn vector
+ theEmitter->emitIns_R_R_R(INS_raddhn, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_raddhn, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_raddhn, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // raddhn2 vector
+ theEmitter->emitIns_R_R_R(INS_raddhn2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_raddhn2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_raddhn2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // rsubhn vector
+ theEmitter->emitIns_R_R_R(INS_rsubhn, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_rsubhn, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_rsubhn, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // rsubhn2 vector
+ theEmitter->emitIns_R_R_R(INS_rsubhn2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_rsubhn2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_rsubhn2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // sabal vector
+ theEmitter->emitIns_R_R_R(INS_sabal, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sabal, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sabal, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // sabal2 vector
+ theEmitter->emitIns_R_R_R(INS_sabal2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sabal2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sabal2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // sabdl vector
+ theEmitter->emitIns_R_R_R(INS_sabdl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sabdl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sabdl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // sabdl2 vector
+ theEmitter->emitIns_R_R_R(INS_sabdl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sabdl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sabdl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // saddl vector
+ theEmitter->emitIns_R_R_R(INS_saddl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_saddl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_saddl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // saddl2 vector
+ theEmitter->emitIns_R_R_R(INS_saddl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_saddl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_saddl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // saddw vector
+ theEmitter->emitIns_R_R_R(INS_saddw, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_saddw, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_saddw, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // saddw2 vector
+ theEmitter->emitIns_R_R_R(INS_saddw2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_saddw2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_saddw2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // shadd vector
+ theEmitter->emitIns_R_R_R(INS_shadd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_shadd, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_shadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_shadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_shadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_shadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // shsub vector
+ theEmitter->emitIns_R_R_R(INS_shsub, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_shsub, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_shsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_shsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_shsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_shsub, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // sqadd scalar
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+ // sqadd vector
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sqadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // sqsub scalar
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+ // sqsub vector
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sqsub, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // srhadd vector
+ theEmitter->emitIns_R_R_R(INS_srhadd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_srhadd, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_srhadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_srhadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_srhadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_srhadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // ssubl vector
+ theEmitter->emitIns_R_R_R(INS_ssubl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_ssubl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_ssubl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // ssubl2 vector
+ theEmitter->emitIns_R_R_R(INS_ssubl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_ssubl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_ssubl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // ssubw vector
+ theEmitter->emitIns_R_R_R(INS_ssubw, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_ssubw, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_ssubw, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // ssubw2 vector
+ theEmitter->emitIns_R_R_R(INS_ssubw2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_ssubw2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_ssubw2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // subhn vector
+ theEmitter->emitIns_R_R_R(INS_subhn, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_subhn, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_subhn, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // subhn2 vector
+ theEmitter->emitIns_R_R_R(INS_subhn2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_subhn2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_subhn2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uabal vector
+ theEmitter->emitIns_R_R_R(INS_uabal, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uabal, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uabal, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // uabal2 vector
+ theEmitter->emitIns_R_R_R(INS_uabal2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uabal2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uabal2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uabdl vector
+ theEmitter->emitIns_R_R_R(INS_uabdl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uabdl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uabdl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // uabdl2 vector
+ theEmitter->emitIns_R_R_R(INS_uabdl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uabdl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uabdl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uaddl vector
+ theEmitter->emitIns_R_R_R(INS_uaddl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uaddl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uaddl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // uaddl2 vector
+ theEmitter->emitIns_R_R_R(INS_uaddl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uaddl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uaddl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uaddw vector
+ theEmitter->emitIns_R_R_R(INS_uaddw, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uaddw, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uaddw, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // uaddw2 vector
+ theEmitter->emitIns_R_R_R(INS_uaddw2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uaddw2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uaddw2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uhadd vector
+ theEmitter->emitIns_R_R_R(INS_uhadd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uhadd, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uhadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uhadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uhadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uhadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uhsub vector
+ theEmitter->emitIns_R_R_R(INS_uhsub, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uhsub, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uhsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uhsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uhsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uhsub, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uqadd scalar
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+ // uqadd vector
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uqadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uqsub scalar
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_1BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_2BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_4BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_NONE);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_NONE);
+
+ // uqsub vector
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uqsub, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // urhadd vector
+ theEmitter->emitIns_R_R_R(INS_urhadd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_urhadd, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_urhadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_urhadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_urhadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_urhadd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // usubl vector
+ theEmitter->emitIns_R_R_R(INS_usubl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_usubl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_usubl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // usubl2 vector
+ theEmitter->emitIns_R_R_R(INS_usubl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_usubl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_usubl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // usubw vector
+ theEmitter->emitIns_R_R_R(INS_usubw, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_usubw, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_usubw, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // usubw2 vector
+ theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R vector multiply
//
theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // pmull vector
+ theEmitter->emitIns_R_R_R(INS_pmull, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_pmull, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_1D);
+
+ // pmull2 vector
+ theEmitter->emitIns_R_R_R(INS_pmull2, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_pmull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2D);
+
+ // smlal vector
+ theEmitter->emitIns_R_R_R(INS_smlal, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_smlal, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_smlal, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // smlal2 vector
+ theEmitter->emitIns_R_R_R(INS_smlal2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_smlal2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_smlal2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // smlsl vector
+ theEmitter->emitIns_R_R_R(INS_smlsl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_smlsl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_smlsl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // smlsl2 vector
+ theEmitter->emitIns_R_R_R(INS_smlsl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_smlsl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_smlsl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // smull vector
+ theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // smull2 vector
+ theEmitter->emitIns_R_R_R(INS_smull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_smull2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_smull2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // umlal vector
+ theEmitter->emitIns_R_R_R(INS_umlal, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_umlal, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_umlal, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // umlal2 vector
+ theEmitter->emitIns_R_R_R(INS_umlal2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_umlal2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_umlal2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // umlsl vector
+ theEmitter->emitIns_R_R_R(INS_umlsl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_umlsl, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_umlsl, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // umlsl2 vector
+ theEmitter->emitIns_R_R_R(INS_umlsl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_umlsl2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_umlsl2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // umull vector
+ theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+
+ // umull2 vector
+ theEmitter->emitIns_R_R_R(INS_umull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_umull2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_umull2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // smlal vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_smlal, EA_8BYTE, REG_V0, REG_V1, REG_V2, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_smlal, EA_8BYTE, REG_V3, REG_V4, REG_V5, 1, INS_OPTS_2S);
+
+ // smlal2 vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_smlal2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_smlal2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S);
+
+ // smlsl vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_smlsl, EA_8BYTE, REG_V0, REG_V1, REG_V2, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_smlsl, EA_8BYTE, REG_V3, REG_V4, REG_V5, 1, INS_OPTS_2S);
+
+ // smlsl2 vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_smlsl2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_smlsl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S);
+
+ // smull vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_smull, EA_8BYTE, REG_V0, REG_V1, REG_V2, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_smull, EA_8BYTE, REG_V3, REG_V4, REG_V5, 1, INS_OPTS_2S);
+
+ // smull2 vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_smull2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_smull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S);
+
+ // umlsl2 vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_umlsl2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_umlsl2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S);
+
+ // umull vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_umull, EA_8BYTE, REG_V0, REG_V1, REG_V2, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_umull, EA_8BYTE, REG_V3, REG_V4, REG_V5, 1, INS_OPTS_2S);
+
+ // umull2 vector, by element
+ theEmitter->emitIns_R_R_R_I(INS_umull2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_umull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
printf("*************** End of genArm64EmitterUnitTests()\n");
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
}
#endif // defined(DEBUG)
case IF_DV_2T: // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv,
// umaxv, uminv)
assert(isValidVectorDatasize(id->idOpSize()));
- elemsize = optGetElemsize(id->idInsOpt());
- assert((elemsize != EA_8BYTE) && (id->idInsOpt() != INS_OPTS_2S)); // can't use 2D or 1D or 2S
assert(isVectorRegister(id->idReg1()));
assert(isVectorRegister(id->idReg2()));
break;
break;
case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ case IF_DV_3HI: // DV_3HI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (smlal{2}, umlal{2} by element)
assert(isValidVectorDatasize(id->idOpSize()));
assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
assert(isVectorRegister(id->idReg1()));
assert(isValidVectorIndex(EA_16BYTE, elemsize, emitGetInsSC(id)));
break;
- case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
assert(insOptsNone(id->idInsOpt()));
- assert(id->idOpSize() == EA_8BYTE);
assert(isVectorRegister(id->idReg1()));
assert(isVectorRegister(id->idReg2()));
assert(isVectorRegister(id->idReg3()));
assert(isVectorRegister(id->idReg3()));
break;
+ case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (addhn{2}, raddhn{2}, rsubhn{2},
+ // subhn{2}, pmull{2})
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ break;
+
case IF_DV_4A: // DR_4A .........X.mmmmm .aaaaannnnnddddd Rd Rn Rm Ra (scalar)
assert(isValidGeneralDatasize(id->idOpSize()));
assert(isVectorRegister(id->idReg1()));
case IF_DR_3C: // DR_3C X..........mmmmm xxxsssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4)
case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond
case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63)
- case IF_DV_3F: // DV_3F ...........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - Vd both source and dest
case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnddddd Rd Rn Rm Ra
case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
- case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3F: // DV_3F .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
+ case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (addhn{2}, raddhn{2}, rsubhn{2},
+ // subhn{2}, pmull{2})
+ case IF_DV_3HI: // DV_3HI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (smlal{2}, smlsl{2}, smull{2},
+ // umlal{2}, umlsl{2}, umull{2} vector by elem)
case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
// Tracked GC pointers cannot be placed into the SIMD registers.
return false;
const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI};
const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M};
const static insFormat formatEncode3J[3] = {IF_LS_2D, IF_LS_3F, IF_LS_2E};
+ const static insFormat formatEncode3K[3] = {IF_DR_3A, IF_DV_3H, IF_DV_3HI};
const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F};
const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B};
const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D};
const static insFormat formatEncode2O[2] = {IF_DV_3E, IF_DV_3A};
const static insFormat formatEncode2P[2] = {IF_DV_2Q, IF_DV_3B};
const static insFormat formatEncode2Q[2] = {IF_DV_2S, IF_DV_3A};
+ const static insFormat formatEncode2R[2] = {IF_DV_3H, IF_DV_3HI};
code_t code = BAD_CODE;
insFormat insFmt = emitInsFormat(ins);
}
break;
+ case IF_EN3K:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3K[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
case IF_EN2A:
for (index = 0; index < 2; index++)
{
}
break;
- case IF_BI_0A:
- case IF_BI_0B:
- case IF_BI_0C:
- case IF_BI_1A:
- case IF_BI_1B:
- case IF_BR_1A:
- case IF_BR_1B:
- case IF_LS_1A:
- case IF_LS_2A:
- case IF_LS_2B:
- case IF_LS_2C:
- case IF_LS_3A:
- case IF_LS_3B:
- case IF_LS_3C:
- case IF_LS_3D:
- case IF_LS_3E:
- case IF_DI_1A:
- case IF_DI_1B:
- case IF_DI_1C:
- case IF_DI_1D:
- case IF_DI_1E:
- case IF_DI_1F:
- case IF_DI_2A:
- case IF_DI_2B:
- case IF_DI_2C:
- case IF_DI_2D:
- case IF_DR_1D:
- case IF_DR_2A:
- case IF_DR_2B:
- case IF_DR_2C:
- case IF_DR_2D:
- case IF_DR_2E:
- case IF_DR_2F:
- case IF_DR_2G:
- case IF_DR_2H:
- case IF_DR_2I:
- case IF_DR_3A:
- case IF_DR_3B:
- case IF_DR_3C:
- case IF_DR_3D:
- case IF_DR_3E:
- case IF_DR_4A:
- case IF_DV_1A:
- case IF_DV_1B:
- case IF_DV_1C:
- case IF_DV_2A:
- case IF_DV_2B:
- case IF_DV_2C:
- case IF_DV_2D:
- case IF_DV_2E:
- case IF_DV_2F:
- case IF_DV_2G:
- case IF_DV_2H:
- case IF_DV_2I:
- case IF_DV_2J:
- case IF_DV_2K:
- case IF_DV_2L:
- case IF_DV_2M:
- case IF_DV_2N:
- case IF_DV_2O:
- case IF_DV_2P:
- case IF_DV_2R:
- case IF_DV_2T:
- case IF_DV_2U:
- case IF_DV_3A:
- case IF_DV_3AI:
- case IF_DV_3B:
- case IF_DV_3BI:
- case IF_DV_3C:
- case IF_DV_3D:
- case IF_DV_3DI:
- case IF_DV_3E:
- case IF_DV_3F:
- case IF_DV_3G:
- case IF_DV_4A:
- case IF_SN_0A:
- case IF_SI_0A:
- case IF_SI_0B:
-
- index = 0;
- encoding_found = true;
+ case IF_EN2R:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2R[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
break;
default:
-
- encoding_found = false;
+ if (fmt == insFmt)
+ {
+ encoding_found = true;
+ index = 0;
+ }
+ else
+ {
+ encoding_found = false;
+ }
break;
}
}
}
+// For the given 'srcArrangement' returns the "widen" 'dstArrangement' specifying the destination vector register
+// arrangement
+// asserts and returns INS_OPTS_NONE if an invalid 'srcArrangement' value is passed
+//
+/*static*/ insOpts emitter::optWidenDstArrangement(insOpts srcArrangement)
+{
+ insOpts dstArrangement = INS_OPTS_NONE;
+
+ switch (srcArrangement)
+ {
+ case INS_OPTS_8B:
+ dstArrangement = INS_OPTS_4H;
+ break;
+
+ case INS_OPTS_16B:
+ dstArrangement = INS_OPTS_8H;
+ break;
+
+ case INS_OPTS_4H:
+ dstArrangement = INS_OPTS_2S;
+ break;
+
+ case INS_OPTS_8H:
+ dstArrangement = INS_OPTS_4S;
+ break;
+
+ case INS_OPTS_2S:
+ dstArrangement = INS_OPTS_1D;
+ break;
+
+ case INS_OPTS_4S:
+ dstArrangement = INS_OPTS_2D;
+ break;
+
+ default:
+ assert(!" invalid 'srcArrangement' value");
+ break;
+ }
+
+ return dstArrangement;
+}
+
// For the given 'conversion' returns the 'dstsize' specified by the conversion option
/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion)
{
fmt = IF_DV_2G;
break;
+ case INS_sadalp:
+ case INS_saddlp:
+ case INS_uadalp:
+ case INS_uaddlp:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isValidArrangement(size, opt));
+ assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
+ fmt = IF_DV_2T;
+ break;
+
default:
unreached();
break;
/* Figure out the encoding format of the instruction */
switch (ins)
{
+ case INS_mul:
+ case INS_smull:
+ case INS_umull:
+ if (insOptsAnyArrangement(opt))
+ {
+ // ASIMD instruction
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(isValidArrangement(size, opt));
+ assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
+ if (ins == INS_mul)
+ {
+ fmt = IF_DV_3A;
+ }
+ else
+ {
+ fmt = IF_DV_3H;
+ }
+ break;
+ }
+ // Base instruction
+ __fallthrough;
+
case INS_lsl:
case INS_lsr:
case INS_asr:
case INS_udiv:
case INS_sdiv:
case INS_mneg:
- case INS_smull:
case INS_smnegl:
case INS_smulh:
- case INS_umull:
case INS_umnegl:
case INS_umulh:
case INS_lslv:
fmt = IF_DR_3A;
break;
- case INS_mul:
- if (insOptsNone(opt))
- {
- // general register
- assert(isValidGeneralDatasize(size));
- assert(isGeneralRegister(reg1));
- assert(isGeneralRegister(reg2));
- assert(isGeneralRegister(reg3));
- fmt = IF_DR_3A;
- break;
- }
- __fallthrough;
-
- case INS_mla:
- case INS_mls:
- case INS_pmul:
- assert(insOptsAnyArrangement(opt));
- assert(isVectorRegister(reg1));
- assert(isVectorRegister(reg2));
- assert(isVectorRegister(reg3));
- assert(isValidVectorDatasize(size));
- assert(isValidArrangement(size, opt));
- elemsize = optGetElemsize(opt);
- if (ins == INS_pmul)
- {
- assert(elemsize == EA_1BYTE); // only supports 8B or 16B
- }
- else // INS_mul, INS_mla, INS_mls
- {
- assert(elemsize != EA_8BYTE); // can't use 2D or 1D
- }
- fmt = IF_DV_3A;
- break;
-
case INS_add:
case INS_sub:
if (isVectorRegister(reg1))
{
+ // ASIMD instruction
assert(isVectorRegister(reg2));
assert(isVectorRegister(reg3));
}
break;
}
+ // Base instruction
__fallthrough;
case INS_adds:
if (insOptsAnyArrangement(opt))
{
// Vector operation
- assert(isValidVectorDatasize(size));
assert(isValidArrangement(size, opt));
- elemsize = optGetElemsize(opt);
- assert(opt != INS_OPTS_1D); // Reserved encoding
+ assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
fmt = IF_DV_3A;
}
else
}
break;
+ case INS_sqadd:
+ case INS_sqsub:
+ case INS_uqadd:
+ case INS_uqsub:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isValidArrangement(size, opt));
+ assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
+ fmt = IF_DV_3A;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsize(size));
+ fmt = IF_DV_3E;
+ }
+ break;
+
case INS_fcmeq:
case INS_fcmge:
case INS_fcmgt:
}
break;
+ case INS_mla:
+ case INS_mls:
case INS_saba:
case INS_sabd:
+ case INS_shadd:
+ case INS_shsub:
case INS_smax:
case INS_smaxp:
case INS_smin:
case INS_sminp:
+ case INS_srhadd:
case INS_uaba:
case INS_uabd:
+ case INS_uhadd:
+ case INS_uhsub:
case INS_umax:
case INS_umaxp:
case INS_umin:
case INS_uminp:
- assert(elemsize != EA_8BYTE); // can't use 2D or 1D
- __fallthrough;
+ case INS_urhadd:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(isValidArrangement(size, opt));
+ assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
+ fmt = IF_DV_3A;
+ break;
case INS_addp:
case INS_uzp1:
assert(isVectorRegister(reg1));
assert(isVectorRegister(reg2));
assert(isVectorRegister(reg3));
- assert(insOptsAnyArrangement(opt));
-
- // Vector operation
- assert(isValidVectorDatasize(size));
assert(isValidArrangement(size, opt));
- elemsize = optGetElemsize(opt);
-
+ assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
fmt = IF_DV_3A;
break;
fmt = IF_LS_3F;
break;
+ case INS_addhn:
+ case INS_raddhn:
+ case INS_rsubhn:
+ case INS_subhn:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_8BYTE);
+ assert(isValidArrangement(size, opt));
+ assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved.
+ fmt = IF_DV_3H;
+ break;
+
+ case INS_addhn2:
+ case INS_raddhn2:
+ case INS_rsubhn2:
+ case INS_subhn2:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_16BYTE);
+ assert(isValidArrangement(size, opt));
+ assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved.
+ fmt = IF_DV_3H;
+ break;
+
+ case INS_sabal:
+ case INS_sabdl:
+ case INS_saddl:
+ case INS_saddw:
+ case INS_smlal:
+ case INS_smlsl:
+ case INS_ssubl:
+ case INS_ssubw:
+ case INS_uabal:
+ case INS_uabdl:
+ case INS_uaddl:
+ case INS_uaddw:
+ case INS_umlal:
+ case INS_umlsl:
+ case INS_usubl:
+ case INS_usubw:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_8BYTE);
+ assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+ fmt = IF_DV_3H;
+ break;
+
+ case INS_sabal2:
+ case INS_sabdl2:
+ case INS_saddl2:
+ case INS_saddw2:
+ case INS_smlal2:
+ case INS_smlsl2:
+ case INS_ssubl2:
+ case INS_ssubw2:
+ case INS_umlal2:
+ case INS_umlsl2:
+ case INS_smull2:
+ case INS_uabal2:
+ case INS_uabdl2:
+ case INS_uaddl2:
+ case INS_uaddw2:
+ case INS_usubl2:
+ case INS_umull2:
+ case INS_usubw2:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_16BYTE);
+ assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+ fmt = IF_DV_3H;
+ break;
+
+ case INS_pmul:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(isValidArrangement(size, opt));
+ assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B));
+ fmt = IF_DV_3A;
+ break;
+
+ case INS_pmull:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_8BYTE);
+ assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_1D));
+ fmt = IF_DV_3H;
+ break;
+
+ case INS_pmull2:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_16BYTE);
+ assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_2D));
+ fmt = IF_DV_3H;
+ break;
+
default:
unreached();
break;
fmt = IF_DV_3G;
break;
+ case INS_smlal:
+ case INS_smlsl:
+ case INS_smull:
+ case INS_umlal:
+ case INS_umlsl:
+ case INS_umull:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_8BYTE);
+ assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_3HI;
+ break;
+
+ case INS_smlal2:
+ case INS_smlsl2:
+ case INS_smull2:
+ case INS_umlal2:
+ case INS_umlsl2:
+ case INS_umull2:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(size == EA_16BYTE);
+ assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_3HI;
+ break;
+
default:
unreached();
break;
dst += emitOutput_Instr(dst, code);
break;
- case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ code = emitInsCode(ins, fmt);
+ elemsize = id->idOpSize();
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
case IF_DV_3F: // DV_3F ...........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - source dest regs overlap
code = emitInsCode(ins, fmt);
code |= insEncodeReg_Vd(id->idReg1()); // ddddd
dst += emitOutput_Instr(dst, code);
break;
+ case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (addhn{2}, raddhn{2}, rsubhn{2},
+ // subhn{2}, pmull{2})
+ code = emitInsCode(ins, fmt);
+ elemsize = optGetElemsize(id->idInsOpt());
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3HI: // DV_3HI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (smlal{2}, umlal{2} by element)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ elemsize = optGetElemsize(id->idInsOpt());
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
code = emitInsCode(ins, fmt);
elemsize = id->idOpSize();
break;
case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn
- emitDispReg(id->idReg1(), size, true);
- emitDispReg(id->idReg2(), size, false);
+ if ((ins == INS_uxtb) || (ins == INS_uxth))
+ {
+ // There is no 64-bit variant of uxtb and uxth
+ // However, we allow idOpSize() to have EA_8BYTE value for these instruction
+ emitDispReg(id->idReg1(), EA_4BYTE, true);
+ emitDispReg(id->idReg2(), EA_4BYTE, false);
+ }
+ else
+ {
+ emitDispReg(id->idReg1(), size, true);
+ // sxtb, sxth and sxtb always operate on 32-bit source register
+ emitDispReg(id->idReg2(), EA_4BYTE, false);
+ }
break;
case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond
case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar)
case IF_DV_2T: // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv,
// umaxv, uminv)
- elemsize = optGetElemsize(id->idInsOpt());
- emitDispReg(id->idReg1(), elemsize, true);
- emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+ if ((ins == INS_sadalp) || (ins == INS_saddlp) || (ins == INS_uadalp) || (ins == INS_uaddlp))
+ {
+ emitDispVectorReg(id->idReg1(), optWidenDstArrangement(id->idInsOpt()), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+ }
+ else
+ {
+ elemsize = optGetElemsize(id->idInsOpt());
+ emitDispReg(id->idReg1(), elemsize, true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+ }
break;
case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
break;
case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
- case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
emitDispReg(id->idReg1(), size, true);
emitDispReg(id->idReg2(), size, true);
emitDispReg(id->idReg3(), size, false);
emitDispImm(emitGetInsSC(id), false);
break;
+ case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (addhn{2}, raddhn{2}, rsubhn{2},
+ // subhn{2}, pmull{2})
+ if ((ins == INS_addhn) || (ins == INS_addhn2) || (ins == INS_raddhn) || (ins == INS_raddhn2) ||
+ (ins == INS_subhn) || (ins == INS_subhn2) || (ins == INS_rsubhn) || (ins == INS_rsubhn2))
+ {
+ // These are "high narrow" instruction i.e. their source registers are "wider" than the destination
+ // register.
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg2(), optWidenElemsize(id->idInsOpt()), true);
+ emitDispVectorReg(id->idReg3(), optWidenElemsize(id->idInsOpt()), false);
+ }
+ else
+ {
+ if (((ins == INS_pmull) && (id->idInsOpt() == INS_OPTS_1D)) ||
+ (ins == (INS_pmull2) && (id->idInsOpt() == INS_OPTS_2D)))
+ {
+ // PMULL Vd.1Q, Vn.1D, Vm.1D
+ // PMULL2 Vd.1Q, Vn.2D, Vm.2D
+ printf("%s.1q, ", emitVectorRegName(id->idReg1()));
+ }
+ else
+ {
+ emitDispVectorReg(id->idReg1(), optWidenElemsize(id->idInsOpt()), true);
+ }
+
+ if ((ins == INS_saddw) || (ins == INS_saddw2) || (ins == INS_uaddw) || (ins == INS_uaddw2) ||
+ (ins == INS_ssubw) || (ins == INS_ssubw2) || (ins == INS_usubw) || (ins == INS_usubw2))
+ {
+ emitDispVectorReg(id->idReg2(), optWidenElemsize(id->idInsOpt()), true);
+ }
+ else
+ {
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+ }
+
+ emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+ }
+ break;
+
+ case IF_DV_3HI:
+ emitDispVectorReg(id->idReg1(), optWidenElemsize(id->idInsOpt()), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+ elemsize = optGetElemsize(id->idInsOpt());
+ emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
+ break;
+
case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
emitDispReg(id->idReg1(), size, true);
emitDispReg(id->idReg2(), size, true);
result.insLatency = PERFSCORE_LATENCY_1C;
break;
+ case INS_smaddl:
+ case INS_smsubl:
+ case INS_smnegl:
+ case INS_umaddl:
+ case INS_umsubl:
+ case INS_umnegl:
+ result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ break;
+
default:
// all other instructions
perfScoreUnhandledInstruction(id, &result);
case INS_cmgt:
case INS_cmhi:
case INS_cmhs:
+ case INS_shadd:
+ case INS_shsub:
+ case INS_srhadd:
case INS_smax:
case INS_smaxp:
case INS_smin:
case INS_umaxp:
case INS_umin:
case INS_uminp:
+ case INS_uhadd:
+ case INS_uhsub:
+ case INS_urhadd:
case INS_uzp1:
case INS_uzp2:
case INS_zip1:
case INS_cmtst:
case INS_pmul:
case INS_sabd:
+ case INS_sqadd:
+ case INS_sqsub:
case INS_uabd:
+ case INS_uqadd:
+ case INS_uqsub:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_3C;
break;
case INS_rshrn:
case INS_rshrn2:
+ case INS_ssra:
case INS_srshr:
case INS_urshr:
- result.insThroughput = PERFSCORE_THROUGHPUT_2X;
- result.insLatency = PERFSCORE_LATENCY_3C;
+ case INS_usra:
+ if (id->idOpSize() == EA_16BYTE)
+ {
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ }
+ else
+ {
+ result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ }
+ break;
+
+ case INS_srsra:
+ case INS_ursra:
+ result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+ result.insLatency = PERFSCORE_LATENCY_4C;
break;
default:
}
break;
+ case IF_DV_3H: // addhn{2}, raddhn{2}, rsubhn{2}, sabal{2}, sabdl{2}, saddl{2}, saddw{2}, ssubl{2}, ssubw{2},
+ // pmull{2}
+ case IF_DV_3HI: // subhn{2}, uabal{2}, uabdl{2}, uaddl{2}, uaddw{2}, usubl{2}, usubw{2}
+ switch (ins)
+ {
+ case INS_addhn:
+ case INS_addhn2:
+ case INS_sabdl:
+ case INS_sabdl2:
+ case INS_saddl:
+ case INS_saddl2:
+ case INS_saddw:
+ case INS_saddw2:
+ case INS_ssubl:
+ case INS_ssubl2:
+ case INS_ssubw:
+ case INS_ssubw2:
+ case INS_subhn:
+ case INS_subhn2:
+ case INS_uabdl:
+ case INS_uabdl2:
+ case INS_uaddl:
+ case INS_uaddl2:
+ case INS_uaddw:
+ case INS_uaddw2:
+ case INS_usubl:
+ case INS_usubl2:
+ case INS_usubw:
+ case INS_usubw2:
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ break;
+
+ case INS_raddhn:
+ case INS_raddhn2:
+ case INS_rsubhn:
+ case INS_rsubhn2:
+ case INS_sabal:
+ case INS_sabal2:
+ case INS_uabal:
+ case INS_uabal2:
+ result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+ result.insLatency = PERFSCORE_LATENCY_4C;
+ break;
+
+ case INS_smlal:
+ case INS_smlal2:
+ case INS_smlsl:
+ case INS_smlsl2:
+ case INS_smull:
+ case INS_smull2:
+ case INS_umlal:
+ case INS_umlal2:
+ case INS_umlsl:
+ case INS_umlsl2:
+ case INS_umull:
+ case INS_umull2:
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency = PERFSCORE_LATENCY_4C;
+ break;
+
+ case INS_pmull:
+ case INS_pmull2:
+ if ((id->idInsOpt() == INS_OPTS_8B) || (id->idInsOpt() == INS_OPTS_16B))
+ {
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ }
+ else
+ {
+ // Crypto polynomial (64x64) multiply long
+ assert((id->idInsOpt() == INS_OPTS_1D) || (id->idInsOpt() == INS_OPTS_2D));
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ result.insLatency = PERFSCORE_LATENCY_2C;
+ }
+ break;
+
+ default:
+ // all other instructions
+ perfScoreUnhandledInstruction(id, &result);
+ break;
+ }
+ break;
+
case IF_SI_0A: // brk imm16
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_1C;
result.insLatency = PERFSCORE_LATENCY_4C;
break;
+ case INS_sadalp:
+ case INS_uadalp:
+ result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+ result.insLatency = PERFSCORE_LATENCY_4C;
+ break;
+
+ case INS_saddlp:
+ case INS_uaddlp:
+ result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+ result.insLatency = PERFSCORE_LATENCY_3C;
+ break;
+
default:
// all other instructions
perfScoreUnhandledInstruction(id, &result);
// For the given 'arrangement' returns the 'widen-arrangement' specified by the vector register arrangement
static insOpts optWidenElemsize(insOpts arrangement);
+// For the given 'srcArrangement' returns the "widen" 'dstArrangement' specifying the destination vector register
+// arrangement
+// of Long Pairwise instructions. Note that destination vector elements twice as long as the source vector elements.
+static insOpts optWidenDstArrangement(insOpts srcArrangement);
+
// For the given 'conversion' returns the 'dstsize' specified by the conversion option
static emitAttr optGetDstsize(insOpts conversion);
IF_DEF(EN3H, IS_NONE, NONE) // Instruction has 3 possible encoding types, type H
IF_DEF(EN3I, IS_NONE, NONE) // Instruction has 3 possible encoding types, type I
IF_DEF(EN3J, IS_NONE, NONE) // Instruction has 3 possible encoding types, type J
+IF_DEF(EN3K, IS_NONE, NONE) // Instruction has 3 possible encoding types, type K
IF_DEF(EN2A, IS_NONE, NONE) // Instruction has 2 possible encoding types, type A
IF_DEF(EN2B, IS_NONE, NONE) // Instruction has 2 possible encoding types, type B
IF_DEF(EN2C, IS_NONE, NONE) // Instruction has 2 possible encoding types, type C
IF_DEF(EN2O, IS_NONE, NONE) // Instruction has 2 possible encoding types, type O
IF_DEF(EN2P, IS_NONE, NONE) // Instruction has 2 possible encoding types, type P
IF_DEF(EN2Q, IS_NONE, NONE) // Instruction has 2 possible encoding types, type Q
+IF_DEF(EN2R, IS_NONE, NONE) // Instruction has 2 possible encoding types, type R
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
IF_DEF(DV_2T, IS_NONE, NONE) // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv)
IF_DEF(DV_2U, IS_NONE, NONE) // DV_2U ................ ......nnnnnddddd Sd Sn (sha1h)
-IF_DEF(DV_3A, IS_NONE, NONE) // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
-IF_DEF(DV_3AI, IS_NONE, NONE) // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
-IF_DEF(DV_3B, IS_NONE, NONE) // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
-IF_DEF(DV_3BI, IS_NONE, NONE) // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
-IF_DEF(DV_3C, IS_NONE, NONE) // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
-IF_DEF(DV_3D, IS_NONE, NONE) // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
-IF_DEF(DV_3DI, IS_NONE, NONE) // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
-IF_DEF(DV_3E, IS_NONE, NONE) // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
-IF_DEF(DV_3F, IS_NONE, NONE) // DV_3F ...........mmmmm ......nnnnnddddd Qd Sn Vm (Qd used as both source and destination)
-IF_DEF(DV_3G, IS_NONE, NONE) // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
-
-IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Vn Vm Va (scalar)
+IF_DEF(DV_3A, IS_NONE, NONE) // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+IF_DEF(DV_3AI, IS_NONE, NONE) // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+IF_DEF(DV_3B, IS_NONE, NONE) // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+IF_DEF(DV_3BI, IS_NONE, NONE) // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+IF_DEF(DV_3C, IS_NONE, NONE) // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+IF_DEF(DV_3D, IS_NONE, NONE) // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+IF_DEF(DV_3DI, IS_NONE, NONE) // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
+IF_DEF(DV_3E, IS_NONE, NONE) // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+IF_DEF(DV_3F, IS_NONE, NONE) // DV_3F ...........mmmmm ......nnnnnddddd Qd Sn Vm (Qd used as both source and destination)
+IF_DEF(DV_3G, IS_NONE, NONE) // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
+IF_DEF(DV_3H, IS_NONE, NONE) // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (addhn{2}, raddhn{2}, rsubhn{2}, pmull{2}, smlal{2}, subhn{2}, umlal{2} vector)
+IF_DEF(DV_3HI, IS_NONE, NONE) // DV_3HI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (smlal{2}, smlsl{2}, smull{2}, umlal{2}, umlsl{2}, umull{2} vector by elem)
+
+IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Vn Vm Va (scalar)
IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................
IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16
// ld4r {Vt-Vt4},[Xn],Xm LS_3F 0Q001101111mmmmm 1110ssnnnnnttttt 0DE0 E000 post-indexed by a register
// ld4r {Vt-Vt4},[Xn],#8 LS_2E 0Q00110111111111 1110ssnnnnnttttt 0DFF E000 post-indexed by an immediate
+INST3(smull, "smull", 0, 0, IF_EN3K, 0x9B207C00, 0x0E20C000, 0x0F00A000)
+ // C6.2.243 SMULL
+ // C7.2.272 SMULL, SMULL2 (by element)
+ // C7.2.273 SMULL, SMULL2 (vector)
+ // smull Rd,Rn,Rm DR_3A 10011011001mmmmm 011111nnnnnddddd 9B20 7C00
+ // smull Vd,Vn,Vm DV_3H 0000111000100000 1100000000000000 0E20 C000 Vd,Vn,Vm (vector)
+ // smull Vd,Vn,Vm[] DV_3HI 00001111XXLMmmmm 1010H0nnnnnddddd 0F00 A000 Vd,Vn,Vm[] (vector by elem)
+
+INST3(umull, "umull", 0, 0, IF_EN3K, 0x9BA07C00, 0x2E20C000, 0x2F00A000)
+ // C6.2.340 UMULL
+ // C7.2.362 UMULL, UMULL2 (by element)
+ // C7.2.363 UMULL, UMULL2 (vector)
+ // umull Rd,Rn,Rm DR_3A 10011011101mmmmm 011111nnnnnddddd 9BA0 7C00
+ // umull Vd,Vn,Vm DV_3H 00101110XX1mmmmm 110000nnnnnddddd 2E20 C000 Vd,Vn,Vm (vector)
+ // umull Vd,Vn,Vm[] DV_3HI 00101111XXLMmmmm 1010H0nnnnnddddd 2F00 A000 Vd,Vn,Vm[] (vector by elem)
+
// enum name FP LD/ST DR_2E DR_2F
INST2(negs, "negs", 0, 0, IF_EN2A, 0x6B0003E0, 0x6B0003E0)
// negs Rd,Rm DR_2E X1101011000mmmmm 00000011111ddddd 6B00 03E0
// mls Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100101nnnnnddddd 2E20 9400 Vd,Vn,Vm (vector)
// mls Vd,Vn,Vm[] DV_3AI 0Q101111XXLMmmmm 0100H0nnnnnddddd 2F00 4000 Vd,Vn,Vm[] (vector by elem)
+INST2(smlal, "smlal", 0, 0, IF_EN2R, 0x0E208000, 0x0F002000)
+ // C7.2.267 SMLAL, SMLAL2 (by element)
+ // C7.2.268 SMLAL, SMLAL2 (vector)
+ // smlal Vd,Vn,Vm DV_3H 00001110XX1mmmmm 100000nnnnnddddd 0E20 8000 Vd,Vn,Vm (vector)
+ // smlal Vd,Vn,Vm[] DV_3HI 00001111XXLMmmmm 0010H0nnnnnddddd 0F00 2000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(smlal2, "smlal2", 0, 0, IF_EN2R, 0x4E208000, 0x4F002000)
+ // C7.2.267 SMLAL, SMLAL2 (by element)
+ // C7.2.268 SMLAL, SMLAL2 (vector)
+ // smlal2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 100000nnnnnddddd 4E20 8000 Vd,Vn,Vm (vector)
+ // smlal2 Vd,Vn,Vm[] DV_3HI 01001111XXLMmmmm 0010H0nnnnnddddd 4F00 2000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(smlsl, "smlsl", 0, 0, IF_EN2R, 0x0E20A000, 0x0F006000)
+ // C7.2.269 SMLSL, SMLSL2 (by element)
+ // C7.2.270 SMLSL, SMLSL2 (vector)
+ // smlsl Vd,Vn,Vm DV_3H 00001110XX1mmmmm 101000nnnnnddddd 0E20 A000 Vd,Vn,Vm (vector)
+ // smlsl Vd,Vn,Vm[] DV_3HI 00001111XXLMmmmm 0110H0nnnnnddddd 0F00 6000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(smlsl2, "smlsl2", 0, 0, IF_EN2R, 0x4E20A000, 0x4F006000)
+ // C7.2.269 SMLSL, SMLSL2 (by element)
+ // C7.2.270 SMLSL, SMLSL2 (vector)
+ // smlsl2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 101000nnnnnddddd 4E20 A000 Vd,Vn,Vm (vector)
+ // smlsl2 Vd,Vn,Vm[] DV_3HI 01001111XXLMmmmm 0110H0nnnnnddddd 4F00 6000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(smull2, "smull2", 0, 0, IF_EN2R, 0x4E20C000, 0x4F00A000)
+ // C7.2.272 SMULL, SMULL2 (by element)
+ // C7.2.273 SMULL, SMULL2 (vector)
+ // smull2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 110000nnnnnddddd 4E20 C000 Vd,Vn,Vm (vector)
+ // smull2 Vd,Vn,Vm[] DV_3HI 01001111XXLMmmmm 1010H0nnnnnddddd 4F00 A000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(umlal, "umlal", 0, 0, IF_EN2R, 0x2E208000, 0x2F002000)
+ // C7.2.357 UMLAL, UMLAL2 (by element)
+ // C7.2.358 UMLAL, UMLAL2 (vector)
+ // umlal Vd,Vn,Vm DV_3H 00101110XX1mmmmm 100000nnnnnddddd 2E20 8000 Vd,Vn,Vm (vector)
+ // umlal Vd,Vn,Vm[] DV_3HI 00101111XXLMmmmm 0010H0nnnnnddddd 2F00 2000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(umlal2, "umlal2", 0, 0, IF_EN2R, 0x6E208000, 0x6F002000)
+ // C7.2.357 UMLAL, UMLAL2 (by element)
+ // C7.2.358 UMLAL, UMLAL2 (vector)
+ // umlal2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 100000nnnnnddddd 6E20 8000 Vd,Vn,Vm (vector)
+ // umlal2 Vd,Vn,Vm[] DV_3HI 01101111XXLMmmmm 0010H0nnnnnddddd 6F00 2000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(umlsl, "umlsl", 0, 0, IF_EN2R, 0x2E20A000, 0x2F006000)
+ // C7.2.359 UMLSL, UMLSL2 (by element)
+ // C7.2.360 UMLSL, UMLSL2 (vector)
+ // umlsl Vd,Vn,Vm DV_3H 00101110XX1mmmmm 101000nnnnnddddd 2E20 A000 Vd,Vn,Vm (vector)
+ // umlsl Vd,Vn,Vm[] DV_3HI 00101111XXLMmmmm 0110H0nnnnnddddd 2F00 6000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(umlsl2, "umlsl2", 0, 0, IF_EN2R, 0x6E20A000, 0x6F006000)
+ // C7.2.359 UMLSL, UMLSL2 (by element)
+ // C7.2.360 UMLSL, UMLSL2 (vector)
+ // umlsl2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 101000nnnnnddddd 6E20 A000 Vd,Vn,Vm (vector)
+ // umlsl2 Vd,Vn,Vm[] DV_3HI 01101111XXLMmmmm 0110H0nnnnnddddd 6F00 6000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(umull2, "umull2", 0, 0, IF_EN2R, 0x6E20C000, 0x6F00A000)
+ // C7.2.362 UMULL, UMULL2 (by element)
+ // C7.2.363 UMULL, UMULL2 (vector)
+ // umull2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 110000nnnnnddddd 6E20 C000 Vd,Vn,Vm (vector)
+ // umull2 Vd,Vn,Vm[] DV_3HI 01101111XXLMmmmm 1010H0nnnnnddddd 6F00 A000 Vd,Vn,Vm[] (vector by elem)
+
// enum name FP LD/ST DV_2N DV_2O
INST2(sshr, "sshr", 0, 0, IF_EN2N, 0x5F000400, 0x0F000400)
// sshr Vd,Vn,imm DV_2N 010111110iiiiiii 000001nnnnnddddd 5F00 0400 Vd Vn imm (shift - scalar)
// cmtst Vd,Vn,Vm DV_3E 01011110111mmmmm 100011nnnnnddddd 5EE0 8C00 Vd,Vn,Vm (scalar)
// cmtst Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 100011nnnnnddddd 0E20 8C00 Vd,Vn,Vm (vector)
+INST2(sqadd, "sqadd", 0, 0, IF_EN2O, 0x5E200C00, 0x0E200C00)
+ // C7.2.275 SQADD
+ // sqadd Vd,Vn,Vm DV_3E 01011110XX1mmmmm 000011nnnnnddddd 5E20 0C00 Vd,Vn,Vm (scalar)
+ // sqadd Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 000011nnnnnddddd 0E20 0C00 Vd,Vn,Vm (vector)
+
+INST2(sqsub, "sqsub", 0, 0, IF_EN2O, 0x5E202C00, 0x0E202C00)
+ // C7.2.299 SQSUB
+ // sqsub Vd,Vn,Vm DV_3E 01011110XX1mmmmm 001011nnnnnddddd 5E20 2C00 Vd,Vn,Vm (scalar)
+ // sqsub Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 001011nnnnnddddd 0E20 2C00 Vd,Vn,Vm (vector)
+
+INST2(uqadd, "uqadd", 0, 0, IF_EN2O, 0x7E200C00, 0x2E200C00)
+ // C7.2.364 UQADD
+ // uqadd Vd,Vn,Vm DV_3E 01111110XX1mmmmm 000011nnnnnddddd 7E20 0C00 Vd,Vn,Vm (scalar)
+ // uqadd Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 000011nnnnnddddd 2E20 0C00 Vd,Vn,Vm (vector)
+
+INST2(uqsub, "uqsub", 0, 0, IF_EN2O, 0x7E202C00, 0x2E202C00)
+ // C7.2.370 UQSUB
+ // uqsub Vd,Vn,Vm DV_3E 01111110XX1mmmmm 001011nnnnnddddd 7E20 2C00 Vd,Vn,Vm (scalar)
+ // uqsub Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 001011nnnnnddddd 2E20 2C00 Vd,Vn,Vm (vector)
+
// enum name FP LD/ST DV_2Q DV_3B
INST2(faddp, "faddp", 0, 0, IF_EN2P, 0x7E30D800, 0x2E20D400)
// faddp Vd,Vn DV_2Q 011111100X110000 110110nnnnnddddd 7E30 D800 Vd,Vn (scalar)
INST1(msub, "msub", 0, 0, IF_DR_4A, 0x1B008000)
// msub Rd,Rn,Rm,Ra DR_4A X0011011000mmmmm 1aaaaannnnnddddd 1B00 8000
-INST1(smull, "smull", 0, 0, IF_DR_3A, 0x9B207C00)
- // smull Rd,Rn,Rm DR_3A 10011011001mmmmm 011111nnnnnddddd 9B20 7C00
-
INST1(smaddl, "smaddl", 0, 0, IF_DR_4A, 0x9B200000)
// smaddl Rd,Rn,Rm,Ra DR_4A 10011011001mmmmm 0aaaaannnnnddddd 9B20 0000
INST1(smulh, "smulh", 0, 0, IF_DR_3A, 0x9B407C00)
// smulh Rd,Rn,Rm DR_3A 10011011010mmmmm 011111nnnnnddddd 9B40 7C00
-INST1(umull, "umull", 0, 0, IF_DR_3A, 0x9BA07C00)
- // umull Rd,Rn,Rm DR_3A 10011011101mmmmm 011111nnnnnddddd 9BA0 7C00
-
INST1(umaddl, "umaddl", 0, 0, IF_DR_4A, 0x9BA00000)
// umaddl Rd,Rn,Rm,Ra DR_4A 10011011101mmmmm 0aaaaannnnnddddd 9BA0 0000
// C7.2.139 FRECPX
// frecpx Vd,Vn DV_2G 010111101X100001 111110nnnnnddddd 5EA1 F800 Vd,Vn (scalar)
+INST1(addhn, "addhn", 0, 0, IF_DV_3H, 0x0E204000)
+ // C7.2.3 ADDHN, ADDHN2
+ // addhn Vd,Vn,Vm DV_3H 00001110XX1mmmmm 010000nnnnnddddd 0E20 4000 Vd,Vn,Vm (vector)
+
+INST1(addhn2, "addhn2", 0, 0, IF_DV_3H, 0x4E204000)
+ // C7.2.3 ADDHN, ADDHN2
+ // addhn2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 010000nnnnnddddd 4E20 4000 Vd,Vn,Vm (vector)
+
+INST1(pmull, "pmull", 0, 0, IF_DV_3H, 0x0E20E000)
+ // C7.2.208 PMULL, PMULL2
+ // pmull Vd,Vn,Vm DV_3H 00001110XX1mmmmm 111000nnnnnddddd 0E20 E000 Vd,Vn,Vm (vector)
+
+INST1(pmull2, "pmull2", 0, 0, IF_DV_3H, 0x4E20E000)
+ // C7.2.208 PMULL, PMULL2
+ // pmull2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 111000nnnnnddddd 4E20 E000 Vd,Vn,Vm (vector)
+
+INST1(raddhn, "raddhn", 0, 0, IF_DV_3H, 0x2E204000)
+ // C7.2.209 RADDHN, RADDHN2
+ // raddhn Vd,Vn,Vm DV_3H 00101110XX1mmmmm 010000nnnnnddddd 2E20 4000 Vd,Vn,Vm (vector)
+
+INST1(raddhn2, "raddhn2",0, 0, IF_DV_3H, 0x6E204000)
+ // C7.2.209 RADDHN, RADDHN2
+ // raddhn2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 010000nnnnnddddd 6E20 4000 Vd,Vn,Vm (vector)
+
+INST1(rsubhn, "rsubhn", 0, 0, IF_DV_3H, 0x2E206000)
+ // C7.2.216 RSUBHN, RSUBHN2
+ // rsubhn Vd,Vn,Vm DV_3H 00101110XX1mmmmm 011000nnnnnddddd 2E20 6000 Vd,Vn,Vm (vector)
+
+INST1(rsubhn2, "rsubhn2",0, 0, IF_DV_3H, 0x6E206000)
+ // C7.2.216 RSUBHN, RSUBHN2
+ // rsubhn2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 011000nnnnnddddd 6E20 6000 Vd,Vn,Vm (vector)
+
+INST1(sabal, "sabal", 0, 0, IF_DV_3H, 0x0E205000)
+ // C7.2.218 SABAL, SABAL2
+ // sabal Vd,Vn,Vm DV_3H 00001110XX1mmmmm 010100nnnnnddddd 0E20 5000 Vd,Vn,Vm (vector)
+
+INST1(sabal2, "sabal2", 0, 0, IF_DV_3H, 0x4E205000)
+ // C7.2.218 SABAL, SABAL2
+ // sabal2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 010100nnnnnddddd 4E20 5000 Vd,Vn,Vm (vector)
+
+INST1(sabdl, "sabdl", 0, 0, IF_DV_3H, 0x0E207000)
+ // C7.2.220 SABDL, SABDL2
+ // sabdl Vd,Vn,Vm DV_3H 00001110XX1mmmmm 011100nnnnnddddd 0E20 7000 Vd,Vn,Vm (vector)
+
+INST1(sabdl2, "sabdl2", 0, 0, IF_DV_3H, 0x4E207000)
+ // C7.2.220 SABDL, SABDL2
+ // sabdl2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 011100nnnnnddddd 4E20 7000 Vd,Vn,Vm (vector)
+
+INST1(sadalp, "sadalp", 0, 0, IF_DV_2T, 0x0E206800)
+ // C7.2.221 SADALP
+ // sadalp Vd,Vn DV_2T 0Q001110XX100000 011010nnnnnddddd 0E20 6800 Vd,Vn (vector)
+
+INST1(saddl, "saddl", 0, 0, IF_DV_3H, 0x0E200000)
+ // C7.2.222 SADDL, SADDL2
+ // saddl Vd,Vn,Vm DV_3H 00001110XX1mmmmm 000000nnnnnddddd 0E20 0000 Vd,Vn,Vm (vector)
+
+INST1(saddl2, "saddl2", 0, 0, IF_DV_3H, 0x4E200000)
+ // C7.2.222 SADDL, SADDL2
+ // saddl2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 000000nnnnnddddd 4E20 0000 Vd,Vn,Vm (vector)
+
+INST1(saddlp, "saddlp", 0, 0, IF_DV_2T, 0x0E202800)
+ // C7.2.223 SADDLP
+ // saddlp Vd,Vn DV_2T 0Q001110XX100000 001010nnnnnddddd 0E20 2800 Vd,Vn (vector)
+
+INST1(saddw, "saddw", 0, 0, IF_DV_3H, 0x0E201000)
+ // C7.2.225 SADDW, SADDW2
+ // saddw Vd,Vn,Vm DV_3H 00001110XX1mmmmm 000100nnnnnddddd 0E20 1000 Vd,Vn,Vm (vector)
+
+INST1(saddw2, "saddw2", 0, 0, IF_DV_3H, 0x4E201000)
+ // C7.2.225 SADDW, SADDW2
+ // saddw2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 000100nnnnnddddd 4E20 1000 Vd,Vn,Vm (vector)
+
+INST1(shadd, "shadd", 0, 0, IF_DV_3A, 0x0E200400)
+ // C7.2.246 SHADD
+ // shadd Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 000001nnnnnddddd 0E20 0400 Vd,Vn,Vm (vector)
+
+INST1(shsub, "shsub", 0, 0, IF_DV_3A, 0x0E202400)
+ // C7.2.250 SHSUB
+ // shsub Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 001001nnnnnddddd 0E20 2400 Vd,Vn,Vm (vector)
+
+INST1(srhadd, "srhadd", 0, 0, IF_DV_3A, 0x0E201400)
+ // C7.2.302 SRHADD
+ // srhadd Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 000101nnnnnddddd 0E20 1400 Vd,Vn,Vm (vector)
+
+INST1(ssubl, "ssubl", 0, 0, IF_DV_3H, 0x0E202000)
+ // C7.2.311 SSUBL, SSUBL2
+ // ssubl Vd,Vn,Vm DV_3H 00001110XX1mmmmm 001000nnnnnddddd 0E20 2000 Vd,Vn,Vm (vector)
+
+INST1(ssubl2, "ssubl2", 0, 0, IF_DV_3H, 0x4E202000)
+ // C7.2.311 SSUBL, SSUBL2
+ // ssubl2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 001000nnnnnddddd 4E20 2000 Vd,Vn,Vm (vector)
+
+INST1(ssubw, "ssubw", 0, 0, IF_DV_3H, 0x0E203000)
+ // C7.2.312 SSUBW, SSUBW2
+ // ssubw Vd,Vn,Vm DV_3H 00001110XX1mmmmm 001100nnnnnddddd 0E20 3000 Vd,Vn,Vm (vector)
+
+INST1(ssubw2, "ssubw2", 0, 0, IF_DV_3H, 0x4E203000)
+ // C7.2.312 SSUBW, SSUBW2
+ // ssubw2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 001100nnnnnddddd 4E20 3000 Vd,Vn,Vm (vector)
+
+INST1(subhn, "subhn", 0, 0, IF_DV_3H, 0x0E206000)
+ // C7.2.327 SUBHN, SUBHN2
+ // subhn Vd,Vn,Vm DV_3H 00001110XX1mmmmm 011000nnnnnddddd 0E20 6000 Vd,Vn,Vm (vector)
+
+INST1(subhn2, "subhn2",0, 0, IF_DV_3H, 0x4E206000)
+ // C7.2.327 SUBHN, SUBHN2
+ // subhn2 Vd,Vn,Vm DV_3H 01001110XX1mmmmm 011000nnnnnddddd 4E20 6000 Vd,Vn,Vm (vector)
+
+INST1(uabal, "uabal", 0, 0, IF_DV_3H, 0x2E205000)
+ // C7.2.335 UABAL, UABAL2
+ // uabal Vd,Vn,Vm DV_3H 00101110XX1mmmmm 010100nnnnnddddd 2E20 5000 Vd,Vn,Vm (vector)
+
+INST1(uabal2, "uabal2", 0, 0, IF_DV_3H, 0x6E205000)
+ // C7.2.335 UABAL, UABAL2
+ // uabal2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 010100nnnnnddddd 6E20 5000 Vd,Vn,Vm (vector)
+
+INST1(uabdl, "uabdl", 0, 0, IF_DV_3H, 0x2E207000)
+ // C7.2.337 UABDL, UABDL2
+ // uabdl Vd,Vn,Vm DV_3H 00101110XX1mmmmm 011100nnnnnddddd 2E20 7000 Vd,Vn,Vm (vector)
+
+INST1(uabdl2, "uabdl2", 0, 0, IF_DV_3H, 0x6E207000)
+ // C7.2.337 UABDL, UABDL2
+ // uabdl2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 011100nnnnnddddd 6E20 7000 Vd,Vn,Vm (vector)
+
+INST1(uadalp, "uadalp", 0, 0, IF_DV_2T, 0x2E206800)
+ // C7.2.338 UADALP
+ // uadalp Vd,Vn DV_2T 0Q101110XX100000 011010nnnnnddddd 2E20 6800 Vd,Vn (vector)
+
+INST1(uaddl, "uaddl", 0, 0, IF_DV_3H, 0x2E200000)
+ // C7.2.339 UADDL, UADDL2
+ // uaddl Vd,Vn,Vm DV_3H 00101110XX1mmmmm 000000nnnnnddddd 2E20 0000 Vd,Vn,Vm (vector)
+
+INST1(uaddl2, "uaddl2", 0, 0, IF_DV_3H, 0x6E200000)
+ // C7.2.339 UADDL, UADDL2
+ // uaddl2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 000000nnnnnddddd 6E20 0000 Vd,Vn,Vm (vector)
+
+INST1(uaddlp, "uaddlp", 0, 0, IF_DV_2T, 0x2E202800)
+ // C7.2.340 UADDLP
+ // uaddlp Vd,Vn DV_2T 0Q101110XX100000 001010nnnnnddddd 2E20 2800 Vd,Vn (vector)
+
+INST1(uaddw, "uaddw", 0, 0, IF_DV_3H, 0x2E201000)
+ // C7.2.342 UADDW, UADDW2
+ // uaddw Vd,Vn,Vm DV_3H 00101110XX1mmmmm 000100nnnnnddddd 2E20 1000 Vd,Vn,Vm (vector)
+
+INST1(uaddw2, "uaddw2", 0, 0, IF_DV_3H, 0x6E201000)
+ // C7.2.342 UADDW, UADDW2
+ // uaddw2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 000100nnnnnddddd 6E20 1000 Vd,Vn,Vm (vector)
+
+INST1(uhadd, "uhadd", 0, 0, IF_DV_3A, 0x2E200400)
+ // C7.2.349 UHADD
+ // uhadd Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 000001nnnnnddddd 2E20 0400 Vd,Vn,Vm (vector)
+
+INST1(uhsub, "uhsub", 0, 0, IF_DV_3A, 0x2E202400)
+ // C7.2.350 UHSUB
+ // uhsub Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 001001nnnnnddddd 2E20 2400 Vd,Vn,Vm (vector)
+
+INST1(urhadd, "urhadd", 0, 0, IF_DV_3A, 0x2E201400)
+ // C7.2.373 URHADD
+ // urhadd Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 000101nnnnnddddd 2E20 1400 Vd,Vn,Vm (vector)
+
+INST1(usubl, "usubl", 0, 0, IF_DV_3H, 0x2E202000)
+ // C7.2.383 USUBL, USUBL2
+ // usubl Vd,Vn,Vm DV_3H 00101110XX1mmmmm 001000nnnnnddddd 2E20 2000 Vd,Vn,Vm (vector)
+
+INST1(usubl2, "usubl2", 0, 0, IF_DV_3H, 0x6E202000)
+ // C7.2.383 USUBL, USUBL2
+ // usubl2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 001000nnnnnddddd 6E20 2000 Vd,Vn,Vm (vector)
+
+INST1(usubw, "usubw", 0, 0, IF_DV_3H, 0x2E203000)
+ // C7.2.384 USUBW, USUBW2
+ // usubw Vd,Vn,Vm DV_3H 00101110XX1mmmmm 001100nnnnnddddd 2E20 3000 Vd,Vn,Vm (vector)
+
+INST1(usubw2, "usubw2", 0, 0, IF_DV_3H, 0x6E203000)
+ // C7.2.384 USUBW, USUBW2
+ // usubw2 Vd,Vn,Vm DV_3H 01101110XX1mmmmm 001100nnnnnddddd 6E20 3000 Vd,Vn,Vm (vector)
+
INST1(shll, "shll", 0, 0, IF_DV_2M, 0x2F00A400)
// shll Vd,Vn,imm DV_2M 0Q101110XX100001 001110nnnnnddddd 2E21 3800 Vd,Vn, {8/16/32}