uint32_t hwcaps = 0;
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
+ GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
set_feature(env, ARM_FEATURE_V8_AES);
set_feature(env, ARM_FEATURE_V8_SHA1);
set_feature(env, ARM_FEATURE_V8_SHA256);
+ set_feature(env, ARM_FEATURE_V8_PMULL);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA);
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
};
static inline int arm_feature(CPUARMState *env, int feature)
return result;
}
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- for (bitnum = 0; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 << bitnum;
- }
- }
- return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- /* bit 0 of op1 can't influence the high 64 bits at all */
- for (bitnum = 1; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 >> (64 - bitnum);
- }
- }
- return res;
-}
-
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
+DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
env->vfp.regs[rm] = make_float64(m0);
env->vfp.regs[rd] = make_float64(d0);
}
+
+/* Helper function for 64 bit polynomial multiply case:
+ * perform PolynomialMult(op1, op2) and return either the top or
+ * bottom half of the 128 bit result.
+ */
+uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ for (bitnum = 0; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 << bitnum;
+ }
+ }
+ return res;
+}
+uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ /* bit 0 of op1 can't influence the high 64 bits at all */
+ for (bitnum = 1; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 >> (64 - bitnum);
+ }
+ }
+ return res;
+}
{0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
{0, 0, 0, 1}, /* VQDMULL */
- {0, 0, 0, 15}, /* Polynomial VMULL */
+ {0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};
return 1;
}
+ /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
+ * outside the loop below as it only performs a single pass.
+ */
+ if (op == 14 && size == 2) {
+ TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
+
+ if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
+ return 1;
+ }
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rm = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ neon_load_reg64(tcg_rn, rn);
+ neon_load_reg64(tcg_rm, rm);
+ gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
+ neon_store_reg64(tcg_rd, rd);
+ gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
+ neon_store_reg64(tcg_rd, rd + 1);
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rm);
+ tcg_temp_free_i64(tcg_rd);
+ return 0;
+ }
+
/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */