From 91c75504d6e545311ca97699419787ec1bccec1f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 23 Dec 2017 20:38:29 -0800 Subject: [PATCH] Adding support for the SSE Reciprocal, ReciprocalSqrt, and Sqrt intrinsics --- src/jit/emitxarch.cpp | 9 +++++++++ src/jit/emitxarch.h | 1 + src/jit/hwintrinsiccodegenxarch.cpp | 18 ++++++++++++++++++ src/jit/hwintrinsicxarch.cpp | 9 +++++++++ src/jit/instrsxarch.h | 6 ++++++ 5 files changed, 43 insertions(+) diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 7282baa..e2fb1da 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -196,8 +196,12 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) case INS_movlpd: case INS_movlps: case INS_movss: + case INS_rcpps: + case INS_rcpss: case INS_roundsd: case INS_roundss: + case INS_rsqrtps: + case INS_rsqrtss: case INS_sqrtsd: case INS_sqrtss: return IsAVXInstruction(ins); @@ -5077,6 +5081,11 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu } #if FEATURE_HW_INTRINSICS +void emitter::emitIns_SIMD_R_R(instruction ins, regNumber reg, regNumber reg1, var_types simdtype) +{ + emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg1); +} + void emitter::emitIns_SIMD_R_R_A( instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype) { diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index e8097a4..5bcba76 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -441,6 +441,7 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp); #if FEATURE_HW_INTRINSICS +void emitIns_SIMD_R_R(instruction ins, regNumber reg, regNumber reg1, var_types simdtype); void emitIns_SIMD_R_R_A(instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype); void emitIns_SIMD_R_R_C( instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype); diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 64769b9..2b43608 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -320,6 +320,24 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R_R(INS_orps, targetReg, op1Reg, op2Reg, TYP_SIMD16); break; + case NI_SSE_Reciprocal: + assert(baseType == TYP_FLOAT); + assert(op2 == nullptr); + emit->emitIns_SIMD_R_R(INS_rcpps, targetReg, op1Reg, TYP_SIMD16); + break; + + case NI_SSE_ReciprocalSqrt: + assert(baseType == TYP_FLOAT); + assert(op2 == nullptr); + emit->emitIns_SIMD_R_R(INS_rsqrtps, targetReg, op1Reg, TYP_SIMD16); + break; + + case NI_SSE_Sqrt: + assert(baseType == TYP_FLOAT); + assert(op2 == nullptr); + emit->emitIns_SIMD_R_R(INS_sqrtps, targetReg, op1Reg, TYP_SIMD16); + break; + case NI_SSE_Subtract: assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index fd1c41a..61b3d8f 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -485,6 +485,15 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16); break; + case NI_SSE_Reciprocal: + case NI_SSE_ReciprocalSqrt: + case NI_SSE_Sqrt: + assert(sig->numArgs == 1); + assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT); + op1 = impSIMDPopStack(TYP_SIMD16); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16); + break; + default: JITDUMP("Not implemented hardware intrinsic"); break; diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 4110b2a..ee5f5c0 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -252,6 +252,12 @@ INST3( orps, "orps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56)) / INST3( orpd, "orpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x56)) // Or packed doubles INST3( haddpd, "haddpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7C)) // Horizontal add packed doubles +// SSE 2 approx arith +INST3( rcpps, "rcpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x53)) // Reciprocal of packed singles +INST3( rcpss, "rcpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x53)) // Reciprocal of scalar single +INST3( rsqrtps, "rsqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x52)) // Reciprocal Sqrt of packed singles +INST3( rsqrtss, "rsqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x52)) // Reciprocal Sqrt of scalar single + // SSE2 conversions INST3( cvtpi2ps, "cvtpi2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2A)) // cvt packed DWORDs to singles INST3( cvtsi2ss, "cvtsi2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2A)) // cvt DWORD to scalar single -- 2.7.4