From fdfb33d93a0b9baca2992745c419e7281d960311 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 28 Feb 2018 15:16:45 -0800 Subject: [PATCH] Implementing the SSE2 MaskMove intrinsic --- src/jit/hwintrinsiccodegenxarch.cpp | 12 ++++++++++++ src/jit/hwintrinsiclistxarch.h | 1 + src/jit/instrsxarch.h | 1 + src/jit/lsraxarch.cpp | 8 ++++++++ 4 files changed, 22 insertions(+) diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 24829ea..a05cc88 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -184,6 +184,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase); } } + else if (category == HW_Category_MemoryStore) + { + assert(intrinsicID == NI_SSE2_MaskMove); + assert(targetReg == REG_NA); + + // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI + if (op3Reg != REG_EDI) + { + emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg); + } + emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); + } else { emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg); diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index cc4710a..49b5c6c 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -192,6 +192,7 @@ HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAligne HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_MaxScalar, "MaxScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index ae8adff..bed6bc8 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -214,6 +214,7 @@ INST3( movlhps, "movlhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCK INST3( movmskps, "movmskps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x50)) INST3( unpckhps, "unpckhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x15)) INST3( unpcklps, "unpcklps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x14)) +INST3( maskmovdqu, "maskmovdqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF7)) INST3( shufps, "shufps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC6)) INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC6)) diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index c2c1992..d56abed 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2362,6 +2362,14 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) useList.Last()->info.isTgtPref = true; break; + case NI_SSE2_MaskMove: + { + // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI + LocationInfoListNode* op3Info = useList.Begin()->Next()->Next(); + op3Info->info.setSrcCandidates(this, RBM_EDI); + break; + } + case NI_SSE41_BlendVariable: if (!compiler->canUseVexEncoding()) { -- 2.7.4