VEC_OP(simd_w<vectorNum>, simd_w<vectorNum>, scalar_w, AND_S16, _mm_and_ps, ID, ID, ID);
VEC_OP(simd_w<vectorNum>, simd_w<vectorNum>, scalar_w, OR_S16, _mm_or_ps, ID, ID, ID);
VEC_OP(simd_w<vectorNum>, simd_w<vectorNum>, scalar_w, XOR_S16, _mm_xor_ps, ID, ID, ID);
+VEC_OP(simd_m<vectorNum>, simd_m<vectorNum>, scalar_m, AND_M, _mm_and_ps, ID, ID, ID);
#undef VEC_OP
/* Vector integer operations that we can get by switching argument order */
/*! Update the UIP vector according for the lanes alive in mask */
template <uint32_t vectorNum>
-INLINE void updateUIP(simd_w<vectorNum> &uipVec, const simd_m<vectorNum> mask, uint16_t uip) {
+void updateUIP(simd_w<vectorNum> &uipVec, const simd_m<vectorNum> mask, uint16_t uip) {
union { float f; uint32_t u; } x;
x.u = uip;
__m128 v = _mm_load1_ps(&x.f);
uipVec.m[i] = _mm_blendv_ps(uipVec.m[i], v, mask.m[i]);
}
+/*! Update the UIP vector according for the lanes alive in mask */
+template <uint32_t vectorNum>
+void updateUIPC(simd_w<vectorNum> &uipVec,
+ const simd_m<vectorNum> mask,
+ const simd_m<vectorNum> cond,
+ uint16_t uip) {
+ union { float f; uint32_t u; } x;
+ x.u = uip;
+ __m128 v = _mm_load1_ps(&x.f);
+ for (uint32_t i = 0; i < vectorNum; ++i)
+ uipVec.m[i] = _mm_blendv_ps(uipVec.m[i], v, _mm_and_ps(cond.m[i], mask.m[i]));
+}
+
/*! Update the execution mask based on block IP and UIP values */
template <uint32_t vectorNum>
-INLINE void updateMask(simd_m<vectorNum> &mask, const simd_w<vectorNum> &uipVec, uint16_t ip) {
+void updateMask(simd_m<vectorNum> &mask, const simd_w<vectorNum> &uipVec, uint16_t ip) {
const simd_w<vectorNum> ipv(ip);
LE_U16(mask, uipVec, ipv);
}
/*! Based on the condition jump to block JIP */
#define SIM_FWD_BRA_C(UIPVEC, EMASK, COND, JIP, UIP) \
do { \
- updateUIP(UIPVEC, COND, UIP); \
+ updateUIPC(UIPVEC, EMASK, COND, UIP); \
typeof(COND) jumpCond; \
scalar_w jipScalar(uint16_t(JIP)); \
LT_U16(jumpCond, UIPVEC, uint16_t(JIP)); \
} while (0)
/*! Conditional backward jump is taken if the condition is non-null */
-#define SIM_BWD_BRA_C(UIPVEC, COND, JIP) \
+#define SIM_BWD_BRA_C(UIPVEC, EMASK, COND, JIP) \
do { \
- updateUIP(UIPVEC, COND, JIP); \
- if (mask(COND) != 0) goto label##JIP; \
+ updateUIPC(UIPVEC, EMASK, COND, JIP); \
+ typeof(COND) JUMP_MASK; \
+ AND_M(JUMP_MASK, COND, EMASK); \
+ if (mask(JUMP_MASK) != 0) goto label##JIP; \
} while (0)
/*! JOIN: reactivates lanes */
"VEC_OP(simd_w<vectorNum>, simd_w<vectorNum>, scalar_w, AND_S16, _mm_and_ps, ID, ID, ID);\n"
"VEC_OP(simd_w<vectorNum>, simd_w<vectorNum>, scalar_w, OR_S16, _mm_or_ps, ID, ID, ID);\n"
"VEC_OP(simd_w<vectorNum>, simd_w<vectorNum>, scalar_w, XOR_S16, _mm_xor_ps, ID, ID, ID);\n"
+"VEC_OP(simd_m<vectorNum>, simd_m<vectorNum>, scalar_m, AND_M, _mm_and_ps, ID, ID, ID);\n"
"#undef VEC_OP\n"
"\n"
"/* Vector integer operations that we can get by switching argument order */\n"
"\n"
"/*! Update the UIP vector according for the lanes alive in mask */\n"
"template <uint32_t vectorNum>\n"
-"INLINE void updateUIP(simd_w<vectorNum> &uipVec, const simd_m<vectorNum> mask, uint16_t uip) {\n"
+"void updateUIP(simd_w<vectorNum> &uipVec, const simd_m<vectorNum> mask, uint16_t uip) {\n"
" union { float f; uint32_t u; } x;\n"
" x.u = uip;\n"
" __m128 v = _mm_load1_ps(&x.f);\n"
" uipVec.m[i] = _mm_blendv_ps(uipVec.m[i], v, mask.m[i]);\n"
"}\n"
"\n"
+"/*! Update the UIP vector according for the lanes alive in mask */\n"
+"template <uint32_t vectorNum>\n"
+"void updateUIPC(simd_w<vectorNum> &uipVec,\n"
+" const simd_m<vectorNum> mask,\n"
+" const simd_m<vectorNum> cond,\n"
+" uint16_t uip) {\n"
+" union { float f; uint32_t u; } x;\n"
+" x.u = uip;\n"
+" __m128 v = _mm_load1_ps(&x.f);\n"
+" for (uint32_t i = 0; i < vectorNum; ++i)\n"
+" uipVec.m[i] = _mm_blendv_ps(uipVec.m[i], v, _mm_and_ps(cond.m[i], mask.m[i]));\n"
+"}\n"
+"\n"
"/*! Update the execution mask based on block IP and UIP values */\n"
"template <uint32_t vectorNum>\n"
-"INLINE void updateMask(simd_m<vectorNum> &mask, const simd_w<vectorNum> &uipVec, uint16_t ip) {\n"
+"void updateMask(simd_m<vectorNum> &mask, const simd_w<vectorNum> &uipVec, uint16_t ip) {\n"
" const simd_w<vectorNum> ipv(ip);\n"
" LE_U16(mask, uipVec, ipv);\n"
"}\n"
"/*! Based on the condition jump to block JIP */\n"
"#define SIM_FWD_BRA_C(UIPVEC, EMASK, COND, JIP, UIP) \\\n"
" do { \\\n"
-" updateUIP(UIPVEC, COND, UIP); \\\n"
+" updateUIPC(UIPVEC, EMASK, COND, UIP); \\\n"
" typeof(COND) jumpCond; \\\n"
" scalar_w jipScalar(uint16_t(JIP)); \\\n"
" LT_U16(jumpCond, UIPVEC, uint16_t(JIP)); \\\n"
" } while (0)\n"
"\n"
"/*! Conditional backward jump is taken if the condition is non-null */\n"
-"#define SIM_BWD_BRA_C(UIPVEC, COND, JIP) \\\n"
+"#define SIM_BWD_BRA_C(UIPVEC, EMASK, COND, JIP) \\\n"
" do { \\\n"
-" updateUIP(UIPVEC, COND, JIP); \\\n"
-" if (mask(COND) != 0) goto label##JIP; \\\n"
+" updateUIPC(UIPVEC, EMASK, COND, JIP); \\\n"
+" typeof(COND) JUMP_MASK; \\\n"
+" AND_M(JUMP_MASK, COND, EMASK); \\\n"
+" if (mask(JUMP_MASK) != 0) goto label##JIP; \\\n"
" } while (0)\n"
"\n"
"/*! JOIN: reactivates lanes */\n"
#undef DECL_INSN
}
if (opcode == OP_LABEL) {
- const LabelInstruction labelInsn = cast<LabelInstruction>(insn);
+ const LabelInstruction &labelInsn = cast<LabelInstruction>(insn);
const LabelIndex index = labelInsn.getLabelIndex();
+ const bool byPassed = JIPs.contains(&labelInsn);
o << "\n";
- if (usedLabels.contains(index) == false) o << "// ";
+ if (byPassed == false && usedLabels.contains(index) == false) o << "// ";
o << "label" << index << ":\n";
- o << "SIM_JOIN(uip, emask, " << uint32_t(index) << ");\n";
+ if (byPassed == false)
+ o << "SIM_JOIN(uip, emask, " << uint32_t(index) << ");\n";
+ else {
+ //GBE_ASSERT(false);
+ const LabelIndex jip = JIPs.find(&labelInsn)->second;
+ o << "SIM_JOIN_JUMP(uip, emask, " <<
+ uint32_t(index) << ", " <<
+ uint32_t(jip) << ");\n";
+ }
return;
} else if (opcode == OP_BRA) {
// Get the label of the block
- const BranchInstruction bra = cast<BranchInstruction>(insn);
+ const BranchInstruction &bra = cast<BranchInstruction>(insn);
const BasicBlock *bb = insn.getParent();
const Instruction *label = bb->getFirstInstruction();
GBE_ASSERT(label->isMemberOf<LabelInstruction>() == true);
const LabelIndex srcIndex = cast<LabelInstruction>(label)->getLabelIndex();
- const LabelIndex dstIndex = bra.getLabelIndex();
+ const LabelIndex uip = bra.getLabelIndex();
const bool isPredicated = bra.isPredicated();
- if (uint32_t(dstIndex) > uint32_t(srcIndex)) { // FWD jump here
+ if (uint32_t(uip) > uint32_t(srcIndex)) { // FWD jump here
+ GBE_ASSERT(JIPs.contains(&bra) == true);
+ const LabelIndex jip = JIPs.find(&bra)->second;
if (isPredicated) {
const Register pred = bra.getPredicateIndex();
o << "SIM_FWD_BRA_C(uip, emask, " << "_" << pred
- << ", " << uint32_t(dstIndex) << ", " << uint32_t(dstIndex)
+ << ", " << uint32_t(jip) << ", " << uint32_t(uip)
<< ");\n";
} else {
o << "SIM_FWD_BRA(uip, emask, "
- << uint32_t(dstIndex) << ", " << uint32_t(dstIndex)
+ << uint32_t(jip) << ", " << uint32_t(uip)
<< ");\n";
}
} else { // BWD jump
if (isPredicated) {
const Register pred = bra.getPredicateIndex();
- o << "SIM_BWD_BRA_C(uip, _" << pred
- << ", " << uint32_t(dstIndex) << ");\n";
+ o << "SIM_BWD_BRA_C(uip, emask, _" << pred
+ << ", " << uint32_t(uip) << ");\n";
} else
- o << "SIM_BWD_BRA(uip, emask, " << uint32_t(dstIndex) << ");\n";
+ o << "SIM_BWD_BRA(uip, emask, " << uint32_t(uip) << ");\n";
}
return;
} else if (opcode == OP_RET) {
return cond ? src0 : src1;
}
+typedef unsigned int uint;
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
-typedef int uint2 __attribute__((ext_vector_type(2)));
+typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
typedef unsigned uint3 __attribute__((ext_vector_type(3)));
typedef unsigned uint4 __attribute__((ext_vector_type(4)));
typedef bool bool2 __attribute__((ext_vector_type(2)));
" return cond ? src0 : src1;\n"
"}\n"
"\n"
+"typedef unsigned int uint;\n"
"typedef float float2 __attribute__((ext_vector_type(2)));\n"
"typedef float float3 __attribute__((ext_vector_type(3)));\n"
"typedef float float4 __attribute__((ext_vector_type(4)));\n"
"typedef int int2 __attribute__((ext_vector_type(2)));\n"
"typedef int int3 __attribute__((ext_vector_type(3)));\n"
"typedef int int4 __attribute__((ext_vector_type(4)));\n"
-"typedef int uint2 __attribute__((ext_vector_type(2)));\n"
+"typedef unsigned int uint2 __attribute__((ext_vector_type(2)));\n"
"typedef unsigned uint3 __attribute__((ext_vector_type(3)));\n"
"typedef unsigned uint4 __attribute__((ext_vector_type(4)));\n"
"typedef bool bool2 __attribute__((ext_vector_type(2)));\n"