dEQP-VK.glsl.builtin.precision.mod.mediump.vec2
dEQP-VK.glsl.builtin.precision.mod.mediump.vec3
dEQP-VK.glsl.builtin.precision.mod.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.mediump.scalar
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec2
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec3
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.highp.scalar
+dEQP-VK.glsl.builtin.precision.frem.highp.vec2
+dEQP-VK.glsl.builtin.precision.frem.highp.vec3
+dEQP-VK.glsl.builtin.precision.frem.highp.vec4
dEQP-VK.glsl.builtin.precision.modf.mediump
dEQP-VK.glsl.builtin.precision.modf.highp
dEQP-VK.glsl.builtin.precision.min.mediump.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec3
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec4
dEQP-VK.glsl.builtin.precision_fp16_storage16b.modf.compute
dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec3
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec4
dEQP-VK.glsl.builtin.precision_fp16_storage32b.modf.compute
dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.vec2
//! Index of output parameter, or -1 if none of the parameters is output.
virtual int getOutParamIndex (void) const { return -1; }
+ virtual SpirVCaseT getSpirvCase (void) const { return SPIRV_CASETYPE_NONE; }
+
void printDefinition (ostream& os) const
{
doPrintDefinition(os);
class Comparison : public InfixOperator < T >
{
public:
- string getName(void) const { return "comparison"; }
- string getSymbol(void) const { return ""; }
+ string getName (void) const { return "comparison"; }
+ string getSymbol (void) const { return ""; }
+
+ SpirVCaseT getSpirvCase () const { return SPIRV_CASETYPE_COMPARE; }
- Interval doApply(const EvalContext& ctx,
- const Signature<int, float, float>::IArgs& iargs) const
+ Interval doApply (const EvalContext& ctx,
+ const typename Comparison<T>::IArgs& iargs) const
{
DE_UNREF(ctx);
if (iargs.a.hasNaN() || iargs.b.hasNaN())
return app<CLASS>(arg0, arg1); \
}
-#define DEFINE_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION) \
+#define DEFINE_CASED_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION, SPIRVCASE) \
class CLASS : public DerivedFunc<Signature<TRET, T0, T1> > /* NOLINT(CLASS) */ \
{ \
public: \
- string getName (void) const { return #NAME; } \
+ string getName (void) const { return #NAME; } \
+ \
+ SpirVCaseT getSpirvCase(void) const { return SPIRVCASE; } \
\
protected: \
ExprP<TRET> doExpand (ExpandContext&, const ArgExprs& args_) const \
}; \
DEFINE_CONSTRUCTOR2(CLASS, TRET, NAME, T0, T1)
+#define DEFINE_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION) \
+ DEFINE_CASED_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION, SPIRV_CASETYPE_NONE)
+
#define DEFINE_DERIVED_FLOAT2(CLASS, NAME, Arg0, Arg1, EXPANSION) \
DEFINE_DERIVED2(CLASS, float, NAME, float, Arg0, float, Arg1, EXPANSION)
#define DEFINE_DERIVED_FLOAT2_16BIT(CLASS, NAME, Arg0, Arg1, EXPANSION) \
DEFINE_DERIVED2(CLASS, deFloat16, NAME, deFloat16, Arg0, deFloat16, Arg1, EXPANSION)
+#define DEFINE_CASED_DERIVED_FLOAT2(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
+ DEFINE_CASED_DERIVED2(CLASS, float, NAME, float, Arg0, float, Arg1, EXPANSION, SPIRVCASE)
+
+#define DEFINE_CASED_DERIVED_FLOAT2_16BIT(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
+ DEFINE_CASED_DERIVED2(CLASS, deFloat16, NAME, deFloat16, Arg0, deFloat16, Arg1, EXPANSION, SPIRVCASE)
+
+#define DEFINE_CASED_DERIVED_DOUBLE2(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
+ DEFINE_CASED_DERIVED2(CLASS, double, NAME, double, Arg0, double, Arg1, EXPANSION, SPIRVCASE)
+
#define DEFINE_CONSTRUCTOR3(CLASS, TRET, NAME, T0, T1, T2) \
ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1, const ExprP<T2>& arg2) \
{ \
typedef InverseSqrt< Signature<deFloat16, deFloat16> > InverseSqrt16Bit;
typedef InverseSqrt< Signature<float, float> > InverseSqrt32Bit;
-DEFINE_DERIVED_FLOAT1(Sqrt, sqrt, x, constant(1.0f) / app<InverseSqrt32Bit>(x));
+DEFINE_DERIVED_FLOAT1(Sqrt32Bit, sqrt, x, constant(1.0f) / app<InverseSqrt32Bit>(x));
DEFINE_DERIVED_FLOAT1_16BIT(Sqrt16Bit, sqrt, x, constant((deFloat16)FLOAT16_1_0) / app<InverseSqrt16Bit>(x));
DEFINE_DERIVED_FLOAT2(Pow, pow, x, y, exp2<float>(y * log2(x)));
DEFINE_DERIVED_FLOAT2_16BIT(Pow16, pow, x, y, exp2<deFloat16>(y * log2(x)));
typedef Floor< Signature<float, float> > Floor32Bit;
typedef Floor< Signature<deFloat16, deFloat16> > Floor16Bit;
+typedef Trunc< Signature<float, float> > Trunc32Bit;
+typedef Trunc< Signature<deFloat16, deFloat16> > Trunc16Bit;
+
DEFINE_DERIVED_FLOAT1(Fract, fract, x, x - app<Floor32Bit>(x));
DEFINE_DERIVED_FLOAT1_16BIT(Fract16Bit, fract, x, x - app<Floor16Bit>(x));
double precision (const EvalContext&, double, double, double) const { return 0.0; }
};
-DEFINE_DERIVED_FLOAT2(Mod, mod, x, y, x - y * app<Floor32Bit>(x / y));
+DEFINE_DERIVED_FLOAT2(Mod32Bit, mod, x, y, x - y * app<Floor32Bit>(x / y));
DEFINE_DERIVED_FLOAT2_16BIT(Mod16Bit, mod, x, y, x - y * app<Floor16Bit>(x / y));
+DEFINE_CASED_DERIVED_FLOAT2(FRem32Bit, frem, x, y, x - y * app<Trunc32Bit>(x / y), SPIRV_CASETYPE_FREM);
+DEFINE_CASED_DERIVED_FLOAT2_16BIT(FRem16Bit, frem, x, y, x - y * app<Trunc16Bit>(x / y), SPIRV_CASETYPE_FREM);
+
template <class T>
class Modf : public PrimitiveFunc<T>
{
GenFunc (const Func<Sig_>& scalarFunc) : m_func (scalarFunc) {}
+ SpirVCaseT getSpirvCase (void) const
+ {
+ return m_func.getSpirvCase();
+ }
+
string getName (void) const
{
return m_func.getName();
return this->doGetScalarFunc().getName();
}
+ SpirVCaseT getSpirvCase (void) const
+ {
+ return this->doGetScalarFunc().getSpirvCase();
+ }
+
protected:
void doPrint (ostream& os, const BaseArgExprs& args) const
{
if (outCount > 0)
{
- if (m_executor->isSpirVShader())
+ if (m_executor->spirvCase() == SPIRV_CASETYPE_COMPARE)
{
builder << "Output:\n"
<< comparisonMessage(outputs.out0[valueNdx])
const FloatFormat& getFormat (void) const { return m_ctx.floatFormat; }
template <typename In, typename Out>
- void testStatement (const Variables<In, Out>& variables, const Statement& stmt);
+ void testStatement (const Variables<In, Out>& variables, const Statement& stmt, SpirVCaseT spirvCase);
template<typename T>
Symbol makeSymbol (const Variable<T>& variable)
};
template <typename In, typename Out>
-void PrecisionCase::testStatement (const Variables<In, Out>& variables, const Statement& stmt)
+void PrecisionCase::testStatement (const Variables<In, Out>& variables, const Statement& stmt, SpirVCaseT spirvCase)
{
const int inCount = numInputs<In>();
const int outCount = numOutputs<Out>();
}
m_spec.source = de::toString(stmt);
- m_spec.spirVShader = isInteger<typename Out::Out0>();
+ m_spec.spirvCase = spirvCase;
}
template <typename T>
ExprP<Ret> expr = applyVar(m_func, m_variables.in0, m_variables.in1, m_variables.in2, m_variables.in3);
m_stmt = variableAssignment(m_variables.out0, expr);
- this->testStatement(m_variables, *m_stmt);
+ this->testStatement(m_variables, *m_stmt, m_func.getSpirvCase());
}
}
ExprP<Ret> expr = applyVar(m_func, m_variables.in0, m_variables.out1, m_variables.in1, m_variables.in2);
m_stmt = variableAssignment(m_variables.out0, expr);
- this->testStatement(m_variables, *m_stmt);
+ this->testStatement(m_variables, *m_stmt, m_func.getSpirvCase());
}
}
addScalarFactory<Log< Signature<float, float> > >(*funcs);
addScalarFactory<Exp2<Signature<float, float> > >(*funcs);
addScalarFactory<Log2< Signature<float, float> > >(*funcs);
- addScalarFactory<Sqrt>(*funcs);
+ addScalarFactory<Sqrt32Bit>(*funcs);
addScalarFactory<InverseSqrt< Signature<float, float> > >(*funcs);
addScalarFactory<Abs< Signature<float, float> > >(*funcs);
addScalarFactory<Sign< Signature<float, float> > >(*funcs);
addScalarFactory<Floor32Bit>(*funcs);
- addScalarFactory<Trunc< Signature<float, float> > >(*funcs);
+ addScalarFactory<Trunc32Bit>(*funcs);
addScalarFactory<Round< Signature<float, float> > >(*funcs);
addScalarFactory<RoundEven< Signature<float, float> > >(*funcs);
addScalarFactory<Ceil< Signature<float, float> > >(*funcs);
addScalarFactory<Fract>(*funcs);
- addScalarFactory<Mod>(*funcs);
+ addScalarFactory<Mod32Bit>(*funcs);
+ addScalarFactory<FRem32Bit>(*funcs);
funcs->addFactory(createSimpleFuncCaseFactory<Modf32Bit>());
addScalarFactory<Min< Signature<float, float, float> > >(*funcs);
addScalarFactory<Abs< Signature<deFloat16, deFloat16> > >(*funcs);
addScalarFactory<Sign< Signature<deFloat16, deFloat16> > >(*funcs);
addScalarFactory<Floor16Bit>(*funcs);
- addScalarFactory<Trunc< Signature<deFloat16, deFloat16> > >(*funcs);
+ addScalarFactory<Trunc16Bit>(*funcs);
addScalarFactory<Round< Signature<deFloat16, deFloat16> > >(*funcs);
addScalarFactory<RoundEven< Signature<deFloat16, deFloat16> > >(*funcs);
addScalarFactory<Ceil< Signature<deFloat16, deFloat16> > >(*funcs);
addScalarFactory<Fract16Bit>(*funcs);
addScalarFactory<Mod16Bit>(*funcs);
+ addScalarFactory<FRem16Bit>(*funcs);
funcs->addFactory(createSimpleFuncCaseFactory<Modf16Bit>());
addScalarFactory<Min< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
const int scalarSize = glu::getDataTypeScalarSize(basicType);
const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
const int numComps = scalarSize / numVecs;
+ const int size = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
{
for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
{
- const int size = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
const int dstOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
return src.str();
}
-std::string sclarComparison(const std::string opeartion, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
+std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
{
std::ostringstream src;
std::string boolType;
case glu::TYPE_FLOAT16:
case glu::TYPE_FLOAT:
src << "\n"
- << "%operation_result_" << operationNdx << " = " << opeartion << " %bool %in0_val %in1_val\n"
+ << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
<< "OpSelectionMerge %IF_" << operationNdx << " None\n"
<< "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
<< "%label_IF_" << operationNdx << " = OpLabel\n"
}
src << "\n"
- << "%operation_result_" << operationNdx << " = " << opeartion << " " << boolType << " %in0_val %in1_val\n"
+ << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
<< "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool isMediump)
{
- const int operationAmount = 10;
- int moveBitNdx = 0;
- const std::string inputType1 = getTypeSpirv(spec.inputs[0].varType.getBasicType(), spec.packFloat16Bit);
- const std::string inputType2 = getTypeSpirv(spec.inputs[1].varType.getBasicType(), spec.packFloat16Bit);
- const std::string outputType = getTypeSpirv(spec.outputs[0].varType.getBasicType());
- const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
-
- std::string opeartions[operationAmount] =
+ static const std::string COMPARE_OPERATIONS[] =
{
"OpFOrdEqual",
"OpFOrdGreaterThan",
"OpFUnordLessThanEqual"
};
+ int moveBitNdx = 0;
+ const std::string inputType1 = getTypeSpirv(spec.inputs[0].varType.getBasicType(), spec.packFloat16Bit);
+ const std::string inputType2 = getTypeSpirv(spec.inputs[1].varType.getBasicType(), spec.packFloat16Bit);
+ const std::string outputType = getTypeSpirv(spec.outputs[0].varType.getBasicType(), spec.packFloat16Bit);
+ const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
+
+ const bool floatResult = glu::isDataTypeFloatType(spec.outputs[0].varType.getBasicType());
+ const bool packFloatRes = (floatResult && spec.packFloat16Bit);
+ const bool useF32Types = (!are16Bit);
+ const bool useF16Types = (spec.packFloat16Bit || are16Bit);
+
+ if (floatResult)
+ DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_FREM);
+
std::ostringstream src;
src << "; SPIR-V\n"
"; Version: 1.0\n"
"; Schema: 0\n"
"OpCapability Shader\n";
- if (spec.packFloat16Bit || are16Bit)
+ if (useF16Types)
src << "OpCapability Float16\n";
if (are16Bit)
{
src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
++ndx;
- offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
+ const int scalarSize = symIter->varType.getScalarSize();
+ offset += (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
}
src << "OpDecorate %up_SSB0_IN ArrayStride "<< offset << "\n";
}
"OpDecorate %in0_val RelaxedPrecision\n"
"OpDecorate %in1_val RelaxedPrecision\n"
"OpMemberDecorate %SSB0_OUT 0 RelaxedPrecision\n";
+
+ if (floatResult)
+ {
+ src <<
+ "OpDecorate %out RelaxedPrecision\n"
+ "OpDecorate %frem_result RelaxedPrecision\n"
+ "OpDecorate %out_val_final RelaxedPrecision\n";
+ }
}
//output offset
{
src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
++ndx;
- offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
+ const int scalarSize = symIter->varType.getScalarSize();
+ offset += (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
}
src << "OpDecorate %up_SSB0_OUT ArrayStride " << offset << "\n";
}
"%v4bool = OpTypeVector %bool 4\n"
"%u32 = OpTypeInt 32 0\n";
- if (!are16Bit) //f32 is not needed when shader operates only on f16
+ if (useF32Types)
src << "%f32 = OpTypeFloat 32\n"
"%v2f32 = OpTypeVector %f32 2\n"
"%v3f32 = OpTypeVector %f32 3\n"
"%v4f32 = OpTypeVector %f32 4\n";
- if (spec.packFloat16Bit || are16Bit)
+ if (useF16Types)
src << "%f16 = OpTypeFloat 16\n"
"%v2f16 = OpTypeVector %f16 2\n"
"%v3f16 = OpTypeVector %f16 3\n"
"\n"
"%voidf = OpTypeFunction %void\n"
"%fp_u32 = OpTypePointer Function %u32\n"
- "%fp_i32 = OpTypePointer Function " << outputType << "\n"
- "%fp_f32 = OpTypePointer Function " << inputType1 << "\n"
+ "%fp_out = OpTypePointer Function " << outputType << "\n"
+ "%fp_it1 = OpTypePointer Function " << inputType1 << "\n"
"%fp_operation = OpTypePointer Function %i32\n";
if (spec.packFloat16Bit)
src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
"%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
- "%up_i32 = OpTypePointer Uniform " << outputType << "\n"
+ "%up_out = OpTypePointer Uniform " << outputType << "\n"
"\n"
"%c_u32_0 = OpConstant %u32 0\n"
"%c_u32_1 = OpConstant %u32 1\n"
"%c_u32_2 = OpConstant %u32 2\n"
"%c_i32_0 = OpConstant %i32 0\n"
"%c_i32_1 = OpConstant %i32 1\n"
+ "\n";
+
+ if (useF32Types)
+ src <<
+ "%c_f32_0 = OpConstant %f32 0\n"
+ "%c_f32_1 = OpConstant %f32 1\n"
+ ;
+
+ if (useF16Types)
+ src <<
+ "%c_f16_0 = OpConstant %f16 0\n"
+ "%c_f16_1 = OpConstant %f16 1\n"
+ "%c_f16_minus1 = OpConstant %f16 -0x1p+0"
+ ;
+
+ src << "\n"
"%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
"%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
"%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
"%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
"%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
"%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
- "\n"
+ "\n";
+
+ if (useF32Types)
+ src <<
+ "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
+ "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
+ "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
+ "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
+ "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
+ "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
+ ;
+
+ if (useF16Types)
+ src <<
+ "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
+ "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
+ "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
+ "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
+ "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
+ "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
+ ;
+
+ src << "\n"
"%SSB0_IN = OpTypeStruct " << inputType1 << " " << inputType2 << "\n"
"%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
"%ssboIN = OpTypeStruct %up_SSB0_IN\n"
"\n"
"%BP_main = OpFunction %void None %voidf\n"
"%BP_label = OpLabel\n"
- "%invocationNdx = OpVariable %fp_u32 Function\n";
+ "%invocationNdx = OpVariable %fp_u32 Function\n";
if (spec.packFloat16Bit)
src << "%in0 = OpVariable %fp_f16 Function\n"
"%in1 = OpVariable %fp_f16 Function\n";
else
- src << "%in0 = OpVariable %fp_f32 Function\n"
- "%in1 = OpVariable %fp_f32 Function\n";
+ src << "%in0 = OpVariable %fp_it1 Function\n"
+ "%in1 = OpVariable %fp_it1 Function\n";
+
+ src << "%out = OpVariable " << (packFloatRes ? "%fp_f16" : "%fp_out") << " Function\n";
src << "%operation = OpVariable %fp_operation Function\n"
- "%out = OpVariable %fp_i32 Function\n"
"%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
"%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
"%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
"%src_ptr_0_0 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_0\n"
"%src_val_0_0 = OpLoad " << inputType1 << " %src_ptr_0_0\n";
- if(spec.packFloat16Bit)
+ if (spec.packFloat16Bit)
{
if (spec.inputs[0].varType.getScalarSize() > 1)
{
src << "\n"
"OpStore %operation %c_i32_1\n"
- "OpStore %out %c_" << &outputType[1] << "_0\n"
+ "OpStore %out %c_" << (packFloatRes ? &packType[1] : &outputType[1]) << "_0\n"
"\n";
if (spec.packFloat16Bit)
"%in1_val = OpLoad " << inputType2 << " %in1\n";
src << "\n";
- for(int operationNdx = 0; operationNdx < operationAmount; ++operationNdx)
+
+ switch (spec.spirvCase)
{
- src << sclarComparison (opeartions[operationNdx], operationNdx,
- spec.inputs[0].varType.getBasicType(),
- outputType,
- spec.outputs[0].varType.getScalarSize());
- src << moveBitOperation("%operation", moveBitNdx);
- ++moveBitNdx;
+ case SPIRV_CASETYPE_COMPARE:
+ for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
+ {
+ src << scalarComparison (COMPARE_OPERATIONS[operationNdx], operationNdx,
+ spec.inputs[0].varType.getBasicType(),
+ outputType,
+ spec.outputs[0].varType.getScalarSize());
+ src << moveBitOperation("%operation", moveBitNdx);
+ ++moveBitNdx;
+ }
+ break;
+ case SPIRV_CASETYPE_FREM:
+ src << "%frem_result = OpFRem " << (packFloatRes ? packType : outputType) << " %in0_val %in1_val\n"
+ << "OpStore %out %frem_result\n";
+ break;
+ default:
+ DE_ASSERT(false);
+ break;
+ }
+
+ src << "\n"
+ "%out_val_final = OpLoad " << (packFloatRes ? packType : outputType) << " %out\n"
+ "%ssbo_dst_ptr = OpAccessChain %up_out %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n";
+
+ if (packFloatRes)
+ {
+ if (spec.inputs[0].varType.getScalarSize() > 1)
+ {
+ for (int i = 0; i < spec.inputs[0].varType.getScalarSize(); ++i)
+ {
+ src << "%out_val_final_" << i << " = OpCompositeExtract %f16 %out_val_final " << i << "\n";
+ src << "%out_composite_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << i << " %c_f16_minus1\n";
+ src << "%u32_val_" << i << " = OpBitcast %u32 %out_composite_" << i << "\n";
+ }
+
+ src << "%u32_final_val = OpCompositeConstruct " << outputType;
+ for (int i = 0; i < spec.inputs[0].varType.getScalarSize(); ++i)
+ src << " %u32_val_" << i;
+ src << "\n";
+ src << "OpStore %ssbo_dst_ptr %u32_final_val\n";
+ }
+ else
+ {
+ src <<
+ "%out_composite = OpCompositeConstruct %v2f16 %out_val_final %c_f16_minus1\n"
+ "%out_result = OpBitcast " << outputType << " %out_composite\n"
+ "OpStore %ssbo_dst_ptr %out_result\n";
+ }
+ }
+ else
+ {
+ src << "OpStore %ssbo_dst_ptr %out_val_final\n";
}
src << "\n"
- "%out_val_final = OpLoad " << outputType << " %out\n"
- "%ssbo_dst_ptr = OpAccessChain %up_i32 %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n"
- "OpStore %ssbo_dst_ptr %out_val_final\n"
- "\n"
"OpReturn\n"
"OpFunctionEnd\n";
std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
{
- if(spec.spirVShader)
+ if (spec.spirvCase != SPIRV_CASETYPE_NONE)
{
bool are16Bit = false;
bool isMediump = false;
if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
isMediump = true;
- if(isMediump && are16Bit)
+ if (isMediump && are16Bit)
break;
}
void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
{
- if(shaderSpec.spirVShader)
+ if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
else
programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
// For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
// storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
// the shader.
- uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && m_shaderSpec.spirVShader);
+ uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
// Create command pool
cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
Symbol (const std::string& name_, const glu::VarType& varType_) : name(name_), varType(varType_) {}
};
+enum SpirVCaseT
+{
+ SPIRV_CASETYPE_NONE = 0,
+ SPIRV_CASETYPE_COMPARE,
+ SPIRV_CASETYPE_FREM,
+ SPIRV_CASETYPE_MAX_ENUM,
+};
+
//! Complete shader specification.
struct ShaderSpec
{
std::string source; //!< Source snippet to be executed.
vk::ShaderBuildOptions buildOptions;
bool packFloat16Bit;
- bool spirVShader;
+ SpirVCaseT spirvCase;
ShaderSpec (void)
: glslVersion (glu::GLSL_VERSION_450)
, packFloat16Bit (false)
- , spirVShader (false)
+ , spirvCase (SPIRV_CASETYPE_NONE)
{}
};
bool areInputs16Bit (void) const;
bool areOutputs16Bit (void) const;
bool isOutput16Bit (const size_t ndx) const;
- bool isSpirVShader (void) {return m_shaderSpec.spirVShader;}
+ bool isSpirVShader (void) { return (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE); }
+ SpirVCaseT spirvCase (void) { return m_shaderSpec.spirvCase; }
protected:
ShaderExecutor (Context& context, const ShaderSpec& shaderSpec)
dEQP-VK.glsl.builtin.precision.mod.highp.vec2
dEQP-VK.glsl.builtin.precision.mod.highp.vec3
dEQP-VK.glsl.builtin.precision.mod.highp.vec4
+dEQP-VK.glsl.builtin.precision.frem.mediump.scalar
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec2
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec3
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.highp.scalar
+dEQP-VK.glsl.builtin.precision.frem.highp.vec2
+dEQP-VK.glsl.builtin.precision.frem.highp.vec3
+dEQP-VK.glsl.builtin.precision.frem.highp.vec4
dEQP-VK.glsl.builtin.precision.modf.mediump
dEQP-VK.glsl.builtin.precision.modf.highp
dEQP-VK.glsl.builtin.precision.min.mediump.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec3
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec4
dEQP-VK.glsl.builtin.precision_fp16_storage16b.modf.compute
dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec3
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec4
dEQP-VK.glsl.builtin.precision_fp16_storage32b.modf.compute
dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.vec2
dEQP-VK.glsl.builtin.precision.mod.highp.vec2
dEQP-VK.glsl.builtin.precision.mod.highp.vec3
dEQP-VK.glsl.builtin.precision.mod.highp.vec4
+dEQP-VK.glsl.builtin.precision.frem.mediump.scalar
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec2
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec3
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.highp.scalar
+dEQP-VK.glsl.builtin.precision.frem.highp.vec2
+dEQP-VK.glsl.builtin.precision.frem.highp.vec3
+dEQP-VK.glsl.builtin.precision.frem.highp.vec4
dEQP-VK.glsl.builtin.precision.modf.mediump
dEQP-VK.glsl.builtin.precision.modf.highp
dEQP-VK.glsl.builtin.precision.min.mediump.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec3
dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec4
dEQP-VK.glsl.builtin.precision_fp16_storage16b.modf.compute
dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec2
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec3
dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec4
dEQP-VK.glsl.builtin.precision_fp16_storage32b.modf.compute
dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.scalar
dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.vec2