Add OpFRem to SPIR-V precision tests
authorRicardo Garcia <rgarcia@igalia.com>
Thu, 15 Aug 2019 07:09:42 +0000 (09:09 +0200)
committerAlexander Galazin <Alexander.Galazin@arm.com>
Tue, 17 Sep 2019 11:01:24 +0000 (07:01 -0400)
Test the precision of OpFRem with scalars and vectors using different
floating point types.

New tests:
dEQP-VK.glsl.builtin.precision*.frem.*

Components: Vulkan
VK-GL-CTS issue: 1928

Change-Id: I3cbba606e8f107105ae860c3ab88804874e3fa27

android/cts/master/vk-master.txt
external/vulkancts/modules/vulkan/shaderexecutor/vktShaderBuiltinPrecisionTests.cpp
external/vulkancts/modules/vulkan/shaderexecutor/vktShaderExecutor.cpp
external/vulkancts/modules/vulkan/shaderexecutor/vktShaderExecutor.hpp
external/vulkancts/mustpass/master/vk-default-no-waivers.txt
external/vulkancts/mustpass/master/vk-default.txt

index b251dcb..91ed73b 100644 (file)
@@ -304783,6 +304783,14 @@ dEQP-VK.glsl.builtin.precision.mod.mediump.scalar
 dEQP-VK.glsl.builtin.precision.mod.mediump.vec2
 dEQP-VK.glsl.builtin.precision.mod.mediump.vec3
 dEQP-VK.glsl.builtin.precision.mod.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.mediump.scalar
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec2
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec3
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.highp.scalar
+dEQP-VK.glsl.builtin.precision.frem.highp.vec2
+dEQP-VK.glsl.builtin.precision.frem.highp.vec3
+dEQP-VK.glsl.builtin.precision.frem.highp.vec4
 dEQP-VK.glsl.builtin.precision.modf.mediump
 dEQP-VK.glsl.builtin.precision.modf.highp
 dEQP-VK.glsl.builtin.precision.min.mediump.scalar
@@ -305089,6 +305097,10 @@ dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec2
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec3
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec4
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.modf.compute
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.vec2
@@ -305319,6 +305331,10 @@ dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec2
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec3
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec4
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.modf.compute
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.vec2
index 27ed87f..cea4ed6 100644 (file)
@@ -1497,6 +1497,8 @@ public:
        //! Index of output parameter, or -1 if none of the parameters is output.
        virtual int                     getOutParamIndex                (void)                                  const { return -1; }
 
+       virtual SpirVCaseT      getSpirvCase                    (void)                                  const { return SPIRV_CASETYPE_NONE; }
+
        void                            printDefinition                 (ostream& os)                   const
        {
                doPrintDefinition(os);
@@ -2302,11 +2304,13 @@ template <class T>
 class Comparison : public InfixOperator < T >
 {
 public:
-       string          getName(void) const { return "comparison"; }
-       string          getSymbol(void) const { return ""; }
+       string          getName                 (void) const    { return "comparison"; }
+       string          getSymbol               (void) const    { return ""; }
+
+       SpirVCaseT      getSpirvCase    () const                { return SPIRV_CASETYPE_COMPARE; }
 
-       Interval        doApply(const EvalContext&      ctx,
-               const Signature<int, float, float>::IArgs&              iargs) const
+       Interval        doApply                 (const EvalContext&                                             ctx,
+                                                                const typename Comparison<T>::IArgs&   iargs) const
        {
                DE_UNREF(ctx);
                if (iargs.a.hasNaN() || iargs.b.hasNaN())
@@ -2704,11 +2708,13 @@ ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1)         \
        return app<CLASS>(arg0, arg1);                                                                  \
 }
 
-#define DEFINE_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION) \
+#define DEFINE_CASED_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION, SPIRVCASE) \
 class CLASS : public DerivedFunc<Signature<TRET, T0, T1> > /* NOLINT(CLASS) */ \
 {                                                                                                                                              \
 public:                                                                                                                                        \
-       string                  getName         (void) const            { return #NAME; }       \
+       string                  getName         (void) const    { return #NAME; }               \
+                                                                                                                                               \
+       SpirVCaseT              getSpirvCase(void) const        { return SPIRVCASE; }   \
                                                                                                                                                \
 protected:                                                                                                                             \
        ExprP<TRET>             doExpand        (ExpandContext&, const ArgExprs& args_) const \
@@ -2720,12 +2726,24 @@ protected:                                                                                                                              \
 };                                                                                                                                             \
 DEFINE_CONSTRUCTOR2(CLASS, TRET, NAME, T0, T1)
 
+#define DEFINE_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION) \
+       DEFINE_CASED_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION, SPIRV_CASETYPE_NONE)
+
 #define DEFINE_DERIVED_FLOAT2(CLASS, NAME, Arg0, Arg1, EXPANSION)              \
        DEFINE_DERIVED2(CLASS, float, NAME, float, Arg0, float, Arg1, EXPANSION)
 
 #define DEFINE_DERIVED_FLOAT2_16BIT(CLASS, NAME, Arg0, Arg1, EXPANSION)                \
        DEFINE_DERIVED2(CLASS, deFloat16, NAME, deFloat16, Arg0, deFloat16, Arg1, EXPANSION)
 
+#define DEFINE_CASED_DERIVED_FLOAT2(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
+       DEFINE_CASED_DERIVED2(CLASS, float, NAME, float, Arg0, float, Arg1, EXPANSION, SPIRVCASE)
+
+#define DEFINE_CASED_DERIVED_FLOAT2_16BIT(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
+       DEFINE_CASED_DERIVED2(CLASS, deFloat16, NAME, deFloat16, Arg0, deFloat16, Arg1, EXPANSION, SPIRVCASE)
+
+#define DEFINE_CASED_DERIVED_DOUBLE2(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
+       DEFINE_CASED_DERIVED2(CLASS, double, NAME, double, Arg0, double, Arg1, EXPANSION, SPIRVCASE)
+
 #define DEFINE_CONSTRUCTOR3(CLASS, TRET, NAME, T0, T1, T2)                             \
 ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1, const ExprP<T2>& arg2) \
 {                                                                                                                                              \
@@ -2765,7 +2783,7 @@ ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1,                   \
 typedef         InverseSqrt< Signature<deFloat16, deFloat16> > InverseSqrt16Bit;
 typedef         InverseSqrt< Signature<float, float> >                 InverseSqrt32Bit;
 
-DEFINE_DERIVED_FLOAT1(Sqrt,                            sqrt,           x,              constant(1.0f) / app<InverseSqrt32Bit>(x));
+DEFINE_DERIVED_FLOAT1(Sqrt32Bit,               sqrt,           x,              constant(1.0f) / app<InverseSqrt32Bit>(x));
 DEFINE_DERIVED_FLOAT1_16BIT(Sqrt16Bit, sqrt,           x,              constant((deFloat16)FLOAT16_1_0) / app<InverseSqrt16Bit>(x));
 DEFINE_DERIVED_FLOAT2(Pow,                             pow,            x,      y,      exp2<float>(y * log2(x)));
 DEFINE_DERIVED_FLOAT2_16BIT(Pow16,             pow,            x,      y,      exp2<deFloat16>(y * log2(x)));
@@ -3929,6 +3947,9 @@ public:
 typedef Floor< Signature<float, float> > Floor32Bit;
 typedef Floor< Signature<deFloat16, deFloat16> > Floor16Bit;
 
+typedef Trunc< Signature<float, float> > Trunc32Bit;
+typedef Trunc< Signature<deFloat16, deFloat16> > Trunc16Bit;
+
 DEFINE_DERIVED_FLOAT1(Fract, fract, x, x - app<Floor32Bit>(x));
 DEFINE_DERIVED_FLOAT1_16BIT(Fract16Bit, fract, x, x - app<Floor16Bit>(x));
 
@@ -3941,9 +3962,12 @@ protected:
        double  precision               (const EvalContext&, double, double, double) const { return 0.0; }
 };
 
-DEFINE_DERIVED_FLOAT2(Mod, mod, x, y, x - y * app<Floor32Bit>(x / y));
+DEFINE_DERIVED_FLOAT2(Mod32Bit, mod, x, y, x - y * app<Floor32Bit>(x / y));
 DEFINE_DERIVED_FLOAT2_16BIT(Mod16Bit, mod, x, y, x - y * app<Floor16Bit>(x / y));
 
+DEFINE_CASED_DERIVED_FLOAT2(FRem32Bit, frem, x, y, x - y * app<Trunc32Bit>(x / y), SPIRV_CASETYPE_FREM);
+DEFINE_CASED_DERIVED_FLOAT2_16BIT(FRem16Bit, frem, x, y, x - y * app<Trunc16Bit>(x / y), SPIRV_CASETYPE_FREM);
+
 template <class T>
 class Modf : public PrimitiveFunc<T>
 {
@@ -4801,6 +4825,11 @@ public:
 
                                GenFunc                                 (const Func<Sig_>&      scalarFunc) : m_func (scalarFunc) {}
 
+       SpirVCaseT      getSpirvCase                    (void) const
+       {
+               return m_func.getSpirvCase();
+       }
+
        string          getName                                 (void) const
        {
                return m_func.getName();
@@ -4885,6 +4914,11 @@ public:
                return this->doGetScalarFunc().getName();
        }
 
+       SpirVCaseT                                      getSpirvCase    (void) const
+       {
+               return this->doGetScalarFunc().getSpirvCase();
+       }
+
 protected:
        void                                            doPrint                 (ostream& os, const BaseArgExprs& args) const
        {
@@ -5729,7 +5763,7 @@ tcu::TestStatus BuiltinPrecisionCaseTestInstance<In, Out>::iterate (void)
 
                        if (outCount > 0)
                        {
-                               if (m_executor->isSpirVShader())
+                               if (m_executor->spirvCase() == SPIRV_CASETYPE_COMPARE)
                                {
                                        builder << "Output:\n"
                                                        << comparisonMessage(outputs.out0[valueNdx])
@@ -5801,7 +5835,7 @@ protected:
        const FloatFormat&      getFormat               (void) const                    { return m_ctx.floatFormat; }
 
        template <typename In, typename Out>
-       void                            testStatement   (const Variables<In, Out>& variables, const Statement& stmt);
+       void                            testStatement   (const Variables<In, Out>& variables, const Statement& stmt, SpirVCaseT spirvCase);
 
        template<typename T>
        Symbol                          makeSymbol              (const Variable<T>& variable)
@@ -5815,7 +5849,7 @@ protected:
 };
 
 template <typename In, typename Out>
-void PrecisionCase::testStatement (const Variables<In, Out>& variables, const Statement& stmt)
+void PrecisionCase::testStatement (const Variables<In, Out>& variables, const Statement& stmt, SpirVCaseT spirvCase)
 {
        const int               inCount         = numInputs<In>();
        const int               outCount        = numOutputs<Out>();
@@ -5874,7 +5908,7 @@ void PrecisionCase::testStatement (const Variables<In, Out>& variables, const St
        }
 
        m_spec.source = de::toString(stmt);
-       m_spec.spirVShader = isInteger<typename Out::Out0>();
+       m_spec.spirvCase = spirvCase;
 }
 
 template <typename T>
@@ -6099,7 +6133,7 @@ void FuncCase<Sig>::buildTest (void)
                ExprP<Ret> expr = applyVar(m_func, m_variables.in0, m_variables.in1, m_variables.in2, m_variables.in3);
                m_stmt                  = variableAssignment(m_variables.out0, expr);
 
-               this->testStatement(m_variables, *m_stmt);
+               this->testStatement(m_variables, *m_stmt, m_func.getSpirvCase());
        }
 }
 
@@ -6153,7 +6187,7 @@ void InOutFuncCase<Sig>::buildTest (void)
                ExprP<Ret> expr = applyVar(m_func, m_variables.in0, m_variables.out1, m_variables.in1, m_variables.in2);
                m_stmt                  = variableAssignment(m_variables.out0, expr);
 
-               this->testStatement(m_variables, *m_stmt);
+               this->testStatement(m_variables, *m_stmt, m_func.getSpirvCase());
        }
 }
 
@@ -6386,19 +6420,20 @@ MovePtr<const CaseFactories> createBuiltinCases (bool is16BitTest = false)
        addScalarFactory<Log< Signature<float, float> > >(*funcs);
        addScalarFactory<Exp2<Signature<float, float> > >(*funcs);
        addScalarFactory<Log2< Signature<float, float> > >(*funcs);
-       addScalarFactory<Sqrt>(*funcs);
+       addScalarFactory<Sqrt32Bit>(*funcs);
        addScalarFactory<InverseSqrt< Signature<float, float> > >(*funcs);
 
        addScalarFactory<Abs< Signature<float, float> > >(*funcs);
        addScalarFactory<Sign< Signature<float, float> > >(*funcs);
        addScalarFactory<Floor32Bit>(*funcs);
-       addScalarFactory<Trunc< Signature<float, float> > >(*funcs);
+       addScalarFactory<Trunc32Bit>(*funcs);
        addScalarFactory<Round< Signature<float, float> > >(*funcs);
        addScalarFactory<RoundEven< Signature<float, float> > >(*funcs);
        addScalarFactory<Ceil< Signature<float, float> > >(*funcs);
        addScalarFactory<Fract>(*funcs);
 
-       addScalarFactory<Mod>(*funcs);
+       addScalarFactory<Mod32Bit>(*funcs);
+       addScalarFactory<FRem32Bit>(*funcs);
 
        funcs->addFactory(createSimpleFuncCaseFactory<Modf32Bit>());
        addScalarFactory<Min< Signature<float, float, float> > >(*funcs);
@@ -6469,13 +6504,14 @@ MovePtr<const CaseFactories> createBuiltinCases16Bit(void)
        addScalarFactory<Abs< Signature<deFloat16, deFloat16> > >(*funcs);
        addScalarFactory<Sign< Signature<deFloat16, deFloat16> > >(*funcs);
        addScalarFactory<Floor16Bit>(*funcs);
-       addScalarFactory<Trunc< Signature<deFloat16, deFloat16> > >(*funcs);
+       addScalarFactory<Trunc16Bit>(*funcs);
        addScalarFactory<Round< Signature<deFloat16, deFloat16> > >(*funcs);
        addScalarFactory<RoundEven< Signature<deFloat16, deFloat16> > >(*funcs);
        addScalarFactory<Ceil< Signature<deFloat16, deFloat16> > >(*funcs);
        addScalarFactory<Fract16Bit>(*funcs);
 
        addScalarFactory<Mod16Bit>(*funcs);
+       addScalarFactory<FRem16Bit>(*funcs);
 
        funcs->addFactory(createSimpleFuncCaseFactory<Modf16Bit>());
        addScalarFactory<Min< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
index ec0bdea..e901dfe 100644 (file)
@@ -1766,12 +1766,12 @@ void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayou
                const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
                const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
                const int                               numComps                = scalarSize / numVecs;
+               const int                               size                    = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
 
                for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
                {
                        for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
                        {
-                               const int               size                    = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
                                const int               srcOffset               = size * (elemNdx * scalarSize + vecNdx * numComps);
                                const int               dstOffset               = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
                                const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
@@ -1991,7 +1991,7 @@ std::string moveBitOperation (std::string variableName, const int operationNdx)
        return src.str();
 }
 
-std::string sclarComparison(const std::string opeartion, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
+std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
 {
        std::ostringstream      src;
        std::string                     boolType;
@@ -2001,7 +2001,7 @@ std::string sclarComparison(const std::string opeartion, const int operationNdx,
        case glu::TYPE_FLOAT16:
        case glu::TYPE_FLOAT:
                src << "\n"
-                       << "%operation_result_" << operationNdx << " = " << opeartion << " %bool %in0_val %in1_val\n"
+                       << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
                        << "OpSelectionMerge %IF_" << operationNdx << " None\n"
                        << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
                        << "%label_IF_" << operationNdx << " = OpLabel\n"
@@ -2031,7 +2031,7 @@ std::string sclarComparison(const std::string opeartion, const int operationNdx,
        }
 
        src << "\n"
-               << "%operation_result_" << operationNdx << " = " << opeartion << " " << boolType << " %in0_val %in1_val\n"
+               << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
                << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
                << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
 
@@ -2051,14 +2051,7 @@ std::string sclarComparison(const std::string opeartion, const int operationNdx,
 
 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool isMediump)
 {
-       const int                       operationAmount = 10;
-       int                                     moveBitNdx              = 0;
-       const std::string       inputType1              = getTypeSpirv(spec.inputs[0].varType.getBasicType(), spec.packFloat16Bit);
-       const std::string       inputType2              = getTypeSpirv(spec.inputs[1].varType.getBasicType(), spec.packFloat16Bit);
-       const std::string       outputType              = getTypeSpirv(spec.outputs[0].varType.getBasicType());
-       const std::string       packType                = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
-
-       std::string     opeartions[operationAmount]     =
+       static const std::string COMPARE_OPERATIONS[] =
        {
                "OpFOrdEqual",
                "OpFOrdGreaterThan",
@@ -2072,6 +2065,20 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                "OpFUnordLessThanEqual"
        };
 
+       int                                     moveBitNdx              = 0;
+       const std::string       inputType1              = getTypeSpirv(spec.inputs[0].varType.getBasicType(), spec.packFloat16Bit);
+       const std::string       inputType2              = getTypeSpirv(spec.inputs[1].varType.getBasicType(), spec.packFloat16Bit);
+       const std::string       outputType              = getTypeSpirv(spec.outputs[0].varType.getBasicType(), spec.packFloat16Bit);
+       const std::string       packType                = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
+
+       const bool                      floatResult             = glu::isDataTypeFloatType(spec.outputs[0].varType.getBasicType());
+       const bool                      packFloatRes    = (floatResult && spec.packFloat16Bit);
+       const bool                      useF32Types             = (!are16Bit);
+       const bool                      useF16Types             = (spec.packFloat16Bit || are16Bit);
+
+       if (floatResult)
+               DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_FREM);
+
        std::ostringstream      src;
        src << "; SPIR-V\n"
                "; Version: 1.0\n"
@@ -2080,7 +2087,7 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                "; Schema: 0\n"
                "OpCapability Shader\n";
 
-       if (spec.packFloat16Bit || are16Bit)
+       if (useF16Types)
                src << "OpCapability Float16\n";
 
        if (are16Bit)
@@ -2105,7 +2112,8 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                {
                        src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
                        ++ndx;
-                       offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
+                       const int scalarSize = symIter->varType.getScalarSize();
+                       offset += (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
                }
                src << "OpDecorate %up_SSB0_IN ArrayStride "<< offset << "\n";
        }
@@ -2131,6 +2139,14 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                        "OpDecorate %in0_val RelaxedPrecision\n"
                        "OpDecorate %in1_val RelaxedPrecision\n"
                        "OpMemberDecorate %SSB0_OUT 0 RelaxedPrecision\n";
+
+                       if (floatResult)
+                       {
+                               src <<
+                                       "OpDecorate %out RelaxedPrecision\n"
+                                       "OpDecorate %frem_result RelaxedPrecision\n"
+                                       "OpDecorate %out_val_final RelaxedPrecision\n";
+                       }
        }
 
        //output offset
@@ -2141,7 +2157,8 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                {
                        src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
                        ++ndx;
-                       offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
+                       const int scalarSize = symIter->varType.getScalarSize();
+                       offset += (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
                }
                src << "OpDecorate %up_SSB0_OUT ArrayStride " << offset << "\n";
        }
@@ -2158,13 +2175,13 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                "%v4bool = OpTypeVector %bool 4\n"
                "%u32   = OpTypeInt 32 0\n";
 
-       if (!are16Bit) //f32 is not needed when shader operates only on f16
+       if (useF32Types)
                src << "%f32   = OpTypeFloat 32\n"
                        "%v2f32 = OpTypeVector %f32 2\n"
                        "%v3f32 = OpTypeVector %f32 3\n"
                        "%v4f32 = OpTypeVector %f32 4\n";
 
-       if (spec.packFloat16Bit || are16Bit)
+       if (useF16Types)
                src << "%f16   = OpTypeFloat 16\n"
                        "%v2f16 = OpTypeVector %f16 2\n"
                        "%v3f16 = OpTypeVector %f16 3\n"
@@ -2184,8 +2201,8 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                "\n"
                "%voidf   = OpTypeFunction %void\n"
                "%fp_u32  = OpTypePointer Function %u32\n"
-               "%fp_i32  = OpTypePointer Function " << outputType << "\n"
-               "%fp_f32  = OpTypePointer Function " << inputType1 << "\n"
+               "%fp_out  = OpTypePointer Function " << outputType << "\n"
+               "%fp_it1  = OpTypePointer Function " << inputType1 << "\n"
                "%fp_operation =  OpTypePointer Function %i32\n";
 
        if (spec.packFloat16Bit)
@@ -2193,20 +2210,58 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
 
        src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
                "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
-               "%up_i32 = OpTypePointer Uniform " << outputType << "\n"
+               "%up_out = OpTypePointer Uniform " << outputType << "\n"
                "\n"
                "%c_u32_0 = OpConstant %u32 0\n"
                "%c_u32_1 = OpConstant %u32 1\n"
                "%c_u32_2 = OpConstant %u32 2\n"
                "%c_i32_0 = OpConstant %i32 0\n"
                "%c_i32_1 = OpConstant %i32 1\n"
+               "\n";
+
+       if (useF32Types)
+               src <<
+                       "%c_f32_0 = OpConstant %f32 0\n"
+                       "%c_f32_1 = OpConstant %f32 1\n"
+                       ;
+
+       if (useF16Types)
+               src <<
+                       "%c_f16_0 = OpConstant %f16 0\n"
+                       "%c_f16_1 = OpConstant %f16 1\n"
+                       "%c_f16_minus1 = OpConstant %f16 -0x1p+0"
+                       ;
+
+       src << "\n"
                "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
                "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
                "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
                "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
                "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
                "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
-               "\n"
+               "\n";
+
+       if (useF32Types)
+               src <<
+                       "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
+                       "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
+                       "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
+                       "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
+                       "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
+                       "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
+                       ;
+
+       if (useF16Types)
+               src <<
+                       "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
+                       "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
+                       "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
+                       "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
+                       "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
+                       "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
+                       ;
+
+       src << "\n"
                "%SSB0_IN    = OpTypeStruct " << inputType1 << " " << inputType2 << "\n"
                "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
                "%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
@@ -2221,17 +2276,18 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                "\n"
                "%BP_main = OpFunction %void None %voidf\n"
                "%BP_label = OpLabel\n"
-               "%invocationNdx = OpVariable  %fp_u32 Function\n";
+               "%invocationNdx = OpVariable %fp_u32 Function\n";
 
        if (spec.packFloat16Bit)
                src << "%in0 = OpVariable %fp_f16 Function\n"
                        "%in1 = OpVariable %fp_f16 Function\n";
        else
-               src << "%in0 = OpVariable %fp_f32 Function\n"
-                       "%in1 = OpVariable %fp_f32 Function\n";
+               src << "%in0 = OpVariable %fp_it1 Function\n"
+                       "%in1 = OpVariable %fp_it1 Function\n";
+
+       src << "%out = OpVariable " << (packFloatRes ? "%fp_f16" : "%fp_out") << " Function\n";
 
        src << "%operation = OpVariable %fp_operation Function\n"
-               "%out = OpVariable %fp_i32 Function\n"
                "%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
                "%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
                "%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
@@ -2254,7 +2310,7 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                "%src_ptr_0_0 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_0\n"
                "%src_val_0_0 = OpLoad " << inputType1 << " %src_ptr_0_0\n";
 
-       if(spec.packFloat16Bit)
+       if (spec.packFloat16Bit)
        {
                if (spec.inputs[0].varType.getScalarSize() > 1)
                {
@@ -2332,7 +2388,7 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
 
        src << "\n"
                "OpStore %operation %c_i32_1\n"
-               "OpStore %out %c_" << &outputType[1] << "_0\n"
+               "OpStore %out %c_" << (packFloatRes ? &packType[1] : &outputType[1]) << "_0\n"
                "\n";
 
        if (spec.packFloat16Bit)
@@ -2343,21 +2399,64 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
                        "%in1_val = OpLoad " << inputType2 << " %in1\n";
 
        src << "\n";
-       for(int operationNdx = 0; operationNdx < operationAmount; ++operationNdx)
+
+       switch (spec.spirvCase)
        {
-               src << sclarComparison  (opeartions[operationNdx], operationNdx,
-                                                               spec.inputs[0].varType.getBasicType(),
-                                                               outputType,
-                                                               spec.outputs[0].varType.getScalarSize());
-               src << moveBitOperation("%operation", moveBitNdx);
-               ++moveBitNdx;
+       case SPIRV_CASETYPE_COMPARE:
+               for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
+               {
+                       src << scalarComparison (COMPARE_OPERATIONS[operationNdx], operationNdx,
+                                                                       spec.inputs[0].varType.getBasicType(),
+                                                                       outputType,
+                                                                       spec.outputs[0].varType.getScalarSize());
+                       src << moveBitOperation("%operation", moveBitNdx);
+                       ++moveBitNdx;
+               }
+               break;
+       case SPIRV_CASETYPE_FREM:
+               src << "%frem_result = OpFRem " << (packFloatRes ? packType : outputType) << " %in0_val %in1_val\n"
+                       << "OpStore %out %frem_result\n";
+               break;
+       default:
+               DE_ASSERT(false);
+               break;
+       }
+
+       src << "\n"
+               "%out_val_final = OpLoad " << (packFloatRes ? packType : outputType) << " %out\n"
+               "%ssbo_dst_ptr = OpAccessChain %up_out %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n";
+
+       if (packFloatRes)
+       {
+               if (spec.inputs[0].varType.getScalarSize() > 1)
+               {
+                       for (int i = 0; i < spec.inputs[0].varType.getScalarSize(); ++i)
+                       {
+                               src << "%out_val_final_" << i << " = OpCompositeExtract %f16 %out_val_final " << i << "\n";
+                               src << "%out_composite_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << i << " %c_f16_minus1\n";
+                               src << "%u32_val_" << i << " = OpBitcast %u32 %out_composite_" << i << "\n";
+                       }
+
+                       src << "%u32_final_val = OpCompositeConstruct " << outputType;
+                       for (int i = 0; i < spec.inputs[0].varType.getScalarSize(); ++i)
+                               src << " %u32_val_" << i;
+                       src << "\n";
+                       src << "OpStore %ssbo_dst_ptr %u32_final_val\n";
+               }
+               else
+               {
+                       src <<
+                               "%out_composite = OpCompositeConstruct %v2f16 %out_val_final %c_f16_minus1\n"
+                               "%out_result = OpBitcast " << outputType << " %out_composite\n"
+                               "OpStore %ssbo_dst_ptr %out_result\n";
+               }
+       }
+       else
+       {
+               src << "OpStore %ssbo_dst_ptr %out_val_final\n";
        }
 
        src << "\n"
-               "%out_val_final = OpLoad " << outputType << " %out\n"
-               "%ssbo_dst_ptr = OpAccessChain %up_i32 %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n"
-               "OpStore %ssbo_dst_ptr %out_val_final\n"
-               "\n"
                "OpReturn\n"
                "OpFunctionEnd\n";
 
@@ -2367,7 +2466,7 @@ std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const boo
 
 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
 {
-       if(spec.spirVShader)
+       if (spec.spirvCase != SPIRV_CASETYPE_NONE)
        {
                bool    are16Bit        = false;
                bool    isMediump       = false;
@@ -2379,7 +2478,7 @@ std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec
                        if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
                                isMediump = true;
 
-                       if(isMediump && are16Bit)
+                       if (isMediump && are16Bit)
                                break;
                }
 
@@ -2413,7 +2512,7 @@ std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec
 
 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
 {
-       if(shaderSpec.spirVShader)
+       if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
                programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
        else
                programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
@@ -2446,7 +2545,7 @@ void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, v
        // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
        // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
        // the shader.
-       uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && m_shaderSpec.spirVShader);
+       uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
 
        // Create command pool
        cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
index 9abdc99..ed51780 100644 (file)
@@ -47,6 +47,14 @@ struct Symbol
        Symbol (const std::string& name_, const glu::VarType& varType_) : name(name_), varType(varType_) {}
 };
 
+enum SpirVCaseT
+{
+       SPIRV_CASETYPE_NONE = 0,
+       SPIRV_CASETYPE_COMPARE,
+       SPIRV_CASETYPE_FREM,
+       SPIRV_CASETYPE_MAX_ENUM,
+};
+
 //! Complete shader specification.
 struct ShaderSpec
 {
@@ -57,12 +65,12 @@ struct ShaderSpec
        std::string                             source;                         //!< Source snippet to be executed.
        vk::ShaderBuildOptions  buildOptions;
        bool                                    packFloat16Bit;
-       bool                                    spirVShader;
+       SpirVCaseT                              spirvCase;
 
        ShaderSpec (void)
                : glslVersion           (glu::GLSL_VERSION_450)
                , packFloat16Bit        (false)
-               , spirVShader           (false)
+               , spirvCase                     (SPIRV_CASETYPE_NONE)
        {}
 };
 
@@ -83,7 +91,8 @@ public:
        bool                                    areInputs16Bit          (void) const;
        bool                                    areOutputs16Bit         (void) const;
        bool                                    isOutput16Bit           (const size_t ndx) const;
-       bool                                    isSpirVShader           (void) {return m_shaderSpec.spirVShader;}
+       bool                                    isSpirVShader           (void) { return (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE); }
+       SpirVCaseT                              spirvCase                       (void) { return m_shaderSpec.spirvCase; }
 
 protected:
                                                        ShaderExecutor          (Context& context, const ShaderSpec& shaderSpec)
index 8043588..db2e499 100644 (file)
@@ -304768,6 +304768,14 @@ dEQP-VK.glsl.builtin.precision.mod.highp.scalar
 dEQP-VK.glsl.builtin.precision.mod.highp.vec2
 dEQP-VK.glsl.builtin.precision.mod.highp.vec3
 dEQP-VK.glsl.builtin.precision.mod.highp.vec4
+dEQP-VK.glsl.builtin.precision.frem.mediump.scalar
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec2
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec3
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.highp.scalar
+dEQP-VK.glsl.builtin.precision.frem.highp.vec2
+dEQP-VK.glsl.builtin.precision.frem.highp.vec3
+dEQP-VK.glsl.builtin.precision.frem.highp.vec4
 dEQP-VK.glsl.builtin.precision.modf.mediump
 dEQP-VK.glsl.builtin.precision.modf.highp
 dEQP-VK.glsl.builtin.precision.min.mediump.scalar
@@ -305102,6 +305110,10 @@ dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec2
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec3
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec4
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.modf.compute
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.vec2
@@ -305332,6 +305344,10 @@ dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec2
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec3
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec4
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.modf.compute
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.vec2
index 2eea291..fdbe210 100644 (file)
@@ -304730,6 +304730,14 @@ dEQP-VK.glsl.builtin.precision.mod.highp.scalar
 dEQP-VK.glsl.builtin.precision.mod.highp.vec2
 dEQP-VK.glsl.builtin.precision.mod.highp.vec3
 dEQP-VK.glsl.builtin.precision.mod.highp.vec4
+dEQP-VK.glsl.builtin.precision.frem.mediump.scalar
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec2
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec3
+dEQP-VK.glsl.builtin.precision.frem.mediump.vec4
+dEQP-VK.glsl.builtin.precision.frem.highp.scalar
+dEQP-VK.glsl.builtin.precision.frem.highp.vec2
+dEQP-VK.glsl.builtin.precision.frem.highp.vec3
+dEQP-VK.glsl.builtin.precision.frem.highp.vec4
 dEQP-VK.glsl.builtin.precision.modf.mediump
 dEQP-VK.glsl.builtin.precision.modf.highp
 dEQP-VK.glsl.builtin.precision.min.mediump.scalar
@@ -305064,6 +305072,10 @@ dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec2
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec3
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage16b.frem.compute.vec4
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.modf.compute
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage16b.min.compute.vec2
@@ -305294,6 +305306,10 @@ dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec2
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec3
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.mod.compute.vec4
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.scalar
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec2
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec3
+dEQP-VK.glsl.builtin.precision_fp16_storage32b.frem.compute.vec4
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.modf.compute
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.scalar
 dEQP-VK.glsl.builtin.precision_fp16_storage32b.min.compute.vec2