enum AllocationKind
{
- ALLOCATION_KIND_SUBALLOCATED,
+ ALLOCATION_KIND_SUBALLOCATED = 0,
ALLOCATION_KIND_DEDICATED,
-};
+ ALLOCATION_KIND_LAST,
+};
de::MovePtr<Allocation> allocateBuffer (const InstanceInterface& vki,
const DeviceInterface& vkd,
deUint32 x=a;
deUint32 y=b;
- while (x%b != 0)
+ while (x%y != 0)
{
temp = y;
y = x%y;
{
// Only layers in range passed to clear command are cleared
- const deUint32 clearBaseLayer = (imageViewLayerRange ? imageViewLayerRange->baseArrayLayer : 0) + attachmentClearLayerRange->baseArrayLayer;
+ const deUint32 clearBaseLayer = (imageViewLayerRange ? imageViewLayerRange->baseArrayLayer : 0) + attachmentClearLayerRange->baseArrayLayer;
+ const deUint32 clearLayerCount = (attachmentClearLayerRange->layerCount == VK_REMAINING_ARRAY_LAYERS) ? imageViewLayerRange->layerCount : clearBaseLayer + attachmentClearLayerRange->layerCount;
- if ((arrayLayer < clearBaseLayer) || (arrayLayer >= (clearBaseLayer + attachmentClearLayerRange->layerCount)))
+ if ((arrayLayer < clearBaseLayer) || (arrayLayer >= (clearLayerCount)))
{
return false;
}
const ImageLayerParams imageLayerParamsToTest[] =
{
{
- 1u, // imageLayerCount
- {0u, 1u}, // imageViewRange
- {0u, 1u}, // clearLayerRange
- DE_NULL // testName
+ 1u, // imageLayerCount
+ {0u, 1u}, // imageViewRange
+ {0u, 1u}, // clearLayerRange
+ "single_layer" // testName
},
{
- 16u, // imageLayerCount
- {3u, 12u}, // imageViewRange
- {2u, 5u}, // clearLayerRange
- "multiple_layers" // testName
+ 16u, // imageLayerCount
+ {3u, 12u}, // imageViewRange
+ {2u, 5u}, // clearLayerRange
+ "multiple_layers" // testName
},
+ {
+ 16u, // imageLayerCount
+ { 3u, 12u }, // imageViewRange
+ { 8u, VK_REMAINING_ARRAY_LAYERS }, // clearLayerRange
+ "remaining_array_layers" // testName
+ }
};
+ // Include test cases with VK_REMAINING_ARRAY_LAYERS when using vkCmdClearColorImage
const size_t numOfImageLayerParamsToTest = DE_LENGTH_OF_ARRAY(imageLayerParamsToTest);
+ // Exclude test cases with VK_REMAINING_ARRAY_LAYERS when using vkCmdClearAttachments
+ const size_t numOfAttachmentLayerParamsToTest = numOfImageLayerParamsToTest - 1;
+
// Clear color image
{
const VkImageType imageTypesToTest[] =
};
for (size_t imageTypeIndex = 0; imageTypeIndex < numOfImageTypesToTest; ++imageTypeIndex)
- for (size_t imageFormatIndex = 0; imageFormatIndex < numOfColorImageFormatsToTest; ++imageFormatIndex)
- for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfImageLayerParamsToTest; ++imageLayerParamsIndex)
{
+ de::MovePtr<TestCaseGroup> imageTypeGroup(new TestCaseGroup(testCtx, getImageTypeCaseName(imageTypesToTest[imageTypeIndex]), ""));
- if (imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount > 1u && imageTypesToTest[imageTypeIndex] == VK_IMAGE_TYPE_3D)
+ for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfImageLayerParamsToTest; ++imageLayerParamsIndex)
{
// 3D ARRAY images are not supported
- continue;
- }
-
- const VkFormat format = colorImageFormatsToTest[imageFormatIndex];
- const TestParams testParams =
- {
- false, // bool useSingleMipLevel;
- imageTypesToTest[imageTypeIndex], // VkImageType imageType;
- format, // VkFormat imageFormat;
- imageDimensionsByType[imageTypeIndex], // VkExtent3D imageExtent;
- imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
- {
- 0u,
- imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount
- }, // LayerRange imageViewLayerRange;
- makeClearColorValue(format, 0.2f, 0.1f, 0.7f, 0.8f), // VkClearValue initValue;
- {
- makeClearColorValue(format, 0.1f, 0.5f, 0.3f, 0.9f), // VkClearValue clearValue[0];
- makeClearColorValue(format, 0.3f, 0.6f, 0.2f, 0.7f), // VkClearValue clearValue[1];
- },
- imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
- allocationKind // AllocationKind allocationKind;
- };
+ if (imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount > 1u && imageTypesToTest[imageTypeIndex] == VK_IMAGE_TYPE_3D)
+ continue;
- std::ostringstream testCaseName;
- testCaseName << getImageTypeCaseName(testParams.imageType) << "_" << getFormatCaseName(format);
- if (imageLayerParamsToTest[imageLayerParamsIndex].testName != DE_NULL)
- testCaseName << "_" << imageLayerParamsToTest[imageLayerParamsIndex].testName;
+ de::MovePtr<TestCaseGroup> imageLayersGroup(new TestCaseGroup(testCtx, imageLayerParamsToTest[imageLayerParamsIndex].testName, ""));
- colorImageClearTests->addChild(new InstanceFactory1<ClearColorImageTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName.str(), "Clear Color Image", testParams));
+ for (size_t imageFormatIndex = 0; imageFormatIndex < numOfColorImageFormatsToTest; ++imageFormatIndex)
+ {
+ const VkFormat format = colorImageFormatsToTest[imageFormatIndex];
+ const std::string testCaseName = getFormatCaseName(format);
+ const TestParams testParams =
+ {
+ false, // bool useSingleMipLevel;
+ imageTypesToTest[imageTypeIndex], // VkImageType imageType;
+ format, // VkFormat imageFormat;
+ imageDimensionsByType[imageTypeIndex], // VkExtent3D imageExtent;
+ imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
+ {
+ 0u,
+ imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount
+ }, // LayerRange imageViewLayerRange;
+ makeClearColorValue(format, 0.2f, 0.1f, 0.7f, 0.8f), // VkClearValue initValue;
+ {
+ makeClearColorValue(format, 0.1f, 0.5f, 0.3f, 0.9f), // VkClearValue clearValue[0];
+ makeClearColorValue(format, 0.3f, 0.6f, 0.2f, 0.7f), // VkClearValue clearValue[1];
+ },
+ imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
+ allocationKind // AllocationKind allocationKind;
+ };
+
+ imageLayersGroup->addChild(new InstanceFactory1<ClearColorImageTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName, "Clear Color Image", testParams));
+ }
+ imageTypeGroup->addChild(imageLayersGroup.release());
+ }
+ colorImageClearTests->addChild(imageTypeGroup.release());
}
-
imageClearingTests->addChild(colorImageClearTests.release());
}
// Clear depth/stencil image
{
- for (size_t imageFormatIndex = 0; imageFormatIndex < numOfDepthStencilImageFormatsToTest; ++imageFormatIndex)
for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfImageLayerParamsToTest; ++imageLayerParamsIndex)
{
- const TestParams testParams =
+ de::MovePtr<TestCaseGroup> imageLayersGroup(new TestCaseGroup(testCtx, imageLayerParamsToTest[imageLayerParamsIndex].testName, ""));
+
+ for (size_t imageFormatIndex = 0; imageFormatIndex < numOfDepthStencilImageFormatsToTest; ++imageFormatIndex)
{
- true, // bool useSingleMipLevel;
- VK_IMAGE_TYPE_2D, // VkImageType imageType;
- depthStencilImageFormatsToTest[imageFormatIndex], // VkFormat format;
- { 256, 256, 1 }, // VkExtent3D extent;
- imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
- {
- 0u,
- imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount
- }, // LayerRange imageViewLayerRange;
- makeClearValueDepthStencil(0.5f, 0x03), // VkClearValue initValue
+ const VkFormat format = depthStencilImageFormatsToTest[imageFormatIndex];
+ const std::string testCaseName = getFormatCaseName(format);
+ const TestParams testParams =
{
- makeClearValueDepthStencil(0.1f, 0x06), // VkClearValue clearValue[0];
- makeClearValueDepthStencil(0.3f, 0x04), // VkClearValue clearValue[1];
- },
- imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
- allocationKind // AllocationKind allocationKind;
- };
-
- std::ostringstream testCaseName;
- testCaseName << getImageTypeCaseName(testParams.imageType) << "_" << getFormatCaseName(testParams.imageFormat);
- if (imageLayerParamsToTest[imageLayerParamsIndex].testName != DE_NULL)
- testCaseName << "_" << imageLayerParamsToTest[imageLayerParamsIndex].testName;
-
- depthStencilImageClearTests->addChild(new InstanceFactory1<ClearDepthStencilImageTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName.str(), "Clear Depth/Stencil Image", testParams));
+ true, // bool useSingleMipLevel;
+ VK_IMAGE_TYPE_2D, // VkImageType imageType;
+ format, // VkFormat format;
+ { 256, 256, 1 }, // VkExtent3D extent;
+ imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
+ {
+ 0u,
+ imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount
+ }, // LayerRange imageViewLayerRange;
+ makeClearValueDepthStencil(0.5f, 0x03), // VkClearValue initValue
+ {
+ makeClearValueDepthStencil(0.1f, 0x06), // VkClearValue clearValue[0];
+ makeClearValueDepthStencil(0.3f, 0x04), // VkClearValue clearValue[1];
+ },
+ imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
+ allocationKind // AllocationKind allocationKind;
+ };
+
+ imageLayersGroup->addChild(new InstanceFactory1<ClearDepthStencilImageTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName, "Clear Depth/Stencil Image", testParams));
+ }
+ depthStencilImageClearTests->addChild(imageLayersGroup.release());
}
-
imageClearingTests->addChild(depthStencilImageClearTests.release());
}
// Clear color attachment
{
- for (size_t imageFormatIndex = 0; imageFormatIndex < numOfColorImageFormatsToTest; ++imageFormatIndex)
- for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfImageLayerParamsToTest; ++imageLayerParamsIndex)
+ for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfAttachmentLayerParamsToTest; ++imageLayerParamsIndex)
{
- const VkFormat format = colorImageFormatsToTest[imageFormatIndex];
+ de::MovePtr<TestCaseGroup> colorAttachmentClearLayersGroup(new TestCaseGroup(testCtx, imageLayerParamsToTest[imageLayerParamsIndex].testName, ""));
+ de::MovePtr<TestCaseGroup> partialColorAttachmentClearLayersGroup(new TestCaseGroup(testCtx, imageLayerParamsToTest[imageLayerParamsIndex].testName, ""));
- const TestParams testParams =
+ for (size_t imageFormatIndex = 0; imageFormatIndex < numOfColorImageFormatsToTest; ++imageFormatIndex)
{
- true, // bool useSingleMipLevel;
- VK_IMAGE_TYPE_2D, // VkImageType imageType;
- format, // VkFormat format;
- { 256, 256, 1 }, // VkExtent3D extent;
- imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
- imageLayerParamsToTest[imageLayerParamsIndex].imageViewRange, // LayerRange imageViewLayerRange;
- makeClearColorValue(format, 0.2f, 0.1f, 0.7f, 0.8f), // VkClearValue initValue
+ const VkFormat format = colorImageFormatsToTest[imageFormatIndex];
+ const std::string testCaseName = getFormatCaseName(format);
+ const TestParams testParams =
{
- makeClearColorValue(format, 0.1f, 0.5f, 0.3f, 0.9f), // VkClearValue clearValue[0];
- makeClearColorValue(format, 0.3f, 0.6f, 0.2f, 0.7f), // VkClearValue clearValue[1];
- },
- imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
- allocationKind // AllocationKind allocationKind;
- };
-
- std::ostringstream testCaseName;
- testCaseName << getImageTypeCaseName(testParams.imageType) << "_" << getFormatCaseName(format);
- if (imageLayerParamsToTest[imageLayerParamsIndex].testName != DE_NULL)
- testCaseName << "_" << imageLayerParamsToTest[imageLayerParamsIndex].testName;
-
- colorAttachmentClearTests->addChild(new InstanceFactory1<ClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName.str(), "Clear Color Attachment", testParams));
- partialColorAttachmentClearTests->addChild(new InstanceFactory1<PartialClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName.str(), "Partial Clear Color Attachment", testParams));
+ true, // bool useSingleMipLevel;
+ VK_IMAGE_TYPE_2D, // VkImageType imageType;
+ format, // VkFormat format;
+ { 256, 256, 1 }, // VkExtent3D extent;
+ imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
+ imageLayerParamsToTest[imageLayerParamsIndex].imageViewRange, // LayerRange imageViewLayerRange;
+ makeClearColorValue(format, 0.2f, 0.1f, 0.7f, 0.8f), // VkClearValue initValue
+ {
+ makeClearColorValue(format, 0.1f, 0.5f, 0.3f, 0.9f), // VkClearValue clearValue[0];
+ makeClearColorValue(format, 0.3f, 0.6f, 0.2f, 0.7f), // VkClearValue clearValue[1];
+ },
+ imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
+ allocationKind // AllocationKind allocationKind;
+ };
+
+ colorAttachmentClearLayersGroup->addChild(new InstanceFactory1<ClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName, "Clear Color Attachment", testParams));
+ partialColorAttachmentClearLayersGroup->addChild(new InstanceFactory1<PartialClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName, "Partial Clear Color Attachment", testParams));
+ }
+ colorAttachmentClearTests->addChild(colorAttachmentClearLayersGroup.release());
+ partialColorAttachmentClearTests->addChild(partialColorAttachmentClearLayersGroup.release());
}
-
imageClearingTests->addChild(colorAttachmentClearTests.release());
imageClearingTests->addChild(partialColorAttachmentClearTests.release());
}
// Clear depth/stencil attachment
{
- for (size_t imageFormatIndex = 0; imageFormatIndex < numOfDepthStencilImageFormatsToTest; ++imageFormatIndex)
- for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfImageLayerParamsToTest; ++imageLayerParamsIndex)
+ for (size_t imageLayerParamsIndex = 0; imageLayerParamsIndex < numOfAttachmentLayerParamsToTest; ++imageLayerParamsIndex)
{
- const TestParams testParams =
+ de::MovePtr<TestCaseGroup> depthStencilLayersGroup(new TestCaseGroup(testCtx, imageLayerParamsToTest[imageLayerParamsIndex].testName, ""));
+ de::MovePtr<TestCaseGroup> partialDepthStencilLayersGroup(new TestCaseGroup(testCtx, imageLayerParamsToTest[imageLayerParamsIndex].testName, ""));
+
+ for (size_t imageFormatIndex = 0; imageFormatIndex < numOfDepthStencilImageFormatsToTest; ++imageFormatIndex)
{
- true, // bool useSingleMipLevel;
- VK_IMAGE_TYPE_2D, // VkImageType imageType;
- depthStencilImageFormatsToTest[imageFormatIndex], // VkFormat format;
- { 256, 256, 1 }, // VkExtent3D extent;
- imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
- imageLayerParamsToTest[imageLayerParamsIndex].imageViewRange, // LayerRange imageViewLayerRange;
- makeClearValueDepthStencil(0.5f, 0x03), // VkClearValue initValue
+ const VkFormat format = depthStencilImageFormatsToTest[imageFormatIndex];
+ const std::string testCaseName = getFormatCaseName(format);
+ const TestParams testParams =
{
- makeClearValueDepthStencil(0.1f, 0x06), // VkClearValue clearValue[0];
- makeClearValueDepthStencil(0.3f, 0x04), // VkClearValue clearValue[1];
- },
- imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
- allocationKind // AllocationKind allocationKind;
- };
-
- std::ostringstream testCaseName;
- testCaseName << getImageTypeCaseName(testParams.imageType) << "_" << getFormatCaseName(testParams.imageFormat);
- if (imageLayerParamsToTest[imageLayerParamsIndex].testName != DE_NULL)
- testCaseName << "_" << imageLayerParamsToTest[imageLayerParamsIndex].testName;
-
- depthStencilAttachmentClearTests->addChild(new InstanceFactory1<ClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName.str(), "Clear Depth/Stencil Attachment", testParams));
- partialDepthStencilAttachmentClearTests->addChild(new InstanceFactory1<PartialClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName.str(), "Parital Clear Depth/Stencil Attachment", testParams));
+ true, // bool useSingleMipLevel;
+ VK_IMAGE_TYPE_2D, // VkImageType imageType;
+ format, // VkFormat format;
+ { 256, 256, 1 }, // VkExtent3D extent;
+ imageLayerParamsToTest[imageLayerParamsIndex].imageLayerCount, // deUint32 imageLayerCount;
+ imageLayerParamsToTest[imageLayerParamsIndex].imageViewRange, // LayerRange imageViewLayerRange;
+ makeClearValueDepthStencil(0.5f, 0x03), // VkClearValue initValue
+ {
+ makeClearValueDepthStencil(0.1f, 0x06), // VkClearValue clearValue[0];
+ makeClearValueDepthStencil(0.3f, 0x04), // VkClearValue clearValue[1];
+ },
+ imageLayerParamsToTest[imageLayerParamsIndex].clearLayerRange, // LayerRange clearLayerRange;
+ allocationKind // AllocationKind allocationKind;
+ };
+
+ depthStencilLayersGroup->addChild(new InstanceFactory1<ClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName, "Clear Depth/Stencil Attachment", testParams));
+ partialDepthStencilLayersGroup->addChild(new InstanceFactory1<PartialClearAttachmentTestInstance, TestParams>(testCtx, NODETYPE_SELF_VALIDATE, testCaseName, "Parital Clear Depth/Stencil Attachment", testParams));
+ }
+ depthStencilAttachmentClearTests->addChild(depthStencilLayersGroup.release());
+ partialDepthStencilAttachmentClearTests->addChild(partialDepthStencilLayersGroup.release());
}
-
imageClearingTests->addChild(depthStencilAttachmentClearTests.release());
imageClearingTests->addChild(partialDepthStencilAttachmentClearTests.release());
}
#include "tcuStringTemplate.hpp"
#include "tcuTestLog.hpp"
#include "tcuVectorUtil.hpp"
+#include "tcuInterval.hpp"
#include "vkDefs.hpp"
#include "vkDeviceUtil.hpp"
#include "deRandom.hpp"
#include "deStringUtil.hpp"
#include "deUniquePtr.hpp"
+#include "deMath.h"
#include "tcuStringTemplate.hpp"
#include "vktSpvAsm16bitStorageTests.hpp"
}
}
+// Gets a 64-bit integer with a more logarithmic distribution
+deInt64 randomInt64LogDistributed (de::Random& rnd)
+{
+ deInt64 val = rnd.getUint64();
+ val &= (1ull << rnd.getInt(1, 63)) - 1;
+ if (rnd.getBool())
+ val = -val;
+ return val;
+}
+
+static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
+{
+ for (int ndx = 0; ndx < numValues; ndx++)
+ dst[ndx] = randomInt64LogDistributed(rnd);
+}
+
+template<typename FilterT>
+static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
+{
+ for (int ndx = 0; ndx < numValues; ndx++)
+ {
+ deInt64 value;
+ do {
+ value = randomInt64LogDistributed(rnd);
+ } while (!filter(value));
+ dst[ndx] = value;
+ }
+}
+
+inline bool filterNonNegative (const deInt64 value)
+{
+ return value >= 0;
+}
+
+inline bool filterPositive (const deInt64 value)
+{
+ return value > 0;
+}
+
+inline bool filterNotZero (const deInt64 value)
+{
+ return value != 0;
+}
+
static void floorAll (vector<float>& values)
{
for (size_t i = 0; i < values.size(); i++)
useStorageBuffer ? "opatomic_storage_buffer" : "opatomic",
"Test the OpAtomic* opcodes"));
de::Random rnd (deStringHash(group->getName()));
- const int numElements = 1000000;
+ const int numElements = 65535;
vector<OpAtomicCase> cases;
const StringTemplate shaderTemplate (
"OpMemberDecorate %sumbuf 0 Coherent\n"
"OpMemberDecorate %sumbuf 0 Offset 0\n"
- "%void = OpTypeVoid\n"
- "%voidf = OpTypeFunction %void\n"
- "%u32 = OpTypeInt 32 0\n"
- "%i32 = OpTypeInt 32 1\n"
- "%uvec3 = OpTypeVector %u32 3\n"
- "%uvec3ptr = OpTypePointer Input %uvec3\n"
- "%i32ptr = OpTypePointer ${BLOCK_POINTER_TYPE} %i32\n"
- "%i32arr = OpTypeRuntimeArray %i32\n"
+ + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
"%buf = OpTypeStruct %i32arr\n"
"%bufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
return group.release();
}
+bool compareNMin (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+ if (outputAllocs.size() != 1)
+ return false;
+
+ const BufferSp& expectedOutput = expectedOutputs[0];
+ const float* const expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
+ const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+
+ for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+ {
+ const float f0 = expectedOutputAsFloat[idx];
+ const float f1 = outputAsFloat[idx];
+
+ // For NMin, we accept NaN as output if both inputs were NaN.
+ // Otherwise the NaN is the wrong choise, as on architectures that
+ // do not handle NaN, those are huge values.
+ if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
+ return false;
+ }
+
+ return true;
+}
+
+tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+ vector<float> inputFloats1 (numElements, 0);
+ vector<float> inputFloats2 (numElements, 0);
+ vector<float> outputFloats (numElements, 0);
+
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+ // Make the first case a full-NAN case.
+ inputFloats1[0] = TCU_NAN;
+ inputFloats2[0] = TCU_NAN;
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // By default, pick the smallest
+ outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
+
+ // Make half of the cases NaN cases
+ if ((ndx & 1) == 0)
+ {
+ // Alternate between the NaN operand
+ if ((ndx & 2) == 0)
+ {
+ outputFloats[ndx] = inputFloats2[ndx];
+ inputFloats1[ndx] = TCU_NAN;
+ }
+ else
+ {
+ outputFloats[ndx] = inputFloats1[ndx];
+ inputFloats2[ndx] = TCU_NAN;
+ }
+ }
+ }
+
+ spec.assembly =
+ "OpCapability Shader\n"
+ "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %f32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%rem = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareNMin;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+bool compareNMax (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+ if (outputAllocs.size() != 1)
+ return false;
+
+ const BufferSp& expectedOutput = expectedOutputs[0];
+ const float* const expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
+ const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+
+ for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+ {
+ const float f0 = expectedOutputAsFloat[idx];
+ const float f1 = outputAsFloat[idx];
+
+ // For NMax, NaN is considered acceptable result, since in
+ // architectures that do not handle NaNs, those are huge values.
+ if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
+ return false;
+ }
+
+ return true;
+}
+
+tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+ vector<float> inputFloats1 (numElements, 0);
+ vector<float> inputFloats2 (numElements, 0);
+ vector<float> outputFloats (numElements, 0);
+
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+ // Make the first case a full-NAN case.
+ inputFloats1[0] = TCU_NAN;
+ inputFloats2[0] = TCU_NAN;
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // By default, pick the biggest
+ outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+
+ // Make half of the cases NaN cases
+ if ((ndx & 1) == 0)
+ {
+ // Alternate between the NaN operand
+ if ((ndx & 2) == 0)
+ {
+ outputFloats[ndx] = inputFloats2[ndx];
+ inputFloats1[ndx] = TCU_NAN;
+ }
+ else
+ {
+ outputFloats[ndx] = inputFloats1[ndx];
+ inputFloats2[ndx] = TCU_NAN;
+ }
+ }
+ }
+
+ spec.assembly =
+ "OpCapability Shader\n"
+ "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %f32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%rem = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareNMax;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+bool compareNClamp (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+ if (outputAllocs.size() != 1)
+ return false;
+
+ const BufferSp& expectedOutput = expectedOutputs[0];
+ const float* const expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
+ const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+
+ for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float) / 2; ++idx)
+ {
+ const float e0 = expectedOutputAsFloat[idx * 2];
+ const float e1 = expectedOutputAsFloat[idx * 2 + 1];
+ const float res = outputAsFloat[idx];
+
+ // For NClamp, we have two possible outcomes based on
+ // whether NaNs are handled or not.
+ // If either min or max value is NaN, the result is undefined,
+ // so this test doesn't stress those. If the clamped value is
+ // NaN, and NaNs are handled, the result is min; if NaNs are not
+ // handled, they are big values that result in max.
+ // If all three parameters are NaN, the result should be NaN.
+ if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
+ (deFloatAbs(e0 - res) < 0.00001f) ||
+ (deFloatAbs(e1 - res) < 0.00001f)))
+ return false;
+ }
+
+ return true;
+}
+
+tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+ vector<float> inputFloats1 (numElements, 0);
+ vector<float> inputFloats2 (numElements, 0);
+ vector<float> inputFloats3 (numElements, 0);
+ vector<float> outputFloats (numElements * 2, 0);
+
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // Results are only defined if max value is bigger than min value.
+ if (inputFloats2[ndx] > inputFloats3[ndx])
+ {
+ float t = inputFloats2[ndx];
+ inputFloats2[ndx] = inputFloats3[ndx];
+ inputFloats3[ndx] = t;
+ }
+
+ // By default, do the clamp, setting both possible answers
+ float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
+
+ float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+ float maxResB = maxResA;
+
+ // Alternate between the NaN cases
+ if (ndx & 1)
+ {
+ inputFloats1[ndx] = TCU_NAN;
+ // If NaN is handled, the result should be same as the clamp minimum.
+ // If NaN is not handled, the result should clamp to the clamp maximum.
+ maxResA = inputFloats2[ndx];
+ maxResB = inputFloats3[ndx];
+ }
+ else
+ {
+ // Not a NaN case - only one legal result.
+ maxResA = defaultRes;
+ maxResB = defaultRes;
+ }
+
+ outputFloats[ndx * 2] = maxResA;
+ outputFloats[ndx * 2 + 1] = maxResB;
+ }
+
+ // Make the first case a full-NAN case.
+ inputFloats1[0] = TCU_NAN;
+ inputFloats2[0] = TCU_NAN;
+ inputFloats3[0] = TCU_NAN;
+ outputFloats[0] = TCU_NAN;
+ outputFloats[1] = TCU_NAN;
+
+ spec.assembly =
+ "OpCapability Shader\n"
+ "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %indata3 DescriptorSet 0\n"
+ "OpDecorate %indata3 Binding 2\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 3\n"
+ "OpDecorate %f32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%indata3 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
+ "%inval3 = OpLoad %f32 %inloc3\n"
+ "%rem = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareNClamp;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ int op1Min, op1Max; // operand ranges
+ int op2Min, op2Max;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
+ { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ ComputeShaderSpec spec;
+ vector<deInt32> inputInts1 (numElements, 0);
+ vector<deInt32> inputInts2 (numElements, 0);
+ vector<deInt32> outputInts (numElements, 0);
+
+ fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
+ fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
+ outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
+ }
+
+ spec.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %i32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i32 %inloc1\n"
+ "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i32 %inloc2\n"
+ "%rem = OpSRem %i32 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
+ }
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ bool positive;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
+ { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ ComputeShaderSpec spec;
+ vector<deInt64> inputInts1 (numElements, 0);
+ vector<deInt64> inputInts2 (numElements, 0);
+ vector<deInt64> outputInts (numElements, 0);
+
+ if (params.positive)
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
+ }
+ else
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
+ }
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
+ outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
+ }
+
+ spec.assembly =
+ "OpCapability Int64\n"
+
+ + string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i64arr ArrayStride 8\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes())
+ + string(getComputeAsmCommonInt64Types()) +
+
+ "%buf = OpTypeStruct %i64arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i64 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i64 %inloc1\n"
+ "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i64 %inloc2\n"
+ "%rem = OpSRem %i64 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
+ }
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ int op1Min, op1Max; // operand ranges
+ int op2Min, op2Max;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
+ { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+
+ ComputeShaderSpec spec;
+ vector<deInt32> inputInts1 (numElements, 0);
+ vector<deInt32> inputInts2 (numElements, 0);
+ vector<deInt32> outputInts (numElements, 0);
+
+ fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
+ fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
+ if (rem == 0)
+ {
+ outputInts[ndx] = 0;
+ }
+ else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
+ {
+ // They have the same sign
+ outputInts[ndx] = rem;
+ }
+ else
+ {
+ // They have opposite sign. The remainder operation takes the
+ // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
+ // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
+ // the result has the correct sign and that it is still
+ // congruent to inputInts1[ndx] modulo inputInts2[ndx]
+ //
+ // See also http://mathforum.org/library/drmath/view/52343.html
+ outputInts[ndx] = rem + inputInts2[ndx];
+ }
+ }
+
+ spec.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %i32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i32 %inloc1\n"
+ "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i32 %inloc2\n"
+ "%rem = OpSMod %i32 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
+ }
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ bool positive;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
+ { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+
+ ComputeShaderSpec spec;
+ vector<deInt64> inputInts1 (numElements, 0);
+ vector<deInt64> inputInts2 (numElements, 0);
+ vector<deInt64> outputInts (numElements, 0);
+
+
+ if (params.positive)
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
+ }
+ else
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
+ }
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
+ if (rem == 0)
+ {
+ outputInts[ndx] = 0;
+ }
+ else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
+ {
+ // They have the same sign
+ outputInts[ndx] = rem;
+ }
+ else
+ {
+ // They have opposite sign. The remainder operation takes the
+ // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
+ // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
+ // the result has the correct sign and that it is still
+ // congruent to inputInts1[ndx] modulo inputInts2[ndx]
+ //
+ // See also http://mathforum.org/library/drmath/view/52343.html
+ outputInts[ndx] = rem + inputInts2[ndx];
+ }
+ }
+
+ spec.assembly =
+ "OpCapability Int64\n"
+
+ + string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i64arr ArrayStride 8\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes())
+ + string(getComputeAsmCommonInt64Types()) +
+
+ "%buf = OpTypeStruct %i64arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i64 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i64 %inloc1\n"
+ "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i64 %inloc2\n"
+ "%rem = OpSMod %i64 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
+ }
+
+ return group.release();
+}
+
// Copy contents in the input buffer to the output buffer.
tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
{
+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
- "%ivec3 = OpTypeVector %i32 3\n"
- "%buf = OpTypeStruct %i32arr\n"
- "%bufptr = OpTypePointer Uniform %buf\n"
- "%indata = OpVariable %bufptr Uniform\n"
- "%outdata = OpVariable %bufptr Uniform\n"
+ "%ivec3 = OpTypeVector %i32 3\n"
+ "%buf = OpTypeStruct %i32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
- "%id = OpVariable %uvec3ptr Input\n"
- "%zero = OpConstant %i32 0\n"
- "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
+ "%vec3_undef = OpUndef %ivec3\n"
"%sc_0 = OpSpecConstant %i32 0\n"
"%sc_1 = OpSpecConstant %i32 0\n"
"%sc_2 = OpSpecConstant %i32 0\n"
- "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
- "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
- "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
- "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %sc_vec3_1 1 0 4\n" // (0, sc_0, sc_1)
- "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
- "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
- "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
- "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
- "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
- "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
+ "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
+ "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
+ "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
+ "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
+ "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
+ "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
+ "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
+ "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
+ "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
+ "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
+ "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
+ "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
+ "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
return group.release();
}
+
} // anonymous
tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
"OpDecorate %sc_2 SpecId 2\n";
const char typesAndConstants2[] =
- "%v3i32 = OpTypeVector %i32 3\n"
-
- "%sc_0 = OpSpecConstant %i32 0\n"
- "%sc_1 = OpSpecConstant %i32 0\n"
- "%sc_2 = OpSpecConstant %i32 0\n"
-
+ "%v3i32 = OpTypeVector %i32 3\n"
"%vec3_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
- "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
- "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
- "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
- "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %sc_vec3_1 1 0 4\n" // (0, sc_0, sc_1)
- "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
- "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
- "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
- "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
- "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
- "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
+ "%vec3_undef = OpUndef %v3i32\n"
+
+ "%sc_0 = OpSpecConstant %i32 0\n"
+ "%sc_1 = OpSpecConstant %i32 0\n"
+ "%sc_2 = OpSpecConstant %i32 0\n"
+ "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
+ "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
+ "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
+ "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
+ "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
+ "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
+ "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
+ "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
+ "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
+ "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
+ "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
+ "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
+ "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
const char function2[] =
"%test_code = OpFunction %v4f32 None %v4f32_function\n"
return testGroup.release();
}
+// Test for the OpSRem instruction.
+tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
+ map<string, string> fragments;
+
+ fragments["pre_main"] =
+ "%c_f32_255 = OpConstant %f32 255.0\n"
+ "%c_i32_128 = OpConstant %i32 128\n"
+ "%c_i32_255 = OpConstant %i32 255\n"
+ "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
+ "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
+ "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
+
+ // The test does the following.
+ // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
+ // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
+ // return float(result + 128) / 255.0;
+ fragments["testfun"] =
+ "%test_code = OpFunction %v4f32 None %v4f32_function\n"
+ "%param1 = OpFunctionParameter %v4f32\n"
+ "%label_testfun = OpLabel\n"
+ "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
+ "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
+ "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
+ "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
+ "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
+ "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
+ "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
+ "%x_out = OpSRem %i32 %x_in %y_in\n"
+ "%y_out = OpSRem %i32 %y_in %z_in\n"
+ "%z_out = OpSRem %i32 %z_in %x_in\n"
+ "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
+ "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
+ "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
+ "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
+ "OpReturnValue %float_out\n"
+ "OpFunctionEnd\n";
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessageTemplate; // customized status message
+ qpTestResult failResult; // override status on failure
+ int operands[4][3]; // four (x, y, z) vectors of operands
+ int results[4][3]; // four (x, y, z) vectors of results
+ } cases[] =
+ {
+ {
+ "positive",
+ "${reason}",
+ QP_TEST_RESULT_FAIL,
+ { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
+ { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
+ },
+ {
+ "all",
+ "Inconsistent results, but within specification: ${reason}",
+ negFailResult, // negative operands, not required by the spec
+ { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
+ { { 5, 12, -2 }, { 0, -5, 2 }, { 3, 8, -6 }, { 25, -60, 0 } }, // results
+ },
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ RGBA inputColors[4];
+ RGBA outputColors[4];
+
+ for (int i = 0; i < 4; ++i)
+ {
+ inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
+ outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
+ }
+
+ createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
+ }
+
+ return testGroup.release();
+}
+
+// Test for the OpSMod instruction.
+tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
+ map<string, string> fragments;
+
+ fragments["pre_main"] =
+ "%c_f32_255 = OpConstant %f32 255.0\n"
+ "%c_i32_128 = OpConstant %i32 128\n"
+ "%c_i32_255 = OpConstant %i32 255\n"
+ "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
+ "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
+ "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
+
+ // The test does the following.
+ // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
+ // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
+ // return float(result + 128) / 255.0;
+ fragments["testfun"] =
+ "%test_code = OpFunction %v4f32 None %v4f32_function\n"
+ "%param1 = OpFunctionParameter %v4f32\n"
+ "%label_testfun = OpLabel\n"
+ "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
+ "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
+ "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
+ "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
+ "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
+ "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
+ "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
+ "%x_out = OpSMod %i32 %x_in %y_in\n"
+ "%y_out = OpSMod %i32 %y_in %z_in\n"
+ "%z_out = OpSMod %i32 %z_in %x_in\n"
+ "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
+ "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
+ "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
+ "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
+ "OpReturnValue %float_out\n"
+ "OpFunctionEnd\n";
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessageTemplate; // customized status message
+ qpTestResult failResult; // override status on failure
+ int operands[4][3]; // four (x, y, z) vectors of operands
+ int results[4][3]; // four (x, y, z) vectors of results
+ } cases[] =
+ {
+ {
+ "positive",
+ "${reason}",
+ QP_TEST_RESULT_FAIL,
+ { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
+ { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
+ },
+ {
+ "all",
+ "Inconsistent results, but within specification: ${reason}",
+ negFailResult, // negative operands, not required by the spec
+ { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
+ { { 5, -5, 3 }, { 0, 2, -3 }, { 3, -73, 69 }, { -35, 40, 0 } }, // results
+ },
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ RGBA inputColors[4];
+ RGBA outputColors[4];
+
+ for (int i = 0; i < 4; ++i)
+ {
+ inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
+ outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
+ }
+
+ createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
+ }
+
+ return testGroup.release();
+}
+
enum IntegerType
{
INTEGER_TYPE_SIGNED_16,
computeTests->addChild(createOpUnreachableGroup(testCtx));
computeTests ->addChild(createOpQuantizeToF16Group(testCtx));
computeTests ->addChild(createOpFRemGroup(testCtx));
+ computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
+ computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
+ computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
+ computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
computeTests->addChild(createSConvertTests(testCtx));
computeTests->addChild(createUConvertTests(testCtx));
computeTests->addChild(createOpCompositeInsertGroup(testCtx));
computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
+ computeTests->addChild(createOpNMinGroup(testCtx));
+ computeTests->addChild(createOpNMaxGroup(testCtx));
+ computeTests->addChild(createOpNClampGroup(testCtx));
+ {
+ de::MovePtr<tcu::TestCaseGroup> computeAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
+
+ computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+ computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+
+ computeTests->addChild(computeAndroidTests.release());
+ }
+
computeTests->addChild(create16BitStorageComputeGroup(testCtx));
computeTests->addChild(createVariablePointersComputeGroup(testCtx));
graphicsTests->addChild(createOpNopTests(testCtx));
graphicsTests->addChild(createBarrierTests(testCtx));
graphicsTests->addChild(createDecorationGroupTests(testCtx));
graphicsTests->addChild(createFRemTests(testCtx));
+ graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
+ graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
+
+ {
+ de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
+
+ graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+ graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+
+ graphicsTests->addChild(graphicsAndroidTests.release());
+ }
graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));