From: Pyry Haulos Date: Tue, 24 Feb 2015 23:42:31 +0000 (-0800) Subject: Improve depth interpolation in reference renderer X-Git-Tag: upstream/0.1.0~1939^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f4238ec8ad0e9b0995142dc3b0b9a18de167ae3f;p=platform%2Fupstream%2FVK-GL-CTS.git Improve depth interpolation in reference renderer This change reformulates depth value interpolation to exactly preserve constant depth across a triangle. Old interpolation code exhibited serious Z-fighting with two (Z-)overlapping screen-aligned triangles. In addition barycentric precision is slightly improved by actually dividing with the sum instead of sharing pre-computed 1/sum. A simple test for constant depth and varying interpolation is provided. Change-Id: If6229f9652bcf3dbe1d273ed8e131b175c06dfc2 --- diff --git a/framework/referencerenderer/rrRasterizer.cpp b/framework/referencerenderer/rrRasterizer.cpp index 9a8bff1..db222f6 100644 --- a/framework/referencerenderer/rrRasterizer.cpp +++ b/framework/referencerenderer/rrRasterizer.cpp @@ -535,6 +535,13 @@ void TriangleRasterizer::rasterizeSingleSample (FragmentPacket* const fragmentPa const deUint64 halfPixel = 1ll << (RASTERIZER_SUBPIXEL_BITS-1); int packetNdx = 0; + // For depth interpolation; given barycentrics A, B, C = (1 - A - B) + // we can reformulate the usual z = z0*A + z1*B + z2*C into more + // stable equation z = A*(z0 - z2) + B*(z1 - z2) + z2. + const float za = m_v0.z()-m_v2.z(); + const float zb = m_v1.z()-m_v2.z(); + const float zc = m_v2.z(); + while (m_curPos.y() <= m_bboxMax.y() && packetNdx < maxFragmentPackets) { const int x0 = m_curPos.x(); @@ -597,15 +604,14 @@ void TriangleRasterizer::rasterizeSingleSample (FragmentPacket* const fragmentPa // Compute depth values. if (depthValues) { - const tcu::Vec4 ooSum = 1.0f / (e01f + e12f + e20f); - const tcu::Vec4 z0 = e12f * ooSum; - const tcu::Vec4 z1 = e20f * ooSum; - const tcu::Vec4 z2 = e01f * ooSum; - - depthValues[packetNdx*4+0] = z0[0]*m_v0.z() + z1[0]*m_v1.z() + z2[0]*m_v2.z(); - depthValues[packetNdx*4+1] = z0[1]*m_v0.z() + z1[1]*m_v1.z() + z2[1]*m_v2.z(); - depthValues[packetNdx*4+2] = z0[2]*m_v0.z() + z1[2]*m_v1.z() + z2[2]*m_v2.z(); - depthValues[packetNdx*4+3] = z0[3]*m_v0.z() + z1[3]*m_v1.z() + z2[3]*m_v2.z(); + const tcu::Vec4 edgeSum = e01f + e12f + e20f; + const tcu::Vec4 z0 = e12f / edgeSum; + const tcu::Vec4 z1 = e20f / edgeSum; + + depthValues[packetNdx*4+0] = z0[0]*za + z1[0]*zb + zc; + depthValues[packetNdx*4+1] = z0[1]*za + z1[1]*zb + zc; + depthValues[packetNdx*4+2] = z0[2]*za + z1[2]*zb + zc; + depthValues[packetNdx*4+3] = z0[3]*za + z1[3]*zb + zc; } // Compute barycentrics and write out fragment packet @@ -615,12 +621,12 @@ void TriangleRasterizer::rasterizeSingleSample (FragmentPacket* const fragmentPa const tcu::Vec4 b0 = e12f * m_v0.w(); const tcu::Vec4 b1 = e20f * m_v1.w(); const tcu::Vec4 b2 = e01f * m_v2.w(); - const tcu::Vec4 ooSum = 1.0f / (b0 + b1 + b2); + const tcu::Vec4 bSum = b0 + b1 + b2; packet.position = tcu::IVec2(x0, y0); packet.coverage = coverage; - packet.barycentric[0] = b0 * ooSum; - packet.barycentric[1] = b1 * ooSum; + packet.barycentric[0] = b0 / bSum; + packet.barycentric[1] = b1 / bSum; packet.barycentric[2] = 1.0f - packet.barycentric[0] - packet.barycentric[1]; packetNdx += 1; @@ -701,6 +707,11 @@ void TriangleRasterizer::rasterizeMultiSample (FragmentPacket* const fragmentPac const deUint64 halfPixel = 1ll << (RASTERIZER_SUBPIXEL_BITS-1); int packetNdx = 0; + // For depth interpolation, see rasterizeSingleSample + const float za = m_v0.z()-m_v2.z(); + const float zb = m_v1.z()-m_v2.z(); + const float zc = m_v2.z(); + switch (NumSamples) { case 2: samplePos = s_samplePos2; break; @@ -784,15 +795,14 @@ void TriangleRasterizer::rasterizeMultiSample (FragmentPacket* const fragmentPac const tcu::Vec4& e12f = e12[sampleNdx].asFloat(); const tcu::Vec4& e20f = e20[sampleNdx].asFloat(); - const tcu::Vec4 ooSum = 1.0f / (e01f + e12f + e20f); - const tcu::Vec4 z0 = e12f * ooSum; - const tcu::Vec4 z1 = e20f * ooSum; - const tcu::Vec4 z2 = e01f * ooSum; + const tcu::Vec4 edgeSum = e01f + e12f + e20f; + const tcu::Vec4 z0 = e12f / edgeSum; + const tcu::Vec4 z1 = e20f / edgeSum; - depthValues[(packetNdx*4+0)*NumSamples + sampleNdx] = z0[0]*m_v0.z() + z1[0]*m_v1.z() + z2[0]*m_v2.z(); - depthValues[(packetNdx*4+1)*NumSamples + sampleNdx] = z0[1]*m_v0.z() + z1[1]*m_v1.z() + z2[1]*m_v2.z(); - depthValues[(packetNdx*4+2)*NumSamples + sampleNdx] = z0[2]*m_v0.z() + z1[2]*m_v1.z() + z2[2]*m_v2.z(); - depthValues[(packetNdx*4+3)*NumSamples + sampleNdx] = z0[3]*m_v0.z() + z1[3]*m_v1.z() + z2[3]*m_v2.z(); + depthValues[(packetNdx*4+0)*NumSamples + sampleNdx] = z0[0]*za + z1[0]*zb + zc; + depthValues[(packetNdx*4+1)*NumSamples + sampleNdx] = z0[1]*za + z1[1]*zb + zc; + depthValues[(packetNdx*4+2)*NumSamples + sampleNdx] = z0[2]*za + z1[2]*zb + zc; + depthValues[(packetNdx*4+3)*NumSamples + sampleNdx] = z0[3]*za + z1[3]*zb + zc; } } @@ -816,12 +826,12 @@ void TriangleRasterizer::rasterizeMultiSample (FragmentPacket* const fragmentPac const tcu::Vec4 b0 = e12f * m_v0.w(); const tcu::Vec4 b1 = e20f * m_v1.w(); const tcu::Vec4 b2 = e01f * m_v2.w(); - const tcu::Vec4 ooSum = 1.0f / (b0 + b1 + b2); + const tcu::Vec4 bSum = b0 + b1 + b2; packet.position = tcu::IVec2(x0, y0); packet.coverage = coverage; - packet.barycentric[0] = b0 * ooSum; - packet.barycentric[1] = b1 * ooSum; + packet.barycentric[0] = b0 / bSum; + packet.barycentric[1] = b1 / bSum; packet.barycentric[2] = 1.0f - packet.barycentric[0] - packet.barycentric[1]; packetNdx += 1; diff --git a/framework/referencerenderer/rrVertexAttrib.hpp b/framework/referencerenderer/rrVertexAttrib.hpp index 7e7debf..ab94b94 100644 --- a/framework/referencerenderer/rrVertexAttrib.hpp +++ b/framework/referencerenderer/rrVertexAttrib.hpp @@ -123,6 +123,26 @@ struct VertexAttrib , pointer (DE_NULL) { } + + VertexAttrib (VertexAttribType type_, int size_, int stride_, int instanceDivisor_, const void* pointer_) + : type (type_) + , size (size_) + , stride (stride_) + , instanceDivisor (instanceDivisor_) + , pointer (pointer_) + { + } + + template + explicit VertexAttrib (const tcu::Vector& generic_) + : type (VERTEXATTRIBTYPE_DONT_CARE) + , size (0) + , stride (0) + , instanceDivisor (0) + , pointer (DE_NULL) + , generic (generic_) + { + } }; bool isValidVertexAttrib (const VertexAttrib& vertexAttrib); diff --git a/modules/internal/CMakeLists.txt b/modules/internal/CMakeLists.txt index c226c0c..d14a271 100644 --- a/modules/internal/CMakeLists.txt +++ b/modules/internal/CMakeLists.txt @@ -23,6 +23,7 @@ set(DE_INTERNAL_TESTS_SRCS set(DE_INTERNAL_TESTS_LIBS tcutil + referencerenderer ) add_deqp_module(de-internal-tests "${DE_INTERNAL_TESTS_SRCS}" "${DE_INTERNAL_TESTS_LIBS}" ditTestPackageEntry.cpp) diff --git a/modules/internal/ditFrameworkTests.cpp b/modules/internal/ditFrameworkTests.cpp index ec8f241..d9f17d6 100644 --- a/modules/internal/ditFrameworkTests.cpp +++ b/modules/internal/ditFrameworkTests.cpp @@ -27,6 +27,12 @@ #include "tcuTestLog.hpp" #include "tcuCommandLine.hpp" +#include "rrRenderer.hpp" +#include "tcuTextureUtil.hpp" +#include "tcuVectorUtil.hpp" +#include "deRandom.hpp" +#include "tcuFloat.hpp" + namespace dit { @@ -545,6 +551,315 @@ public: } }; +inline deUint32 ulpDiff (float a, float b) +{ + const deUint32 ab = tcu::Float32(a).bits(); + const deUint32 bb = tcu::Float32(b).bits(); + return de::max(ab, bb) - de::min(ab, bb); +} + +template +inline tcu::Vector ulpDiff (const tcu::Vector& a, const tcu::Vector& b) +{ + tcu::Vector res; + for (int ndx = 0; ndx < Size; ndx++) + res[ndx] = ulpDiff(a[ndx], b[ndx]); + return res; +} + +class ConstantInterpolationTest : public tcu::TestCase +{ +public: + ConstantInterpolationTest (tcu::TestContext& testCtx) + : tcu::TestCase(testCtx, "const_interpolation", "Constant value interpolation") + { + const int supportedMsaaLevels[] = {1, 2, 4, 8, 16}; + + for (int msaaNdx = 0; msaaNdx < DE_LENGTH_OF_ARRAY(supportedMsaaLevels); msaaNdx++) + { + const int numSamples = supportedMsaaLevels[msaaNdx]; + { + SubCase c; + c.rtSize = tcu::IVec3(128, 128, numSamples); + c.vtx[0] = tcu::Vec4(-1.0f, -1.0f, 0.5f, 1.0f); + c.vtx[1] = tcu::Vec4(-1.0f, +1.0f, 0.5f, 1.0f); + c.vtx[2] = tcu::Vec4(+1.0f, -1.0f, 0.5f, 1.0f); + c.varying = tcu::Vec4(0.0f, 1.0f, 8.0f, -8.0f); + m_cases.push_back(c); + } + + { + SubCase c; + c.rtSize = tcu::IVec3(128, 128, numSamples); + c.vtx[0] = tcu::Vec4(-1.0f, +1.0f, 0.5f, 1.0f); + c.vtx[1] = tcu::Vec4(+1.0f, -1.0f, 0.5f, 1.0f); + c.vtx[2] = tcu::Vec4(+1.0f, +1.0f, 0.5f, 1.0f); + c.varying = tcu::Vec4(0.0f, 1.0f, 8.0f, -8.0f); + m_cases.push_back(c); + } + { + SubCase c; + c.rtSize = tcu::IVec3(129, 113, numSamples); + c.vtx[0] = tcu::Vec4(-1.0f, -1.0f, 0.5f, 1.0f); + c.vtx[1] = tcu::Vec4(-1.0f, +1.0f, 0.5f, 1.0f); + c.vtx[2] = tcu::Vec4(+1.0f, -1.0f, 0.5f, 1.0f); + c.varying = tcu::Vec4(0.0f, 1.0f, 8.0f, -8.0f); + m_cases.push_back(c); + } + { + SubCase c; + c.rtSize = tcu::IVec3(107, 131, numSamples); + c.vtx[0] = tcu::Vec4(-1.0f, +1.0f, 0.5f, 1.0f); + c.vtx[1] = tcu::Vec4(+1.0f, -1.0f, 0.5f, 1.0f); + c.vtx[2] = tcu::Vec4(+1.0f, +1.0f, 0.5f, 1.0f); + c.varying = tcu::Vec4(0.0f, 1.0f, 8.0f, -8.0f); + m_cases.push_back(c); + } + } + + { + de::Random rnd(0x89423f); + for (int ndx = 0; ndx < 25; ndx++) + { + const float depth = rnd.getFloat()*2.0f - 1.0f; + SubCase c; + + c.rtSize.x() = rnd.getInt(16, 256); + c.rtSize.y() = rnd.getInt(16, 256); + c.rtSize.z() = rnd.choose(DE_ARRAY_BEGIN(supportedMsaaLevels), DE_ARRAY_END(supportedMsaaLevels)); + + for (int vtxNdx = 0; vtxNdx < DE_LENGTH_OF_ARRAY(c.vtx); vtxNdx++) + { + c.vtx[vtxNdx].x() = rnd.getFloat()*2.0f - 1.0f; + c.vtx[vtxNdx].y() = rnd.getFloat()*2.0f - 1.0f; + c.vtx[vtxNdx].z() = depth; + c.vtx[vtxNdx].w() = 1.0f; + } + + for (int compNdx = 0; compNdx < 4; compNdx++) + { + float v; + do + { + v = tcu::Float32(rnd.getUint32()).asFloat(); + } while (deFloatIsInf(v) || deFloatIsNaN(v)); + c.varying[compNdx] = v; + } + m_cases.push_back(c); + } + } + } + + void init (void) + { + m_caseIter = m_cases.begin(); + m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "All iterations passed"); + } + + IterateResult iterate (void) + { + { + tcu::ScopedLogSection section(m_testCtx.getLog(), "SubCase", ""); + runCase(*m_caseIter); + } + return (++m_caseIter != m_cases.end()) ? CONTINUE : STOP; + } + +protected: + struct SubCase + { + tcu::IVec3 rtSize; // (width, height, samples) + tcu::Vec4 vtx[3]; + tcu::Vec4 varying; + }; + + void runCase (const SubCase& subCase) + { + using namespace tcu; + + const deUint32 maxColorUlpDiff = 2; + const deUint32 maxDepthUlpDiff = 0; + + const int width = subCase.rtSize.x(); + const int height = subCase.rtSize.y(); + const int numSamples = subCase.rtSize.z(); + const float zn = 0.0f; + const float zf = 1.0f; + + TextureLevel interpolated (TextureFormat(TextureFormat::RGBA, TextureFormat::FLOAT), numSamples, width, height); + TextureLevel depthStencil (TextureFormat(TextureFormat::DS, TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV), numSamples, width, height); + + m_testCtx.getLog() << TestLog::Message + << "RT size (w, h, #samples) = " << subCase.rtSize << "\n" + << "vtx[0] = " << subCase.vtx[0] << "\n" + << "vtx[1] = " << subCase.vtx[1] << "\n" + << "vtx[2] = " << subCase.vtx[2] << "\n" + << "color = " << subCase.varying + << TestLog::EndMessage; + + clear (interpolated.getAccess(), subCase.varying - Vec4(0.0f, 0.0f, 0.0f, 1.0f)); + clearDepth (depthStencil.getAccess(), 0.0f); + clearStencil (depthStencil.getAccess(), 0); + + { + class VtxShader : public rr::VertexShader + { + public: + VtxShader (void) + : rr::VertexShader(2, 1) + { + m_inputs[0].type = rr::GENERICVECTYPE_FLOAT; + m_inputs[1].type = rr::GENERICVECTYPE_FLOAT; + m_outputs[0].type = rr::GENERICVECTYPE_FLOAT; + } + + void shadeVertices (const rr::VertexAttrib* inputs, rr::VertexPacket* const* packets, const int numPackets) const + { + for (int packetNdx = 0; packetNdx < numPackets; packetNdx++) + { + rr::readVertexAttrib(packets[packetNdx]->position, inputs[0], packets[packetNdx]->instanceNdx, packets[packetNdx]->vertexNdx); + packets[packetNdx]->outputs[0] = rr::readVertexAttribFloat(inputs[1], packets[packetNdx]->instanceNdx, packets[packetNdx]->vertexNdx); + } + } + } vtxShader; + + class FragShader : public rr::FragmentShader + { + public: + FragShader (void) + : rr::FragmentShader(1, 1) + { + m_inputs[0].type = rr::GENERICVECTYPE_FLOAT; + m_outputs[0].type = rr::GENERICVECTYPE_FLOAT; + } + + void shadeFragments (rr::FragmentPacket* packets, const int numPackets, const rr::FragmentShadingContext& context) const + { + for (int packetNdx = 0; packetNdx < numPackets; packetNdx++) + { + for (int fragNdx = 0; fragNdx < rr::NUM_FRAGMENTS_PER_PACKET; fragNdx++) + { + const tcu::Vec4 interp = rr::readTriangleVarying(packets[packetNdx], context, 0, fragNdx); + rr::writeFragmentOutput(context, packetNdx, fragNdx, 0, interp); + } + } + } + } fragShader; + + const rr::Program program (&vtxShader, &fragShader); + + const rr::MultisamplePixelBufferAccess colorAccess = rr::MultisamplePixelBufferAccess::fromMultisampleAccess(interpolated.getAccess()); + const rr::MultisamplePixelBufferAccess dsAccess = rr::MultisamplePixelBufferAccess::fromMultisampleAccess(depthStencil.getAccess()); + const rr::RenderTarget renderTarget (colorAccess, dsAccess, dsAccess); + const rr::VertexAttrib vertexAttribs[] = + { + rr::VertexAttrib(rr::VERTEXATTRIBTYPE_FLOAT, 4, 0, 0, subCase.vtx), + rr::VertexAttrib(subCase.varying) + }; + rr::ViewportState viewport (colorAccess); + rr::RenderState state (viewport); + const rr::DrawCommand drawCmd (state, renderTarget, program, DE_LENGTH_OF_ARRAY(vertexAttribs), vertexAttribs, rr::PrimitiveList(rr::PRIMITIVETYPE_TRIANGLES, 3, 0)); + const rr::Renderer renderer; + + viewport.zn = zn; + viewport.zf = zf; + + state.fragOps.depthTestEnabled = true; + state.fragOps.depthFunc = rr::TESTFUNC_ALWAYS; + state.fragOps.stencilTestEnabled = true; + state.fragOps.stencilStates[rr::FACETYPE_BACK].func = rr::TESTFUNC_ALWAYS; + state.fragOps.stencilStates[rr::FACETYPE_BACK].dpPass = rr::STENCILOP_INCR; + state.fragOps.stencilStates[rr::FACETYPE_FRONT] = state.fragOps.stencilStates[rr::FACETYPE_BACK]; + + renderer.draw(drawCmd); + } + + // Verify interpolated values + { + TextureLevel resolvedColor (interpolated.getFormat(), width, height); // For debugging + TextureLevel resolvedDepthStencil (depthStencil.getFormat(), width, height); // For debugging + TextureLevel errorMask (TextureFormat(TextureFormat::RGB, TextureFormat::UNORM_INT8), width, height); + const ConstPixelBufferAccess interpAccess = interpolated.getAccess(); + const ConstPixelBufferAccess dsAccess = depthStencil.getAccess(); + const PixelBufferAccess errorAccess = errorMask.getAccess(); + int numCoveredSamples = 0; + int numFailedColorSamples = 0; + int numFailedDepthSamples = 0; + const bool verifyDepth = (subCase.vtx[0].z() == subCase.vtx[1].z()) && + (subCase.vtx[1].z() == subCase.vtx[2].z()); + const float refDepth = subCase.vtx[0].z()*(zf - zn)/2.0f + (zn + zf)/2.0f; + + rr::resolveMultisampleColorBuffer(resolvedColor.getAccess(), rr::MultisampleConstPixelBufferAccess::fromMultisampleAccess(interpolated.getAccess())); + rr::resolveMultisampleColorBuffer(resolvedDepthStencil.getAccess(), rr::MultisampleConstPixelBufferAccess::fromMultisampleAccess(depthStencil.getAccess())); + clear(errorAccess, Vec4(0.0f, 1.0f, 0.0f, 1.0f)); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + for (int sampleNdx = 0; sampleNdx < numSamples; sampleNdx++) + { + if (dsAccess.getPixStencil(sampleNdx, x, y) != 0) + { + const Vec4 color = interpAccess.getPixel(sampleNdx, x, y); + const UVec4 colorDiff = ulpDiff(color, subCase.varying); + const bool colorOk = boolAll(lessThanEqual(colorDiff, tcu::UVec4(maxColorUlpDiff))); + + const float depth = dsAccess.getPixDepth(sampleNdx, x, y); + const deUint32 depthDiff = ulpDiff(depth, refDepth); + const bool depthOk = verifyDepth && (depthDiff <= maxDepthUlpDiff); + + const int maxMsgs = 10; + + numCoveredSamples += 1; + + if (!colorOk) + { + numFailedColorSamples += 1; + + if (numFailedColorSamples <= maxMsgs) + m_testCtx.getLog() << TestLog::Message + << "FAIL: " << tcu::IVec3(x, y, sampleNdx) + << " color ulp diff = " << colorDiff + << TestLog::EndMessage; + } + + if (!depthOk) + numFailedDepthSamples += 1; + + if (!colorOk || !depthOk) + errorAccess.setPixel(errorAccess.getPixel(x, y) + Vec4(1.0f, -1.0f, 0.0f, 0.0f) / float(numSamples-1), x, y); + } + } + } + } + + m_testCtx.getLog() << TestLog::Image("ResolvedColor", "Resolved colorbuffer", resolvedColor) + << TestLog::Image("ResolvedDepthStencil", "Resolved depth- & stencilbuffer", resolvedDepthStencil); + + if (numFailedColorSamples != 0 || numFailedDepthSamples != 0) + { + m_testCtx.getLog() << TestLog::Image("ErrorMask", "Error mask", errorMask); + + if (numFailedColorSamples != 0) + m_testCtx.getLog() << TestLog::Message << "FAIL: Found " << numFailedColorSamples << " invalid color samples!" << TestLog::EndMessage; + + if (numFailedDepthSamples != 0) + m_testCtx.getLog() << TestLog::Message << "FAIL: Found " << numFailedDepthSamples << " invalid depth samples!" << TestLog::EndMessage; + + if (m_testCtx.getTestResult() == QP_TEST_RESULT_PASS) + m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid samples found"); + } + + m_testCtx.getLog() << TestLog::Message << (numCoveredSamples-numFailedColorSamples) << " / " << numCoveredSamples << " color samples passed" << TestLog::EndMessage; + m_testCtx.getLog() << TestLog::Message << (numCoveredSamples-numFailedDepthSamples) << " / " << numCoveredSamples << " depth samples passed" << TestLog::EndMessage; + } + } + + vector m_cases; + vector::const_iterator m_caseIter; +}; + class CommonFrameworkTests : public tcu::TestCaseGroup { public: @@ -562,6 +877,20 @@ public: } }; +class ReferenceRendererTests : public tcu::TestCaseGroup +{ +public: + ReferenceRendererTests (tcu::TestContext& testCtx) + : tcu::TestCaseGroup(testCtx, "reference_renderer", "Reference renderer tests") + { + } + + void init (void) + { + addChild(new ConstantInterpolationTest(m_testCtx)); + } +}; + } // anonymous FrameworkTests::FrameworkTests (tcu::TestContext& testCtx) @@ -577,6 +906,7 @@ void FrameworkTests::init (void) { addChild(new CommonFrameworkTests (m_testCtx)); addChild(new CaseListParserTests (m_testCtx)); + addChild(new ReferenceRendererTests (m_testCtx)); } }