From 29340067e919854db7d50bb8c0b666117b229f5e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mika=20Isoj=C3=A4rvi?= Date: Tue, 3 Feb 2015 14:34:22 -0800 Subject: [PATCH] Improve tcuTexture performance by caching pixel size. Cache pixel size of the texture format to improve performance. On N6, N7, N9 and my host machine this improves dEQP-GLES3.functional.texture tests runtime by ~1-2%. Change-Id: Ib804cfde7e24fa89a88b7ad907502d0ddded2cea --- framework/common/tcuTexture.cpp | 174 +++++++++++++++++++++------------------- framework/common/tcuTexture.hpp | 48 ++++++----- 2 files changed, 116 insertions(+), 106 deletions(-) diff --git a/framework/common/tcuTexture.cpp b/framework/common/tcuTexture.cpp index efc6f84..735ffc5 100644 --- a/framework/common/tcuTexture.cpp +++ b/framework/common/tcuTexture.cpp @@ -403,6 +403,15 @@ tcu::Vec4 unpackRGB999E5 (deUint32 color) } // anonymous +IVec3 calculatePackedPitch (const TextureFormat& format, const IVec3& size) +{ + const int pixelSize = format.getPixelSize(); + const int rowPitch = pixelSize * size.x(); + const int slicePitch = rowPitch * size.y(); + + return IVec3(pixelSize, rowPitch, slicePitch); +} + /** Get pixel size in bytes. */ int TextureFormat::getPixelSize (void) const { @@ -492,44 +501,49 @@ int TextureFormat::getPixelSize (void) const } ConstPixelBufferAccess::ConstPixelBufferAccess (void) - : m_width (0) - , m_height (0) - , m_depth (0) - , m_rowPitch (0) - , m_slicePitch (0) + : m_size (0) + , m_pitch (0) , m_data (DE_NULL) { } ConstPixelBufferAccess::ConstPixelBufferAccess (const TextureFormat& format, int width, int height, int depth, const void* data) : m_format (format) - , m_width (width) - , m_height (height) - , m_depth (depth) - , m_rowPitch (width*format.getPixelSize()) - , m_slicePitch (m_rowPitch*height) + , m_size (width, height, depth) + , m_pitch (calculatePackedPitch(m_format, m_size)) + , m_data ((void*)data) +{ +} + +ConstPixelBufferAccess::ConstPixelBufferAccess (const TextureFormat& format, const IVec3& size, const void* data) + : m_format (format) + , m_size (size) + , m_pitch (calculatePackedPitch(m_format, m_size)) , m_data ((void*)data) { } ConstPixelBufferAccess::ConstPixelBufferAccess (const TextureFormat& format, int width, int height, int depth, int rowPitch, int slicePitch, const void* data) : m_format (format) - , m_width (width) - , m_height (height) - , m_depth (depth) - , m_rowPitch (rowPitch) - , m_slicePitch (slicePitch) + , m_size (width, height, depth) + , m_pitch (format.getPixelSize(), rowPitch, slicePitch) + , m_data ((void*)data) +{ +} + +ConstPixelBufferAccess::ConstPixelBufferAccess (const TextureFormat& format, const IVec3& size, const IVec3& pitch, const void* data) + : m_format (format) + , m_size (size) + , m_pitch (pitch) , m_data ((void*)data) { + DE_ASSERT(m_format.getPixelSize() <= m_pitch.x()); } ConstPixelBufferAccess::ConstPixelBufferAccess (const TextureLevel& level) : m_format (level.getFormat()) - , m_width (level.getWidth()) - , m_height (level.getHeight()) - , m_depth (level.getDepth()) - , m_rowPitch (m_width*m_format.getPixelSize()) - , m_slicePitch (m_rowPitch*m_height) + , m_size (level.getSize()) + , m_pitch (calculatePackedPitch(m_format, m_size)) , m_data ((void*)level.getPtr()) { } @@ -539,11 +553,21 @@ PixelBufferAccess::PixelBufferAccess (const TextureFormat& format, int width, in { } +PixelBufferAccess::PixelBufferAccess (const TextureFormat& format, const IVec3& size, void* data) + : ConstPixelBufferAccess(format, size, data) +{ +} + PixelBufferAccess::PixelBufferAccess (const TextureFormat& format, int width, int height, int depth, int rowPitch, int slicePitch, void* data) : ConstPixelBufferAccess(format, width, height, depth, rowPitch, slicePitch, data) { } +PixelBufferAccess::PixelBufferAccess (const TextureFormat& format, const IVec3& size, const IVec3& pitch, void* data) + : ConstPixelBufferAccess(format, size, pitch, data) +{ +} + PixelBufferAccess::PixelBufferAccess (TextureLevel& level) : ConstPixelBufferAccess(level) { @@ -557,22 +581,21 @@ void PixelBufferAccess::setPixels (const void* buf, int bufSize) const Vec4 ConstPixelBufferAccess::getPixel (int x, int y, int z) const { - DE_ASSERT(de::inBounds(x, 0, m_width)); - DE_ASSERT(de::inBounds(y, 0, m_height)); - DE_ASSERT(de::inBounds(z, 0, m_depth)); + DE_ASSERT(de::inBounds(x, 0, m_size.x())); + DE_ASSERT(de::inBounds(y, 0, m_size.y())); + DE_ASSERT(de::inBounds(z, 0, m_size.z())); + + const deUint8* pixelPtr = (const deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); // Optimized fomats. if (m_format.type == TextureFormat::UNORM_INT8) { if (m_format.order == TextureFormat::RGBA) - return readRGBA8888Float((const deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*4); + return readRGBA8888Float(pixelPtr); else if (m_format.order == TextureFormat::RGB) - return readRGB888Float((const deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*3); + return readRGB888Float(pixelPtr); } - int pixelSize = m_format.getPixelSize(); - const deUint8* pixelPtr = (const deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; - #define UB16(OFFS, COUNT) ((*((const deUint16*)pixelPtr) >> (OFFS)) & ((1<<(COUNT))-1)) #define UB32(OFFS, COUNT) ((*((const deUint32*)pixelPtr) >> (OFFS)) & ((1<<(COUNT))-1)) #define NB16(OFFS, COUNT) channelToNormFloat(UB16(OFFS, COUNT), (COUNT)) @@ -642,12 +665,11 @@ Vec4 ConstPixelBufferAccess::getPixel (int x, int y, int z) const IVec4 ConstPixelBufferAccess::getPixelInt (int x, int y, int z) const { - DE_ASSERT(de::inBounds(x, 0, m_width)); - DE_ASSERT(de::inBounds(y, 0, m_height)); - DE_ASSERT(de::inBounds(z, 0, m_depth)); + DE_ASSERT(de::inBounds(x, 0, m_size.x())); + DE_ASSERT(de::inBounds(y, 0, m_size.y())); + DE_ASSERT(de::inBounds(z, 0, m_size.z())); - int pixelSize = m_format.getPixelSize(); - const deUint8* pixelPtr = (const deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; + const deUint8* pixelPtr = (const deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); IVec4 result; // Optimized fomats. @@ -738,8 +760,7 @@ float ConstPixelBufferAccess::getPixDepth (int x, int y, int z) const DE_ASSERT(de::inBounds(y, 0, getHeight())); DE_ASSERT(de::inBounds(z, 0, getDepth())); - int pixelSize = m_format.getPixelSize(); - deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; + deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); #define UB32(OFFS, COUNT) ((*((const deUint32*)pixelPtr) >> (OFFS)) & ((1<<(COUNT))-1)) #define NB32(OFFS, COUNT) channelToNormFloat(UB32(OFFS, COUNT), (COUNT)) @@ -778,8 +799,7 @@ int ConstPixelBufferAccess::getPixStencil (int x, int y, int z) const DE_ASSERT(de::inBounds(y, 0, getHeight())); DE_ASSERT(de::inBounds(z, 0, getDepth())); - int pixelSize = m_format.getPixelSize(); - deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; + deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); switch (m_format.type) { @@ -818,26 +838,23 @@ void PixelBufferAccess::setPixel (const Vec4& color, int x, int y, int z) const DE_ASSERT(de::inBounds(y, 0, getHeight())); DE_ASSERT(de::inBounds(z, 0, getDepth())); + deUint8* const pixelPtr = (deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); + // Optimized fomats. if (m_format.type == TextureFormat::UNORM_INT8) { if (m_format.order == TextureFormat::RGBA) { - deUint8* const ptr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*4; - writeRGBA8888Float(ptr, color); + writeRGBA8888Float(pixelPtr, color); return; } else if (m_format.order == TextureFormat::RGB) { - deUint8* const ptr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*3; - writeRGB888Float(ptr, color); + writeRGB888Float(pixelPtr, color); return; } } - const int pixelSize = m_format.getPixelSize(); - deUint8* const pixelPtr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; - #define PN(VAL, OFFS, BITS) (normFloatToChannel((VAL), (BITS)) << (OFFS)) #define PU(VAL, OFFS, BITS) (uintToChannel((VAL), (BITS)) << (OFFS)) @@ -914,8 +931,7 @@ void PixelBufferAccess::setPixel (const IVec4& color, int x, int y, int z) const DE_ASSERT(de::inBounds(y, 0, getHeight())); DE_ASSERT(de::inBounds(z, 0, getDepth())); - int pixelSize = m_format.getPixelSize(); - deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; + deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); // Optimized fomats. if (m_format.type == TextureFormat::UNORM_INT8) @@ -976,8 +992,7 @@ void PixelBufferAccess::setPixDepth (float depth, int x, int y, int z) const DE_ASSERT(de::inBounds(y, 0, getHeight())); DE_ASSERT(de::inBounds(z, 0, getDepth())); - int pixelSize = m_format.getPixelSize(); - deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; + deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); #define PN(VAL, OFFS, BITS) (normFloatToChannel((VAL), (BITS)) << (OFFS)) @@ -1013,8 +1028,7 @@ void PixelBufferAccess::setPixStencil (int stencil, int x, int y, int z) const DE_ASSERT(de::inBounds(y, 0, getHeight())); DE_ASSERT(de::inBounds(z, 0, getDepth())); - int pixelSize = m_format.getPixelSize(); - deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_slicePitch + y*m_rowPitch + x*pixelSize; + deUint8* pixelPtr = (deUint8*)getDataPtr() + z*m_pitch.z() + y*m_pitch.y() + x*m_pitch.x(); #define PU(VAL, OFFS, BITS) (uintToChannel((deUint32)(VAL), (BITS)) << (OFFS)) @@ -1600,13 +1614,13 @@ static Vec4 sampleLinear3D (const ConstPixelBufferAccess& access, const Sampler& Vec4 ConstPixelBufferAccess::sample1D (const Sampler& sampler, Sampler::FilterMode filter, float s, int level) const { - DE_ASSERT(de::inBounds(level, 0, m_height)); + DE_ASSERT(de::inBounds(level, 0, m_size.y())); // Non-normalized coordinates. float u = s; if (sampler.normalizedCoords) - u = unnormalize(sampler.wrapS, s, m_width); + u = unnormalize(sampler.wrapS, s, m_size.x()); switch (filter) { @@ -1620,7 +1634,7 @@ Vec4 ConstPixelBufferAccess::sample1D (const Sampler& sampler, Sampler::FilterMo Vec4 ConstPixelBufferAccess::sample2D (const Sampler& sampler, Sampler::FilterMode filter, float s, float t, int depth) const { - DE_ASSERT(de::inBounds(depth, 0, m_depth)); + DE_ASSERT(de::inBounds(depth, 0, m_size.z())); // Non-normalized coordinates. float u = s; @@ -1628,8 +1642,8 @@ Vec4 ConstPixelBufferAccess::sample2D (const Sampler& sampler, Sampler::FilterMo if (sampler.normalizedCoords) { - u = unnormalize(sampler.wrapS, s, m_width); - v = unnormalize(sampler.wrapT, t, m_height); + u = unnormalize(sampler.wrapS, s, m_size.x()); + v = unnormalize(sampler.wrapT, t, m_size.y()); } switch (filter) @@ -1644,13 +1658,13 @@ Vec4 ConstPixelBufferAccess::sample2D (const Sampler& sampler, Sampler::FilterMo Vec4 ConstPixelBufferAccess::sample1DOffset (const Sampler& sampler, Sampler::FilterMode filter, float s, const IVec2& offset) const { - DE_ASSERT(de::inBounds(offset.y(), 0, m_width)); + DE_ASSERT(de::inBounds(offset.y(), 0, m_size.x())); // Non-normalized coordinates. float u = s; if (sampler.normalizedCoords) - u = unnormalize(sampler.wrapS, s, m_width); + u = unnormalize(sampler.wrapS, s, m_size.x()); switch (filter) { @@ -1664,7 +1678,7 @@ Vec4 ConstPixelBufferAccess::sample1DOffset (const Sampler& sampler, Sampler::Fi Vec4 ConstPixelBufferAccess::sample2DOffset (const Sampler& sampler, Sampler::FilterMode filter, float s, float t, const IVec3& offset) const { - DE_ASSERT(de::inBounds(offset.z(), 0, m_depth)); + DE_ASSERT(de::inBounds(offset.z(), 0, m_size.z())); // Non-normalized coordinates. float u = s; @@ -1672,8 +1686,8 @@ Vec4 ConstPixelBufferAccess::sample2DOffset (const Sampler& sampler, Sampler::Fi if (sampler.normalizedCoords) { - u = unnormalize(sampler.wrapS, s, m_width); - v = unnormalize(sampler.wrapT, t, m_height); + u = unnormalize(sampler.wrapS, s, m_size.x()); + v = unnormalize(sampler.wrapT, t, m_size.y()); } switch (filter) @@ -1688,7 +1702,7 @@ Vec4 ConstPixelBufferAccess::sample2DOffset (const Sampler& sampler, Sampler::Fi float ConstPixelBufferAccess::sample1DCompare (const Sampler& sampler, Sampler::FilterMode filter, float ref, float s, const IVec2& offset) const { - DE_ASSERT(de::inBounds(offset.y(), 0, m_height)); + DE_ASSERT(de::inBounds(offset.y(), 0, m_size.y())); // Format information for comparison function const bool isFixedPointDepth = isFixedPointDepthTextureFormat(m_format); @@ -1697,7 +1711,7 @@ float ConstPixelBufferAccess::sample1DCompare (const Sampler& sampler, Sampler:: float u = s; if (sampler.normalizedCoords) - u = unnormalize(sampler.wrapS, s, m_width); + u = unnormalize(sampler.wrapS, s, m_size.x()); switch (filter) { @@ -1711,7 +1725,7 @@ float ConstPixelBufferAccess::sample1DCompare (const Sampler& sampler, Sampler:: float ConstPixelBufferAccess::sample2DCompare (const Sampler& sampler, Sampler::FilterMode filter, float ref, float s, float t, const IVec3& offset) const { - DE_ASSERT(de::inBounds(offset.z(), 0, m_depth)); + DE_ASSERT(de::inBounds(offset.z(), 0, m_size.z())); // Format information for comparison function const bool isFixedPointDepth = isFixedPointDepthTextureFormat(m_format); @@ -1722,8 +1736,8 @@ float ConstPixelBufferAccess::sample2DCompare (const Sampler& sampler, Sampler:: if (sampler.normalizedCoords) { - u = unnormalize(sampler.wrapS, s, m_width); - v = unnormalize(sampler.wrapT, t, m_height); + u = unnormalize(sampler.wrapS, s, m_size.x()); + v = unnormalize(sampler.wrapT, t, m_size.y()); } switch (filter) @@ -1745,9 +1759,9 @@ Vec4 ConstPixelBufferAccess::sample3D (const Sampler& sampler, Sampler::FilterMo if (sampler.normalizedCoords) { - u = unnormalize(sampler.wrapS, s, m_width); - v = unnormalize(sampler.wrapT, t, m_height); - w = unnormalize(sampler.wrapR, r, m_depth); + u = unnormalize(sampler.wrapS, s, m_size.x()); + v = unnormalize(sampler.wrapT, t, m_size.y()); + w = unnormalize(sampler.wrapR, r, m_size.z()); } switch (filter) @@ -1769,9 +1783,9 @@ Vec4 ConstPixelBufferAccess::sample3DOffset (const Sampler& sampler, Sampler::Fi if (sampler.normalizedCoords) { - u = unnormalize(sampler.wrapS, s, m_width); - v = unnormalize(sampler.wrapT, t, m_height); - w = unnormalize(sampler.wrapR, r, m_depth); + u = unnormalize(sampler.wrapS, s, m_size.x()); + v = unnormalize(sampler.wrapT, t, m_size.y()); + w = unnormalize(sampler.wrapR, r, m_size.z()); } switch (filter) @@ -1786,25 +1800,19 @@ Vec4 ConstPixelBufferAccess::sample3DOffset (const Sampler& sampler, Sampler::Fi TextureLevel::TextureLevel (void) : m_format () - , m_width (0) - , m_height (0) - , m_depth (0) + , m_size (0) { } TextureLevel::TextureLevel (const TextureFormat& format) : m_format (format) - , m_width (0) - , m_height (0) - , m_depth (0) + , m_size (0) { } TextureLevel::TextureLevel (const TextureFormat& format, int width, int height, int depth) : m_format (format) - , m_width (0) - , m_height (0) - , m_depth (0) + , m_size (0) { setSize(width, height, depth); } @@ -1823,11 +1831,9 @@ void TextureLevel::setSize (int width, int height, int depth) { int pixelSize = m_format.getPixelSize(); - m_width = width; - m_height = height; - m_depth = depth; + m_size = IVec3(width, height, depth); - m_data.setStorage(m_width*m_height*m_depth*pixelSize); + m_data.setStorage(m_size.x() * m_size.y() * m_size.z() * pixelSize); } Vec4 sampleLevelArray1D (const ConstPixelBufferAccess* levels, int numLevels, const Sampler& sampler, float s, int depth, float lod) diff --git a/framework/common/tcuTexture.hpp b/framework/common/tcuTexture.hpp index c4b6e85..fd69111 100644 --- a/framework/common/tcuTexture.hpp +++ b/framework/common/tcuTexture.hpp @@ -228,6 +228,9 @@ public: } }; +// Calculate pitches for pixel data with no padding. +IVec3 calculatePackedPitch (const TextureFormat& format, const IVec3& size); + class TextureLevel; /*--------------------------------------------------------------------*//*! @@ -246,17 +249,20 @@ public: ConstPixelBufferAccess (void); ConstPixelBufferAccess (const TextureLevel& level); ConstPixelBufferAccess (const TextureFormat& format, int width, int height, int depth, const void* data); + ConstPixelBufferAccess (const TextureFormat& format, const IVec3& size, const void* data); ConstPixelBufferAccess (const TextureFormat& format, int width, int height, int depth, int rowPitch, int slicePitch, const void* data); + ConstPixelBufferAccess (const TextureFormat& format, const IVec3& size, const IVec3& pitch, const void* data); - const TextureFormat& getFormat (void) const { return m_format; } - int getWidth (void) const { return m_width; } - int getHeight (void) const { return m_height; } - int getDepth (void) const { return m_depth; } - int getRowPitch (void) const { return m_rowPitch; } - int getSlicePitch (void) const { return m_slicePitch; } + const TextureFormat& getFormat (void) const { return m_format; } + const IVec3& getSize (void) const { return m_size; } + int getWidth (void) const { return m_size.x(); } + int getHeight (void) const { return m_size.y(); } + int getDepth (void) const { return m_size.z(); } + int getRowPitch (void) const { return m_pitch.y(); } + int getSlicePitch (void) const { return m_pitch.z(); } - const void* getDataPtr (void) const { return m_data; } - int getDataSize (void) const { return m_depth*m_slicePitch; } + const void* getDataPtr (void) const { return m_data; } + int getDataSize (void) const { return m_size.z()*m_pitch.z(); } Vec4 getPixel (int x, int y, int z = 0) const; IVec4 getPixelInt (int x, int y, int z = 0) const; @@ -281,11 +287,8 @@ public: protected: TextureFormat m_format; - int m_width; - int m_height; - int m_depth; - int m_rowPitch; - int m_slicePitch; + IVec3 m_size; + IVec3 m_pitch; //!< (pixelPitch, rowPitch, slicePitch) mutable void* m_data; }; @@ -304,7 +307,9 @@ public: PixelBufferAccess (void) {} PixelBufferAccess (TextureLevel& level); PixelBufferAccess (const TextureFormat& format, int width, int height, int depth, void* data); + PixelBufferAccess (const TextureFormat& format, const IVec3& size, void* data); PixelBufferAccess (const TextureFormat& format, int width, int height, int depth, int rowPitch, int slicePitch, void* data); + PixelBufferAccess (const TextureFormat& format, const IVec3& size, const IVec3& pitch, void* data); void* getDataPtr (void) const { return m_data; } @@ -332,26 +337,25 @@ public: TextureLevel (const TextureFormat& format, int width, int height, int depth = 1); ~TextureLevel (void); - int getWidth (void) const { return m_width; } - int getHeight (void) const { return m_height; } - int getDepth (void) const { return m_depth; } - bool isEmpty (void) const { return m_width == 0 || m_height == 0 || m_depth == 0; } + const IVec3& getSize (void) const { return m_size; } + int getWidth (void) const { return m_size.x(); } + int getHeight (void) const { return m_size.y(); } + int getDepth (void) const { return m_size.z(); } + bool isEmpty (void) const { return m_size.x() * m_size.y() * m_size.z() == 0; } const TextureFormat getFormat (void) const { return m_format; } void setStorage (const TextureFormat& format, int width, int heigth, int depth = 1); void setSize (int width, int height, int depth = 1); - PixelBufferAccess getAccess (void) { return PixelBufferAccess(m_format, m_width, m_height, m_depth, getPtr()); } - ConstPixelBufferAccess getAccess (void) const { return ConstPixelBufferAccess(m_format, m_width, m_height, m_depth, getPtr()); } + PixelBufferAccess getAccess (void) { return PixelBufferAccess(m_format, m_size, calculatePackedPitch(m_format, m_size), getPtr()); } + ConstPixelBufferAccess getAccess (void) const { return ConstPixelBufferAccess(m_format, m_size, calculatePackedPitch(m_format, m_size), getPtr()); } private: void* getPtr (void) { return m_data.getPtr(); } const void* getPtr (void) const { return m_data.getPtr(); } TextureFormat m_format; - int m_width; - int m_height; - int m_depth; + IVec3 m_size; de::ArrayBuffer m_data; friend class ConstPixelBufferAccess; -- 2.7.4