a57559c92e529898490a7ce54ced63b9c66349d9
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / framework / vulkan / vkRayTracingUtil.cpp
1 /*-------------------------------------------------------------------------
2  * Vulkan CTS Framework
3  * --------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Utilities for creating commonly used Vulkan objects
22  *//*--------------------------------------------------------------------*/
23
24 #include "vkRayTracingUtil.hpp"
25
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "deStringUtil.hpp"
33 #include "deSTLUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <thread>
38 #include <limits>
39 #include <type_traits>
40 #include <map>
41
42 namespace vk
43 {
44
45 #ifndef CTS_USES_VULKANSC
46
47 struct DeferredThreadParams
48 {
49         const DeviceInterface&  vk;
50         VkDevice                                device;
51         VkDeferredOperationKHR  deferredOperation;
52         VkResult                                result;
53 };
54
55 std::string getFormatSimpleName (vk::VkFormat format)
56 {
57         constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
58         return de::toLower(de::toString(format).substr(kPrefixLen));
59 }
60
61 bool pointInTriangle2D(const tcu::Vec3& p, const tcu::Vec3& p0, const tcu::Vec3& p1, const tcu::Vec3& p2)
62 {
63         float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
64         float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
65
66         if ((s < 0) != (t < 0))
67                 return false;
68
69         float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
70
71         return a < 0 ?
72                 (s <= 0 && s + t >= a) :
73                 (s >= 0 && s + t <= a);
74 }
75
76 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
77 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
78 {
79         bool mandatory = false;
80
81         switch (format)
82         {
83     case VK_FORMAT_R32G32_SFLOAT:
84     case VK_FORMAT_R32G32B32_SFLOAT:
85     case VK_FORMAT_R16G16_SFLOAT:
86     case VK_FORMAT_R16G16B16A16_SFLOAT:
87     case VK_FORMAT_R16G16_SNORM:
88     case VK_FORMAT_R16G16B16A16_SNORM:
89                 mandatory = true;
90                 break;
91         default:
92                 break;
93         }
94
95         return mandatory;
96 }
97
98 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
99 {
100         const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
101
102         if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
103         {
104                 const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
105                 if (isMandatoryAccelerationStructureVertexBufferFormat(format))
106                         TCU_FAIL(errorMsg);
107                 TCU_THROW(NotSupportedError, errorMsg);
108         }
109 }
110
111 std::string getCommonRayGenerationShader (void)
112 {
113         return
114                 "#version 460 core\n"
115                 "#extension GL_EXT_ray_tracing : require\n"
116                 "layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
117                 "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
118                 "\n"
119                 "void main()\n"
120                 "{\n"
121                 "  uint  rayFlags = 0;\n"
122                 "  uint  cullMask = 0xFF;\n"
123                 "  float tmin     = 0.0;\n"
124                 "  float tmax     = 9.0;\n"
125                 "  vec3  origin   = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
126                 "  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
127                 "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
128                 "}\n";
129 }
130
131 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
132         : m_geometryType        (geometryType)
133         , m_vertexFormat        (vertexFormat)
134         , m_indexType           (indexType)
135         , m_geometryFlags       ((VkGeometryFlagsKHR)0u)
136         , m_hasOpacityMicromap (false)
137 {
138         if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
139                 DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
140 }
141
142 RaytracedGeometryBase::~RaytracedGeometryBase ()
143 {
144 }
145
146 struct GeometryBuilderParams
147 {
148         VkGeometryTypeKHR       geometryType;
149         bool                            usePadding;
150 };
151
152 template <typename V, typename I>
153 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
154 {
155         return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
156 }
157
158 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
159 {
160         const GeometryBuilderParams builderParams { geometryType, padVertices };
161
162         switch (vertexFormat)
163         {
164                 case VK_FORMAT_R32G32_SFLOAT:
165                         switch (indexType)
166                         {
167                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
168                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
169                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
170                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
171                         }
172                 case VK_FORMAT_R32G32B32_SFLOAT:
173                         switch (indexType)
174                         {
175                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
176                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
177                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
178                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
179                         }
180                 case VK_FORMAT_R32G32B32A32_SFLOAT:
181                         switch (indexType)
182                         {
183                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
184                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
185                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
186                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
187                         }
188                 case VK_FORMAT_R16G16_SFLOAT:
189                         switch (indexType)
190                         {
191                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
192                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
193                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
194                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
195                         }
196                 case VK_FORMAT_R16G16B16_SFLOAT:
197                         switch (indexType)
198                         {
199                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
200                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
201                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
202                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
203                         }
204                 case VK_FORMAT_R16G16B16A16_SFLOAT:
205                         switch (indexType)
206                         {
207                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
208                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
209                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
210                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
211                         }
212                 case VK_FORMAT_R16G16_SNORM:
213                         switch (indexType)
214                         {
215                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
216                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
217                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
218                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
219                         }
220                 case VK_FORMAT_R16G16B16_SNORM:
221                         switch (indexType)
222                         {
223                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
224                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
225                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
226                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
227                         }
228                 case VK_FORMAT_R16G16B16A16_SNORM:
229                         switch (indexType)
230                         {
231                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
232                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
233                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
234                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
235                         }
236                 case VK_FORMAT_R64G64_SFLOAT:
237                         switch (indexType)
238                         {
239                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
240                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
241                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
242                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
243                         }
244                 case VK_FORMAT_R64G64B64_SFLOAT:
245                         switch (indexType)
246                         {
247                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
248                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
249                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
250                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
251                         }
252                 case VK_FORMAT_R64G64B64A64_SFLOAT:
253                         switch (indexType)
254                         {
255                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
256                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
257                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
258                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
259                         }
260                 case VK_FORMAT_R8G8_SNORM:
261                         switch (indexType)
262                         {
263                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
264                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
265                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
266                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
267                         }
268                 case VK_FORMAT_R8G8B8_SNORM:
269                         switch (indexType)
270                         {
271                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
272                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
273                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
274                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
275                         }
276                 case VK_FORMAT_R8G8B8A8_SNORM:
277                         switch (indexType)
278                         {
279                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
280                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
281                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
282                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
283                         }
284                 default:
285                         TCU_THROW(InternalError, "Wrong vertex format");
286         }
287
288 }
289
290 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface& vk,
291                                                                                  const VkDevice                 device,
292                                                                                  const VkBuffer                 buffer,
293                                                                                  VkDeviceSize                   offset )
294 {
295
296         if (buffer == DE_NULL)
297                 return 0;
298
299         VkBufferDeviceAddressInfo deviceAddressInfo
300         {
301                 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,           // VkStructureType    sType
302                 DE_NULL,                                                                                        // const void*        pNext
303                 buffer                                                                                          // VkBuffer           buffer;
304         };
305         return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
306 }
307
308
309 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface&           vk,
310                                                                                            const VkDevice                               device,
311                                                                                            const VkQueryType                    queryType,
312                                                                                            deUint32                                     queryCount)
313 {
314         const VkQueryPoolCreateInfo                             queryPoolCreateInfo =
315         {
316                 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,               // sType
317                 DE_NULL,                                                                                // pNext
318                 (VkQueryPoolCreateFlags)0,                                              // flags
319                 queryType,                                                                              // queryType
320                 queryCount,                                                                             // queryCount
321                 0u,                                                                                             // pipelineStatistics
322         };
323         return createQueryPool(vk, device, &queryPoolCreateInfo);
324 }
325
326 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
327 {
328         VkAccelerationStructureGeometryDataKHR result;
329
330         deMemset(&result, 0, sizeof(result));
331
332         result.triangles = triangles;
333
334         return result;
335 }
336
337 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
338 {
339         VkAccelerationStructureGeometryDataKHR result;
340
341         deMemset(&result, 0, sizeof(result));
342
343         result.aabbs = aabbs;
344
345         return result;
346 }
347
348 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
349 {
350         VkAccelerationStructureGeometryDataKHR result;
351
352         deMemset(&result, 0, sizeof(result));
353
354         result.instances = instances;
355
356         return result;
357 }
358
359 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR&                    transform,
360                                                                                                                                                                                  deUint32                                                               instanceCustomIndex,
361                                                                                                                                                                                  deUint32                                                               mask,
362                                                                                                                                                                                  deUint32                                                               instanceShaderBindingTableRecordOffset,
363                                                                                                                                                                                  VkGeometryInstanceFlagsKHR                             flags,
364                                                                                                                                                                                  deUint64                                                               accelerationStructureReference)
365 {
366         VkAccelerationStructureInstanceKHR instance             = { transform, 0, 0, 0, 0, accelerationStructureReference };
367         instance.instanceCustomIndex                                    = instanceCustomIndex & 0xFFFFFF;
368         instance.mask                                                                   = mask & 0xFF;
369         instance.instanceShaderBindingTableRecordOffset = instanceShaderBindingTableRecordOffset & 0xFFFFFF;
370         instance.flags                                                                  = flags & 0xFF;
371         return instance;
372 }
373
374 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface&             vk,
375                                                                                          const VkDevice                         device,
376                                                                                          const VkPipeline                       pipeline,
377                                                                                          const deUint32                         firstGroup,
378                                                                                          const deUint32                         groupCount,
379                                                                                          const deUintptr                        dataSize,
380                                                                                          void*                                          pData)
381 {
382         return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
383 }
384
385 VkResult getRayTracingShaderGroupHandles (const DeviceInterface&                vk,
386                                                                                   const VkDevice                                device,
387                                                                                   const VkPipeline                              pipeline,
388                                                                                   const deUint32                                firstGroup,
389                                                                                   const deUint32                                groupCount,
390                                                                                   const deUintptr                               dataSize,
391                                                                                   void*                                                 pData)
392 {
393         return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
394 }
395
396 VkResult finishDeferredOperation (const DeviceInterface&        vk,
397                                                                   VkDevice                                      device,
398                                                                   VkDeferredOperationKHR        deferredOperation)
399 {
400         VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
401
402         while (result == VK_THREAD_IDLE_KHR)
403         {
404                 std::this_thread::yield();
405                 result = vk.deferredOperationJoinKHR(device, deferredOperation);
406         }
407
408         switch( result )
409         {
410                 case VK_SUCCESS:
411                 {
412                         // Deferred operation has finished. Query its result
413                         result = vk.getDeferredOperationResultKHR(device, deferredOperation);
414
415                         break;
416                 }
417
418                 case VK_THREAD_DONE_KHR:
419                 {
420                         // Deferred operation is being wrapped up by another thread
421                         // wait for that thread to finish
422                         do
423                         {
424                                 std::this_thread::yield();
425                                 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
426                         } while (result == VK_NOT_READY);
427
428                         break;
429                 }
430
431                 default:
432                 {
433                         DE_ASSERT(false);
434
435                         break;
436                 }
437         }
438
439         return result;
440 }
441
442 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
443 {
444         deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
445 }
446
447 void finishDeferredOperation (const DeviceInterface&    vk,
448                                                           VkDevice                                      device,
449                                                           VkDeferredOperationKHR        deferredOperation,
450                                                           const deUint32                        workerThreadCount,
451                                                           const bool                            operationNotDeferred)
452 {
453
454         if (operationNotDeferred)
455         {
456                 // when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
457                 // the deferred operation should act as if no command was deferred
458                 VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
459
460
461                 // there is not need to join any threads to the deferred operation,
462                 // so below can be skipped.
463                 return;
464         }
465
466         if (workerThreadCount == 0)
467         {
468                 VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
469         }
470         else
471         {
472                 const deUint32                                                  maxThreadCountSupported = deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
473                 const deUint32                                                  requestedThreadCount    = workerThreadCount;
474                 const deUint32                                                  testThreadCount                 = requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
475
476                 if (maxThreadCountSupported == 0)
477                         TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
478
479                 const DeferredThreadParams                              deferredThreadParams    =
480                 {
481                         vk,                                     //  const DeviceInterface&      vk;
482                         device,                         //  VkDevice                            device;
483                         deferredOperation,      //  VkDeferredOperationKHR      deferredOperation;
484                         VK_RESULT_MAX_ENUM,     //  VResult                                     result;
485                 };
486                 std::vector<DeferredThreadParams>               threadParams    (testThreadCount, deferredThreadParams);
487                 std::vector<de::MovePtr<std::thread> >  threads                 (testThreadCount);
488                 bool                                                                    executionResult = false;
489
490                 DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
491
492                 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
493                         threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
494
495                 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
496                         threads[threadNdx]->join();
497
498                 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
499                         if (threadParams[threadNdx].result == VK_SUCCESS)
500                                 executionResult = true;
501
502                 if (!executionResult)
503                         TCU_FAIL("Neither reported VK_SUCCESS");
504         }
505 }
506
507 SerialStorage::SerialStorage (const DeviceInterface&                                                                    vk,
508                                                           const VkDevice                                                                                        device,
509                                                           Allocator&                                                                                            allocator,
510                                                           const VkAccelerationStructureBuildTypeKHR                                     buildType,
511                                                           const VkDeviceSize                                                                            storageSize)
512         : m_buildType           (buildType)
513         , m_storageSize         (storageSize)
514         , m_serialInfo          ()
515 {
516         const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
517         try
518         {
519                 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
520         }
521         catch (const tcu::NotSupportedError&)
522         {
523                 // retry without Cached flag
524                 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
525         }
526 }
527
528 SerialStorage::SerialStorage (const DeviceInterface&                                            vk,
529                                                           const VkDevice                                                                device,
530                                                           Allocator&                                                                    allocator,
531                                                           const VkAccelerationStructureBuildTypeKHR             buildType,
532                                                           const SerialInfo&                                                             serialInfo)
533         : m_buildType           (buildType)
534         , m_storageSize         (serialInfo.sizes()[0]) // raise assertion if serialInfo is empty
535         , m_serialInfo          (serialInfo)
536 {
537         DE_ASSERT(serialInfo.sizes().size() >= 2u);
538
539         // create buffer for top-level acceleration structure
540         {
541                 const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
542                 m_buffer                                                                                = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
543         }
544
545         // create buffers for bottom-level acceleration structures
546         {
547                 std::vector<deUint64>   addrs;
548
549                 for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
550                 {
551                         const deUint64& lookAddr = serialInfo.addresses()[i];
552                         auto end = addrs.end();
553                         auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
554                         if (match == end)
555                         {
556                                 addrs.emplace_back(lookAddr);
557                                 m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
558                         }
559                 }
560         }
561 }
562
563 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface&                                              vk,
564                                                                                                         const VkDevice                                                          device,
565                                                                                                         const VkAccelerationStructureBuildTypeKHR       buildType)
566 {
567         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
568                 return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
569         else
570                 return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
571 }
572
573 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
574 {
575         return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
576 }
577
578 bool SerialStorage::hasDeepFormat () const
579 {
580         return (m_serialInfo.sizes().size() >= 2u);
581 }
582
583 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
584 {
585         return m_bottoms[index];
586 }
587
588 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
589 {
590         DE_ASSERT(offset < m_storageSize);
591         return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
592 }
593
594 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
595 {
596         return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
597 }
598
599 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface&                                    vk,
600                                                                                                                           const VkDevice                                                        device,
601                                                                                                                           const VkAccelerationStructureBuildTypeKHR     buildType)
602 {
603         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
604                 return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
605         else
606                 return getHostAddressConst();
607 }
608
609 inline VkDeviceSize SerialStorage::getStorageSize () const
610 {
611         return m_storageSize;
612 }
613
614 inline const SerialInfo& SerialStorage::getSerialInfo () const
615 {
616         return m_serialInfo;
617 }
618
619 deUint64 SerialStorage::getDeserializedSize ()
620 {
621         deUint64                result          = 0;
622         const deUint8*  startPtr        = static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
623
624         DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
625
626         deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
627
628         return result;
629 }
630
631 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
632 {
633 }
634
635 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
636         : m_structureSize               (0u)
637         , m_updateScratchSize   (0u)
638         , m_buildScratchSize    (0u)
639 {
640 }
641
642 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>&   geometryData,
643                                                                                                                 const bool                                              triangles,
644                                                                                                                 const VkGeometryFlagsKHR                geometryFlags)
645 {
646         if (triangles)
647                 DE_ASSERT((geometryData.size() % 3) == 0);
648         else
649                 DE_ASSERT((geometryData.size() % 2) == 0);
650
651         setGeometryCount(1u);
652
653         addGeometry(geometryData, triangles, geometryFlags);
654 }
655
656 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits      testStage,
657                                                                                                                            const VkGeometryFlagsKHR             geometryFlags)
658 {
659         bool                                    trianglesData   = false;
660         float                                   z                               = 0.0f;
661         std::vector<tcu::Vec3>  geometryData;
662
663         switch (testStage)
664         {
665                 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:            z = -1.0f; trianglesData = true;        break;
666                 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:           z = -1.0f; trianglesData = true;        break;
667                 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:       z = -1.0f; trianglesData = true;        break;
668                 case VK_SHADER_STAGE_MISS_BIT_KHR:                      z = -9.9f; trianglesData = true;        break;
669                 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:      z = -1.0f; trianglesData = false;       break;
670                 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:          z = -1.0f; trianglesData = true;        break;
671                 default:                                                                        TCU_THROW(InternalError, "Unacceptable stage");
672         }
673
674         if (trianglesData)
675         {
676                 geometryData.reserve(6);
677
678                 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
679                 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
680                 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
681                 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
682                 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
683                 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
684         }
685         else
686         {
687                 geometryData.reserve(2);
688
689                 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
690                 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
691         }
692
693         setGeometryCount(1u);
694
695         addGeometry(geometryData, trianglesData, geometryFlags);
696 }
697
698 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
699 {
700         m_geometriesData.clear();
701
702         m_geometriesData.reserve(geometryCount);
703 }
704
705 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>&               raytracedGeometry)
706 {
707         m_geometriesData.push_back(raytracedGeometry);
708 }
709
710 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>&       geometryData,
711                                                                                                         const bool                                              triangles,
712                                                                                                         const VkGeometryFlagsKHR                geometryFlags,
713                                                                                                         const VkAccelerationStructureTrianglesOpacityMicromapEXT* opacityGeometryMicromap)
714 {
715         DE_ASSERT(geometryData.size() > 0);
716         DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
717
718         if (!triangles)
719                 for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
720                 {
721                         DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
722                         DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
723                         DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
724                 }
725
726         de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
727         for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
728                 geometry->addVertex(*it);
729
730         geometry->setGeometryFlags(geometryFlags);
731         if (opacityGeometryMicromap)
732                 geometry->setOpacityMicromap(opacityGeometryMicromap);
733         addGeometry(geometry);
734 }
735
736 VkAccelerationStructureBuildSizesInfoKHR BottomLevelAccelerationStructure::getStructureBuildSizes () const
737 {
738         return
739         {
740                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
741                 DE_NULL,                                                                                                                //  const void*         pNext;
742                 m_structureSize,                                                                                                //  VkDeviceSize        accelerationStructureSize;
743                 m_updateScratchSize,                                                                                    //  VkDeviceSize        updateScratchSize;
744                 m_buildScratchSize                                                                                              //  VkDeviceSize        buildScratchSize;
745         };
746 };
747
748 VkDeviceSize getVertexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&      geometriesData)
749 {
750         DE_ASSERT(geometriesData.size() != 0);
751         VkDeviceSize                                    bufferSizeBytes = 0;
752         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
753                 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
754         return bufferSizeBytes;
755 }
756
757 BufferWithMemory* createVertexBuffer (const DeviceInterface&    vk,
758                                                                           const VkDevice                        device,
759                                                                           Allocator&                            allocator,
760                                                                           const VkDeviceSize            bufferSizeBytes)
761 {
762         const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
763         return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
764 }
765
766 BufferWithMemory* createVertexBuffer (const DeviceInterface&                                                                    vk,
767                                                                           const VkDevice                                                                                        device,
768                                                                           Allocator&                                                                                            allocator,
769                                                                           const std::vector<de::SharedPtr<RaytracedGeometryBase>>&      geometriesData)
770 {
771         return createVertexBuffer(vk, device, allocator, getVertexBufferSize(geometriesData));
772 }
773
774 void updateVertexBuffer (const DeviceInterface&                                                                         vk,
775                                                  const VkDevice                                                                                         device,
776                                                  const std::vector<de::SharedPtr<RaytracedGeometryBase>>&       geometriesData,
777                                                  BufferWithMemory*                                                                                      vertexBuffer,
778                                                  VkDeviceSize                                                                                           geometriesOffset = 0)
779 {
780         const Allocation&                               geometryAlloc           = vertexBuffer->getAllocation();
781         deUint8*                                                bufferStart                     = static_cast<deUint8*>(geometryAlloc.getHostPtr());
782         VkDeviceSize                                    bufferOffset            = geometriesOffset;
783
784         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
785         {
786                 const void*                                     geometryPtr                     = geometriesData[geometryNdx]->getVertexPointer();
787                 const size_t                            geometryPtrSize         = geometriesData[geometryNdx]->getVertexByteSize();
788
789                 deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
790
791                 bufferOffset += deAlignSize(geometryPtrSize,8);
792         }
793
794         // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
795         // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
796         // for the vertex and index buffers, so flushing is actually not needed.
797         flushAlloc(vk, device, geometryAlloc);
798 }
799
800 VkDeviceSize getIndexBufferSize (const std::vector<de::SharedPtr<RaytracedGeometryBase>>&       geometriesData)
801 {
802         DE_ASSERT(!geometriesData.empty());
803
804         VkDeviceSize    bufferSizeBytes = 0;
805         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
806                 if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
807                         bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
808         return bufferSizeBytes;
809 }
810
811 BufferWithMemory* createIndexBuffer (const DeviceInterface&             vk,
812                                                                          const VkDevice                         device,
813                                                                          Allocator&                                     allocator,
814                                                                          const VkDeviceSize                     bufferSizeBytes)
815 {
816         DE_ASSERT(bufferSizeBytes);
817         const VkBufferCreateInfo                bufferCreateInfo        = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
818         return  new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
819 }
820
821 BufferWithMemory* createIndexBuffer (const DeviceInterface&                                                                             vk,
822                                                                          const VkDevice                                                                                         device,
823                                                                          Allocator&                                                                                                     allocator,
824                                                                          const std::vector<de::SharedPtr<RaytracedGeometryBase>>&       geometriesData)
825 {
826
827
828         const VkDeviceSize bufferSizeBytes = getIndexBufferSize(geometriesData);
829         return bufferSizeBytes ? createIndexBuffer(vk, device, allocator, bufferSizeBytes) : nullptr;
830 }
831
832 void updateIndexBuffer (const DeviceInterface&                                                                          vk,
833                                                 const VkDevice                                                                                          device,
834                                                 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&        geometriesData,
835                                                 BufferWithMemory*                                                                                       indexBuffer,
836                                                 VkDeviceSize                                                                                            geometriesOffset)
837 {
838         const Allocation&                               indexAlloc                      = indexBuffer->getAllocation();
839         deUint8*                                                bufferStart                     = static_cast<deUint8*>(indexAlloc.getHostPtr());
840         VkDeviceSize                                    bufferOffset            = geometriesOffset;
841
842         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
843         {
844                 if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
845                 {
846                         const void*                                     indexPtr                = geometriesData[geometryNdx]->getIndexPointer();
847                         const size_t                            indexPtrSize    = geometriesData[geometryNdx]->getIndexByteSize();
848
849                         deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
850
851                         bufferOffset += deAlignSize(indexPtrSize, 8);
852                 }
853         }
854
855         // Flush the whole allocation. We could flush only the interesting range, but we'd need to be sure both the offset and size
856         // align to VkPhysicalDeviceLimits::nonCoherentAtomSize, which we are not considering. Also note most code uses Coherent memory
857         // for the vertex and index buffers, so flushing is actually not needed.
858         flushAlloc(vk, device, indexAlloc);
859 }
860
861 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
862 {
863 public:
864         static deUint32                                                                                 getRequiredAllocationCount                                              (void);
865
866                                                                                                                         BottomLevelAccelerationStructureKHR                             ();
867                                                                                                                         BottomLevelAccelerationStructureKHR                             (const BottomLevelAccelerationStructureKHR&             other) = delete;
868         virtual                                                                                                 ~BottomLevelAccelerationStructureKHR                    ();
869
870         void                                                                                                    setBuildType                                                                    (const VkAccelerationStructureBuildTypeKHR              buildType) override;
871         VkAccelerationStructureBuildTypeKHR                                             getBuildType                                                                    () const override;
872         void                                                                                                    setCreateFlags                                                                  (const VkAccelerationStructureCreateFlagsKHR    createFlags) override;
873         void                                                                                                    setCreateGeneric                                                                (bool                                                                                   createGeneric) override;
874         void                                                                                                    setBuildFlags                                                                   (const VkBuildAccelerationStructureFlagsKHR             buildFlags) override;
875         void                                                                                                    setBuildWithoutGeometries                                               (bool                                                                                   buildWithoutGeometries) override;
876         void                                                                                                    setBuildWithoutPrimitives                                               (bool                                                                                   buildWithoutPrimitives) override;
877         void                                                                                                    setDeferredOperation                                                    (const bool                                                                             deferredOperation,
878                                                                                                                                                                                                                          const deUint32                                                                 workerThreadCount) override;
879         void                                                                                                    setUseArrayOfPointers                                                   (const bool                                                                             useArrayOfPointers) override;
880         void                                                                                                    setIndirectBuildParameters                                              (const VkBuffer                                                                 indirectBuffer,
881                                                                                                                                                                                                                          const VkDeviceSize                                                             indirectBufferOffset,
882                                                                                                                                                                                                                          const deUint32                                                                 indirectBufferStride) override;
883         VkBuildAccelerationStructureFlagsKHR                                    getBuildFlags                                                                   () const override;
884
885         void                                                                                                    create                                                                                  (const DeviceInterface&                                                 vk,
886                                                                                                                                                                                                                          const VkDevice                                                                 device,
887                                                                                                                                                                                                                          Allocator&                                                                             allocator,
888                                                                                                                                                                                                                          VkDeviceSize                                                                   structureSize,
889                                                                                                                                                                                                                          VkDeviceAddress                                                                deviceAddress                   = 0u,
890                                                                                                                                                                                                                          const void*                                                                    pNext                                   = DE_NULL,
891                                                                                                                                                                                                                          const MemoryRequirement&                                               addMemoryRequirement    = MemoryRequirement::Any) override;
892         void                                                                                                    build                                                                                   (const DeviceInterface&                                                 vk,
893                                                                                                                                                                                                                          const VkDevice                                                                 device,
894                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer) override;
895         void                                                                                                    copyFrom                                                                                (const DeviceInterface&                                                 vk,
896                                                                                                                                                                                                                          const VkDevice                                                                 device,
897                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer,
898                                                                                                                                                                                                                          BottomLevelAccelerationStructure*                              accelerationStructure,
899                                                                                                                                                                                                                          bool                                                                                   compactCopy) override;
900
901         void                                                                                                    serialize                                                                               (const DeviceInterface&                                                 vk,
902                                                                                                                                                                                                                          const VkDevice                                                                 device,
903                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer,
904                                                                                                                                                                                                                          SerialStorage*                                                                 storage) override;
905         void                                                                                                    deserialize                                                                             (const DeviceInterface&                                                 vk,
906                                                                                                                                                                                                                          const VkDevice                                                                 device,
907                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer,
908                                                                                                                                                                                                                          SerialStorage*                                                                 storage) override;
909
910         const VkAccelerationStructureKHR*                                               getPtr                                                                                  (void) const override;
911
912 protected:
913         VkAccelerationStructureBuildTypeKHR                                             m_buildType;
914         VkAccelerationStructureCreateFlagsKHR                                   m_createFlags;
915         bool                                                                                                    m_createGeneric;
916         VkBuildAccelerationStructureFlagsKHR                                    m_buildFlags;
917         bool                                                                                                    m_buildWithoutGeometries;
918         bool                                                                                                    m_buildWithoutPrimitives;
919         bool                                                                                                    m_deferredOperation;
920         deUint32                                                                                                m_workerThreadCount;
921         bool                                                                                                    m_useArrayOfPointers;
922         de::MovePtr<BufferWithMemory>                                                   m_accelerationStructureBuffer;
923         de::MovePtr<BufferWithMemory>                                                   m_vertexBuffer;
924         de::MovePtr<BufferWithMemory>                                                   m_indexBuffer;
925         de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
926         de::UniquePtr<std::vector<deUint8>>                                             m_hostScratchBuffer;
927         Move<VkAccelerationStructureKHR>                                                m_accelerationStructureKHR;
928         VkBuffer                                                                                                m_indirectBuffer;
929         VkDeviceSize                                                                                    m_indirectBufferOffset;
930         deUint32                                                                                                m_indirectBufferStride;
931
932         void                                                                                                    prepareGeometries                                                               (const DeviceInterface&                                                                                         vk,
933                                                                                                                                                                                                                          const VkDevice                                                                                                         device,
934                                                                                                                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR>&                                       accelerationStructureGeometriesKHR,
935                                                                                                                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR*>&                                      accelerationStructureGeometriesKHRPointers,
936                                                                                                                                                                                                                          std::vector<VkAccelerationStructureBuildRangeInfoKHR>&                         accelerationStructureBuildRangeInfoKHR,
937                                                                                                                                                                                                                          std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&       accelerationStructureGeometryMicromapsEXT,
938                                                                                                                                                                                                                          std::vector<deUint32>&                                                                                         maxPrimitiveCounts,
939                                                                                                                                                                                                                          VkDeviceSize                                                                                                           vertexBufferOffset = 0,
940                                                                                                                                                                                                                          VkDeviceSize                                                                                                           indexBufferOffset = 0) const;
941
942         virtual BufferWithMemory*                                                               getAccelerationStructureBuffer                                  () const { return m_accelerationStructureBuffer.get(); }
943         virtual BufferWithMemory*                                                               getDeviceScratchBuffer                                                  () const { return m_deviceScratchBuffer.get(); }
944         virtual std::vector<deUint8>*                                                   getHostScratchBuffer                                                    () const { return m_hostScratchBuffer.get(); }
945         virtual BufferWithMemory*                                                               getVertexBuffer                                                                 () const { return m_vertexBuffer.get(); }
946         virtual BufferWithMemory*                                                               getIndexBuffer                                                                  () const { return m_indexBuffer.get(); }
947
948         virtual VkDeviceSize                                                                    getAccelerationStructureBufferOffset                    () const { return 0; }
949         virtual VkDeviceSize                                                                    getDeviceScratchBufferOffset                                    () const { return 0; }
950         virtual VkDeviceSize                                                                    getVertexBufferOffset                                                   () const { return 0; }
951         virtual VkDeviceSize                                                                    getIndexBufferOffset                                                    () const { return 0; }
952 };
953
954 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
955 {
956         /*
957                 de::MovePtr<BufferWithMemory>                                                   m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
958                 de::MovePtr<Allocation>                                                                 m_accelerationStructureAlloc;
959                 de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
960         */
961         return 3u;
962 }
963
964 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
965 {
966 }
967
968 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
969         : BottomLevelAccelerationStructure      ()
970         , m_buildType                                           (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
971         , m_createFlags                                         (0u)
972         , m_createGeneric                                       (false)
973         , m_buildFlags                                          (0u)
974         , m_buildWithoutGeometries                      (false)
975         , m_buildWithoutPrimitives                      (false)
976         , m_deferredOperation                           (false)
977         , m_workerThreadCount                           (0)
978         , m_useArrayOfPointers                          (false)
979         , m_accelerationStructureBuffer         (DE_NULL)
980         , m_vertexBuffer                                        (DE_NULL)
981         , m_indexBuffer                                         (DE_NULL)
982         , m_deviceScratchBuffer                         (DE_NULL)
983         , m_hostScratchBuffer                           (new std::vector<deUint8>)
984         , m_accelerationStructureKHR            ()
985         , m_indirectBuffer                                      (DE_NULL)
986         , m_indirectBufferOffset                        (0)
987         , m_indirectBufferStride                        (0)
988 {
989 }
990
991 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR       buildType)
992 {
993         m_buildType = buildType;
994 }
995
996 VkAccelerationStructureBuildTypeKHR BottomLevelAccelerationStructureKHR::getBuildType () const
997 {
998         return m_buildType;
999 }
1000
1001 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR   createFlags)
1002 {
1003         m_createFlags = createFlags;
1004 }
1005
1006 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1007 {
1008         m_createGeneric = createGeneric;
1009 }
1010
1011 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR     buildFlags)
1012 {
1013         m_buildFlags = buildFlags;
1014 }
1015
1016 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
1017 {
1018         m_buildWithoutGeometries = buildWithoutGeometries;
1019 }
1020
1021 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1022 {
1023         m_buildWithoutPrimitives = buildWithoutPrimitives;
1024 }
1025
1026 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool              deferredOperation,
1027                                                                                                                                 const deUint32  workerThreadCount)
1028 {
1029         m_deferredOperation = deferredOperation;
1030         m_workerThreadCount = workerThreadCount;
1031 }
1032
1033 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool     useArrayOfPointers)
1034 {
1035         m_useArrayOfPointers = useArrayOfPointers;
1036 }
1037
1038 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer            indirectBuffer,
1039                                                                                                                                           const VkDeviceSize    indirectBufferOffset,
1040                                                                                                                                           const deUint32                indirectBufferStride)
1041 {
1042         m_indirectBuffer                = indirectBuffer;
1043         m_indirectBufferOffset  = indirectBufferOffset;
1044         m_indirectBufferStride  = indirectBufferStride;
1045 }
1046
1047 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1048 {
1049         return m_buildFlags;
1050 }
1051
1052 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface&                                vk,
1053                                                                                                   const VkDevice                                                device,
1054                                                                                                   Allocator&                                                    allocator,
1055                                                                                                   VkDeviceSize                                                  structureSize,
1056                                                                                                   VkDeviceAddress                                               deviceAddress,
1057                                                                                                   const void*                                                   pNext,
1058                                                                                                   const MemoryRequirement&                              addMemoryRequirement)
1059 {
1060         // AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1061         // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1062         DE_ASSERT(!m_geometriesData.empty() !=  !(structureSize == 0)); // logical xor
1063
1064         if (structureSize == 0)
1065         {
1066                 std::vector<VkAccelerationStructureGeometryKHR>                 accelerationStructureGeometriesKHR;
1067                 std::vector<VkAccelerationStructureGeometryKHR*>                accelerationStructureGeometriesKHRPointers;
1068                 std::vector<VkAccelerationStructureBuildRangeInfoKHR>   accelerationStructureBuildRangeInfoKHR;
1069                 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1070                 std::vector<deUint32>                                                                   maxPrimitiveCounts;
1071                 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
1072
1073                 const VkAccelerationStructureGeometryKHR*                               accelerationStructureGeometriesKHRPointer       = accelerationStructureGeometriesKHR.data();
1074                 const VkAccelerationStructureGeometryKHR* const*                accelerationStructureGeometry                           = accelerationStructureGeometriesKHRPointers.data();
1075
1076                 VkAccelerationStructureBuildGeometryInfoKHR     accelerationStructureBuildGeometryInfoKHR       =
1077                 {
1078                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                       //  VkStructureType                                                                             sType;
1079                         DE_NULL,                                                                                                                                        //  const void*                                                                                 pNext;
1080                         VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,                                                        //  VkAccelerationStructureTypeKHR                                              type;
1081                         m_buildFlags,                                                                                                                           //  VkBuildAccelerationStructureFlagsKHR                                flags;
1082                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                         //  VkBuildAccelerationStructureModeKHR                                 mode;
1083                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
1084                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
1085                         static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),                       //  deUint32                                                                                    geometryCount;
1086                         m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,     //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
1087                         m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,                         //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
1088                         makeDeviceOrHostAddressKHR(DE_NULL)                                                                                     //  VkDeviceOrHostAddressKHR                                                    scratchData;
1089                 };
1090                 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1091                 {
1092                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
1093                         DE_NULL,                                                                                                                //  const void*         pNext;
1094                         0,                                                                                                                              //  VkDeviceSize        accelerationStructureSize;
1095                         0,                                                                                                                              //  VkDeviceSize        updateScratchSize;
1096                         0                                                                                                                               //  VkDeviceSize        buildScratchSize;
1097                 };
1098
1099                 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1100
1101                 m_structureSize         = sizeInfo.accelerationStructureSize;
1102                 m_updateScratchSize     = sizeInfo.updateScratchSize;
1103                 m_buildScratchSize      = sizeInfo.buildScratchSize;
1104         }
1105         else
1106         {
1107                 m_structureSize         = structureSize;
1108                 m_updateScratchSize     = 0u;
1109                 m_buildScratchSize      = 0u;
1110         }
1111
1112         {
1113                 const VkBufferCreateInfo                bufferCreateInfo                = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1114                 const MemoryRequirement                 memoryRequirement               = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
1115
1116                 try
1117                 {
1118                         m_accelerationStructureBuffer   = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
1119                 }
1120                 catch (const tcu::NotSupportedError&)
1121                 {
1122                         // retry without Cached flag
1123                         m_accelerationStructureBuffer   = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
1124                 }
1125         }
1126
1127         {
1128                 const VkAccelerationStructureTypeKHR            structureType                                           = (m_createGeneric
1129                                                                                                                                                                                    ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1130                                                                                                                                                                                    : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1131                 const VkAccelerationStructureCreateInfoKHR      accelerationStructureCreateInfoKHR
1132                 {
1133                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,                                               //  VkStructureType                                                                                     sType;
1134                         pNext,                                                                                                                                                  //  const void*                                                                                         pNext;
1135                         m_createFlags,                                                                                                                                  //  VkAccelerationStructureCreateFlagsKHR                                       createFlags;
1136                         getAccelerationStructureBuffer()->get(),                                                                                //  VkBuffer                                                                                            buffer;
1137                         getAccelerationStructureBufferOffset(),                                                                                 //  VkDeviceSize                                                                                        offset;
1138                         m_structureSize,                                                                                                                                //  VkDeviceSize                                                                                        size;
1139                         structureType,                                                                                                                                  //  VkAccelerationStructureTypeKHR                                                      type;
1140                         deviceAddress                                                                                                                                   //  VkDeviceAddress                                                                                     deviceAddress;
1141                 };
1142
1143                 m_accelerationStructureKHR      = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1144         }
1145
1146         if (m_buildScratchSize > 0u)
1147         {
1148                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1149                 {
1150                         const VkBufferCreateInfo                bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1151                         m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1152                 }
1153                 else
1154                 {
1155                         m_hostScratchBuffer->resize(static_cast<size_t>(m_buildScratchSize));
1156                 }
1157         }
1158
1159         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1160         {
1161                 m_vertexBuffer  = de::MovePtr<BufferWithMemory>(createVertexBuffer(vk, device, allocator, m_geometriesData));
1162                 m_indexBuffer   = de::MovePtr<BufferWithMemory>(createIndexBuffer(vk, device, allocator, m_geometriesData));
1163         }
1164 }
1165
1166 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface&                                         vk,
1167                                                                                                  const VkDevice                                                         device,
1168                                                                                                  const VkCommandBuffer                                          cmdBuffer)
1169 {
1170         DE_ASSERT(!m_geometriesData.empty());
1171         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1172         DE_ASSERT(m_buildScratchSize != 0);
1173
1174         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1175         {
1176                 updateVertexBuffer(vk, device, m_geometriesData,  getVertexBuffer(), getVertexBufferOffset());
1177                 if(getIndexBuffer() != DE_NULL)
1178                         updateIndexBuffer(vk, device, m_geometriesData, getIndexBuffer(), getIndexBufferOffset());
1179         }
1180
1181         {
1182                 std::vector<VkAccelerationStructureGeometryKHR>                 accelerationStructureGeometriesKHR;
1183                 std::vector<VkAccelerationStructureGeometryKHR*>                accelerationStructureGeometriesKHRPointers;
1184                 std::vector<VkAccelerationStructureBuildRangeInfoKHR>   accelerationStructureBuildRangeInfoKHR;
1185                 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
1186                 std::vector<deUint32>                                                                   maxPrimitiveCounts;
1187
1188                 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers,
1189                                                   accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts, getVertexBufferOffset(), getIndexBufferOffset());
1190
1191                 const VkAccelerationStructureGeometryKHR*                       accelerationStructureGeometriesKHRPointer       = accelerationStructureGeometriesKHR.data();
1192                 const VkAccelerationStructureGeometryKHR* const*        accelerationStructureGeometry                           = accelerationStructureGeometriesKHRPointers.data();
1193                 VkDeviceOrHostAddressKHR                                                        scratchData                                                                     = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1194                                                                                                                                                                                                                 ? makeDeviceOrHostAddressKHR(vk, device, getDeviceScratchBuffer()->get(), getDeviceScratchBufferOffset())
1195                                                                                                                                                                                                                 : makeDeviceOrHostAddressKHR(getHostScratchBuffer()->data());
1196                 const deUint32                                                                          geometryCount                                                           = (m_buildWithoutGeometries
1197                                                                                                                                                                                                                 ? 0u
1198                                                                                                                                                                                                                 : static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1199
1200                 VkAccelerationStructureBuildGeometryInfoKHR     accelerationStructureBuildGeometryInfoKHR       =
1201                 {
1202                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                       //  VkStructureType                                                                             sType;
1203                         DE_NULL,                                                                                                                                        //  const void*                                                                                 pNext;
1204                         VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,                                                        //  VkAccelerationStructureTypeKHR                                              type;
1205                         m_buildFlags,                                                                                                                           //  VkBuildAccelerationStructureFlagsKHR                                flags;
1206                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                         //  VkBuildAccelerationStructureModeKHR                                 mode;
1207                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
1208                         m_accelerationStructureKHR.get(),                                                                                       //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
1209                         geometryCount,                                                                                                                          //  deUint32                                                                                    geometryCount;
1210                         m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,     //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
1211                         m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,                         //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
1212                         scratchData                                                                                                                                     //  VkDeviceOrHostAddressKHR                                                    scratchData;
1213                 };
1214
1215                 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr     = accelerationStructureBuildRangeInfoKHR.data();
1216
1217                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1218                 {
1219                         if (m_indirectBuffer == DE_NULL)
1220                                 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1221                         else
1222                         {
1223                                 VkDeviceAddress indirectDeviceAddress   = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1224                                 deUint32*               pMaxPrimitiveCounts             = maxPrimitiveCounts.data();
1225                                 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1226                         }
1227                 }
1228                 else if (!m_deferredOperation)
1229                 {
1230                         VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1231                 }
1232                 else
1233                 {
1234                         const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1235                         const auto deferredOperation    = deferredOperationPtr.get();
1236
1237                         VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1238
1239                         DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1240
1241                         finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1242                 }
1243         }
1244
1245         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1246         {
1247                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1248                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
1249
1250                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1251         }
1252 }
1253
1254 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&                                              vk,
1255                                                                                                         const VkDevice                                                          device,
1256                                                                                                         const VkCommandBuffer                                           cmdBuffer,
1257                                                                                                         BottomLevelAccelerationStructure*                       accelerationStructure,
1258                                                                                                         bool                                                                            compactCopy)
1259 {
1260         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1261         DE_ASSERT(accelerationStructure != DE_NULL);
1262
1263         VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1264         {
1265                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,                                                                                                                 // VkStructureType                                              sType;
1266                 DE_NULL,                                                                                                                                                                                                                // const void*                                                  pNext;
1267                 *(accelerationStructure->getPtr()),                                                                                                                                                             // VkAccelerationStructureKHR                   src;
1268                 *(getPtr()),                                                                                                                                                                                                    // VkAccelerationStructureKHR                   dst;
1269                 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR   // VkCopyAccelerationStructureModeKHR   mode;
1270         };
1271
1272         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1273         {
1274                 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1275         }
1276         else if (!m_deferredOperation)
1277         {
1278                 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1279         }
1280         else
1281         {
1282                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1283                 const auto deferredOperation    = deferredOperationPtr.get();
1284
1285                 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1286
1287                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1288
1289                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1290         }
1291
1292         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1293         {
1294                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1295                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
1296
1297                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1298         }
1299 }
1300
1301 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface&             vk,
1302                                                                                                          const VkDevice                         device,
1303                                                                                                          const VkCommandBuffer          cmdBuffer,
1304                                                                                                          SerialStorage*                         storage)
1305 {
1306         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1307         DE_ASSERT(storage != DE_NULL);
1308
1309         const VkCopyAccelerationStructureToMemoryInfoKHR        copyAccelerationStructureInfo   =
1310         {
1311                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,       // VkStructureType                                              sType;
1312                 DE_NULL,                                                                                                                        // const void*                                                  pNext;
1313                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                   src;
1314                 storage->getAddress(vk, device, m_buildType),                                           // VkDeviceOrHostAddressKHR                             dst;
1315                 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR                                       // VkCopyAccelerationStructureModeKHR   mode;
1316         };
1317
1318         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1319         {
1320                 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
1321         }
1322         else if (!m_deferredOperation)
1323         {
1324                 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1325         }
1326         else
1327         {
1328                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1329                 const auto deferredOperation    = deferredOperationPtr.get();
1330
1331                 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1332
1333                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1334
1335                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1336         }
1337 }
1338
1339 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface&   vk,
1340                                                                                                            const VkDevice                       device,
1341                                                                                                            const VkCommandBuffer        cmdBuffer,
1342                                                                                                            SerialStorage*                       storage)
1343 {
1344         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1345         DE_ASSERT(storage != DE_NULL);
1346
1347         const VkCopyMemoryToAccelerationStructureInfoKHR        copyAccelerationStructureInfo   =
1348         {
1349                 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,       // VkStructureType                                                      sType;
1350                 DE_NULL,                                                                                                                        // const void*                                                          pNext;
1351                 storage->getAddressConst(vk, device, m_buildType),                                      // VkDeviceOrHostAddressConstKHR                        src;
1352                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                           dst;
1353                 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR                                     // VkCopyAccelerationStructureModeKHR           mode;
1354         };
1355
1356         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1357         {
1358                 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1359         }
1360         else if (!m_deferredOperation)
1361         {
1362                 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1363         }
1364         else
1365         {
1366                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1367                 const auto deferredOperation    = deferredOperationPtr.get();
1368
1369                 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1370
1371                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1372
1373                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1374         }
1375
1376         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1377         {
1378                 const VkAccessFlags             accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1379                 const VkMemoryBarrier   memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1380
1381                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1382         }
1383 }
1384
1385 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1386 {
1387         return &m_accelerationStructureKHR.get();
1388 }
1389
1390 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface&                                                                                             vk,
1391                                                                                                                          const VkDevice                                                                                                         device,
1392                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR>&                                       accelerationStructureGeometriesKHR,
1393                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR*>&                                      accelerationStructureGeometriesKHRPointers,
1394                                                                                                                          std::vector<VkAccelerationStructureBuildRangeInfoKHR>&                         accelerationStructureBuildRangeInfoKHR,
1395                                                                                                                          std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT>&       accelerationStructureGeometryMicromapsEXT,
1396                                                                                                                          std::vector<deUint32>&                                                                                         maxPrimitiveCounts,
1397                                                                                                                          VkDeviceSize                                                                                                           vertexBufferOffset,
1398                                                                                                                          VkDeviceSize                                                                                                           indexBufferOffset) const
1399 {
1400         accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1401         accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1402         accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1403         accelerationStructureGeometryMicromapsEXT.resize(m_geometriesData.size());
1404         maxPrimitiveCounts.resize(m_geometriesData.size());
1405
1406         for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1407         {
1408                 const de::SharedPtr<RaytracedGeometryBase>&                             geometryData = m_geometriesData[geometryNdx];
1409                 VkDeviceOrHostAddressConstKHR                                                   vertexData, indexData;
1410                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1411                 {
1412                         if (getVertexBuffer() != DE_NULL)
1413                         {
1414                                 vertexData                      = makeDeviceOrHostAddressConstKHR(vk, device, getVertexBuffer()->get(), vertexBufferOffset);
1415                                 if (m_indirectBuffer == DE_NULL )
1416                                 {
1417                                         vertexBufferOffset      += deAlignSize(geometryData->getVertexByteSize(), 8);
1418                                 }
1419                         }
1420                         else
1421                                 vertexData                      = makeDeviceOrHostAddressConstKHR(DE_NULL);
1422
1423                         if (getIndexBuffer() != DE_NULL &&  geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1424                         {
1425                                 indexData                       = makeDeviceOrHostAddressConstKHR(vk, device, getIndexBuffer()->get(), indexBufferOffset);
1426                                 indexBufferOffset       += deAlignSize(geometryData->getIndexByteSize(), 8);
1427                         }
1428                         else
1429                                 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1430                 }
1431                 else
1432                 {
1433                         vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1434                         if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1435                                 indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1436                         else
1437                                 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1438                 }
1439
1440                 VkAccelerationStructureGeometryTrianglesDataKHR accelerationStructureGeometryTrianglesDataKHR =
1441                 {
1442                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,   //  VkStructureType                                     sType;
1443                         DE_NULL,                                                                                                                                //  const void*                                         pNext;
1444                         geometryData->getVertexFormat(),                                                                                //  VkFormat                                            vertexFormat;
1445                         vertexData,                                                                                                                             //  VkDeviceOrHostAddressConstKHR       vertexData;
1446                         geometryData->getVertexStride(),                                                                                //  VkDeviceSize                                        vertexStride;
1447                         static_cast<deUint32>(geometryData->getVertexCount()),                                  //  uint32_t                                            maxVertex;
1448                         geometryData->getIndexType(),                                                                                   //  VkIndexType                                         indexType;
1449                         indexData,                                                                                                                              //  VkDeviceOrHostAddressConstKHR       indexData;
1450                         makeDeviceOrHostAddressConstKHR(DE_NULL),                                                               //  VkDeviceOrHostAddressConstKHR       transformData;
1451                 };
1452
1453                 if (geometryData->getHasOpacityMicromap())
1454                         accelerationStructureGeometryTrianglesDataKHR.pNext = &geometryData->getOpacityMicromap();
1455
1456                 const VkAccelerationStructureGeometryAabbsDataKHR               accelerationStructureGeometryAabbsDataKHR =
1457                 {
1458                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR,       //  VkStructureType                                     sType;
1459                         DE_NULL,                                                                                                                        //  const void*                                         pNext;
1460                         vertexData,                                                                                                                     //  VkDeviceOrHostAddressConstKHR       data;
1461                         geometryData->getAABBStride()                                                                           //  VkDeviceSize                                        stride;
1462                 };
1463                 const VkAccelerationStructureGeometryDataKHR                    geometry = (geometryData->isTrianglesType())
1464                                                                                                                                                  ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1465                                                                                                                                                  : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1466                 const VkAccelerationStructureGeometryKHR                                accelerationStructureGeometryKHR =
1467                 {
1468                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,  //  VkStructureType                                                     sType;
1469                         DE_NULL,                                                                                                //  const void*                                                         pNext;
1470                         geometryData->getGeometryType(),                                                //  VkGeometryTypeKHR                                           geometryType;
1471                         geometry,                                                                                               //  VkAccelerationStructureGeometryDataKHR      geometry;
1472                         geometryData->getGeometryFlags()                                                //  VkGeometryFlagsKHR                                          flags;
1473                 };
1474
1475                 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1476
1477                 const VkAccelerationStructureBuildRangeInfoKHR                  accelerationStructureBuildRangeInfosKHR =
1478                 {
1479                         primitiveCount, //  deUint32    primitiveCount;
1480                         0,                              //  deUint32    primitiveOffset;
1481                         0,                              //  deUint32    firstVertex;
1482                         0                               //  deUint32    firstTransform;
1483                 };
1484
1485                 accelerationStructureGeometriesKHR[geometryNdx]                 = accelerationStructureGeometryKHR;
1486                 accelerationStructureGeometriesKHRPointers[geometryNdx] = &accelerationStructureGeometriesKHR[geometryNdx];
1487                 accelerationStructureBuildRangeInfoKHR[geometryNdx]             = accelerationStructureBuildRangeInfosKHR;
1488                 maxPrimitiveCounts[geometryNdx]                                                 = geometryData->getPrimitiveCount();
1489         }
1490 }
1491
1492 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1493 {
1494         return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1495 }
1496
1497 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface&   vk,
1498                                                                                                            const VkDevice                       device,
1499                                                                                                            const VkCommandBuffer        cmdBuffer,
1500                                                                                                            Allocator&                           allocator,
1501                                                                                                            VkDeviceAddress                      deviceAddress)
1502 {
1503         create(vk, device, allocator, 0u, deviceAddress);
1504         build(vk, device, cmdBuffer);
1505 }
1506
1507 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&                                vk,
1508                                                                                                                   const VkDevice                                                device,
1509                                                                                                                   const VkCommandBuffer                                 cmdBuffer,
1510                                                                                                                   Allocator&                                                    allocator,
1511                                                                                                                   BottomLevelAccelerationStructure*             accelerationStructure,
1512                                                                                                                   VkDeviceSize                                                  compactCopySize,
1513                                                                                                                   VkDeviceAddress                                               deviceAddress)
1514 {
1515         DE_ASSERT(accelerationStructure != NULL);
1516         VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
1517         DE_ASSERT(copiedSize != 0u);
1518
1519         create(vk, device, allocator, copiedSize, deviceAddress);
1520         copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1521 }
1522
1523 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1524                                                                                                                                  const VkDevice                                                         device,
1525                                                                                                                                  const VkCommandBuffer                                          cmdBuffer,
1526                                                                                                                                  Allocator&                                                                     allocator,
1527                                                                                                                                  SerialStorage*                                                         storage,
1528                                                                                                                                  VkDeviceAddress                                                        deviceAddress )
1529 {
1530         DE_ASSERT(storage != NULL);
1531         DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1532         create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1533         deserialize(vk, device, cmdBuffer, storage);
1534 }
1535
1536 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1537 {
1538         return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1539 }
1540
1541 // Forward declaration
1542 struct BottomLevelAccelerationStructurePoolImpl;
1543
1544 class BottomLevelAccelerationStructurePoolMember : public BottomLevelAccelerationStructureKHR
1545 {
1546 public:
1547         friend class BottomLevelAccelerationStructurePool;
1548
1549                                                                 BottomLevelAccelerationStructurePoolMember      (BottomLevelAccelerationStructurePoolImpl& pool);
1550                                                                 BottomLevelAccelerationStructurePoolMember      (const BottomLevelAccelerationStructurePoolMember&) = delete;
1551                                                                 BottomLevelAccelerationStructurePoolMember      (BottomLevelAccelerationStructurePoolMember&&) = delete;
1552         virtual                                         ~BottomLevelAccelerationStructurePoolMember     () = default;
1553
1554         virtual void                            create                                                                          (const DeviceInterface&,
1555                                                                                                                                                          const VkDevice,
1556                                                                                                                                                          Allocator&,
1557                                                                                                                                                          VkDeviceSize,
1558                                                                                                                                                          VkDeviceAddress,
1559                                                                                                                                                          const void*,
1560                                                                                                                                                          const MemoryRequirement&) override
1561                                                                 {
1562                                                                         DE_ASSERT(0); // Silent this method
1563                                                                 }
1564         virtual auto                            computeBuildSize                                                        (const DeviceInterface& vk,
1565                                                                                                                                                          const VkDevice                 device,
1566                                                                                                                                                          const VkDeviceSize             strSize) const
1567                                                                                                                                                          //              accStrSize,updateScratch, buildScratch, vertexSize,   indexSize
1568                                                                                                                                                          -> std::tuple<VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize, VkDeviceSize>;
1569 protected:
1570         struct Info;
1571         virtual void                            preCreateSetSizesAndOffsets                                     (const Info&                    info,
1572                                                                                                                                                          const VkDeviceSize             accStrSize,
1573                                                                                                                                                          const VkDeviceSize             updateScratchSize,
1574                                                                                                                                                          const VkDeviceSize             buildScratchSize);
1575         virtual void                            createAccellerationStructure                            (const DeviceInterface& vk,
1576                                                                                                                                                          const VkDevice                 device,
1577                                                                                                                                                          VkDeviceAddress                deviceAddress);
1578
1579         virtual BufferWithMemory*       getAccelerationStructureBuffer                          () const override;
1580         virtual BufferWithMemory*       getDeviceScratchBuffer                                          () const override;
1581         virtual std::vector<deUint8>*   getHostScratchBuffer                                    () const override;
1582         virtual BufferWithMemory*       getVertexBuffer                                                         () const override;
1583         virtual BufferWithMemory*       getIndexBuffer                                                          () const override;
1584
1585         virtual VkDeviceSize            getAccelerationStructureBufferOffset            () const override { return m_info.accStrOffset; }
1586         virtual VkDeviceSize            getDeviceScratchBufferOffset                            () const override { return m_info.buildScratchBuffOffset; }
1587         virtual VkDeviceSize            getVertexBufferOffset                                           () const override { return m_info.vertBuffOffset; }
1588         virtual VkDeviceSize            getIndexBufferOffset                                            () const override { return m_info.indexBuffOffset; }
1589
1590         BottomLevelAccelerationStructurePoolImpl&       m_pool;
1591
1592         struct Info
1593         {
1594                 deUint32                                accStrIndex;
1595                 VkDeviceSize                    accStrOffset;
1596                 deUint32                                vertBuffIndex;
1597                 VkDeviceSize                    vertBuffOffset;
1598                 deUint32                                indexBuffIndex;
1599                 VkDeviceSize                    indexBuffOffset;
1600                 deUint32                                buildScratchBuffIndex;
1601                 VkDeviceSize                    buildScratchBuffOffset;
1602         }                                                                                       m_info;
1603 };
1604
1605 template<class X> inline X negz (const X&)
1606 {
1607         return (~static_cast<X>(0));
1608 }
1609 template<class X> inline bool isnegz (const X& x)
1610 {
1611         return x == negz(x);
1612 }
1613 template<class Y> inline auto make_unsigned(const Y& y) -> typename std::make_unsigned<Y>::type
1614 {
1615         return static_cast<typename std::make_unsigned<Y>::type>(y);
1616 }
1617
1618 BottomLevelAccelerationStructurePoolMember::BottomLevelAccelerationStructurePoolMember  (BottomLevelAccelerationStructurePoolImpl& pool)
1619         : m_pool        (pool)
1620         , m_info        {}
1621 {
1622 }
1623
1624 struct BottomLevelAccelerationStructurePoolImpl
1625 {
1626         BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePoolImpl&&) = delete;
1627         BottomLevelAccelerationStructurePoolImpl (const BottomLevelAccelerationStructurePoolImpl&) = delete;
1628         BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool);
1629
1630         BottomLevelAccelerationStructurePool&                   m_pool;
1631         std::vector<de::SharedPtr<BufferWithMemory>>    m_accellerationStructureBuffers;
1632         de::SharedPtr<BufferWithMemory>                                 m_deviceScratchBuffer;
1633         de::UniquePtr<std::vector<deUint8>>                             m_hostScratchBuffer;
1634         std::vector<de::SharedPtr<BufferWithMemory>>    m_vertexBuffers;
1635         std::vector<de::SharedPtr<BufferWithMemory>>    m_indexBuffers;
1636 };
1637 BottomLevelAccelerationStructurePoolImpl::BottomLevelAccelerationStructurePoolImpl (BottomLevelAccelerationStructurePool& pool)
1638         : m_pool                                                        (pool)
1639         , m_accellerationStructureBuffers       ()
1640         , m_deviceScratchBuffer                         ()
1641         , m_hostScratchBuffer                           (new std::vector<deUint8>)
1642         , m_vertexBuffers                                       ()
1643         , m_indexBuffers                                        ()
1644 {
1645 }
1646 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getAccelerationStructureBuffer () const
1647 {
1648         BufferWithMemory* result = nullptr;
1649         if (m_pool.m_accellerationStructureBuffers.size())
1650         {
1651                 DE_ASSERT(!isnegz(m_info.accStrIndex));
1652                 result = m_pool.m_accellerationStructureBuffers[m_info.accStrIndex].get();
1653         }
1654         return result;
1655 }
1656 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getDeviceScratchBuffer () const
1657 {
1658         DE_ASSERT(m_info.buildScratchBuffIndex == 0);
1659         return m_pool.m_deviceScratchBuffer.get();
1660 }
1661 std::vector<deUint8>* BottomLevelAccelerationStructurePoolMember::getHostScratchBuffer () const
1662 {
1663         return this->m_buildScratchSize ? m_pool.m_hostScratchBuffer.get() : nullptr;
1664 }
1665
1666 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getVertexBuffer () const
1667 {
1668         BufferWithMemory* result = nullptr;
1669         if (m_pool.m_vertexBuffers.size())
1670         {
1671                 DE_ASSERT(!isnegz(m_info.vertBuffIndex));
1672                 result = m_pool.m_vertexBuffers[m_info.vertBuffIndex].get();
1673         }
1674         return result;
1675 }
1676 BufferWithMemory* BottomLevelAccelerationStructurePoolMember::getIndexBuffer () const
1677 {
1678         BufferWithMemory* result = nullptr;
1679         if (m_pool.m_indexBuffers.size())
1680         {
1681                 DE_ASSERT(!isnegz(m_info.indexBuffIndex));
1682                 result = m_pool.m_indexBuffers[m_info.indexBuffIndex].get();
1683         }
1684         return result;
1685 }
1686
1687 struct BottomLevelAccelerationStructurePool::Impl : BottomLevelAccelerationStructurePoolImpl
1688 {
1689         friend class BottomLevelAccelerationStructurePool;
1690         friend class BottomLevelAccelerationStructurePoolMember;
1691
1692         Impl (BottomLevelAccelerationStructurePool& pool)
1693                 : BottomLevelAccelerationStructurePoolImpl(pool) { }
1694 };
1695
1696 BottomLevelAccelerationStructurePool::BottomLevelAccelerationStructurePool ()
1697         : m_batchStructCount    (4)
1698         , m_batchGeomCount              (0)
1699         , m_infos                               ()
1700         , m_structs                             ()
1701         , m_createOnce                  (false)
1702         , m_tryCachedMemory             (true)
1703         , m_structsBuffSize             (0)
1704         , m_updatesScratchSize  (0)
1705         , m_buildsScratchSize   (0)
1706         , m_verticesSize                (0)
1707         , m_indicesSize                 (0)
1708         , m_impl                                (new Impl(*this))
1709 {
1710 }
1711
1712 BottomLevelAccelerationStructurePool::~BottomLevelAccelerationStructurePool()
1713 {
1714         delete m_impl;
1715 }
1716
1717 void BottomLevelAccelerationStructurePool::batchStructCount (const deUint32& value)
1718 {
1719         DE_ASSERT(value >= 1); m_batchStructCount = value;
1720 }
1721
1722 auto BottomLevelAccelerationStructurePool::add (VkDeviceSize            structureSize,
1723                                                                                                 VkDeviceAddress         deviceAddress) -> BottomLevelAccelerationStructurePool::BlasPtr
1724 {
1725         // Prevent a programmer from calling this method after batchCreate(...) method has been called.
1726         if (m_createOnce) DE_ASSERT(0);
1727
1728         auto blas = new BottomLevelAccelerationStructurePoolMember(*m_impl);
1729         m_infos.push_back({structureSize, deviceAddress});
1730         m_structs.emplace_back(blas);
1731         return m_structs.back();
1732 }
1733
1734 void adjustBatchCount (const DeviceInterface&           vkd,
1735                                            const VkDevice                               device,
1736                                            const std::vector<BottomLevelAccelerationStructurePool::BlasPtr>& structs,
1737                                            const std::vector<BottomLevelAccelerationStructurePool::BlasInfo>& infos,
1738                                            const VkDeviceSize                   maxBufferSize,
1739                                            deUint32                                             (&result)[4])
1740 {
1741         tcu::Vector<VkDeviceSize, 4>    sizes(0);
1742         tcu::Vector<VkDeviceSize, 4>    sums(0);
1743         tcu::Vector<deUint32, 4>                tmps(0);
1744         tcu::Vector<deUint32, 4>                batches(0);
1745
1746         VkDeviceSize    updateScratchSize = 0;  static_cast<void>(updateScratchSize);   // not used yet, disabled for future implementation
1747
1748         auto updateIf = [&](deUint32 c)
1749         {
1750                 if (sums[c] + sizes[c] <= maxBufferSize)
1751                 {
1752                         sums[c] += sizes[c];
1753                         tmps[c] += 1;
1754
1755                         batches[c] = std::max(tmps[c], batches[c]);
1756                 }
1757                 else
1758                 {
1759                         sums[c] = 0;
1760                         tmps[c] = 0;
1761                 }
1762         };
1763
1764         const deUint32  maxIter = static_cast<deUint32>(structs.size());
1765         for (deUint32 i = 0; i < maxIter; ++i)
1766         {
1767                 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(structs[i].get());
1768                 std::tie(sizes[0], updateScratchSize, sizes[1], sizes[2], sizes[3]) = str.computeBuildSize(vkd, device, infos[i].structureSize);
1769
1770                 updateIf(0);
1771                 updateIf(1);
1772                 updateIf(2);
1773                 updateIf(3);
1774         }
1775
1776         result[0] = std::max(batches[0], 1u);
1777         result[1] = std::max(batches[1], 1u);
1778         result[2] = std::max(batches[2], 1u);
1779         result[3] = std::max(batches[3], 1u);
1780 }
1781
1782 size_t BottomLevelAccelerationStructurePool::getAllocationCount () const
1783 {
1784         return m_impl->m_accellerationStructureBuffers.size()
1785                         + m_impl->m_vertexBuffers.size()
1786                         + m_impl->m_indexBuffers.size()
1787                         + 1 /* for scratch buffer */;
1788 }
1789
1790 size_t BottomLevelAccelerationStructurePool::getAllocationCount (const DeviceInterface&         vk,
1791                                                                                                                                  const VkDevice                         device,
1792                                                                                                                                  const VkDeviceSize                     maxBufferSize) const
1793 {
1794         DE_ASSERT(m_structs.size() != 0);
1795
1796         std::map<deUint32, VkDeviceSize>        accStrSizes;
1797         std::map<deUint32, VkDeviceSize>        vertBuffSizes;
1798         std::map<deUint32, VkDeviceSize>        indexBuffSizes;
1799         std::map<deUint32, VkDeviceSize>        scratchBuffSizes;
1800
1801         const deUint32  allStructsCount         = structCount();
1802
1803         deUint32                batchStructCount        = m_batchStructCount;
1804         deUint32                batchScratchCount       = m_batchStructCount;
1805         deUint32                batchVertexCount        = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1806         deUint32                batchIndexCount         = batchVertexCount;
1807
1808         if (!isnegz(maxBufferSize))
1809         {
1810                 deUint32        batches[4];
1811                 adjustBatchCount(vk, device, m_structs, m_infos, maxBufferSize, batches);
1812                 batchStructCount        = batches[0];
1813                 batchScratchCount       = batches[1];
1814                 batchVertexCount        = batches[2];
1815                 batchIndexCount         = batches[3];
1816         }
1817
1818         deUint32                iStr                            = 0;
1819         deUint32                iScratch                        = 0;
1820         deUint32                iVertex                         = 0;
1821         deUint32                iIndex                          = 0;
1822
1823         VkDeviceSize    strSize                         = 0;
1824         VkDeviceSize    updateScratchSize       = 0;
1825         VkDeviceSize    buildScratchSize        = 0;
1826         VkDeviceSize    vertexSize                      = 0;
1827         VkDeviceSize    indexSize                       = 0;
1828
1829         for (; iStr < allStructsCount; ++iStr)
1830         {
1831                 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1832                 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[iStr].structureSize);
1833
1834                 {
1835                         const VkDeviceSize      alignedStrSize  = deAlign64(strSize, 256);
1836                         const deUint32          accStrIndex             = (iStr / batchStructCount);
1837                         accStrSizes[accStrIndex]        += alignedStrSize;
1838                 }
1839
1840                 if (buildScratchSize != 0)
1841                 {
1842                         const VkDeviceSize      alignedBuilsScratchSize = deAlign64(buildScratchSize, 256);
1843                         const deUint32          scratchBuffIndex                = (iScratch/ batchScratchCount);
1844                         scratchBuffSizes[scratchBuffIndex]      += alignedBuilsScratchSize;
1845                         iScratch                                                        += 1;
1846                 }
1847
1848                 if (vertexSize != 0)
1849                 {
1850                         const VkDeviceSize      alignedVertBuffSize     = deAlign64(vertexSize, 8);
1851                         const deUint32          vertBuffIndex           = (iVertex / batchVertexCount);
1852                         vertBuffSizes[vertBuffIndex]    += alignedVertBuffSize;
1853                         iVertex                                                 += 1;
1854                 }
1855
1856                 if (indexSize != 0)
1857                 {
1858                         const VkDeviceSize      alignedIndexBuffSize    = deAlign64(indexSize, 8);
1859                         const deUint32          indexBuffIndex                  = (iIndex / batchIndexCount);
1860                         indexBuffSizes[indexBuffIndex]  += alignedIndexBuffSize;
1861                         iIndex                                                  += 1;
1862                 }
1863         }
1864
1865         return accStrSizes.size()
1866                         + vertBuffSizes.size()
1867                         + indexBuffSizes.size()
1868                         + scratchBuffSizes.size();
1869 }
1870
1871 tcu::Vector<VkDeviceSize, 4> BottomLevelAccelerationStructurePool::getAllocationSizes (const DeviceInterface&           vk,
1872                                                                                                                                                                            const VkDevice                               device) const
1873 {
1874         if (m_structsBuffSize)
1875         {
1876                 return tcu::Vector<VkDeviceSize, 4>(m_structsBuffSize, m_buildsScratchSize, m_verticesSize, m_indicesSize);
1877         }
1878
1879         VkDeviceSize strSize                            = 0;
1880         VkDeviceSize updateScratchSize          = 0;    static_cast<void>(updateScratchSize);           // not used yet, disabled for future implementation
1881         VkDeviceSize buildScratchSize           = 0;
1882         VkDeviceSize vertexSize                         = 0;
1883         VkDeviceSize indexSize                          = 0;
1884         VkDeviceSize sumStrSize                         = 0;
1885         VkDeviceSize sumUpdateScratchSize       = 0;    static_cast<void>(sumUpdateScratchSize);        // not used yet, disabled for future implementation
1886         VkDeviceSize sumBuildScratchSize        = 0;
1887         VkDeviceSize sumVertexSize                      = 0;
1888         VkDeviceSize sumIndexSize                       = 0;
1889         for (size_t i = 0; i < structCount(); ++i)
1890         {
1891                 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[i].get());
1892                 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vk, device, m_infos[i].structureSize);
1893                 sumStrSize                              += deAlign64(strSize, 256);
1894                 //sumUpdateScratchSize  += deAlign64(updateScratchSize, 256);   not used yet, disabled for future implementation
1895                 sumBuildScratchSize             += deAlign64(buildScratchSize, 256);
1896                 sumVertexSize                   += deAlign64(vertexSize, 8);
1897                 sumIndexSize                    += deAlign64(indexSize, 8);
1898         }
1899         return tcu::Vector<VkDeviceSize, 4>(sumStrSize, sumBuildScratchSize, sumVertexSize, sumIndexSize);
1900 }
1901
1902 void BottomLevelAccelerationStructurePool::batchCreate (const DeviceInterface&          vkd,
1903                                                                                                                 const VkDevice                          device,
1904                                                                                                                 Allocator&                                      allocator)
1905 {
1906         batchCreateAdjust(vkd, device, allocator, negz<VkDeviceSize>(0));
1907 }
1908
1909 void BottomLevelAccelerationStructurePool::batchCreateAdjust (const DeviceInterface&    vkd,
1910                                                                                                                           const VkDevice                        device,
1911                                                                                                                           Allocator&                            allocator,
1912                                                                                                                           const VkDeviceSize            maxBufferSize)
1913 {
1914         // Prevent a programmer from calling this method more than once.
1915         if (m_createOnce) DE_ASSERT(0);
1916
1917         m_createOnce = true;
1918         DE_ASSERT(m_structs.size() != 0);
1919
1920         auto createAccellerationStructureBuffer = [&](VkDeviceSize bufferSize) -> typename std::add_pointer<BufferWithMemory>::type
1921         {
1922                 BufferWithMemory* res = nullptr;
1923                 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1924
1925                 if (m_tryCachedMemory) try
1926                 {
1927                         res = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1928                 }
1929                 catch (const tcu::NotSupportedError&)
1930                 {
1931                         res = nullptr;
1932                 }
1933
1934                 return (nullptr != res)
1935                                 ? res
1936                                 : (new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1937         };
1938
1939         auto createDeviceScratchBuffer = [&](VkDeviceSize bufferSize) -> de::SharedPtr<BufferWithMemory>
1940         {
1941                 const VkBufferCreateInfo bci = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1942                 BufferWithMemory* p = new BufferWithMemory(vkd, device, allocator, bci, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1943                 return de::SharedPtr<BufferWithMemory>(p);
1944         };
1945
1946         std::map<deUint32, VkDeviceSize>        accStrSizes;
1947         std::map<deUint32, VkDeviceSize>        vertBuffSizes;
1948         std::map<deUint32, VkDeviceSize>        indexBuffSizes;
1949
1950         const deUint32  allStructsCount         = structCount();
1951         deUint32                iterKey                         = 0;
1952
1953         deUint32                batchStructCount        = m_batchStructCount;
1954         deUint32                batchVertexCount        = m_batchGeomCount ? m_batchGeomCount : m_batchStructCount;
1955         deUint32                batchIndexCount         = batchVertexCount;
1956
1957         if (!isnegz(maxBufferSize))
1958         {
1959                 deUint32        batches[4];
1960                 adjustBatchCount(vkd, device, m_structs, m_infos, maxBufferSize, batches);
1961                 batchStructCount        = batches[0];
1962                 // batches[1]: batchScratchCount
1963                 batchVertexCount        = batches[2];
1964                 batchIndexCount         = batches[3];
1965         }
1966
1967         deUint32                iStr                            = 0;
1968         deUint32                iVertex                         = 0;
1969         deUint32                iIndex                          = 0;
1970
1971         VkDeviceSize    strSize                         = 0;
1972         VkDeviceSize    updateScratchSize       = 0;
1973         VkDeviceSize    buildScratchSize        = 0;
1974         VkDeviceSize    maxBuildScratchSize     = 0;
1975         VkDeviceSize    vertexSize                      = 0;
1976         VkDeviceSize    indexSize                       = 0;
1977
1978         VkDeviceSize    strOffset                       = 0;
1979         VkDeviceSize    vertexOffset            = 0;
1980         VkDeviceSize    indexOffset                     = 0;
1981
1982         deUint32                hostStructCount         = 0;
1983         deUint32                deviceStructCount       = 0;
1984
1985         for (; iStr < allStructsCount; ++iStr)
1986         {
1987                 BottomLevelAccelerationStructurePoolMember::Info info{};
1988                 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iStr].get());
1989                 std::tie(strSize, updateScratchSize, buildScratchSize, vertexSize, indexSize) = str.computeBuildSize(vkd, device, m_infos[iStr].structureSize);
1990
1991                 ++(str.getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR ? hostStructCount : deviceStructCount);
1992
1993                 {
1994                         const VkDeviceSize      alignedStrSize  = deAlign64(strSize, 256);
1995                         const deUint32          accStrIndex             = (iStr / batchStructCount);
1996                         if (iStr != 0 && (iStr % batchStructCount) == 0)
1997                         {
1998                                 strOffset                               = 0;
1999                         }
2000
2001                         info.accStrIndex                        = accStrIndex;
2002                         info.accStrOffset                       = strOffset;
2003                         accStrSizes[accStrIndex]        += alignedStrSize;
2004                         strOffset                                       += alignedStrSize;
2005                         m_structsBuffSize                       += alignedStrSize;
2006                 }
2007
2008                 if (buildScratchSize != 0)
2009                 {
2010                         maxBuildScratchSize = std::max(maxBuildScratchSize, make_unsigned(deAlign64(buildScratchSize, 256u)));
2011
2012                         info.buildScratchBuffIndex              = 0;
2013                         info.buildScratchBuffOffset             = 0;
2014                 }
2015
2016                 if (vertexSize != 0)
2017                 {
2018                         const VkDeviceSize      alignedVertBuffSize     = deAlign64(vertexSize, 8);
2019                         const deUint32          vertBuffIndex           = (iVertex / batchVertexCount);
2020                         if (iVertex != 0 && (iVertex % batchVertexCount) == 0)
2021                         {
2022                                 vertexOffset                            = 0;
2023                         }
2024
2025                         info.vertBuffIndex                              = vertBuffIndex;
2026                         info.vertBuffOffset                             = vertexOffset;
2027                         vertBuffSizes[vertBuffIndex]    += alignedVertBuffSize;
2028                         vertexOffset                                    += alignedVertBuffSize;
2029                         m_verticesSize                                  += alignedVertBuffSize;
2030                         iVertex                                                 += 1;
2031                 }
2032
2033                 if (indexSize != 0)
2034                 {
2035                         const VkDeviceSize      alignedIndexBuffSize    = deAlign64(indexSize, 8);
2036                         const deUint32          indexBuffIndex                  = (iIndex / batchIndexCount);
2037                         if (iIndex != 0 && (iIndex % batchIndexCount) == 0)
2038                         {
2039                                 indexOffset                                     = 0;
2040                         }
2041
2042                         info.indexBuffIndex                             = indexBuffIndex;
2043                         info.indexBuffOffset                    = indexOffset;
2044                         indexBuffSizes[indexBuffIndex]  += alignedIndexBuffSize;
2045                         indexOffset                                             += alignedIndexBuffSize;
2046                         m_indicesSize                                   += alignedIndexBuffSize;
2047                         iIndex                                                  += 1;
2048                 }
2049
2050                 str.preCreateSetSizesAndOffsets(info, strSize, updateScratchSize, buildScratchSize);
2051         }
2052
2053         for (iterKey = 0; iterKey < static_cast<deUint32>(accStrSizes.size()); ++iterKey)
2054         {
2055                 m_impl->m_accellerationStructureBuffers.emplace_back(createAccellerationStructureBuffer(accStrSizes.at(iterKey)));
2056         }
2057         for (iterKey = 0; iterKey < static_cast<deUint32>(vertBuffSizes.size()); ++iterKey)
2058         {
2059                 m_impl->m_vertexBuffers.emplace_back(createVertexBuffer(vkd, device, allocator, vertBuffSizes.at(iterKey)));
2060         }
2061         for (iterKey = 0; iterKey < static_cast<deUint32>(indexBuffSizes.size()); ++iterKey)
2062         {
2063                 m_impl->m_indexBuffers.emplace_back(createIndexBuffer(vkd, device, allocator, indexBuffSizes.at(iterKey)));
2064         }
2065
2066         if (maxBuildScratchSize)
2067         {
2068                 if (hostStructCount)    m_impl->m_hostScratchBuffer->resize(static_cast<size_t>(maxBuildScratchSize));
2069                 if (deviceStructCount)  m_impl->m_deviceScratchBuffer = createDeviceScratchBuffer(maxBuildScratchSize);
2070
2071                 m_buildsScratchSize = maxBuildScratchSize;
2072         }
2073
2074         for (iterKey = 0; iterKey < allStructsCount; ++iterKey)
2075         {
2076                 auto& str = *dynamic_cast<BottomLevelAccelerationStructurePoolMember*>(m_structs[iterKey].get());
2077                 str.createAccellerationStructure(vkd, device, m_infos[iterKey].deviceAddress);
2078         }
2079 }
2080
2081 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&   vk,
2082                                                                                                            const VkDevice                       device,
2083                                                                                                            VkCommandBuffer                      cmdBuffer)
2084 {
2085         for (const auto& str : m_structs)
2086         {
2087                 str->build(vk, device, cmdBuffer);
2088         }
2089 }
2090
2091 void BottomLevelAccelerationStructurePool::batchBuild (const DeviceInterface&   vk,
2092                                                                                                            const VkDevice                       device,
2093                                                                                                            VkCommandPool                        cmdPool,
2094                                                                                                            VkQueue                                      queue)
2095 {
2096         const deUint32                  limit   = 10000u;
2097         const deUint32                  count   = structCount();
2098         std::vector<BlasPtr>    buildingOnDevice;
2099
2100         auto buildOnDevice = [&]() -> void
2101         {
2102                 Move<VkCommandBuffer>   cmd = allocateCommandBuffer(vk, device, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2103
2104                 beginCommandBuffer(vk, *cmd, 0u);
2105                         for (const auto& str : buildingOnDevice)
2106                                 str->build(vk, device, *cmd);
2107                 endCommandBuffer(vk, *cmd);
2108
2109                 submitCommandsAndWait(vk, device, queue, *cmd);
2110                 vk.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2111         };
2112
2113         buildingOnDevice.reserve(limit);
2114         for (deUint32 i = 0; i < count; ++i)
2115         {
2116                 auto str = m_structs[i];
2117
2118                 if (str->getBuildType() == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
2119                         str->build(vk, device, DE_NULL);
2120                 else
2121                         buildingOnDevice.emplace_back(str);
2122
2123                 if ( buildingOnDevice.size() == limit || (count - 1) == i)
2124                 {
2125                         buildOnDevice();
2126                         buildingOnDevice.clear();
2127                 }
2128         }
2129 }
2130
2131 auto BottomLevelAccelerationStructurePoolMember::computeBuildSize (const DeviceInterface&       vk,
2132                                                                                                                                    const VkDevice                       device,
2133                                                                                                                                    const VkDeviceSize           strSize) const
2134                                                                                                                                    //              accStrSize,updateScratch,buildScratch, vertexSize, indexSize
2135                                                                                                                                    -> std::tuple<VkDeviceSize, VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize>
2136 {
2137         DE_ASSERT(!m_geometriesData.empty() !=  !(strSize == 0)); // logical xor
2138
2139         std::tuple<VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize,VkDeviceSize> result(deAlign64(strSize, 256), 0, 0, 0, 0);
2140
2141         if (!m_geometriesData.empty())
2142         {
2143                 std::vector<VkAccelerationStructureGeometryKHR>                 accelerationStructureGeometriesKHR;
2144                 std::vector<VkAccelerationStructureGeometryKHR*>                accelerationStructureGeometriesKHRPointers;
2145                 std::vector<VkAccelerationStructureBuildRangeInfoKHR>   accelerationStructureBuildRangeInfoKHR;
2146                 std::vector<VkAccelerationStructureTrianglesOpacityMicromapEXT> accelerationStructureGeometryMicromapsEXT;
2147                 std::vector<deUint32>                                                                   maxPrimitiveCounts;
2148                 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, accelerationStructureGeometryMicromapsEXT, maxPrimitiveCounts);
2149
2150                 const VkAccelerationStructureGeometryKHR*                               accelerationStructureGeometriesKHRPointer       = accelerationStructureGeometriesKHR.data();
2151                 const VkAccelerationStructureGeometryKHR* const*                accelerationStructureGeometry                           = accelerationStructureGeometriesKHRPointers.data();
2152
2153                 VkAccelerationStructureBuildGeometryInfoKHR     accelerationStructureBuildGeometryInfoKHR       =
2154                 {
2155                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                       //  VkStructureType                                                                             sType;
2156                         DE_NULL,                                                                                                                                        //  const void*                                                                                 pNext;
2157                         VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,                                                        //  VkAccelerationStructureTypeKHR                                              type;
2158                         m_buildFlags,                                                                                                                           //  VkBuildAccelerationStructureFlagsKHR                                flags;
2159                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                         //  VkBuildAccelerationStructureModeKHR                                 mode;
2160                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
2161                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
2162                         static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),                       //  deUint32                                                                                    geometryCount;
2163                         m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,     //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
2164                         m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,                         //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
2165                         makeDeviceOrHostAddressKHR(DE_NULL)                                                                                     //  VkDeviceOrHostAddressKHR                                                    scratchData;
2166                 };
2167
2168                 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
2169                 {
2170                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
2171                         DE_NULL,                                                                                                                //  const void*         pNext;
2172                         0,                                                                                                                              //  VkDeviceSize        accelerationStructureSize;
2173                         0,                                                                                                                              //  VkDeviceSize        updateScratchSize;
2174                         0                                                                                                                               //  VkDeviceSize        buildScratchSize;
2175                 };
2176
2177                 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2178
2179                 std::get<0>(result) = sizeInfo.accelerationStructureSize;
2180                 std::get<1>(result) = sizeInfo.updateScratchSize;
2181                 std::get<2>(result) = sizeInfo.buildScratchSize;
2182                 std::get<3>(result) = getVertexBufferSize(m_geometriesData);
2183                 std::get<4>(result) = getIndexBufferSize(m_geometriesData);
2184         }
2185
2186         return result;
2187 }
2188
2189 void BottomLevelAccelerationStructurePoolMember::preCreateSetSizesAndOffsets (const Info&                       info,
2190                                                                                                                                                           const VkDeviceSize    accStrSize,
2191                                                                                                                                                           const VkDeviceSize    updateScratchSize,
2192                                                                                                                                                           const VkDeviceSize    buildScratchSize)
2193 {
2194         m_info                          = info;
2195         m_structureSize         = accStrSize;
2196         m_updateScratchSize     = updateScratchSize;
2197         m_buildScratchSize      = buildScratchSize;
2198 }
2199
2200 void BottomLevelAccelerationStructurePoolMember::createAccellerationStructure (const DeviceInterface&   vk,
2201                                                                                                                                                            const VkDevice                       device,
2202                                                                                                                                                            VkDeviceAddress                      deviceAddress)
2203 {
2204         const VkAccelerationStructureTypeKHR            structureType                                           = (m_createGeneric
2205                                                                                                                                                                            ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2206                                                                                                                                                                            : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
2207         const VkAccelerationStructureCreateInfoKHR      accelerationStructureCreateInfoKHR
2208         {
2209                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,                                               //  VkStructureType                                                                                     sType;
2210                 DE_NULL,                                                                                                                                                //  const void*                                                                                         pNext;
2211                 m_createFlags,                                                                                                                                  //  VkAccelerationStructureCreateFlagsKHR                                       createFlags;
2212                 getAccelerationStructureBuffer()->get(),                                                                                //  VkBuffer                                                                                            buffer;
2213                 getAccelerationStructureBufferOffset(),                                                                                 //  VkDeviceSize                                                                                        offset;
2214                 m_structureSize,                                                                                                                                //  VkDeviceSize                                                                                        size;
2215                 structureType,                                                                                                                                  //  VkAccelerationStructureTypeKHR                                                      type;
2216                 deviceAddress                                                                                                                                   //  VkDeviceAddress                                                                                     deviceAddress;
2217         };
2218
2219         m_accelerationStructureKHR      = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2220 }
2221
2222 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
2223 {
2224 }
2225
2226 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
2227         : m_structureSize               (0u)
2228         , m_updateScratchSize   (0u)
2229         , m_buildScratchSize    (0u)
2230 {
2231 }
2232
2233 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
2234 {
2235         m_bottomLevelInstances.reserve(instanceCount);
2236         m_instanceData.reserve(instanceCount);
2237 }
2238
2239 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure>        bottomLevelStructure,
2240                                                                                                  const VkTransformMatrixKHR&                                            matrix,
2241                                                                                                  deUint32                                                                                       instanceCustomIndex,
2242                                                                                                  deUint32                                                                                       mask,
2243                                                                                                  deUint32                                                                                       instanceShaderBindingTableRecordOffset,
2244                                                                                                  VkGeometryInstanceFlagsKHR                                                     flags)
2245 {
2246         m_bottomLevelInstances.push_back(bottomLevelStructure);
2247         m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
2248 }
2249
2250 VkAccelerationStructureBuildSizesInfoKHR TopLevelAccelerationStructure::getStructureBuildSizes () const
2251 {
2252         return
2253         {
2254                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
2255                 DE_NULL,                                                                                                                //  const void*         pNext;
2256                 m_structureSize,                                                                                                //  VkDeviceSize        accelerationStructureSize;
2257                 m_updateScratchSize,                                                                                    //  VkDeviceSize        updateScratchSize;
2258                 m_buildScratchSize                                                                                              //  VkDeviceSize        buildScratchSize;
2259         };
2260 }
2261
2262 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface&      vk,
2263                                                                                                         const VkDevice                  device,
2264                                                                                                         const VkCommandBuffer   cmdBuffer,
2265                                                                                                         Allocator&                              allocator,
2266                                                                                                         VkDeviceAddress                 deviceAddress)
2267 {
2268         create(vk, device, allocator, 0u, deviceAddress);
2269         build(vk, device, cmdBuffer);
2270 }
2271
2272 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&                           vk,
2273                                                                                                            const VkDevice                                               device,
2274                                                                                                            const VkCommandBuffer                                cmdBuffer,
2275                                                                                                            Allocator&                                                   allocator,
2276                                                                                                            TopLevelAccelerationStructure*               accelerationStructure,
2277                                                                                                            VkDeviceSize                                                 compactCopySize,
2278                                                                                                            VkDeviceAddress                                              deviceAddress)
2279 {
2280         DE_ASSERT(accelerationStructure != NULL);
2281         VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureBuildSizes().accelerationStructureSize;
2282         DE_ASSERT(copiedSize != 0u);
2283
2284         create(vk, device, allocator, copiedSize, deviceAddress);
2285         copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
2286 }
2287
2288 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface&                                    vk,
2289                                                                                                                           const VkDevice                                                        device,
2290                                                                                                                           const VkCommandBuffer                                         cmdBuffer,
2291                                                                                                                           Allocator&                                                            allocator,
2292                                                                                                                           SerialStorage*                                                        storage,
2293                                                                                                                           VkDeviceAddress                                                       deviceAddress)
2294 {
2295         DE_ASSERT(storage != NULL);
2296         DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
2297         create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
2298         if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
2299         deserialize(vk, device, cmdBuffer, storage);
2300 }
2301
2302 BufferWithMemory* createInstanceBuffer (const DeviceInterface&                                                                                  vk,
2303                                                                                 const VkDevice                                                                                                  device,
2304                                                                                 Allocator&                                                                                                              allocator,
2305                                                                                 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >   bottomLevelInstances,
2306                                                                                 std::vector<InstanceData>                                                                               instanceData,
2307                                                                                 const bool                                                                                                              tryCachedMemory)
2308 {
2309         DE_ASSERT(bottomLevelInstances.size() != 0);
2310         DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2311         DE_UNREF(instanceData);
2312
2313         BufferWithMemory*                       result                          = nullptr;
2314         const VkDeviceSize                      bufferSizeBytes         = bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2315         const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2316         if (tryCachedMemory) try
2317         {
2318                 result = new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2319         }
2320         catch (const tcu::NotSupportedError&)
2321         {
2322                 result = nullptr;
2323         }
2324         return result
2325                         ? result
2326                         : new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
2327 }
2328
2329 void updateSingleInstance (const DeviceInterface&                                       vk,
2330                                                    const VkDevice                                                       device,
2331                                                    const BottomLevelAccelerationStructure&      bottomLevelAccelerationStructure,
2332                                                    const InstanceData&                                          instanceData,
2333                                                    deUint8*                                                                     bufferLocation,
2334                                                    VkAccelerationStructureBuildTypeKHR          buildType,
2335                                                    bool                                                                         inactiveInstances)
2336 {
2337         const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
2338
2339         // This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
2340         VkDeviceAddress accelerationStructureAddress;
2341         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2342         {
2343                 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2344                 {
2345                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,       // VkStructureType                              sType;
2346                         DE_NULL,                                                                                                                        // const void*                                  pNext;
2347                         accelerationStructureKHR                                                                                        // VkAccelerationStructureKHR   accelerationStructure;
2348                 };
2349                 accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2350         }
2351
2352         deUint64 structureReference;
2353         if (inactiveInstances)
2354         {
2355                 // Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
2356                 structureReference = 0ull;
2357         }
2358         else
2359         {
2360                 structureReference      = (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2361                                                         ? deUint64(accelerationStructureAddress)
2362                                                         : deUint64(accelerationStructureKHR.getInternal());
2363         }
2364
2365         VkAccelerationStructureInstanceKHR      accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
2366         (
2367                 instanceData.matrix,                                                                    //  VkTransformMatrixKHR                transform;
2368                 instanceData.instanceCustomIndex,                                               //  deUint32                                    instanceCustomIndex:24;
2369                 instanceData.mask,                                                                              //  deUint32                                    mask:8;
2370                 instanceData.instanceShaderBindingTableRecordOffset,    //  deUint32                                    instanceShaderBindingTableRecordOffset:24;
2371                 instanceData.flags,                                                                             //  VkGeometryInstanceFlagsKHR  flags:8;
2372                 structureReference                                                                              //  deUint64                                    accelerationStructureReference;
2373         );
2374
2375         deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
2376 }
2377
2378 void updateInstanceBuffer (const DeviceInterface&                                                                                               vk,
2379                                                    const VkDevice                                                                                                               device,
2380                                                    const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>&  bottomLevelInstances,
2381                                                    const std::vector<InstanceData>&                                                                             instanceData,
2382                                                    const BufferWithMemory*                                                                                              instanceBuffer,
2383                                                    VkAccelerationStructureBuildTypeKHR                                                                  buildType,
2384                                                    bool                                                                                                                                 inactiveInstances)
2385 {
2386         DE_ASSERT(bottomLevelInstances.size() != 0);
2387         DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
2388
2389         auto&                   instancesAlloc          = instanceBuffer->getAllocation();
2390         auto                    bufferStart                     = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2391         VkDeviceSize    bufferOffset            = 0ull;
2392
2393         for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
2394         {
2395                 const auto& blas = *bottomLevelInstances[instanceNdx];
2396                 updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
2397                 bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
2398         }
2399
2400         flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2401 }
2402
2403 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
2404 {
2405 public:
2406         static deUint32                                                                                 getRequiredAllocationCount                                                      (void);
2407
2408                                                                                                                         TopLevelAccelerationStructureKHR                                        ();
2409                                                                                                                         TopLevelAccelerationStructureKHR                                        (const TopLevelAccelerationStructureKHR&                other) = delete;
2410         virtual                                                                                                 ~TopLevelAccelerationStructureKHR                                       ();
2411
2412         void                                                                                                    setBuildType                                                                            (const VkAccelerationStructureBuildTypeKHR              buildType) override;
2413         void                                                                                                    setCreateFlags                                                                          (const VkAccelerationStructureCreateFlagsKHR    createFlags) override;
2414         void                                                                                                    setCreateGeneric                                                                        (bool                                                                                   createGeneric) override;
2415         void                                                                                                    setBuildFlags                                                                           (const VkBuildAccelerationStructureFlagsKHR             buildFlags) override;
2416         void                                                                                                    setBuildWithoutPrimitives                                                       (bool                                                                                   buildWithoutPrimitives) override;
2417         void                                                                                                    setInactiveInstances                                                            (bool                                                                                   inactiveInstances) override;
2418         void                                                                                                    setDeferredOperation                                                            (const bool                                                                             deferredOperation,
2419                                                                                                                                                                                                                                  const deUint32                                                                 workerThreadCount) override;
2420         void                                                                                                    setUseArrayOfPointers                                                           (const bool                                                                             useArrayOfPointers) override;
2421         void                                                                                                    setIndirectBuildParameters                                                      (const VkBuffer                                                                 indirectBuffer,
2422                                                                                                                                                                                                                                  const VkDeviceSize                                                             indirectBufferOffset,
2423                                                                                                                                                                                                                                  const deUint32                                                                 indirectBufferStride) override;
2424         void                                                                                                    setUsePPGeometries                                                                      (const bool                                                                             usePPGeometries) override;
2425         void                                                                                                    setTryCachedMemory                                                                      (const bool                                                                             tryCachedMemory) override;
2426         VkBuildAccelerationStructureFlagsKHR                                    getBuildFlags                                                                           () const override;
2427
2428         void                                                                                                    getCreationSizes                                                                        (const DeviceInterface&                                                 vk,
2429                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2430                                                                                                                                                                                                                                  const VkDeviceSize                                                             structureSize,
2431                                                                                                                                                                                                                                  CreationSizes&                                                                 sizes) override;
2432         void                                                                                                    create                                                                                          (const DeviceInterface&                                                 vk,
2433                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2434                                                                                                                                                                                                                                  Allocator&                                                                             allocator,
2435                                                                                                                                                                                                                                  VkDeviceSize                                                                   structureSize,
2436                                                                                                                                                                                                                                  VkDeviceAddress                                                                deviceAddress                   = 0u,
2437                                                                                                                                                                                                                                  const void*                                                                    pNext                                   = DE_NULL,
2438                                                                                                                                                                                                                                  const MemoryRequirement&                                               addMemoryRequirement    = MemoryRequirement::Any) override;
2439         void                                                                                                    build                                                                                           (const DeviceInterface&                                                 vk,
2440                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2441                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer) override;
2442         void                                                                                                    copyFrom                                                                                        (const DeviceInterface&                                                 vk,
2443                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2444                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
2445                                                                                                                                                                                                                                  TopLevelAccelerationStructure*                                 accelerationStructure,
2446                                                                                                                                                                                                                                  bool                                                                                   compactCopy) override;
2447         void                                                                                                    serialize                                                                                       (const DeviceInterface&                                                 vk,
2448                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2449                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
2450                                                                                                                                                                                                                                  SerialStorage*                                                                 storage) override;
2451         void                                                                                                    deserialize                                                                                     (const DeviceInterface&                                                 vk,
2452                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2453                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
2454                                                                                                                                                                                                                                  SerialStorage*                                                                 storage) override;
2455
2456         std::vector<VkDeviceSize>                                                               getSerializingSizes                                                                     (const DeviceInterface&                                                 vk,
2457                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2458                                                                                                                                                                                                                                  const VkQueue                                                                  queue,
2459                                                                                                                                                                                                                                  const deUint32                                                                 queueFamilyIndex) override;
2460
2461         std::vector<deUint64>                                                                   getSerializingAddresses                                                         (const DeviceInterface&                                                 vk,
2462                                                                                                                                                                                                                                  const VkDevice                                                                 device) const override;
2463
2464
2465         const VkAccelerationStructureKHR*                                               getPtr                                                                                          (void) const override;
2466
2467         void                                                                                                    updateInstanceMatrix                                                            (const DeviceInterface&                                                 vk,
2468                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2469                                                                                                                                                                                                                                  size_t                                                                                 instanceIndex,
2470                                                                                                                                                                                                                                  const VkTransformMatrixKHR&                                    matrix) override;
2471
2472 protected:
2473         VkAccelerationStructureBuildTypeKHR                                             m_buildType;
2474         VkAccelerationStructureCreateFlagsKHR                                   m_createFlags;
2475         bool                                                                                                    m_createGeneric;
2476         VkBuildAccelerationStructureFlagsKHR                                    m_buildFlags;
2477         bool                                                                                                    m_buildWithoutPrimitives;
2478         bool                                                                                                    m_inactiveInstances;
2479         bool                                                                                                    m_deferredOperation;
2480         deUint32                                                                                                m_workerThreadCount;
2481         bool                                                                                                    m_useArrayOfPointers;
2482         de::MovePtr<BufferWithMemory>                                                   m_accelerationStructureBuffer;
2483         de::MovePtr<BufferWithMemory>                                                   m_instanceBuffer;
2484         de::MovePtr<BufferWithMemory>                                                   m_instanceAddressBuffer;
2485         de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
2486         std::vector<deUint8>                                                                    m_hostScratchBuffer;
2487         Move<VkAccelerationStructureKHR>                                                m_accelerationStructureKHR;
2488         VkBuffer                                                                                                m_indirectBuffer;
2489         VkDeviceSize                                                                                    m_indirectBufferOffset;
2490         deUint32                                                                                                m_indirectBufferStride;
2491         bool                                                                                                    m_usePPGeometries;
2492         bool                                                                                                    m_tryCachedMemory;
2493
2494
2495         void                                                                                                    prepareInstances                                                                        (const DeviceInterface&                                                 vk,
2496                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2497                                                                                                                                                                                                                                  VkAccelerationStructureGeometryKHR&                    accelerationStructureGeometryKHR,
2498                                                                                                                                                                                                                                  std::vector<deUint32>&                                                 maxPrimitiveCounts);
2499
2500         void                                                                                                    serializeBottoms                                                                        (const DeviceInterface&                                                 vk,
2501                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2502                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
2503                                                                                                                                                                                                                                  SerialStorage*                                                                 storage,
2504                                                                                                                                                                                                                                  VkDeferredOperationKHR                                                 deferredOperation);
2505
2506         void                                                                                                    createAndDeserializeBottoms                                                     (const DeviceInterface&                                                 vk,
2507                                                                                                                                                                                                                                  const VkDevice                                                                 device,
2508                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
2509                                                                                                                                                                                                                                  Allocator&                                                                             allocator,
2510                                                                                                                                                                                                                                  SerialStorage*                                                                 storage) override;
2511 };
2512
2513 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
2514 {
2515         /*
2516                 de::MovePtr<BufferWithMemory>                                                   m_instanceBuffer;
2517                 de::MovePtr<Allocation>                                                                 m_accelerationStructureAlloc;
2518                 de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
2519         */
2520         return 3u;
2521 }
2522
2523 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
2524         : TopLevelAccelerationStructure ()
2525         , m_buildType                                   (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2526         , m_createFlags                                 (0u)
2527         , m_createGeneric                               (false)
2528         , m_buildFlags                                  (0u)
2529         , m_buildWithoutPrimitives              (false)
2530         , m_inactiveInstances                   (false)
2531         , m_deferredOperation                   (false)
2532         , m_workerThreadCount                   (0)
2533         , m_useArrayOfPointers                  (false)
2534         , m_accelerationStructureBuffer (DE_NULL)
2535         , m_instanceBuffer                              (DE_NULL)
2536         , m_instanceAddressBuffer               (DE_NULL)
2537         , m_deviceScratchBuffer                 (DE_NULL)
2538         , m_accelerationStructureKHR    ()
2539         , m_indirectBuffer                              (DE_NULL)
2540         , m_indirectBufferOffset                (0)
2541         , m_indirectBufferStride                (0)
2542         , m_usePPGeometries                             (false)
2543         , m_tryCachedMemory                             (true)
2544 {
2545 }
2546
2547 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
2548 {
2549 }
2550
2551 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR  buildType)
2552 {
2553         m_buildType = buildType;
2554 }
2555
2556 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR      createFlags)
2557 {
2558         m_createFlags = createFlags;
2559 }
2560
2561 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
2562 {
2563         m_createGeneric = createGeneric;
2564 }
2565
2566 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
2567 {
2568         m_inactiveInstances = inactiveInstances;
2569 }
2570
2571 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR        buildFlags)
2572 {
2573         m_buildFlags = buildFlags;
2574 }
2575
2576 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
2577 {
2578         m_buildWithoutPrimitives = buildWithoutPrimitives;
2579 }
2580
2581 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool         deferredOperation,
2582                                                                                                                          const deUint32 workerThreadCount)
2583 {
2584         m_deferredOperation = deferredOperation;
2585         m_workerThreadCount = workerThreadCount;
2586 }
2587
2588 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool        useArrayOfPointers)
2589 {
2590         m_useArrayOfPointers = useArrayOfPointers;
2591 }
2592
2593 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
2594 {
2595         m_usePPGeometries = usePPGeometries;
2596 }
2597
2598 void TopLevelAccelerationStructureKHR::setTryCachedMemory (const bool tryCachedMemory)
2599 {
2600         m_tryCachedMemory = tryCachedMemory;
2601 }
2602
2603 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer               indirectBuffer,
2604                                                                                                                                    const VkDeviceSize   indirectBufferOffset,
2605                                                                                                                                    const deUint32               indirectBufferStride)
2606 {
2607         m_indirectBuffer                = indirectBuffer;
2608         m_indirectBufferOffset  = indirectBufferOffset;
2609         m_indirectBufferStride  = indirectBufferStride;
2610 }
2611
2612 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
2613 {
2614         return m_buildFlags;
2615 }
2616
2617 VkDeviceSize TopLevelAccelerationStructure::CreationSizes::sum () const
2618 {
2619         return structure + updateScratch + buildScratch + instancePointers + instancesBuffer;
2620 }
2621
2622 void TopLevelAccelerationStructureKHR::getCreationSizes (const DeviceInterface& vk,
2623                                                                                                                  const VkDevice                 device,
2624                                                                                                                  const VkDeviceSize             structureSize,
2625                                                                                                                  CreationSizes&                 sizes)
2626 {
2627         // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2628         // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2629         DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2630
2631         if (structureSize == 0)
2632         {
2633                 VkAccelerationStructureGeometryKHR              accelerationStructureGeometryKHR;
2634                 const auto                                                              accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2635                 std::vector<deUint32>                                   maxPrimitiveCounts;
2636                 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2637
2638                 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR           =
2639                 {
2640                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                                               //  VkStructureType                                                                             sType;
2641                         DE_NULL,                                                                                                                                                                //  const void*                                                                                 pNext;
2642                         VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,                                                                                   //  VkAccelerationStructureTypeKHR                                              type;
2643                         m_buildFlags,                                                                                                                                                   //  VkBuildAccelerationStructureFlagsKHR                                flags;
2644                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                                                 //  VkBuildAccelerationStructureModeKHR                                 mode;
2645                         DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
2646                         DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
2647                         1u,                                                                                                                                                                             //  deUint32                                                                                    geometryCount;
2648                         (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),                                              //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
2649                         (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),                                   //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
2650                         makeDeviceOrHostAddressKHR(DE_NULL)                                                                                                             //  VkDeviceOrHostAddressKHR                                                    scratchData;
2651                 };
2652
2653                 VkAccelerationStructureBuildSizesInfoKHR        sizeInfo =
2654                 {
2655                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
2656                         DE_NULL,                                                                                                                //  const void*         pNext;
2657                         0,                                                                                                                              //  VkDeviceSize        accelerationStructureSize;
2658                         0,                                                                                                                              //  VkDeviceSize        updateScratchSize;
2659                         0                                                                                                                               //  VkDeviceSize        buildScratchSize;
2660                 };
2661
2662                 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2663
2664                 sizes.structure         = sizeInfo.accelerationStructureSize;
2665                 sizes.updateScratch     = sizeInfo.updateScratchSize;
2666                 sizes.buildScratch      = sizeInfo.buildScratchSize;
2667         }
2668         else
2669         {
2670                 sizes.structure         = structureSize;
2671                 sizes.updateScratch     = 0u;
2672                 sizes.buildScratch      = 0u;
2673         }
2674
2675         sizes.instancePointers  = 0u;
2676         if (m_useArrayOfPointers)
2677         {
2678                 const size_t    pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2679                 sizes.instancePointers          = static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize);
2680         }
2681
2682         sizes.instancesBuffer = m_bottomLevelInstances.empty() ? 0u : m_bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
2683 }
2684
2685 void TopLevelAccelerationStructureKHR::create (const DeviceInterface&                           vk,
2686                                                                                            const VkDevice                                               device,
2687                                                                                            Allocator&                                                   allocator,
2688                                                                                            VkDeviceSize                                                 structureSize,
2689                                                                                            VkDeviceAddress                                              deviceAddress,
2690                                                                                            const void*                                                  pNext,
2691                                                                                            const MemoryRequirement&                             addMemoryRequirement)
2692 {
2693         // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
2694         // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
2695         DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
2696
2697         if (structureSize == 0)
2698         {
2699                 VkAccelerationStructureGeometryKHR              accelerationStructureGeometryKHR;
2700                 const auto                                                              accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2701                 std::vector<deUint32>                                   maxPrimitiveCounts;
2702                 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2703
2704                 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR           =
2705                 {
2706                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                                               //  VkStructureType                                                                             sType;
2707                         DE_NULL,                                                                                                                                                                //  const void*                                                                                 pNext;
2708                         VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,                                                                                   //  VkAccelerationStructureTypeKHR                                              type;
2709                         m_buildFlags,                                                                                                                                                   //  VkBuildAccelerationStructureFlagsKHR                                flags;
2710                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                                                 //  VkBuildAccelerationStructureModeKHR                                 mode;
2711                         DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
2712                         DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
2713                         1u,                                                                                                                                                                             //  deUint32                                                                                    geometryCount;
2714                         (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),                                              //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
2715                         (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),                                   //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
2716                         makeDeviceOrHostAddressKHR(DE_NULL)                                                                                                             //  VkDeviceOrHostAddressKHR                                                    scratchData;
2717                 };
2718
2719                 VkAccelerationStructureBuildSizesInfoKHR        sizeInfo =
2720                 {
2721                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
2722                         DE_NULL,                                                                                                                //  const void*         pNext;
2723                         0,                                                                                                                              //  VkDeviceSize        accelerationStructureSize;
2724                         0,                                                                                                                              //  VkDeviceSize        updateScratchSize;
2725                         0                                                                                                                               //  VkDeviceSize        buildScratchSize;
2726                 };
2727
2728                 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
2729
2730                 m_structureSize         = sizeInfo.accelerationStructureSize;
2731                 m_updateScratchSize     = sizeInfo.updateScratchSize;
2732                 m_buildScratchSize      = sizeInfo.buildScratchSize;
2733         }
2734         else
2735         {
2736                 m_structureSize         = structureSize;
2737                 m_updateScratchSize     = 0u;
2738                 m_buildScratchSize      = 0u;
2739         }
2740
2741         {
2742                 const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2743                 const MemoryRequirement         memoryRequirement       = addMemoryRequirement | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
2744
2745                 try
2746                 {
2747                         m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | memoryRequirement));
2748                 }
2749                 catch (const tcu::NotSupportedError&)
2750                 {
2751                         // retry without Cached flag
2752                         m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, memoryRequirement));
2753                 }
2754         }
2755
2756         {
2757                 const VkAccelerationStructureTypeKHR            structureType                                           = (m_createGeneric
2758                                                                                                                                                                                    ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
2759                                                                                                                                                                                    : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
2760                 const VkAccelerationStructureCreateInfoKHR      accelerationStructureCreateInfoKHR      =
2761                 {
2762                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,       //  VkStructureType                                                                                     sType;
2763                         pNext,                                                                                                          //  const void*                                                                                         pNext;
2764                         m_createFlags,                                                                                          //  VkAccelerationStructureCreateFlagsKHR                                       createFlags;
2765                         m_accelerationStructureBuffer->get(),                                           //  VkBuffer                                                                                            buffer;
2766                         0u,                                                                                                                     //  VkDeviceSize                                                                                        offset;
2767                         m_structureSize,                                                                                        //  VkDeviceSize                                                                                        size;
2768                         structureType,                                                                                          //  VkAccelerationStructureTypeKHR                                                      type;
2769                         deviceAddress                                                                                           //  VkDeviceAddress                                                                                     deviceAddress;
2770                 };
2771
2772                 m_accelerationStructureKHR      = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
2773         }
2774
2775         if (m_buildScratchSize > 0u)
2776         {
2777                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2778                 {
2779                         const VkBufferCreateInfo                bufferCreateInfo        = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2780                         m_deviceScratchBuffer                                                           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2781                 }
2782                 else
2783                 {
2784                         m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
2785                 }
2786         }
2787
2788         if (m_useArrayOfPointers)
2789         {
2790                 const size_t                            pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2791                 const VkBufferCreateInfo        bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
2792                 m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
2793         }
2794
2795         if(!m_bottomLevelInstances.empty())
2796                 m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData, m_tryCachedMemory));
2797 }
2798
2799 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
2800 {
2801         DE_ASSERT(m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
2802         DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
2803         DE_ASSERT(instanceIndex < m_instanceData.size());
2804
2805         const auto&             blas                    = *m_bottomLevelInstances[instanceIndex];
2806         auto&                   instanceData    = m_instanceData[instanceIndex];
2807         auto&                   instancesAlloc  = m_instanceBuffer->getAllocation();
2808         auto                    bufferStart             = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
2809         VkDeviceSize    bufferOffset    = sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
2810
2811         instanceData.matrix = matrix;
2812         updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2813         flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2814 }
2815
2816 void TopLevelAccelerationStructureKHR::build (const DeviceInterface&    vk,
2817                                                                                           const VkDevice                        device,
2818                                                                                           const VkCommandBuffer         cmdBuffer)
2819 {
2820         DE_ASSERT(!m_bottomLevelInstances.empty());
2821         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2822         DE_ASSERT(m_buildScratchSize != 0);
2823
2824         updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2825
2826         VkAccelerationStructureGeometryKHR              accelerationStructureGeometryKHR;
2827         const auto                                                              accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2828         std::vector<deUint32>                                   maxPrimitiveCounts;
2829         prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2830
2831         VkDeviceOrHostAddressKHR                                scratchData                                                                             = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2832                                                                                                                                                                                         ? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2833                                                                                                                                                                                         : makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2834
2835         VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR           =
2836         {
2837                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                                               //  VkStructureType                                                                             sType;
2838                 DE_NULL,                                                                                                                                                                //  const void*                                                                                 pNext;
2839                 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,                                                                                   //  VkAccelerationStructureTypeKHR                                              type;
2840                 m_buildFlags,                                                                                                                                                   //  VkBuildAccelerationStructureFlagsKHR                                flags;
2841                 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                                                 //  VkBuildAccelerationStructureModeKHR                                 mode;
2842                 DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
2843                 m_accelerationStructureKHR.get(),                                                                                                               //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
2844                 1u,                                                                                                                                                                             //  deUint32                                                                                    geometryCount;
2845                 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),                                              //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
2846                 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),                                   //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
2847                 scratchData                                                                                                                                                             //  VkDeviceOrHostAddressKHR                                                    scratchData;
2848         };
2849
2850         const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2851
2852         VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2853         {
2854                 primitiveCount, //  deUint32    primitiveCount;
2855                 0,                              //  deUint32    primitiveOffset;
2856                 0,                              //  deUint32    firstVertex;
2857                 0                               //  deUint32    transformOffset;
2858         };
2859         VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr     = &accelerationStructureBuildRangeInfoKHR;
2860
2861         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2862         {
2863                 if (m_indirectBuffer == DE_NULL)
2864                         vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2865                 else
2866                 {
2867                         VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2868                         deUint32*               pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2869                         vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2870                 }
2871         }
2872         else if (!m_deferredOperation)
2873         {
2874                 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
2875         }
2876         else
2877         {
2878                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2879                 const auto deferredOperation    = deferredOperationPtr.get();
2880
2881                 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2882
2883                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2884
2885                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2886
2887                 accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
2888         }
2889
2890         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2891         {
2892                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2893                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
2894
2895                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2896         }
2897 }
2898
2899 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&                         vk,
2900                                                                                                  const VkDevice                                         device,
2901                                                                                                  const VkCommandBuffer                          cmdBuffer,
2902                                                                                                  TopLevelAccelerationStructure*         accelerationStructure,
2903                                                                                                  bool                                                           compactCopy)
2904 {
2905         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2906         DE_ASSERT(accelerationStructure != DE_NULL);
2907
2908         VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2909         {
2910                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,                                                                                                                 // VkStructureType                                              sType;
2911                 DE_NULL,                                                                                                                                                                                                                // const void*                                                  pNext;
2912                 *(accelerationStructure->getPtr()),                                                                                                                                                             // VkAccelerationStructureKHR                   src;
2913                 *(getPtr()),                                                                                                                                                                                                    // VkAccelerationStructureKHR                   dst;
2914                 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR   // VkCopyAccelerationStructureModeKHR   mode;
2915         };
2916
2917         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2918         {
2919                 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
2920         }
2921         else if (!m_deferredOperation)
2922         {
2923                 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2924         }
2925         else
2926         {
2927                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2928                 const auto deferredOperation    = deferredOperationPtr.get();
2929
2930                 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2931
2932                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2933
2934                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2935         }
2936
2937         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2938         {
2939                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2940                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
2941
2942                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2943         }
2944
2945 }
2946
2947 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface&        vk,
2948                                                                                                   const VkDevice                        device,
2949                                                                                                   const VkCommandBuffer         cmdBuffer,
2950                                                                                                   SerialStorage*                        storage)
2951 {
2952         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2953         DE_ASSERT(storage != DE_NULL);
2954
2955         const VkCopyAccelerationStructureToMemoryInfoKHR        copyAccelerationStructureInfo   =
2956         {
2957                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,       // VkStructureType                                              sType;
2958                 DE_NULL,                                                                                                                        // const void*                                                  pNext;
2959                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                   src;
2960                 storage->getAddress(vk, device, m_buildType),                                           // VkDeviceOrHostAddressKHR                             dst;
2961                 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR                                       // VkCopyAccelerationStructureModeKHR   mode;
2962         };
2963
2964         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2965         {
2966                 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
2967                 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2968         }
2969         else if (!m_deferredOperation)
2970         {
2971                 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2972                 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2973         }
2974         else
2975         {
2976                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2977                 const auto deferredOperation    = deferredOperationPtr.get();
2978
2979                 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2980
2981                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2982                 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
2983
2984                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2985         }
2986 }
2987
2988 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface&      vk,
2989                                                                                                         const VkDevice                  device,
2990                                                                                                         const VkCommandBuffer   cmdBuffer,
2991                                                                                                         SerialStorage*                  storage)
2992 {
2993         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2994         DE_ASSERT(storage != DE_NULL);
2995
2996         const VkCopyMemoryToAccelerationStructureInfoKHR        copyAccelerationStructureInfo   =
2997         {
2998                 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,       // VkStructureType                                                      sType;
2999                 DE_NULL,                                                                                                                        // const void*                                                          pNext;
3000                 storage->getAddressConst(vk, device, m_buildType),                                      // VkDeviceOrHostAddressConstKHR                        src;
3001                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                           dst;
3002                 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR                                     // VkCopyAccelerationStructureModeKHR           mode;
3003         };
3004
3005         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3006         {
3007                 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
3008         }
3009         else if (!m_deferredOperation)
3010         {
3011                 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
3012         }
3013         else
3014         {
3015                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
3016                 const auto deferredOperation    = deferredOperationPtr.get();
3017
3018                 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
3019
3020                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
3021
3022                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3023         }
3024
3025         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3026         {
3027                 const VkAccessFlags             accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
3028                 const VkMemoryBarrier   memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
3029
3030                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
3031         }
3032 }
3033
3034 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface& vk,
3035                                                                                                                  const VkDevice                 device,
3036                                                                                                                  const VkCommandBuffer  cmdBuffer,
3037                                                                                                                  SerialStorage*                 storage,
3038                                                                                                                  VkDeferredOperationKHR deferredOperation)
3039 {
3040         DE_UNREF(deferredOperation);
3041         DE_ASSERT(storage->hasDeepFormat());
3042
3043         const std::vector<deUint64>&    addresses               = storage->getSerialInfo().addresses();
3044         const std::size_t                               cbottoms                = m_bottomLevelInstances.size();
3045
3046         deUint32                                                storageIndex    = 0;
3047         std::vector<deUint64>                   matches;
3048
3049         for (std::size_t i = 0; i < cbottoms; ++i)
3050         {
3051                 const deUint64& lookAddr        = addresses[i+1];
3052                 auto                    end                     = matches.end();
3053                 auto                    match           = std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
3054                 if (match == end)
3055                 {
3056                         matches.emplace_back(lookAddr);
3057                         m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
3058                         storageIndex += 1;
3059                 }
3060         }
3061 }
3062
3063 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface&      vk,
3064                                                                                                                                         const VkDevice                  device,
3065                                                                                                                                         const VkCommandBuffer   cmdBuffer,
3066                                                                                                                                         Allocator&                              allocator,
3067                                                                                                                                         SerialStorage*                  storage)
3068 {
3069         DE_ASSERT(storage->hasDeepFormat());
3070         DE_ASSERT(m_bottomLevelInstances.size() == 0);
3071
3072         const std::vector<deUint64>&                                    addresses               = storage->getSerialInfo().addresses();
3073         const std::size_t                                                               cbottoms                = addresses.size() - 1;
3074         deUint32                                                                                storageIndex    = 0;
3075         std::vector<std::pair<deUint64, std::size_t>>   matches;
3076
3077         for (std::size_t i = 0; i < cbottoms; ++i)
3078         {
3079                 const deUint64& lookAddr        = addresses[i+1];
3080                 auto                    end                     = matches.end();
3081                 auto                    match           = std::find_if(matches.begin(), end, [&](const std::pair<deUint64, std::size_t>& item){ return item.first == lookAddr; });
3082                 if (match != end)
3083                 {
3084                         m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
3085                 }
3086                 else
3087                 {
3088                         de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
3089                         blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
3090                         m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
3091                         matches.emplace_back(lookAddr, i);
3092                         storageIndex += 1;
3093                 }
3094         }
3095
3096         std::vector<deUint64>                                           newAddresses    = getSerializingAddresses(vk, device);
3097         DE_ASSERT(addresses.size() == newAddresses.size());
3098
3099         SerialStorage::AccelerationStructureHeader* header                      = storage->getASHeader();
3100         DE_ASSERT(cbottoms ==header->handleCount);
3101
3102         // finally update bottom-level AS addresses before top-level AS deserialization
3103         for (std::size_t i = 0; i < cbottoms; ++i)
3104         {
3105                 header->handleArray[i] = newAddresses[i+1];
3106         }
3107 }
3108
3109 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface& vk,
3110                                                                                                                                                                  const VkDevice                 device,
3111                                                                                                                                                                  const VkQueue                  queue,
3112                                                                                                                                                                  const deUint32                 queueFamilyIndex)
3113 {
3114         const deUint32                                                  queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
3115         std::vector<VkAccelerationStructureKHR> handles(queryCount);
3116         std::vector<VkDeviceSize>                               sizes(queryCount);
3117
3118         handles[0] = m_accelerationStructureKHR.get();
3119
3120         for (deUint32 h = 1; h < queryCount; ++h)
3121                 handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
3122
3123         if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
3124                 queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3125         else
3126         {
3127                 const Move<VkCommandPool>       cmdPool         = createCommandPool(vk, device, 0, queueFamilyIndex);
3128                 const Move<VkCommandBuffer>     cmdBuffer       = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3129                 const Move<VkQueryPool>         queryPool       = makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3130
3131                 beginCommandBuffer(vk, *cmdBuffer);
3132                 queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
3133                 endCommandBuffer(vk, *cmdBuffer);
3134                 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
3135
3136                 VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3137         }
3138
3139         return sizes;
3140 }
3141
3142 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
3143 {
3144         std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
3145
3146         VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
3147         {
3148                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,       // VkStructureType                              sType;
3149                 DE_NULL,                                                                                                                        // const void*                                  pNext;
3150                 DE_NULL                                                                                                                         // VkAccelerationStructureKHR   accelerationStructure;
3151         };
3152
3153         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3154         {
3155                 asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
3156                 result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3157         }
3158         else
3159         {
3160                 result[0] = deUint64(getPtr()->getInternal());
3161         }
3162
3163         for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3164         {
3165                 const BottomLevelAccelerationStructure&         bottomLevelAccelerationStructure        = *m_bottomLevelInstances[instanceNdx];
3166                 const VkAccelerationStructureKHR                        accelerationStructureKHR                        = *bottomLevelAccelerationStructure.getPtr();
3167
3168                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3169                 {
3170                         asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
3171                         result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
3172                 }
3173                 else
3174                 {
3175                         result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
3176                 }
3177         }
3178
3179         return result;
3180 }
3181
3182 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
3183 {
3184         return &m_accelerationStructureKHR.get();
3185 }
3186
3187 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface&                                                 vk,
3188                                                                                                                  const VkDevice                                                                 device,
3189                                                                                                                  VkAccelerationStructureGeometryKHR&                    accelerationStructureGeometryKHR,
3190                                                                                                                  std::vector<deUint32>&                                                 maxPrimitiveCounts)
3191 {
3192         maxPrimitiveCounts.resize(1);
3193         maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
3194
3195         VkDeviceOrHostAddressConstKHR                                                   instancesData;
3196         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3197         {
3198                 if(m_instanceBuffer.get() != DE_NULL)
3199                 {
3200                         if (m_useArrayOfPointers)
3201                         {
3202                                 deUint8*                                                bufferStart                     = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3203                                 VkDeviceSize                                    bufferOffset            = 0;
3204                                 VkDeviceOrHostAddressConstKHR   firstInstance           = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3205                                 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3206                                 {
3207                                         VkDeviceOrHostAddressConstKHR   currentInstance;
3208                                         currentInstance.deviceAddress   = firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3209
3210                                         deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
3211                                         bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
3212                                 }
3213                                 flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
3214
3215                                 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
3216                         }
3217                         else
3218                                 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
3219                 }
3220                 else
3221                         instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3222         }
3223         else
3224         {
3225                 if (m_instanceBuffer.get() != DE_NULL)
3226                 {
3227                         if (m_useArrayOfPointers)
3228                         {
3229                                 deUint8*                                                bufferStart                     = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
3230                                 VkDeviceSize                                    bufferOffset            = 0;
3231                                 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
3232                                 {
3233                                         VkDeviceOrHostAddressConstKHR   currentInstance;
3234                                         currentInstance.hostAddress     = (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
3235
3236                                         deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
3237                                         bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
3238                                 }
3239                                 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
3240                         }
3241                         else
3242                                 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
3243                 }
3244                 else
3245                         instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
3246         }
3247
3248         VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR   =
3249         {
3250                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,   //  VkStructureType                                     sType;
3251                 DE_NULL,                                                                                                                                //  const void*                                         pNext;
3252                 (VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ),                                //  VkBool32                                            arrayOfPointers;
3253                 instancesData                                                                                                                   //  VkDeviceOrHostAddressConstKHR       data;
3254         };
3255
3256         accelerationStructureGeometryKHR                                        =
3257         {
3258                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,                                                                          //  VkStructureType                                                     sType;
3259                 DE_NULL,                                                                                                                                                                        //  const void*                                                         pNext;
3260                 VK_GEOMETRY_TYPE_INSTANCES_KHR,                                                                                                                         //  VkGeometryTypeKHR                                           geometryType;
3261                 makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR),     //  VkAccelerationStructureGeometryDataKHR      geometry;
3262                 (VkGeometryFlagsKHR)0u                                                                                                                                          //  VkGeometryFlagsKHR                                          flags;
3263         };
3264 }
3265
3266 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
3267 {
3268         return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
3269 }
3270
3271 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
3272 {
3273         return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
3274 }
3275
3276 bool queryAccelerationStructureSizeKHR (const DeviceInterface&                                                  vk,
3277                                                                                 const VkDevice                                                                  device,
3278                                                                                 const VkCommandBuffer                                                   cmdBuffer,
3279                                                                                 const std::vector<VkAccelerationStructureKHR>&  accelerationStructureHandles,
3280                                                                                 VkAccelerationStructureBuildTypeKHR                             buildType,
3281                                                                                 const VkQueryPool                                                               queryPool,
3282                                                                                 VkQueryType                                                                             queryType,
3283                                                                                 deUint32                                                                                firstQuery,
3284                                                                                 std::vector<VkDeviceSize>&                                              results)
3285 {
3286         DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
3287
3288         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3289         {
3290                 // queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
3291                 vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
3292                 vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
3293                 // results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
3294                 results.resize(accelerationStructureHandles.size(), 0u);
3295                 return false;
3296         }
3297         // buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
3298         results.resize(accelerationStructureHandles.size(), 0u);
3299         vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
3300                                                                                                 sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
3301         // results will contain proper values
3302         return true;
3303 }
3304
3305 bool queryAccelerationStructureSize (const DeviceInterface&                                                     vk,
3306                                                                          const VkDevice                                                                 device,
3307                                                                          const VkCommandBuffer                                                  cmdBuffer,
3308                                                                          const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
3309                                                                          VkAccelerationStructureBuildTypeKHR                    buildType,
3310                                                                          const VkQueryPool                                                              queryPool,
3311                                                                          VkQueryType                                                                    queryType,
3312                                                                          deUint32                                                                               firstQuery,
3313                                                                          std::vector<VkDeviceSize>&                                             results)
3314 {
3315         return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
3316 }
3317
3318 RayTracingPipeline::RayTracingPipeline ()
3319         : m_shadersModules                      ()
3320         , m_pipelineLibraries           ()
3321         , m_shaderCreateInfos           ()
3322         , m_shadersGroupCreateInfos     ()
3323         , m_pipelineCreateFlags         (0U)
3324         , m_maxRecursionDepth           (1U)
3325         , m_maxPayloadSize                      (0U)
3326         , m_maxAttributeSize            (0U)
3327         , m_deferredOperation           (false)
3328         , m_workerThreadCount           (0)
3329 {
3330 }
3331
3332 RayTracingPipeline::~RayTracingPipeline ()
3333 {
3334 }
3335
3336 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE)                                            \
3337         if (SHADER == VK_SHADER_UNUSED_KHR)                                                             \
3338                 SHADER = STAGE;                                                                                         \
3339         else                                                                                                                    \
3340                 TCU_THROW(InternalError, "Attempt to reassign shader")
3341
3342 void RayTracingPipeline::addShader (VkShaderStageFlagBits                                       shaderStage,
3343                                                                         Move<VkShaderModule>                                    shaderModule,
3344                                                                         deUint32                                                                group,
3345                                                                         const VkSpecializationInfo*                             specializationInfo,
3346                                                                         const VkPipelineShaderStageCreateFlags  pipelineShaderStageCreateFlags,
3347                                                                         const void*                                                             pipelineShaderStageCreateInfopNext)
3348 {
3349         addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3350 }
3351
3352 void RayTracingPipeline::addShader (VkShaderStageFlagBits                                       shaderStage,
3353                                                                         de::SharedPtr<Move<VkShaderModule>>             shaderModule,
3354                                                                         deUint32                                                                group,
3355                                                                         const VkSpecializationInfo*                             specializationInfoPtr,
3356                                                                         const VkPipelineShaderStageCreateFlags  pipelineShaderStageCreateFlags,
3357                                                                         const void*                                                             pipelineShaderStageCreateInfopNext)
3358 {
3359         addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
3360         m_shadersModules.push_back(shaderModule);
3361 }
3362
3363 void RayTracingPipeline::addShader (VkShaderStageFlagBits                                       shaderStage,
3364                                                                         VkShaderModule                                                  shaderModule,
3365                                                                         deUint32                                                                group,
3366                                                                         const VkSpecializationInfo*                             specializationInfoPtr,
3367                                                                         const VkPipelineShaderStageCreateFlags  pipelineShaderStageCreateFlags,
3368                                                                         const void*                                                             pipelineShaderStageCreateInfopNext)
3369 {
3370         if (group >= m_shadersGroupCreateInfos.size())
3371         {
3372                 for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
3373                 {
3374                         VkRayTracingShaderGroupCreateInfoKHR    shaderGroupCreateInfo   =
3375                         {
3376                                 VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,     //  VkStructureType                                     sType;
3377                                 DE_NULL,                                                                                                        //  const void*                                         pNext;
3378                                 VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,                          //  VkRayTracingShaderGroupTypeKHR      type;
3379                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            generalShader;
3380                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            closestHitShader;
3381                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            anyHitShader;
3382                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            intersectionShader;
3383                                 DE_NULL,                                                                                                        //  const void*                                         pShaderGroupCaptureReplayHandle;
3384                         };
3385
3386                         m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
3387                 }
3388         }
3389
3390         const deUint32                                                  shaderStageNdx                  = (deUint32)m_shaderCreateInfos.size();
3391         VkRayTracingShaderGroupCreateInfoKHR&   shaderGroupCreateInfo   = m_shadersGroupCreateInfos[group];
3392
3393         switch (shaderStage)
3394         {
3395                 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:            CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,              shaderStageNdx);        break;
3396                 case VK_SHADER_STAGE_MISS_BIT_KHR:                      CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,              shaderStageNdx);        break;
3397                 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:          CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,              shaderStageNdx);        break;
3398                 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:           CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader,               shaderStageNdx);        break;
3399                 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:       CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader,   shaderStageNdx);        break;
3400                 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:      CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader, shaderStageNdx);        break;
3401                 default:                                                                        TCU_THROW(InternalError, "Unacceptable stage");
3402         }
3403
3404         switch (shaderStage)
3405         {
3406                 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
3407                 case VK_SHADER_STAGE_MISS_BIT_KHR:
3408                 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
3409                 {
3410                         DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
3411                         shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
3412
3413                         break;
3414                 }
3415
3416                 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
3417                 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
3418                 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
3419                 {
3420                         DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
3421                         shaderGroupCreateInfo.type      = (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
3422                                                                                 ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
3423                                                                                 : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
3424
3425                         break;
3426                 }
3427
3428                 default: TCU_THROW(InternalError, "Unacceptable stage");
3429         }
3430
3431         {
3432                 const VkPipelineShaderStageCreateInfo   shaderCreateInfo        =
3433                 {
3434                         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    //  VkStructureType                                             sType;
3435                         pipelineShaderStageCreateInfopNext,                                             //  const void*                                                 pNext;
3436                         pipelineShaderStageCreateFlags,                                                 //  VkPipelineShaderStageCreateFlags    flags;
3437                         shaderStage,                                                                                    //  VkShaderStageFlagBits                               stage;
3438                         shaderModule,                                                                                   //  VkShaderModule                                              module;
3439                         "main",                                                                                                 //  const char*                                                 pName;
3440                         specializationInfoPtr,                                                                  //  const VkSpecializationInfo*                 pSpecializationInfo;
3441                 };
3442
3443                 m_shaderCreateInfos.push_back(shaderCreateInfo);
3444         }
3445 }
3446
3447 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
3448 {
3449         m_pipelineLibraries.push_back(pipelineLibrary);
3450 }
3451
3452 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface&                  vk,
3453                                                                                                                 const VkDevice                                  device,
3454                                                                                                                 const VkPipelineLayout                  pipelineLayout,
3455                                                                                                                 const std::vector<VkPipeline>&  pipelineLibraries,
3456                                                                                                                 const VkPipelineCache                   pipelineCache)
3457 {
3458         for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3459                 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3460
3461         VkPipelineLibraryCreateInfoKHR                          librariesCreateInfo     =
3462         {
3463                 VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,     //  VkStructureType     sType;
3464                 DE_NULL,                                                                                        //  const void*         pNext;
3465                 de::sizeU32(pipelineLibraries),                                         //  deUint32            libraryCount;
3466                 de::dataOrNull(pipelineLibraries)                                       //  VkPipeline*         pLibraries;
3467         };
3468         const VkRayTracingPipelineInterfaceCreateInfoKHR        pipelineInterfaceCreateInfo             =
3469         {
3470                 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR,       //  VkStructureType     sType;
3471                 DE_NULL,                                                                                                                        //  const void*         pNext;
3472                 m_maxPayloadSize,                                                                                                       //  deUint32            maxPayloadSize;
3473                 m_maxAttributeSize                                                                                                      //  deUint32            maxAttributeSize;
3474         };
3475         const bool                                                                                      addPipelineInterfaceCreateInfo  = m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
3476         const VkRayTracingPipelineInterfaceCreateInfoKHR*       pipelineInterfaceCreateInfoPtr  = addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
3477         const VkPipelineLibraryCreateInfoKHR*                           librariesCreateInfoPtr                  = (pipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
3478
3479         Move<VkDeferredOperationKHR>                                            deferredOperation;
3480         if (m_deferredOperation)
3481                 deferredOperation = createDeferredOperationKHR(vk, device);
3482
3483         VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo =
3484         {
3485                 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,   // VkStructureType                                              sType;
3486                 DE_NULL,                                                                                                // const void*                                                  pNext;
3487                 0,                                                                                                              // VkPipelineDynamicStateCreateFlags    flags;
3488                 static_cast<deUint32>(m_dynamicStates.size() ),                 // deUint32                                                             dynamicStateCount;
3489                 m_dynamicStates.data(),                                                                 // const VkDynamicState*                                pDynamicStates;
3490         };
3491
3492         const VkRayTracingPipelineCreateInfoKHR                         pipelineCreateInfo                              =
3493         {
3494                 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, //  VkStructureType                                                             sType;
3495                 DE_NULL,                                                                                                //  const void*                                                                 pNext;
3496                 m_pipelineCreateFlags,                                                                  //  VkPipelineCreateFlags                                               flags;
3497                 de::sizeU32(m_shaderCreateInfos),                                               //  deUint32                                                                    stageCount;
3498                 de::dataOrNull(m_shaderCreateInfos),                                    //  const VkPipelineShaderStageCreateInfo*              pStages;
3499                 de::sizeU32(m_shadersGroupCreateInfos),                                 //  deUint32                                                                    groupCount;
3500                 de::dataOrNull(m_shadersGroupCreateInfos),                              //  const VkRayTracingShaderGroupCreateInfoKHR* pGroups;
3501                 m_maxRecursionDepth,                                                                    //  deUint32                                                                    maxRecursionDepth;
3502                 librariesCreateInfoPtr,                                                                 //  VkPipelineLibraryCreateInfoKHR*                             pLibraryInfo;
3503                 pipelineInterfaceCreateInfoPtr,                                                 //  VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface;
3504                 &dynamicStateCreateInfo,                                                                //  const VkPipelineDynamicStateCreateInfo*             pDynamicState;
3505                 pipelineLayout,                                                                                 //  VkPipelineLayout                                                    layout;
3506                 (VkPipeline)DE_NULL,                                                                    //  VkPipeline                                                                  basePipelineHandle;
3507                 0,                                                                                                              //  deInt32                                                                             basePipelineIndex;
3508         };
3509         VkPipeline                                                                                      object                                                  = DE_NULL;
3510         VkResult                                                                                        result                                                  = vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), pipelineCache, 1u, &pipelineCreateInfo, DE_NULL, &object);
3511         const bool                                                                                      allowCompileRequired                    = ((m_pipelineCreateFlags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) != 0);
3512
3513         if (m_deferredOperation)
3514         {
3515                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS || (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED));
3516                 finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
3517         }
3518
3519         if (allowCompileRequired && result == VK_PIPELINE_COMPILE_REQUIRED)
3520                 throw CompileRequiredError("createRayTracingPipelinesKHR returned VK_PIPELINE_COMPILE_REQUIRED");
3521
3522         Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
3523         return pipeline;
3524 }
3525
3526
3527 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&                                                                     vk,
3528                                                                                                          const VkDevice                                                                                 device,
3529                                                                                                          const VkPipelineLayout                                                                 pipelineLayout,
3530                                                                                                          const std::vector<de::SharedPtr<Move<VkPipeline>>>&    pipelineLibraries)
3531 {
3532         std::vector<VkPipeline> rawPipelines;
3533         rawPipelines.reserve(pipelineLibraries.size());
3534         for (const auto& lib : pipelineLibraries)
3535                 rawPipelines.push_back(lib.get()->get());
3536
3537         return createPipelineKHR(vk, device, pipelineLayout, rawPipelines);
3538 }
3539
3540 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&                     vk,
3541                                                                                                          const VkDevice                                 device,
3542                                                                                                          const VkPipelineLayout                 pipelineLayout,
3543                                                                                                          const std::vector<VkPipeline>& pipelineLibraries,
3544                                                                                                          const VkPipelineCache                  pipelineCache)
3545 {
3546         return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries, pipelineCache);
3547 }
3548
3549 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface&                    vk,
3550                                                                                                                                                                                                 const VkDevice                                  device,
3551                                                                                                                                                                                                 const VkPipelineLayout                  pipelineLayout)
3552 {
3553         for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
3554                 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
3555
3556         DE_ASSERT(m_shaderCreateInfos.size() > 0);
3557         DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
3558
3559         std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
3560         for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
3561         {
3562                 auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
3563                 DE_ASSERT(childLibraries.size() > 0);
3564                 firstLibraries.push_back(childLibraries[0]);
3565                 std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
3566         }
3567         result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
3568         std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
3569         return result;
3570 }
3571
3572 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface&              vk,
3573                                                                                                                                                         const VkDevice                          device,
3574                                                                                                                                                         const VkPipeline                        pipeline,
3575                                                                                                                                                         Allocator&                                      allocator,
3576                                                                                                                                                         const deUint32&                         shaderGroupHandleSize,
3577                                                                                                                                                         const deUint32                          shaderGroupBaseAlignment,
3578                                                                                                                                                         const deUint32&                         firstGroup,
3579                                                                                                                                                         const deUint32&                         groupCount,
3580                                                                                                                                                         const VkBufferCreateFlags&      additionalBufferCreateFlags,
3581                                                                                                                                                         const VkBufferUsageFlags&       additionalBufferUsageFlags,
3582                                                                                                                                                         const MemoryRequirement&        additionalMemoryRequirement,
3583                                                                                                                                                         const VkDeviceAddress&          opaqueCaptureAddress,
3584                                                                                                                                                         const deUint32                          shaderBindingTableOffset,
3585                                                                                                                                                         const deUint32                          shaderRecordSize,
3586                                                                                                                                                         const void**                            shaderGroupDataPtrPerGroup,
3587                                                                                                                                                         const bool                                      autoAlignRecords)
3588 {
3589         DE_ASSERT(shaderGroupBaseAlignment != 0u);
3590         DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
3591         DE_UNREF(shaderGroupBaseAlignment);
3592
3593         const auto                                                              totalEntrySize                                  = (autoAlignRecords ? (deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize)) : (shaderGroupHandleSize + shaderRecordSize));
3594         const deUint32                                                  sbtSize                                                 = shaderBindingTableOffset + groupCount * totalEntrySize;
3595         const VkBufferUsageFlags                                sbtFlags                                                = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
3596         VkBufferCreateInfo                                              sbtCreateInfo                                   = makeBufferCreateInfo(sbtSize, sbtFlags);
3597         sbtCreateInfo.flags                                                                                                             |= additionalBufferCreateFlags;
3598         VkBufferOpaqueCaptureAddressCreateInfo  sbtCaptureAddressInfo                   =
3599         {
3600                 VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,    // VkStructureType      sType;
3601                 DE_NULL,                                                                                                                // const void*          pNext;
3602                 deUint64(opaqueCaptureAddress)                                                                  // deUint64                     opaqueCaptureAddress;
3603         };
3604
3605         if (opaqueCaptureAddress != 0u)
3606         {
3607                 sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
3608                 sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
3609         }
3610         const MemoryRequirement                 sbtMemRequirements                                              = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
3611         de::MovePtr<BufferWithMemory>   sbtBuffer                                                               = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
3612         vk::Allocation&                                 sbtAlloc                                                                = sbtBuffer->getAllocation();
3613
3614         // collect shader group handles
3615         std::vector<deUint8>                    shaderHandles                                                   (groupCount * shaderGroupHandleSize);
3616         VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline, firstGroup, groupCount, groupCount * shaderGroupHandleSize, shaderHandles.data()));
3617
3618         // reserve place for ShaderRecordKHR after each shader handle ( ShaderRecordKHR size might be 0 ). Also take alignment into consideration
3619         deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
3620         for (deUint32 idx = 0; idx < groupCount; ++idx)
3621         {
3622                 deUint8* shaderSrcPos   = shaderHandles.data() + idx * shaderGroupHandleSize;
3623                 deUint8* shaderDstPos   = shaderBegin + idx * totalEntrySize;
3624                 deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
3625
3626                 if (shaderGroupDataPtrPerGroup          != nullptr &&
3627                         shaderGroupDataPtrPerGroup[idx] != nullptr)
3628                 {
3629                         DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
3630
3631                         deMemcpy(       shaderDstPos + shaderGroupHandleSize,
3632                                                 shaderGroupDataPtrPerGroup[idx],
3633                                                 shaderRecordSize);
3634                 }
3635         }
3636
3637         flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
3638
3639         return sbtBuffer;
3640 }
3641
3642 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
3643 {
3644         m_pipelineCreateFlags = pipelineCreateFlags;
3645 }
3646
3647 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
3648 {
3649         m_maxRecursionDepth = maxRecursionDepth;
3650 }
3651
3652 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
3653 {
3654         m_maxPayloadSize = maxPayloadSize;
3655 }
3656
3657 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
3658 {
3659         m_maxAttributeSize = maxAttributeSize;
3660 }
3661
3662 void RayTracingPipeline::setDeferredOperation (const bool               deferredOperation,
3663                                                                                            const deUint32       workerThreadCount)
3664 {
3665         m_deferredOperation = deferredOperation;
3666         m_workerThreadCount = workerThreadCount;
3667 }
3668
3669 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
3670 {
3671         m_dynamicStates.push_back(dynamicState);
3672 }
3673
3674 class RayTracingPropertiesKHR : public RayTracingProperties
3675 {
3676 public:
3677                                                         RayTracingPropertiesKHR                                         () = delete;
3678                                                         RayTracingPropertiesKHR                                         (const InstanceInterface&       vki,
3679                                                                                                                                                  const VkPhysicalDevice         physicalDevice);
3680         virtual                                 ~RayTracingPropertiesKHR                                        ();
3681
3682         uint32_t                getShaderGroupHandleSize                                        (void)  override { return m_rayTracingPipelineProperties.shaderGroupHandleSize;                                         }
3683         uint32_t                getShaderGroupHandleAlignment                           (void)  override { return m_rayTracingPipelineProperties.shaderGroupHandleAlignment;                            }
3684         uint32_t                getMaxRecursionDepth                                            (void)  override { return m_rayTracingPipelineProperties.maxRayRecursionDepth;                                          }
3685         uint32_t                getMaxShaderGroupStride                                         (void)  override { return m_rayTracingPipelineProperties.maxShaderGroupStride;                                          }
3686         uint32_t                getShaderGroupBaseAlignment                                     (void)  override { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment;                                      }
3687         uint64_t                getMaxGeometryCount                                                     (void)  override { return m_accelerationStructureProperties.maxGeometryCount;                                           }
3688         uint64_t                getMaxInstanceCount                                                     (void)  override { return m_accelerationStructureProperties.maxInstanceCount;                                           }
3689         uint64_t                getMaxPrimitiveCount                                            (void)  override { return m_accelerationStructureProperties.maxPrimitiveCount;                                          }
3690         uint32_t                getMaxDescriptorSetAccelerationStructures       (void)  override { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures;     }
3691         uint32_t                getMaxRayDispatchInvocationCount                        (void)  override { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount;                         }
3692         uint32_t                getMaxRayHitAttributeSize                                       (void)  override { return m_rayTracingPipelineProperties.maxRayHitAttributeSize;                                        }
3693         uint32_t                getMaxMemoryAllocationCount                                     (void)  override { return m_maxMemoryAllocationCount;                                                                                           }
3694
3695 protected:
3696         VkPhysicalDeviceAccelerationStructurePropertiesKHR      m_accelerationStructureProperties;
3697         VkPhysicalDeviceRayTracingPipelinePropertiesKHR         m_rayTracingPipelineProperties;
3698         deUint32                                                                                        m_maxMemoryAllocationCount;
3699 };
3700
3701 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
3702 {
3703 }
3704
3705 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface&      vki,
3706                                                                                                   const VkPhysicalDevice        physicalDevice)
3707         : RayTracingProperties  (vki, physicalDevice)
3708 {
3709         m_accelerationStructureProperties       = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3710         m_rayTracingPipelineProperties          = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
3711         m_maxMemoryAllocationCount                      = getPhysicalDeviceProperties(vki, physicalDevice).limits.maxMemoryAllocationCount;
3712 }
3713
3714 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface&    vki,
3715                                                                                                                         const VkPhysicalDevice          physicalDevice)
3716 {
3717         return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
3718 }
3719
3720 static inline void cmdTraceRaysKHR (const DeviceInterface&                                      vk,
3721                                                                         VkCommandBuffer                                                 commandBuffer,
3722                                                                         const VkStridedDeviceAddressRegionKHR*  raygenShaderBindingTableRegion,
3723                                                                         const VkStridedDeviceAddressRegionKHR*  missShaderBindingTableRegion,
3724                                                                         const VkStridedDeviceAddressRegionKHR*  hitShaderBindingTableRegion,
3725                                                                         const VkStridedDeviceAddressRegionKHR*  callableShaderBindingTableRegion,
3726                                                                         deUint32                                                                width,
3727                                                                         deUint32                                                                height,
3728                                                                         deUint32                                                                depth)
3729 {
3730         return vk.cmdTraceRaysKHR(commandBuffer,
3731                                                           raygenShaderBindingTableRegion,
3732                                                           missShaderBindingTableRegion,
3733                                                           hitShaderBindingTableRegion,
3734                                                           callableShaderBindingTableRegion,
3735                                                           width,
3736                                                           height,
3737                                                           depth);
3738 }
3739
3740
3741 void cmdTraceRays (const DeviceInterface&                                       vk,
3742                                    VkCommandBuffer                                                      commandBuffer,
3743                                    const VkStridedDeviceAddressRegionKHR*       raygenShaderBindingTableRegion,
3744                                    const VkStridedDeviceAddressRegionKHR*       missShaderBindingTableRegion,
3745                                    const VkStridedDeviceAddressRegionKHR*       hitShaderBindingTableRegion,
3746                                    const VkStridedDeviceAddressRegionKHR*       callableShaderBindingTableRegion,
3747                                    deUint32                                                                     width,
3748                                    deUint32                                                                     height,
3749                                    deUint32                                                                     depth)
3750 {
3751         DE_ASSERT(raygenShaderBindingTableRegion        != DE_NULL);
3752         DE_ASSERT(missShaderBindingTableRegion          != DE_NULL);
3753         DE_ASSERT(hitShaderBindingTableRegion           != DE_NULL);
3754         DE_ASSERT(callableShaderBindingTableRegion      != DE_NULL);
3755
3756         return cmdTraceRaysKHR(vk,
3757                                                    commandBuffer,
3758                                                    raygenShaderBindingTableRegion,
3759                                                    missShaderBindingTableRegion,
3760                                                    hitShaderBindingTableRegion,
3761                                                    callableShaderBindingTableRegion,
3762                                                    width,
3763                                                    height,
3764                                                    depth);
3765 }
3766
3767 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface&                                      vk,
3768                                                                                         VkCommandBuffer                                                 commandBuffer,
3769                                                                                         const VkStridedDeviceAddressRegionKHR*  raygenShaderBindingTableRegion,
3770                                                                                         const VkStridedDeviceAddressRegionKHR*  missShaderBindingTableRegion,
3771                                                                                         const VkStridedDeviceAddressRegionKHR*  hitShaderBindingTableRegion,
3772                                                                                         const VkStridedDeviceAddressRegionKHR*  callableShaderBindingTableRegion,
3773                                                                                         VkDeviceAddress                                                 indirectDeviceAddress )
3774 {
3775         DE_ASSERT(raygenShaderBindingTableRegion        != DE_NULL);
3776         DE_ASSERT(missShaderBindingTableRegion          != DE_NULL);
3777         DE_ASSERT(hitShaderBindingTableRegion           != DE_NULL);
3778         DE_ASSERT(callableShaderBindingTableRegion      != DE_NULL);
3779         DE_ASSERT(indirectDeviceAddress                         != 0);
3780
3781         return vk.cmdTraceRaysIndirectKHR(commandBuffer,
3782                                                                           raygenShaderBindingTableRegion,
3783                                                                           missShaderBindingTableRegion,
3784                                                                           hitShaderBindingTableRegion,
3785                                                                           callableShaderBindingTableRegion,
3786                                                                           indirectDeviceAddress);
3787 }
3788
3789 void cmdTraceRaysIndirect (const DeviceInterface&                                       vk,
3790                                                    VkCommandBuffer                                                      commandBuffer,
3791                                                    const VkStridedDeviceAddressRegionKHR*       raygenShaderBindingTableRegion,
3792                                                    const VkStridedDeviceAddressRegionKHR*       missShaderBindingTableRegion,
3793                                                    const VkStridedDeviceAddressRegionKHR*       hitShaderBindingTableRegion,
3794                                                    const VkStridedDeviceAddressRegionKHR*       callableShaderBindingTableRegion,
3795                                                    VkDeviceAddress                                                      indirectDeviceAddress)
3796 {
3797         return cmdTraceRaysIndirectKHR(vk,
3798                                                                    commandBuffer,
3799                                                                    raygenShaderBindingTableRegion,
3800                                                                    missShaderBindingTableRegion,
3801                                                                    hitShaderBindingTableRegion,
3802                                                                    callableShaderBindingTableRegion,
3803                                                                    indirectDeviceAddress);
3804 }
3805
3806 static inline void cmdTraceRaysIndirect2KHR (const DeviceInterface&     vk,
3807                                                                                         VkCommandBuffer                 commandBuffer,
3808                                                                                         VkDeviceAddress                 indirectDeviceAddress )
3809 {
3810         DE_ASSERT(indirectDeviceAddress != 0);
3811
3812         return vk.cmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
3813 }
3814
3815 void cmdTraceRaysIndirect2      (const DeviceInterface& vk,
3816                                                          VkCommandBuffer                commandBuffer,
3817                                                          VkDeviceAddress                indirectDeviceAddress)
3818 {
3819         return cmdTraceRaysIndirect2KHR(vk, commandBuffer, indirectDeviceAddress);
3820 }
3821
3822 #else
3823
3824 deUint32 rayTracingDefineAnything()
3825 {
3826         return 0;
3827 }
3828
3829 #endif // CTS_USES_VULKANSC
3830
3831 } // vk