Merge vk-gl-cts/vulkan-cts-1.2.7 into vk-gl-cts/vulkan-cts-1.2.8
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / framework / vulkan / vkRayTracingUtil.cpp
1 /*-------------------------------------------------------------------------
2  * Vulkan CTS Framework
3  * --------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Utilities for creating commonly used Vulkan objects
22  *//*--------------------------------------------------------------------*/
23
24 #include "vkRayTracingUtil.hpp"
25
26 #include "vkRefUtil.hpp"
27 #include "vkQueryUtil.hpp"
28 #include "vkObjUtil.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "deStringUtil.hpp"
33
34 #include <vector>
35 #include <string>
36 #include <thread>
37 #include <limits>
38
39 namespace vk
40 {
41
42 struct DeferredThreadParams
43 {
44         const DeviceInterface&  vk;
45         VkDevice                                device;
46         VkDeferredOperationKHR  deferredOperation;
47         VkResult                                result;
48 };
49
50 std::string getFormatSimpleName (vk::VkFormat format)
51 {
52         constexpr size_t kPrefixLen = 10; // strlen("VK_FORMAT_")
53         return de::toLower(de::toString(format).substr(kPrefixLen));
54 }
55
56 // Returns true if VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR needs to be supported for the given format.
57 static bool isMandatoryAccelerationStructureVertexBufferFormat (vk::VkFormat format)
58 {
59         bool mandatory = false;
60
61         switch (format)
62         {
63     case VK_FORMAT_R32G32_SFLOAT:
64     case VK_FORMAT_R32G32B32_SFLOAT:
65     case VK_FORMAT_R16G16_SFLOAT:
66     case VK_FORMAT_R16G16B16A16_SFLOAT:
67     case VK_FORMAT_R16G16_SNORM:
68     case VK_FORMAT_R16G16B16A16_SNORM:
69                 mandatory = true;
70                 break;
71         default:
72                 break;
73         }
74
75         return mandatory;
76 }
77
78 void checkAccelerationStructureVertexBufferFormat (const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice, vk::VkFormat format)
79 {
80         const vk::VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
81
82         if ((formatProperties.bufferFeatures & vk::VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR) == 0u)
83         {
84                 const std::string errorMsg = "Format not supported for acceleration structure vertex buffers";
85                 if (isMandatoryAccelerationStructureVertexBufferFormat(format))
86                         TCU_FAIL(errorMsg);
87                 TCU_THROW(NotSupportedError, errorMsg);
88         }
89 }
90
91 std::string getCommonRayGenerationShader (void)
92 {
93         return
94                 "#version 460 core\n"
95                 "#extension GL_EXT_ray_tracing : require\n"
96                 "layout(location = 0) rayPayloadEXT vec3 hitValue;\n"
97                 "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
98                 "\n"
99                 "void main()\n"
100                 "{\n"
101                 "  uint  rayFlags = 0;\n"
102                 "  uint  cullMask = 0xFF;\n"
103                 "  float tmin     = 0.0;\n"
104                 "  float tmax     = 9.0;\n"
105                 "  vec3  origin   = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
106                 "  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
107                 "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
108                 "}\n";
109 }
110
111 RaytracedGeometryBase::RaytracedGeometryBase (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType)
112         : m_geometryType        (geometryType)
113         , m_vertexFormat        (vertexFormat)
114         , m_indexType           (indexType)
115         , m_geometryFlags       ((VkGeometryFlagsKHR)0u)
116 {
117         if (m_geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
118                 DE_ASSERT(m_vertexFormat == VK_FORMAT_R32G32B32_SFLOAT);
119 }
120
121 RaytracedGeometryBase::~RaytracedGeometryBase ()
122 {
123 }
124
125 struct GeometryBuilderParams
126 {
127         VkGeometryTypeKHR       geometryType;
128         bool                            usePadding;
129 };
130
131 template <typename V, typename I>
132 RaytracedGeometryBase* buildRaytracedGeometry (const GeometryBuilderParams& params)
133 {
134         return new RaytracedGeometry<V, I>(params.geometryType, (params.usePadding ? 1u : 0u));
135 }
136
137 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry (VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType, bool padVertices)
138 {
139         const GeometryBuilderParams builderParams { geometryType, padVertices };
140
141         switch (vertexFormat)
142         {
143                 case VK_FORMAT_R32G32_SFLOAT:
144                         switch (indexType)
145                         {
146                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint16>(builderParams));
147                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, deUint32>(builderParams));
148                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec2, EmptyIndex>(builderParams));
149                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
150                         }
151                 case VK_FORMAT_R32G32B32_SFLOAT:
152                         switch (indexType)
153                         {
154                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint16>(builderParams));
155                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, deUint32>(builderParams));
156                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec3, EmptyIndex>(builderParams));
157                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
158                         }
159                 case VK_FORMAT_R32G32B32A32_SFLOAT:
160                         switch (indexType)
161                         {
162                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint16>(builderParams));
163                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, deUint32>(builderParams));
164                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::Vec4, EmptyIndex>(builderParams));
165                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
166                         }
167                 case VK_FORMAT_R16G16_SFLOAT:
168                         switch (indexType)
169                         {
170                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint16>(builderParams));
171                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, deUint32>(builderParams));
172                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16, EmptyIndex>(builderParams));
173                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
174                         }
175                 case VK_FORMAT_R16G16B16_SFLOAT:
176                         switch (indexType)
177                         {
178                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint16>(builderParams));
179                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, deUint32>(builderParams));
180                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16, EmptyIndex>(builderParams));
181                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
182                         }
183                 case VK_FORMAT_R16G16B16A16_SFLOAT:
184                         switch (indexType)
185                         {
186                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint16>(builderParams));
187                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, deUint32>(builderParams));
188                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16, EmptyIndex>(builderParams));
189                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
190                         }
191                 case VK_FORMAT_R16G16_SNORM:
192                         switch (indexType)
193                         {
194                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint16>(builderParams));
195                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, deUint32>(builderParams));
196                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_16SNorm, EmptyIndex>(builderParams));
197                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
198                         }
199                 case VK_FORMAT_R16G16B16_SNORM:
200                         switch (indexType)
201                         {
202                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint16>(builderParams));
203                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, deUint32>(builderParams));
204                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_16SNorm, EmptyIndex>(builderParams));
205                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
206                         }
207                 case VK_FORMAT_R16G16B16A16_SNORM:
208                         switch (indexType)
209                         {
210                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint16>(builderParams));
211                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, deUint32>(builderParams));
212                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_16SNorm, EmptyIndex>(builderParams));
213                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
214                         }
215                 case VK_FORMAT_R64G64_SFLOAT:
216                         switch (indexType)
217                         {
218                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint16>(builderParams));
219                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, deUint32>(builderParams));
220                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec2, EmptyIndex>(builderParams));
221                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
222                         }
223                 case VK_FORMAT_R64G64B64_SFLOAT:
224                         switch (indexType)
225                         {
226                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint16>(builderParams));
227                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, deUint32>(builderParams));
228                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec3, EmptyIndex>(builderParams));
229                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
230                         }
231                 case VK_FORMAT_R64G64B64A64_SFLOAT:
232                         switch (indexType)
233                         {
234                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint16>(builderParams));
235                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, deUint32>(builderParams));
236                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<tcu::DVec4, EmptyIndex>(builderParams));
237                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
238                         }
239                 case VK_FORMAT_R8G8_SNORM:
240                         switch (indexType)
241                         {
242                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint16>(builderParams));
243                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, deUint32>(builderParams));
244                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec2_8SNorm, EmptyIndex>(builderParams));
245                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
246                         }
247                 case VK_FORMAT_R8G8B8_SNORM:
248                         switch (indexType)
249                         {
250                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint16>(builderParams));
251                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, deUint32>(builderParams));
252                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec3_8SNorm, EmptyIndex>(builderParams));
253                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
254                         }
255                 case VK_FORMAT_R8G8B8A8_SNORM:
256                         switch (indexType)
257                         {
258                                 case VK_INDEX_TYPE_UINT16:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint16>(builderParams));
259                                 case VK_INDEX_TYPE_UINT32:              return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, deUint32>(builderParams));
260                                 case VK_INDEX_TYPE_NONE_KHR:    return de::SharedPtr<RaytracedGeometryBase>(buildRaytracedGeometry<Vec4_8SNorm, EmptyIndex>(builderParams));
261                                 default:                                                TCU_THROW(InternalError, "Wrong index type");
262                         }
263                 default:
264                         TCU_THROW(InternalError, "Wrong vertex format");
265         }
266
267 }
268
269 VkDeviceAddress getBufferDeviceAddress ( const DeviceInterface& vk,
270                                                                                  const VkDevice                 device,
271                                                                                  const VkBuffer                 buffer,
272                                                                                  VkDeviceSize                   offset )
273 {
274
275         if (buffer == DE_NULL)
276                 return 0;
277
278         VkBufferDeviceAddressInfo deviceAddressInfo
279         {
280                 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,           // VkStructureType    sType
281                 DE_NULL,                                                                                        // const void*        pNext
282                 buffer                                                                                          // VkBuffer           buffer;
283         };
284         return vk.getBufferDeviceAddress(device, &deviceAddressInfo) + offset;
285 }
286
287
288 static inline VkDeviceOrHostAddressConstKHR makeDeviceOrHostAddressConstKHR (const void* hostAddress)
289 {
290         // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
291         VkDeviceOrHostAddressConstKHR result;
292
293         deMemset(&result, 0, sizeof(result));
294
295         result.hostAddress = hostAddress;
296
297         return result;
298 }
299
300 static inline VkDeviceOrHostAddressKHR makeDeviceOrHostAddressKHR (void* hostAddress)
301 {
302         // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
303         VkDeviceOrHostAddressKHR result;
304
305         deMemset(&result, 0, sizeof(result));
306
307         result.hostAddress = hostAddress;
308
309         return result;
310 }
311
312 static inline VkDeviceOrHostAddressConstKHR makeDeviceOrHostAddressConstKHR (const DeviceInterface&     vk,
313                                                                                                                                           const VkDevice                        device,
314                                                                                                                                           VkBuffer                                      buffer,
315                                                                                                                                           VkDeviceSize                          offset)
316 {
317         // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
318         VkDeviceOrHostAddressConstKHR result;
319
320         deMemset(&result, 0, sizeof(result));
321
322         VkBufferDeviceAddressInfo bufferDeviceAddressInfo =
323         {
324                 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,       // VkStructureType       sType;
325                 DE_NULL,                                                                                        // const void*           pNext;
326                 buffer,                                                                                         // VkBuffer                     buffer
327         };
328         result.deviceAddress = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo) + offset;
329
330         return result;
331 }
332
333 static inline VkDeviceOrHostAddressKHR makeDeviceOrHostAddressKHR (const DeviceInterface&       vk,
334                                                                                                                                    const VkDevice                       device,
335                                                                                                                                    VkBuffer                                     buffer,
336                                                                                                                                    VkDeviceSize                         offset)
337 {
338         // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
339         VkDeviceOrHostAddressKHR result;
340
341         deMemset(&result, 0, sizeof(result));
342
343         VkBufferDeviceAddressInfo bufferDeviceAddressInfo =
344         {
345                 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,       // VkStructureType       sType;
346                 DE_NULL,                                                                                        // const void*           pNext;
347                 buffer,                                                                                         // VkBuffer                     buffer
348         };
349         result.deviceAddress = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo) + offset;
350
351         return result;
352 }
353
354 static inline Move<VkQueryPool> makeQueryPool (const DeviceInterface&           vk,
355                                                                                            const VkDevice                               device,
356                                                                                            const VkQueryType                    queryType,
357                                                                                            deUint32                                     queryCount)
358 {
359         const VkQueryPoolCreateInfo                             queryPoolCreateInfo =
360         {
361                 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,               // sType
362                 DE_NULL,                                                                                // pNext
363                 (VkQueryPoolCreateFlags)0,                                              // flags
364                 queryType,                                                                              // queryType
365                 queryCount,                                                                             // queryCount
366                 0u,                                                                                             // pipelineStatistics
367         };
368         return createQueryPool(vk, device, &queryPoolCreateInfo);
369 }
370
371 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryTrianglesDataKHR& triangles)
372 {
373         VkAccelerationStructureGeometryDataKHR result;
374
375         deMemset(&result, 0, sizeof(result));
376
377         result.triangles = triangles;
378
379         return result;
380 }
381
382 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureGeometryDataKHR (const VkAccelerationStructureGeometryAabbsDataKHR& aabbs)
383 {
384         VkAccelerationStructureGeometryDataKHR result;
385
386         deMemset(&result, 0, sizeof(result));
387
388         result.aabbs = aabbs;
389
390         return result;
391 }
392
393 static inline VkAccelerationStructureGeometryDataKHR makeVkAccelerationStructureInstancesDataKHR (const VkAccelerationStructureGeometryInstancesDataKHR& instances)
394 {
395         VkAccelerationStructureGeometryDataKHR result;
396
397         deMemset(&result, 0, sizeof(result));
398
399         result.instances = instances;
400
401         return result;
402 }
403
404 static inline VkAccelerationStructureInstanceKHR makeVkAccelerationStructureInstanceKHR (const VkTransformMatrixKHR&                    transform,
405                                                                                                                                                                                  deUint32                                                               instanceCustomIndex,
406                                                                                                                                                                                  deUint32                                                               mask,
407                                                                                                                                                                                  deUint32                                                               instanceShaderBindingTableRecordOffset,
408                                                                                                                                                                                  VkGeometryInstanceFlagsKHR                             flags,
409                                                                                                                                                                                  deUint64                                                               accelerationStructureReference)
410 {
411         VkAccelerationStructureInstanceKHR instance             = { transform, 0, 0, 0, 0, accelerationStructureReference };
412         instance.instanceCustomIndex                                    = instanceCustomIndex & 0xFFFFFF;
413         instance.mask                                                                   = mask & 0xFF;
414         instance.instanceShaderBindingTableRecordOffset = instanceShaderBindingTableRecordOffset & 0xFFFFFF;
415         instance.flags                                                                  = flags & 0xFF;
416         return instance;
417 }
418
419 VkResult getRayTracingShaderGroupHandlesKHR (const DeviceInterface&             vk,
420                                                                                          const VkDevice                         device,
421                                                                                          const VkPipeline                       pipeline,
422                                                                                          const deUint32                         firstGroup,
423                                                                                          const deUint32                         groupCount,
424                                                                                          const deUintptr                        dataSize,
425                                                                                          void*                                          pData)
426 {
427         return vk.getRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount, dataSize, pData);
428 }
429
430 VkResult getRayTracingShaderGroupHandles (const DeviceInterface&                vk,
431                                                                                   const VkDevice                                device,
432                                                                                   const VkPipeline                              pipeline,
433                                                                                   const deUint32                                firstGroup,
434                                                                                   const deUint32                                groupCount,
435                                                                                   const deUintptr                               dataSize,
436                                                                                   void*                                                 pData)
437 {
438         return getRayTracingShaderGroupHandlesKHR(vk, device, pipeline, firstGroup, groupCount, dataSize, pData);
439 }
440
441 VkResult finishDeferredOperation (const DeviceInterface&        vk,
442                                                                   VkDevice                                      device,
443                                                                   VkDeferredOperationKHR        deferredOperation)
444 {
445         VkResult result = vk.deferredOperationJoinKHR(device, deferredOperation);
446
447         while (result == VK_THREAD_IDLE_KHR)
448         {
449                 std::this_thread::yield();
450                 result = vk.deferredOperationJoinKHR(device, deferredOperation);
451         }
452
453         switch( result )
454         {
455                 case VK_SUCCESS:
456                 {
457                         // Deferred operation has finished. Query its result
458                         result = vk.getDeferredOperationResultKHR(device, deferredOperation);
459
460                         break;
461                 }
462
463                 case VK_THREAD_DONE_KHR:
464                 {
465                         // Deferred operation is being wrapped up by another thread
466                         // wait for that thread to finish
467                         do
468                         {
469                                 std::this_thread::yield();
470                                 result = vk.getDeferredOperationResultKHR(device, deferredOperation);
471                         } while (result == VK_NOT_READY);
472
473                         break;
474                 }
475
476                 default:
477                 {
478                         DE_ASSERT(false);
479
480                         break;
481                 }
482         }
483
484         return result;
485 }
486
487 void finishDeferredOperationThreaded (DeferredThreadParams* deferredThreadParams)
488 {
489         deferredThreadParams->result = finishDeferredOperation(deferredThreadParams->vk, deferredThreadParams->device, deferredThreadParams->deferredOperation);
490 }
491
492 void finishDeferredOperation (const DeviceInterface&    vk,
493                                                           VkDevice                                      device,
494                                                           VkDeferredOperationKHR        deferredOperation,
495                                                           const deUint32                        workerThreadCount,
496                                                           const bool                            operationNotDeferred)
497 {
498
499         if (operationNotDeferred)
500         {
501                 // when the operation deferral returns VK_OPERATION_NOT_DEFERRED_KHR,
502                 // the deferred operation should act as if no command was deferred
503                 VK_CHECK(vk.getDeferredOperationResultKHR(device, deferredOperation));
504
505
506                 // there is not need to join any threads to the deferred operation,
507                 // so below can be skipped.
508                 return;
509         }
510
511         if (workerThreadCount == 0)
512         {
513                 VK_CHECK(finishDeferredOperation(vk, device, deferredOperation));
514         }
515         else
516         {
517                 const deUint32                                                  maxThreadCountSupported = deMinu32(256u, vk.getDeferredOperationMaxConcurrencyKHR(device, deferredOperation));
518                 const deUint32                                                  requestedThreadCount    = workerThreadCount;
519                 const deUint32                                                  testThreadCount                 = requestedThreadCount == std::numeric_limits<deUint32>::max() ? maxThreadCountSupported : requestedThreadCount;
520
521                 if (maxThreadCountSupported == 0)
522                         TCU_FAIL("vkGetDeferredOperationMaxConcurrencyKHR must not return 0");
523
524                 const DeferredThreadParams                              deferredThreadParams    =
525                 {
526                         vk,                                     //  const DeviceInterface&      vk;
527                         device,                         //  VkDevice                            device;
528                         deferredOperation,      //  VkDeferredOperationKHR      deferredOperation;
529                         VK_RESULT_MAX_ENUM,     //  VResult                                     result;
530                 };
531                 std::vector<DeferredThreadParams>               threadParams    (testThreadCount, deferredThreadParams);
532                 std::vector<de::MovePtr<std::thread> >  threads                 (testThreadCount);
533                 bool                                                                    executionResult = false;
534
535                 DE_ASSERT(threads.size() > 0 && threads.size() == testThreadCount);
536
537                 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
538                         threads[threadNdx] = de::MovePtr<std::thread>(new std::thread(finishDeferredOperationThreaded, &threadParams[threadNdx]));
539
540                 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
541                         threads[threadNdx]->join();
542
543                 for (deUint32 threadNdx = 0; threadNdx < testThreadCount; ++threadNdx)
544                         if (threadParams[threadNdx].result == VK_SUCCESS)
545                                 executionResult = true;
546
547                 if (!executionResult)
548                         TCU_FAIL("Neither reported VK_SUCCESS");
549         }
550 }
551
552 SerialStorage::SerialStorage (const DeviceInterface&                                                                    vk,
553                                                           const VkDevice                                                                                        device,
554                                                           Allocator&                                                                                            allocator,
555                                                           const VkAccelerationStructureBuildTypeKHR                                     buildType,
556                                                           const VkDeviceSize                                                                            storageSize)
557         : m_buildType           (buildType)
558         , m_storageSize         (storageSize)
559 {
560         const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
561         try
562         {
563                 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
564         }
565         catch (const tcu::NotSupportedError&)
566         {
567                 // retry without Cached flag
568                 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
569         }
570 }
571
572 SerialStorage::SerialStorage (const DeviceInterface&                                            vk,
573                                                           const VkDevice                                                                device,
574                                                           Allocator&                                                                    allocator,
575                                                           const VkAccelerationStructureBuildTypeKHR             buildType,
576                                                           const SerialInfo&                                                             serialInfo)
577         : m_buildType           (buildType)
578         , m_storageSize         (serialInfo.sizes()[0]) // raise assertion if serialInfo is empty
579         , m_serialInfo          (serialInfo)
580 {
581         DE_ASSERT(serialInfo.sizes().size() >= 2u);
582
583         // create buffer for top-level acceleration structure
584         {
585                 const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(m_storageSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
586                 m_buffer                                                                                = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
587         }
588
589         // create buffers for bottom-level acceleration structures
590         {
591                 std::vector<deUint64>   addrs;
592
593                 for (std::size_t i = 1; i < serialInfo.addresses().size(); ++i)
594                 {
595                         const deUint64& lookAddr = serialInfo.addresses()[i];
596                         auto end = addrs.end();
597                         auto match = std::find_if(addrs.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
598                         if (match == end)
599                         {
600                                 addrs.emplace_back(lookAddr);
601                                 m_bottoms.emplace_back(de::SharedPtr<SerialStorage>(new SerialStorage(vk, device, allocator, buildType, serialInfo.sizes()[i])));
602                         }
603                 }
604         }
605 }
606
607 VkDeviceOrHostAddressKHR SerialStorage::getAddress (const DeviceInterface&                                              vk,
608                                                                                                         const VkDevice                                                          device,
609                                                                                                         const VkAccelerationStructureBuildTypeKHR       buildType)
610 {
611         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
612                 return makeDeviceOrHostAddressKHR(vk, device, m_buffer->get(), 0);
613         else
614                 return makeDeviceOrHostAddressKHR(m_buffer->getAllocation().getHostPtr());
615 }
616
617 SerialStorage::AccelerationStructureHeader* SerialStorage::getASHeader ()
618 {
619         return reinterpret_cast<AccelerationStructureHeader*>(getHostAddress().hostAddress);
620 }
621
622 bool SerialStorage::hasDeepFormat () const
623 {
624         return (m_serialInfo.sizes().size() >= 2u);
625 }
626
627 de::SharedPtr<SerialStorage> SerialStorage::getBottomStorage (deUint32 index) const
628 {
629         return m_bottoms[index];
630 }
631
632 VkDeviceOrHostAddressKHR SerialStorage::getHostAddress (VkDeviceSize offset)
633 {
634         DE_ASSERT(offset < m_storageSize);
635         return makeDeviceOrHostAddressKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
636 }
637
638 VkDeviceOrHostAddressConstKHR SerialStorage::getHostAddressConst (VkDeviceSize offset)
639 {
640         return makeDeviceOrHostAddressConstKHR(static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr()) + offset);
641 }
642
643 VkDeviceOrHostAddressConstKHR SerialStorage::getAddressConst (const DeviceInterface&                                    vk,
644                                                                                                                           const VkDevice                                                        device,
645                                                                                                                           const VkAccelerationStructureBuildTypeKHR     buildType)
646 {
647         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
648                 return makeDeviceOrHostAddressConstKHR(vk, device, m_buffer->get(), 0);
649         else
650                 return getHostAddressConst();
651 }
652
653 inline VkDeviceSize SerialStorage::getStorageSize () const
654 {
655         return m_storageSize;
656 }
657
658 inline const SerialInfo& SerialStorage::getSerialInfo () const
659 {
660         return m_serialInfo;
661 }
662
663 deUint64 SerialStorage::getDeserializedSize ()
664 {
665         deUint64                result          = 0;
666         const deUint8*  startPtr        = static_cast<deUint8*>(m_buffer->getAllocation().getHostPtr());
667
668         DE_ASSERT(sizeof(result) == DESERIALIZED_SIZE_SIZE);
669
670         deMemcpy(&result, startPtr + DESERIALIZED_SIZE_OFFSET, sizeof(result));
671
672         return result;
673 }
674
675 BottomLevelAccelerationStructure::~BottomLevelAccelerationStructure ()
676 {
677 }
678
679 BottomLevelAccelerationStructure::BottomLevelAccelerationStructure ()
680         : m_structureSize               (0u)
681         , m_updateScratchSize   (0u)
682         , m_buildScratchSize    (0u)
683 {
684 }
685
686 void BottomLevelAccelerationStructure::setGeometryData (const std::vector<tcu::Vec3>&   geometryData,
687                                                                                                                 const bool                                              triangles,
688                                                                                                                 const VkGeometryFlagsKHR                geometryFlags)
689 {
690         if (triangles)
691                 DE_ASSERT((geometryData.size() % 3) == 0);
692         else
693                 DE_ASSERT((geometryData.size() % 2) == 0);
694
695         setGeometryCount(1u);
696
697         addGeometry(geometryData, triangles, geometryFlags);
698 }
699
700 void BottomLevelAccelerationStructure::setDefaultGeometryData (const VkShaderStageFlagBits      testStage,
701                                                                                                                            const VkGeometryFlagsKHR             geometryFlags)
702 {
703         bool                                    trianglesData   = false;
704         float                                   z                               = 0.0f;
705         std::vector<tcu::Vec3>  geometryData;
706
707         switch (testStage)
708         {
709                 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:            z = -1.0f; trianglesData = true;        break;
710                 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:           z = -1.0f; trianglesData = true;        break;
711                 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:       z = -1.0f; trianglesData = true;        break;
712                 case VK_SHADER_STAGE_MISS_BIT_KHR:                      z = -9.9f; trianglesData = true;        break;
713                 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:      z = -1.0f; trianglesData = false;       break;
714                 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:          z = -1.0f; trianglesData = true;        break;
715                 default:                                                                        TCU_THROW(InternalError, "Unacceptable stage");
716         }
717
718         if (trianglesData)
719         {
720                 geometryData.reserve(6);
721
722                 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
723                 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
724                 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
725                 geometryData.push_back(tcu::Vec3(-1.0f, +1.0f, z));
726                 geometryData.push_back(tcu::Vec3(+1.0f, -1.0f, z));
727                 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
728         }
729         else
730         {
731                 geometryData.reserve(2);
732
733                 geometryData.push_back(tcu::Vec3(-1.0f, -1.0f, z));
734                 geometryData.push_back(tcu::Vec3(+1.0f, +1.0f, z));
735         }
736
737         setGeometryCount(1u);
738
739         addGeometry(geometryData, trianglesData, geometryFlags);
740 }
741
742 void BottomLevelAccelerationStructure::setGeometryCount (const size_t geometryCount)
743 {
744         m_geometriesData.clear();
745
746         m_geometriesData.reserve(geometryCount);
747 }
748
749 void BottomLevelAccelerationStructure::addGeometry (de::SharedPtr<RaytracedGeometryBase>&               raytracedGeometry)
750 {
751         m_geometriesData.push_back(raytracedGeometry);
752 }
753
754 void BottomLevelAccelerationStructure::addGeometry (const std::vector<tcu::Vec3>&       geometryData,
755                                                                                                         const bool                                              triangles,
756                                                                                                         const VkGeometryFlagsKHR                geometryFlags)
757 {
758         DE_ASSERT(geometryData.size() > 0);
759         DE_ASSERT((triangles && geometryData.size() % 3 == 0) || (!triangles && geometryData.size() % 2 == 0));
760
761         if (!triangles)
762                 for (size_t posNdx = 0; posNdx < geometryData.size() / 2; ++posNdx)
763                 {
764                         DE_ASSERT(geometryData[2 * posNdx].x() <= geometryData[2 * posNdx + 1].x());
765                         DE_ASSERT(geometryData[2 * posNdx].y() <= geometryData[2 * posNdx + 1].y());
766                         DE_ASSERT(geometryData[2 * posNdx].z() <= geometryData[2 * posNdx + 1].z());
767                 }
768
769         de::SharedPtr<RaytracedGeometryBase> geometry = makeRaytracedGeometry(triangles ? VK_GEOMETRY_TYPE_TRIANGLES_KHR : VK_GEOMETRY_TYPE_AABBS_KHR, VK_FORMAT_R32G32B32_SFLOAT, VK_INDEX_TYPE_NONE_KHR);
770         for (auto it = begin(geometryData), eit = end(geometryData); it != eit; ++it)
771                 geometry->addVertex(*it);
772
773         geometry->setGeometryFlags(geometryFlags);
774         addGeometry(geometry);
775 }
776
777 VkDeviceSize BottomLevelAccelerationStructure::getStructureSize() const
778 {
779         return m_structureSize;
780 }
781
782 BufferWithMemory* createVertexBuffer (const DeviceInterface&                                                                    vk,
783                                                                           const VkDevice                                                                                        device,
784                                                                           Allocator&                                                                                            allocator,
785                                                                           const std::vector<de::SharedPtr<RaytracedGeometryBase>>&      geometriesData)
786 {
787         DE_ASSERT(geometriesData.size() != 0);
788
789         VkDeviceSize                                    bufferSizeBytes = 0;
790         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
791                 bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getVertexByteSize(),8);
792
793         const VkBufferCreateInfo                bufferCreateInfo        = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
794         return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
795 }
796
797 void updateVertexBuffer (const DeviceInterface&                                                                         vk,
798                                                  const VkDevice                                                                                         device,
799                                                  const std::vector<de::SharedPtr<RaytracedGeometryBase>>&       geometriesData,
800                                                  BufferWithMemory*                                                                                      vertexBuffer)
801 {
802         const Allocation&                               geometryAlloc           = vertexBuffer->getAllocation();
803         deUint8*                                                bufferStart                     = static_cast<deUint8*>(geometryAlloc.getHostPtr());
804         VkDeviceSize                                    bufferOffset            = 0;
805
806         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
807         {
808                 const void*                                     geometryPtr                     = geometriesData[geometryNdx]->getVertexPointer();
809                 const size_t                            geometryPtrSize         = geometriesData[geometryNdx]->getVertexByteSize();
810
811                 deMemcpy(&bufferStart[bufferOffset], geometryPtr, geometryPtrSize);
812
813                 bufferOffset += deAlignSize(geometryPtrSize,8);
814         }
815
816         flushMappedMemoryRange(vk, device, geometryAlloc.getMemory(), geometryAlloc.getOffset(), VK_WHOLE_SIZE);
817 }
818
819 BufferWithMemory* createIndexBuffer (const DeviceInterface&                                                                             vk,
820                                                                          const VkDevice                                                                                         device,
821                                                                          Allocator&                                                                                                     allocator,
822                                                                          const std::vector<de::SharedPtr<RaytracedGeometryBase>>&       geometriesData)
823 {
824         DE_ASSERT(!geometriesData.empty());
825
826         VkDeviceSize                                    bufferSizeBytes = 0;
827         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
828                 if(geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
829                         bufferSizeBytes += deAlignSize(geometriesData[geometryNdx]->getIndexByteSize(),8);
830
831         if (bufferSizeBytes == 0)
832                 return DE_NULL;
833
834         const VkBufferCreateInfo                bufferCreateInfo        = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
835         return  new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
836 }
837
838 void updateIndexBuffer (const DeviceInterface&                                                                          vk,
839                                                 const VkDevice                                                                                          device,
840                                                 const std::vector<de::SharedPtr<RaytracedGeometryBase>>&        geometriesData,
841                                                 BufferWithMemory*                                                                                       indexBuffer)
842 {
843         const Allocation&                               indexAlloc                      = indexBuffer->getAllocation();
844         deUint8*                                                bufferStart                     = static_cast<deUint8*>(indexAlloc.getHostPtr());
845         VkDeviceSize                                    bufferOffset            = 0;
846
847         for (size_t geometryNdx = 0; geometryNdx < geometriesData.size(); ++geometryNdx)
848         {
849                 if (geometriesData[geometryNdx]->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
850                 {
851                         const void*                                     indexPtr                = geometriesData[geometryNdx]->getIndexPointer();
852                         const size_t                            indexPtrSize    = geometriesData[geometryNdx]->getIndexByteSize();
853
854                         deMemcpy(&bufferStart[bufferOffset], indexPtr, indexPtrSize);
855
856                         bufferOffset += deAlignSize(indexPtrSize, 8);
857                 }
858         }
859
860         flushMappedMemoryRange(vk, device, indexAlloc.getMemory(), indexAlloc.getOffset(), VK_WHOLE_SIZE);
861 }
862
863 class BottomLevelAccelerationStructureKHR : public BottomLevelAccelerationStructure
864 {
865 public:
866         static deUint32                                                                                 getRequiredAllocationCount                                              (void);
867
868                                                                                                                         BottomLevelAccelerationStructureKHR                             ();
869                                                                                                                         BottomLevelAccelerationStructureKHR                             (const BottomLevelAccelerationStructureKHR&             other) = delete;
870         virtual                                                                                                 ~BottomLevelAccelerationStructureKHR                    ();
871
872         void                                                                                                    setBuildType                                                                    (const VkAccelerationStructureBuildTypeKHR              buildType) override;
873         void                                                                                                    setCreateFlags                                                                  (const VkAccelerationStructureCreateFlagsKHR    createFlags) override;
874         void                                                                                                    setCreateGeneric                                                                (bool                                                                                   createGeneric) override;
875         void                                                                                                    setBuildFlags                                                                   (const VkBuildAccelerationStructureFlagsKHR             buildFlags) override;
876         void                                                                                                    setBuildWithoutGeometries                                               (bool                                                                                   buildWithoutGeometries) override;
877         void                                                                                                    setBuildWithoutPrimitives                                               (bool                                                                                   buildWithoutPrimitives) override;
878         void                                                                                                    setDeferredOperation                                                    (const bool                                                                             deferredOperation,
879                                                                                                                                                                                                                          const deUint32                                                                 workerThreadCount) override;
880         void                                                                                                    setUseArrayOfPointers                                                   (const bool                                                                             useArrayOfPointers) override;
881         void                                                                                                    setIndirectBuildParameters                                              (const VkBuffer                                                                 indirectBuffer,
882                                                                                                                                                                                                                          const VkDeviceSize                                                             indirectBufferOffset,
883                                                                                                                                                                                                                          const deUint32                                                                 indirectBufferStride) override;
884         VkBuildAccelerationStructureFlagsKHR                                    getBuildFlags                                                                   () const override;
885
886         void                                                                                                    create                                                                                  (const DeviceInterface&                                                 vk,
887                                                                                                                                                                                                                          const VkDevice                                                                 device,
888                                                                                                                                                                                                                          Allocator&                                                                             allocator,
889                                                                                                                                                                                                                          VkDeviceSize                                                                   structureSize,
890                                                                                                                                                                                                                          VkDeviceAddress                                                                deviceAddress   = 0u ) override;
891         void                                                                                                    build                                                                                   (const DeviceInterface&                                                 vk,
892                                                                                                                                                                                                                          const VkDevice                                                                 device,
893                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer) override;
894         void                                                                                                    copyFrom                                                                                (const DeviceInterface&                                                 vk,
895                                                                                                                                                                                                                          const VkDevice                                                                 device,
896                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer,
897                                                                                                                                                                                                                          BottomLevelAccelerationStructure*                              accelerationStructure,
898                                                                                                                                                                                                                          bool                                                                                   compactCopy) override;
899
900         void                                                                                                    serialize                                                                               (const DeviceInterface&                                                 vk,
901                                                                                                                                                                                                                          const VkDevice                                                                 device,
902                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer,
903                                                                                                                                                                                                                          SerialStorage*                                                                 storage) override;
904         void                                                                                                    deserialize                                                                             (const DeviceInterface&                                                 vk,
905                                                                                                                                                                                                                          const VkDevice                                                                 device,
906                                                                                                                                                                                                                          const VkCommandBuffer                                                  cmdBuffer,
907                                                                                                                                                                                                                          SerialStorage*                                                                 storage) override;
908
909         const VkAccelerationStructureKHR*                                               getPtr                                                                                  (void) const override;
910
911 protected:
912         VkAccelerationStructureBuildTypeKHR                                             m_buildType;
913         VkAccelerationStructureCreateFlagsKHR                                   m_createFlags;
914         bool                                                                                                    m_createGeneric;
915         VkBuildAccelerationStructureFlagsKHR                                    m_buildFlags;
916         bool                                                                                                    m_buildWithoutGeometries;
917         bool                                                                                                    m_buildWithoutPrimitives;
918         bool                                                                                                    m_deferredOperation;
919         deUint32                                                                                                m_workerThreadCount;
920         bool                                                                                                    m_useArrayOfPointers;
921         de::MovePtr<BufferWithMemory>                                                   m_accelerationStructureBuffer;
922         de::MovePtr<BufferWithMemory>                                                   m_vertexBuffer;
923         de::MovePtr<BufferWithMemory>                                                   m_indexBuffer;
924         de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
925         std::vector<deUint8>                                                                    m_hostScratchBuffer;
926         Move<VkAccelerationStructureKHR>                                                m_accelerationStructureKHR;
927         VkBuffer                                                                                                m_indirectBuffer;
928         VkDeviceSize                                                                                    m_indirectBufferOffset;
929         deUint32                                                                                                m_indirectBufferStride;
930
931         void                                                                                                    prepareGeometries                                                               (const DeviceInterface&                                                                 vk,
932                                                                                                                                                                                                                          const VkDevice                                                                                 device,
933                                                                                                                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR>&               accelerationStructureGeometriesKHR,
934                                                                                                                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR*>&              accelerationStructureGeometriesKHRPointers,
935                                                                                                                                                                                                                          std::vector<VkAccelerationStructureBuildRangeInfoKHR>& accelerationStructureBuildRangeInfoKHR,
936                                                                                                                                                                                                                          std::vector<deUint32>&                                                                 maxPrimitiveCounts);
937 };
938
939 deUint32 BottomLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
940 {
941         /*
942                 de::MovePtr<BufferWithMemory>                                                   m_geometryBuffer; // but only when m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
943                 de::MovePtr<Allocation>                                                                 m_accelerationStructureAlloc;
944                 de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
945         */
946         return 3u;
947 }
948
949 BottomLevelAccelerationStructureKHR::~BottomLevelAccelerationStructureKHR ()
950 {
951 }
952
953 BottomLevelAccelerationStructureKHR::BottomLevelAccelerationStructureKHR ()
954         : BottomLevelAccelerationStructure      ()
955         , m_buildType                                           (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
956         , m_createFlags                                         (0u)
957         , m_createGeneric                                       (false)
958         , m_buildFlags                                          (0u)
959         , m_buildWithoutGeometries                      (false)
960         , m_buildWithoutPrimitives                      (false)
961         , m_deferredOperation                           (false)
962         , m_workerThreadCount                           (0)
963         , m_useArrayOfPointers                          (false)
964         , m_accelerationStructureBuffer         (DE_NULL)
965         , m_vertexBuffer                                        (DE_NULL)
966         , m_indexBuffer                                         (DE_NULL)
967         , m_deviceScratchBuffer                         (DE_NULL)
968         , m_accelerationStructureKHR            ()
969         , m_indirectBuffer                                      (DE_NULL)
970         , m_indirectBufferOffset                        (0)
971         , m_indirectBufferStride                        (0)
972 {
973 }
974
975 void BottomLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR       buildType)
976 {
977         m_buildType = buildType;
978 }
979
980 void BottomLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR   createFlags)
981 {
982         m_createFlags = createFlags;
983 }
984
985 void BottomLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
986 {
987         m_createGeneric = createGeneric;
988 }
989
990 void BottomLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR     buildFlags)
991 {
992         m_buildFlags = buildFlags;
993 }
994
995 void BottomLevelAccelerationStructureKHR::setBuildWithoutGeometries (bool buildWithoutGeometries)
996 {
997         m_buildWithoutGeometries = buildWithoutGeometries;
998 }
999
1000 void BottomLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1001 {
1002         m_buildWithoutPrimitives = buildWithoutPrimitives;
1003 }
1004
1005 void BottomLevelAccelerationStructureKHR::setDeferredOperation (const bool              deferredOperation,
1006                                                                                                                                 const deUint32  workerThreadCount)
1007 {
1008         m_deferredOperation = deferredOperation;
1009         m_workerThreadCount = workerThreadCount;
1010 }
1011
1012 void BottomLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool     useArrayOfPointers)
1013 {
1014         m_useArrayOfPointers = useArrayOfPointers;
1015 }
1016
1017 void BottomLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer            indirectBuffer,
1018                                                                                                                                           const VkDeviceSize    indirectBufferOffset,
1019                                                                                                                                           const deUint32                indirectBufferStride)
1020 {
1021         m_indirectBuffer                = indirectBuffer;
1022         m_indirectBufferOffset  = indirectBufferOffset;
1023         m_indirectBufferStride  = indirectBufferStride;
1024 }
1025
1026 VkBuildAccelerationStructureFlagsKHR BottomLevelAccelerationStructureKHR::getBuildFlags () const
1027 {
1028         return m_buildFlags;
1029 }
1030
1031 void BottomLevelAccelerationStructureKHR::create (const DeviceInterface&                                vk,
1032                                                                                                   const VkDevice                                                device,
1033                                                                                                   Allocator&                                                    allocator,
1034                                                                                                   VkDeviceSize                                                  structureSize,
1035                                                                                                   VkDeviceAddress                                               deviceAddress)
1036 {
1037         // AS may be built from geometries using vkCmdBuildAccelerationStructuresKHR / vkBuildAccelerationStructuresKHR
1038         // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1039         DE_ASSERT(!m_geometriesData.empty() !=  !(structureSize == 0)); // logical xor
1040
1041         if (structureSize == 0)
1042         {
1043                 std::vector<VkAccelerationStructureGeometryKHR>                 accelerationStructureGeometriesKHR;
1044                 std::vector<VkAccelerationStructureGeometryKHR*>                accelerationStructureGeometriesKHRPointers;
1045                 std::vector<VkAccelerationStructureBuildRangeInfoKHR>   accelerationStructureBuildRangeInfoKHR;
1046                 std::vector<deUint32>                                                                   maxPrimitiveCounts;
1047                 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, maxPrimitiveCounts);
1048
1049                 const VkAccelerationStructureGeometryKHR*                               accelerationStructureGeometriesKHRPointer       = accelerationStructureGeometriesKHR.data();
1050                 const VkAccelerationStructureGeometryKHR* const*                accelerationStructureGeometry                           = accelerationStructureGeometriesKHRPointers.data();
1051
1052                 VkAccelerationStructureBuildGeometryInfoKHR     accelerationStructureBuildGeometryInfoKHR       =
1053                 {
1054                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                       //  VkStructureType                                                                             sType;
1055                         DE_NULL,                                                                                                                                        //  const void*                                                                                 pNext;
1056                         VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,                                                        //  VkAccelerationStructureTypeKHR                                              type;
1057                         m_buildFlags,                                                                                                                           //  VkBuildAccelerationStructureFlagsKHR                                flags;
1058                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                         //  VkBuildAccelerationStructureModeKHR                                 mode;
1059                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
1060                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
1061                         static_cast<deUint32>(accelerationStructureGeometriesKHR.size()),                       //  deUint32                                                                                    geometryCount;
1062                         m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,     //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
1063                         m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,                         //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
1064                         makeDeviceOrHostAddressKHR(DE_NULL)                                                                                     //  VkDeviceOrHostAddressKHR                                                    scratchData;
1065                 };
1066                 VkAccelerationStructureBuildSizesInfoKHR sizeInfo =
1067                 {
1068                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
1069                         DE_NULL,                                                                                                                //  const void*         pNext;
1070                         0,                                                                                                                              //  VkDeviceSize        accelerationStructureSize;
1071                         0,                                                                                                                              //  VkDeviceSize        updateScratchSize;
1072                         0                                                                                                                               //  VkDeviceSize        buildScratchSize;
1073                 };
1074
1075                 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1076
1077                 m_structureSize         = sizeInfo.accelerationStructureSize;
1078                 m_updateScratchSize     = sizeInfo.updateScratchSize;
1079                 m_buildScratchSize      = sizeInfo.buildScratchSize;
1080         }
1081         else
1082         {
1083                 m_structureSize         = structureSize;
1084                 m_updateScratchSize     = 0u;
1085                 m_buildScratchSize      = 0u;
1086         }
1087
1088         {
1089                 const VkBufferCreateInfo                bufferCreateInfo        = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1090                 try
1091                 {
1092                         m_accelerationStructureBuffer                                           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1093                 }
1094                 catch (const tcu::NotSupportedError&)
1095                 {
1096                         // retry without Cached flag
1097                         m_accelerationStructureBuffer                                           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1098                 }
1099         }
1100
1101         {
1102                 const VkAccelerationStructureTypeKHR            structureType                                           = (m_createGeneric
1103                                                                                                                                                                                    ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1104                                                                                                                                                                                    : VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
1105                 const VkAccelerationStructureCreateInfoKHR      accelerationStructureCreateInfoKHR
1106                 {
1107                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,                                               //  VkStructureType                                                                                     sType;
1108                         DE_NULL,                                                                                                                                                //  const void*                                                                                         pNext;
1109                         m_createFlags,                                                                                                                                  //  VkAccelerationStructureCreateFlagsKHR                                       createFlags;
1110                         m_accelerationStructureBuffer->get(),                                                                                   //  VkBuffer                                                                                            buffer;
1111                         0u,                                                                                                                                                             //  VkDeviceSize                                                                                        offset;
1112                         m_structureSize,                                                                                                                                //  VkDeviceSize                                                                                        size;
1113                         structureType,                                                                                                                                  //  VkAccelerationStructureTypeKHR                                                      type;
1114                         deviceAddress                                                                                                                                   //  VkDeviceAddress                                                                                     deviceAddress;
1115                 };
1116
1117                 m_accelerationStructureKHR      = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1118         }
1119
1120         if (m_buildScratchSize > 0u)
1121         {
1122                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1123                 {
1124                         const VkBufferCreateInfo                bufferCreateInfo = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1125                         m_deviceScratchBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1126                 }
1127                 else
1128                 {
1129                         m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
1130                 }
1131         }
1132
1133         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR && !m_geometriesData.empty())
1134         {
1135                 m_vertexBuffer  = de::MovePtr<BufferWithMemory>(createVertexBuffer(vk, device, allocator, m_geometriesData));
1136                 m_indexBuffer   = de::MovePtr<BufferWithMemory>(createIndexBuffer(vk, device, allocator, m_geometriesData));
1137         }
1138 }
1139
1140 void BottomLevelAccelerationStructureKHR::build (const DeviceInterface&                                         vk,
1141                                                                                                  const VkDevice                                                         device,
1142                                                                                                  const VkCommandBuffer                                          cmdBuffer)
1143 {
1144         DE_ASSERT(!m_geometriesData.empty());
1145         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1146         DE_ASSERT(m_buildScratchSize != 0);
1147
1148         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1149         {
1150                 updateVertexBuffer(vk, device, m_geometriesData, m_vertexBuffer.get());
1151                 if(m_indexBuffer.get() != DE_NULL)
1152                         updateIndexBuffer(vk, device, m_geometriesData, m_indexBuffer.get());
1153         }
1154
1155         {
1156                 std::vector<VkAccelerationStructureGeometryKHR>                 accelerationStructureGeometriesKHR;
1157                 std::vector<VkAccelerationStructureGeometryKHR*>                accelerationStructureGeometriesKHRPointers;
1158                 std::vector<VkAccelerationStructureBuildRangeInfoKHR>   accelerationStructureBuildRangeInfoKHR;
1159                 std::vector<deUint32>                                                                   maxPrimitiveCounts;
1160
1161                 prepareGeometries(vk, device, accelerationStructureGeometriesKHR, accelerationStructureGeometriesKHRPointers, accelerationStructureBuildRangeInfoKHR, maxPrimitiveCounts);
1162
1163                 const VkAccelerationStructureGeometryKHR*                       accelerationStructureGeometriesKHRPointer       = accelerationStructureGeometriesKHR.data();
1164                 const VkAccelerationStructureGeometryKHR* const*        accelerationStructureGeometry                           = accelerationStructureGeometriesKHRPointers.data();
1165                 VkDeviceOrHostAddressKHR                                                        scratchData                                                                     = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1166                                                                                                                                                                                                                 ? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
1167                                                                                                                                                                                                                 : makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
1168                 const deUint32                                                                          geometryCount                                                           = (m_buildWithoutGeometries
1169                                                                                                                                                                                                                 ? 0u
1170                                                                                                                                                                                                                 : static_cast<deUint32>(accelerationStructureGeometriesKHR.size()));
1171
1172                 VkAccelerationStructureBuildGeometryInfoKHR     accelerationStructureBuildGeometryInfoKHR       =
1173                 {
1174                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                       //  VkStructureType                                                                             sType;
1175                         DE_NULL,                                                                                                                                        //  const void*                                                                                 pNext;
1176                         VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,                                                        //  VkAccelerationStructureTypeKHR                                              type;
1177                         m_buildFlags,                                                                                                                           //  VkBuildAccelerationStructureFlagsKHR                                flags;
1178                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                         //  VkBuildAccelerationStructureModeKHR                                 mode;
1179                         DE_NULL,                                                                                                                                        //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
1180                         m_accelerationStructureKHR.get(),                                                                                       //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
1181                         geometryCount,                                                                                                                          //  deUint32                                                                                    geometryCount;
1182                         m_useArrayOfPointers ? DE_NULL : accelerationStructureGeometriesKHRPointer,     //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
1183                         m_useArrayOfPointers ? accelerationStructureGeometry : DE_NULL,                         //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
1184                         scratchData                                                                                                                                     //  VkDeviceOrHostAddressKHR                                                    scratchData;
1185                 };
1186
1187                 VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr     = accelerationStructureBuildRangeInfoKHR.data();
1188
1189                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1190                 {
1191                         if (m_indirectBuffer == DE_NULL)
1192                                 vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1193                         else
1194                         {
1195                                 VkDeviceAddress indirectDeviceAddress   = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
1196                                 deUint32*               pMaxPrimitiveCounts             = maxPrimitiveCounts.data();
1197                                 vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
1198                         }
1199                 }
1200                 else if (!m_deferredOperation)
1201                 {
1202                         VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
1203                 }
1204                 else
1205                 {
1206                         const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1207                         const auto deferredOperation    = deferredOperationPtr.get();
1208
1209                         VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
1210
1211                         DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1212
1213                         finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1214                 }
1215         }
1216
1217         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1218         {
1219                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1220                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
1221
1222                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1223         }
1224 }
1225
1226 void BottomLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&                                              vk,
1227                                                                                                         const VkDevice                                                          device,
1228                                                                                                         const VkCommandBuffer                                           cmdBuffer,
1229                                                                                                         BottomLevelAccelerationStructure*                       accelerationStructure,
1230                                                                                                         bool                                                                            compactCopy)
1231 {
1232         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1233         DE_ASSERT(accelerationStructure != DE_NULL);
1234
1235         VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
1236         {
1237                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,                                                                                                                 // VkStructureType                                              sType;
1238                 DE_NULL,                                                                                                                                                                                                                // const void*                                                  pNext;
1239                 *(accelerationStructure->getPtr()),                                                                                                                                                             // VkAccelerationStructureKHR                   src;
1240                 *(getPtr()),                                                                                                                                                                                                    // VkAccelerationStructureKHR                   dst;
1241                 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR   // VkCopyAccelerationStructureModeKHR   mode;
1242         };
1243
1244         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1245         {
1246                 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1247         }
1248         else if (!m_deferredOperation)
1249         {
1250                 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1251         }
1252         else
1253         {
1254                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1255                 const auto deferredOperation    = deferredOperationPtr.get();
1256
1257                 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1258
1259                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1260
1261                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1262         }
1263
1264         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1265         {
1266                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1267                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
1268
1269                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1270         }
1271 }
1272
1273 void BottomLevelAccelerationStructureKHR::serialize (const DeviceInterface&             vk,
1274                                                                                                          const VkDevice                         device,
1275                                                                                                          const VkCommandBuffer          cmdBuffer,
1276                                                                                                          SerialStorage*                         storage)
1277 {
1278         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1279         DE_ASSERT(storage != DE_NULL);
1280
1281         const VkCopyAccelerationStructureToMemoryInfoKHR        copyAccelerationStructureInfo   =
1282         {
1283                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,       // VkStructureType                                              sType;
1284                 DE_NULL,                                                                                                                        // const void*                                                  pNext;
1285                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                   src;
1286                 storage->getAddress(vk, device, m_buildType),                                           // VkDeviceOrHostAddressKHR                             dst;
1287                 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR                                       // VkCopyAccelerationStructureModeKHR   mode;
1288         };
1289
1290         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1291         {
1292                 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
1293         }
1294         else if (!m_deferredOperation)
1295         {
1296                 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1297         }
1298         else
1299         {
1300                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1301                 const auto deferredOperation    = deferredOperationPtr.get();
1302
1303                 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1304
1305                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1306
1307                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1308         }
1309 }
1310
1311 void BottomLevelAccelerationStructureKHR::deserialize (const DeviceInterface&   vk,
1312                                                                                                            const VkDevice                       device,
1313                                                                                                            const VkCommandBuffer        cmdBuffer,
1314                                                                                                            SerialStorage*                       storage)
1315 {
1316         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
1317         DE_ASSERT(storage != DE_NULL);
1318
1319         const VkCopyMemoryToAccelerationStructureInfoKHR        copyAccelerationStructureInfo   =
1320         {
1321                 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,       // VkStructureType                                                      sType;
1322                 DE_NULL,                                                                                                                        // const void*                                                          pNext;
1323                 storage->getAddressConst(vk, device, m_buildType),                                      // VkDeviceOrHostAddressConstKHR                        src;
1324                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                           dst;
1325                 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR                                     // VkCopyAccelerationStructureModeKHR           mode;
1326         };
1327
1328         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1329         {
1330                 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
1331         }
1332         else if (!m_deferredOperation)
1333         {
1334                 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
1335         }
1336         else
1337         {
1338                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
1339                 const auto deferredOperation    = deferredOperationPtr.get();
1340
1341                 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
1342
1343                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
1344
1345                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
1346         }
1347
1348         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1349         {
1350                 const VkAccessFlags             accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
1351                 const VkMemoryBarrier   memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
1352
1353                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
1354         }
1355 }
1356
1357 const VkAccelerationStructureKHR* BottomLevelAccelerationStructureKHR::getPtr (void) const
1358 {
1359         return &m_accelerationStructureKHR.get();
1360 }
1361
1362 void BottomLevelAccelerationStructureKHR::prepareGeometries (const DeviceInterface&                                                                             vk,
1363                                                                                                                          const VkDevice                                                                                         device,
1364                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR>&                       accelerationStructureGeometriesKHR,
1365                                                                                                                          std::vector<VkAccelerationStructureGeometryKHR*>&                      accelerationStructureGeometriesKHRPointers,
1366                                                                                                                          std::vector<VkAccelerationStructureBuildRangeInfoKHR>&         accelerationStructureBuildRangeInfoKHR,
1367                                                                                                                          std::vector<deUint32>&                                                                         maxPrimitiveCounts)
1368 {
1369         accelerationStructureGeometriesKHR.resize(m_geometriesData.size());
1370         accelerationStructureGeometriesKHRPointers.resize(m_geometriesData.size());
1371         accelerationStructureBuildRangeInfoKHR.resize(m_geometriesData.size());
1372         maxPrimitiveCounts.resize(m_geometriesData.size());
1373
1374         VkDeviceSize vertexBufferOffset = 0, indexBufferOffset = 0;
1375
1376         for (size_t geometryNdx = 0; geometryNdx < m_geometriesData.size(); ++geometryNdx)
1377         {
1378                 de::SharedPtr<RaytracedGeometryBase>&                                   geometryData = m_geometriesData[geometryNdx];
1379                 VkDeviceOrHostAddressConstKHR                                                   vertexData, indexData;
1380                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1381                 {
1382                         if (m_vertexBuffer.get() != DE_NULL)
1383                         {
1384                                 vertexData                      = makeDeviceOrHostAddressConstKHR(vk, device, m_vertexBuffer->get(), vertexBufferOffset);
1385                                 if (m_indirectBuffer == DE_NULL )
1386                                 {
1387                                         vertexBufferOffset      += deAlignSize(geometryData->getVertexByteSize(), 8);
1388                                 }
1389                         }
1390                         else
1391                                 vertexData                      = makeDeviceOrHostAddressConstKHR(DE_NULL);
1392
1393                         if (m_indexBuffer.get() != DE_NULL &&  geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1394                         {
1395                                 indexData                       = makeDeviceOrHostAddressConstKHR(vk, device, m_indexBuffer->get(), indexBufferOffset);
1396                                 indexBufferOffset       += deAlignSize(geometryData->getIndexByteSize(), 8);
1397                         }
1398                         else
1399                                 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1400                 }
1401                 else
1402                 {
1403                         vertexData = makeDeviceOrHostAddressConstKHR(geometryData->getVertexPointer());
1404                         if (geometryData->getIndexType() != VK_INDEX_TYPE_NONE_KHR)
1405                                 indexData = makeDeviceOrHostAddressConstKHR(geometryData->getIndexPointer());
1406                         else
1407                                 indexData = makeDeviceOrHostAddressConstKHR(DE_NULL);
1408                 }
1409
1410                 const VkAccelerationStructureGeometryTrianglesDataKHR   accelerationStructureGeometryTrianglesDataKHR =
1411                 {
1412                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,   //  VkStructureType                                     sType;
1413                         DE_NULL,                                                                                                                                //  const void*                                         pNext;
1414                         geometryData->getVertexFormat(),                                                                                //  VkFormat                                            vertexFormat;
1415                         vertexData,                                                                                                                             //  VkDeviceOrHostAddressConstKHR       vertexData;
1416                         geometryData->getVertexStride(),                                                                                //  VkDeviceSize                                        vertexStride;
1417                         static_cast<deUint32>(geometryData->getVertexCount()),                                  //  uint32_t                                            maxVertex;
1418                         geometryData->getIndexType(),                                                                                   //  VkIndexType                                         indexType;
1419                         indexData,                                                                                                                              //  VkDeviceOrHostAddressConstKHR       indexData;
1420                         makeDeviceOrHostAddressConstKHR(DE_NULL),                                                               //  VkDeviceOrHostAddressConstKHR       transformData;
1421                 };
1422
1423                 const VkAccelerationStructureGeometryAabbsDataKHR               accelerationStructureGeometryAabbsDataKHR =
1424                 {
1425                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR,       //  VkStructureType                                     sType;
1426                         DE_NULL,                                                                                                                        //  const void*                                         pNext;
1427                         vertexData,                                                                                                                     //  VkDeviceOrHostAddressConstKHR       data;
1428                         geometryData->getAABBStride()                                                                           //  VkDeviceSize                                        stride;
1429                 };
1430                 const VkAccelerationStructureGeometryDataKHR                    geometry = (geometryData->isTrianglesType())
1431                                                                                                                                                  ? makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryTrianglesDataKHR)
1432                                                                                                                                                  : makeVkAccelerationStructureGeometryDataKHR(accelerationStructureGeometryAabbsDataKHR);
1433                 const VkAccelerationStructureGeometryKHR                                accelerationStructureGeometryKHR =
1434                 {
1435                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,  //  VkStructureType                                                     sType;
1436                         DE_NULL,                                                                                                //  const void*                                                         pNext;
1437                         geometryData->getGeometryType(),                                                //  VkGeometryTypeKHR                                           geometryType;
1438                         geometry,                                                                                               //  VkAccelerationStructureGeometryDataKHR      geometry;
1439                         geometryData->getGeometryFlags()                                                //  VkGeometryFlagsKHR                                          flags;
1440                 };
1441
1442                 const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : geometryData->getPrimitiveCount());
1443
1444                 const VkAccelerationStructureBuildRangeInfoKHR                  accelerationStructureBuildRangeInfosKHR =
1445                 {
1446                         primitiveCount, //  deUint32    primitiveCount;
1447                         0,                              //  deUint32    primitiveOffset;
1448                         0,                              //  deUint32    firstVertex;
1449                         0                               //  deUint32    firstTransform;
1450                 };
1451
1452                 accelerationStructureGeometriesKHR[geometryNdx]                 = accelerationStructureGeometryKHR;
1453                 accelerationStructureGeometriesKHRPointers[geometryNdx] = &accelerationStructureGeometriesKHR[geometryNdx];
1454                 accelerationStructureBuildRangeInfoKHR[geometryNdx]             = accelerationStructureBuildRangeInfosKHR;
1455                 maxPrimitiveCounts[geometryNdx]                                                 = geometryData->getPrimitiveCount();
1456         }
1457 }
1458
1459 deUint32 BottomLevelAccelerationStructure::getRequiredAllocationCount (void)
1460 {
1461         return BottomLevelAccelerationStructureKHR::getRequiredAllocationCount();
1462 }
1463
1464 void BottomLevelAccelerationStructure::createAndBuild (const DeviceInterface&   vk,
1465                                                                                                            const VkDevice                       device,
1466                                                                                                            const VkCommandBuffer        cmdBuffer,
1467                                                                                                            Allocator&                           allocator,
1468                                                                                                            VkDeviceAddress                      deviceAddress)
1469 {
1470         create(vk, device, allocator, 0u, deviceAddress);
1471         build(vk, device, cmdBuffer);
1472 }
1473
1474 void BottomLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&                                vk,
1475                                                                                                                   const VkDevice                                                device,
1476                                                                                                                   const VkCommandBuffer                                 cmdBuffer,
1477                                                                                                                   Allocator&                                                    allocator,
1478                                                                                                                   BottomLevelAccelerationStructure*             accelerationStructure,
1479                                                                                                                   VkDeviceSize                                                  compactCopySize,
1480                                                                                                                   VkDeviceAddress                                               deviceAddress)
1481 {
1482         DE_ASSERT(accelerationStructure != NULL);
1483         VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureSize();
1484         DE_ASSERT(copiedSize != 0u);
1485
1486         create(vk, device, allocator, copiedSize, deviceAddress);
1487         copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1488 }
1489
1490 void BottomLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface& vk,
1491                                                                                                                                  const VkDevice                                                         device,
1492                                                                                                                                  const VkCommandBuffer                                          cmdBuffer,
1493                                                                                                                                  Allocator&                                                                     allocator,
1494                                                                                                                                  SerialStorage*                                                         storage,
1495                                                                                                                                  VkDeviceAddress                                                        deviceAddress )
1496 {
1497         DE_ASSERT(storage != NULL);
1498         DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1499         create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1500         deserialize(vk, device, cmdBuffer, storage);
1501 }
1502
1503 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure ()
1504 {
1505         return de::MovePtr<BottomLevelAccelerationStructure>(new BottomLevelAccelerationStructureKHR);
1506 }
1507
1508 TopLevelAccelerationStructure::~TopLevelAccelerationStructure ()
1509 {
1510 }
1511
1512 TopLevelAccelerationStructure::TopLevelAccelerationStructure ()
1513         : m_structureSize               (0u)
1514         , m_updateScratchSize   (0u)
1515         , m_buildScratchSize    (0u)
1516 {
1517 }
1518
1519 void TopLevelAccelerationStructure::setInstanceCount (const size_t instanceCount)
1520 {
1521         m_bottomLevelInstances.reserve(instanceCount);
1522         m_instanceData.reserve(instanceCount);
1523 }
1524
1525 void TopLevelAccelerationStructure::addInstance (de::SharedPtr<BottomLevelAccelerationStructure>        bottomLevelStructure,
1526                                                                                                  const VkTransformMatrixKHR&                                            matrix,
1527                                                                                                  deUint32                                                                                       instanceCustomIndex,
1528                                                                                                  deUint32                                                                                       mask,
1529                                                                                                  deUint32                                                                                       instanceShaderBindingTableRecordOffset,
1530                                                                                                  VkGeometryInstanceFlagsKHR                                                     flags)
1531 {
1532         m_bottomLevelInstances.push_back(bottomLevelStructure);
1533         m_instanceData.push_back(InstanceData(matrix, instanceCustomIndex, mask, instanceShaderBindingTableRecordOffset, flags));
1534 }
1535
1536 VkDeviceSize TopLevelAccelerationStructure::getStructureSize () const
1537 {
1538         return m_structureSize;
1539 }
1540
1541 void TopLevelAccelerationStructure::createAndBuild (const DeviceInterface&      vk,
1542                                                                                                         const VkDevice                  device,
1543                                                                                                         const VkCommandBuffer   cmdBuffer,
1544                                                                                                         Allocator&                              allocator,
1545                                                                                                         VkDeviceAddress                 deviceAddress)
1546 {
1547         create(vk, device, allocator, 0u, deviceAddress);
1548         build(vk, device, cmdBuffer);
1549 }
1550
1551 void TopLevelAccelerationStructure::createAndCopyFrom (const DeviceInterface&                           vk,
1552                                                                                                            const VkDevice                                               device,
1553                                                                                                            const VkCommandBuffer                                cmdBuffer,
1554                                                                                                            Allocator&                                                   allocator,
1555                                                                                                            TopLevelAccelerationStructure*               accelerationStructure,
1556                                                                                                            VkDeviceSize                                                 compactCopySize,
1557                                                                                                            VkDeviceAddress                                              deviceAddress)
1558 {
1559         DE_ASSERT(accelerationStructure != NULL);
1560         VkDeviceSize copiedSize = compactCopySize > 0u ? compactCopySize : accelerationStructure->getStructureSize();
1561         DE_ASSERT(copiedSize != 0u);
1562
1563         create(vk, device, allocator, copiedSize, deviceAddress);
1564         copyFrom(vk, device, cmdBuffer, accelerationStructure, compactCopySize > 0u);
1565 }
1566
1567 void TopLevelAccelerationStructure::createAndDeserializeFrom (const DeviceInterface&                                    vk,
1568                                                                                                                           const VkDevice                                                        device,
1569                                                                                                                           const VkCommandBuffer                                         cmdBuffer,
1570                                                                                                                           Allocator&                                                            allocator,
1571                                                                                                                           SerialStorage*                                                        storage,
1572                                                                                                                           VkDeviceAddress                                                       deviceAddress)
1573 {
1574         DE_ASSERT(storage != NULL);
1575         DE_ASSERT(storage->getStorageSize() >= SerialStorage::SERIAL_STORAGE_SIZE_MIN);
1576         create(vk, device, allocator, storage->getDeserializedSize(), deviceAddress);
1577         if (storage->hasDeepFormat()) createAndDeserializeBottoms(vk, device, cmdBuffer, allocator, storage);
1578         deserialize(vk, device, cmdBuffer, storage);
1579 }
1580
1581 BufferWithMemory* createInstanceBuffer (const DeviceInterface&                                                                                  vk,
1582                                                                                 const VkDevice                                                                                                  device,
1583                                                                                 Allocator&                                                                                                              allocator,
1584                                                                                 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >   bottomLevelInstances,
1585                                                                                 std::vector<InstanceData>                                                                               instanceData)
1586 {
1587         DE_ASSERT(bottomLevelInstances.size() != 0);
1588         DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
1589         DE_UNREF(instanceData);
1590
1591         const VkDeviceSize                      bufferSizeBytes         = bottomLevelInstances.size() * sizeof(VkAccelerationStructureInstanceKHR);
1592         const VkBufferCreateInfo        bufferCreateInfo        = makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1593         try
1594         {
1595                 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1596         }
1597         catch (const tcu::NotSupportedError&)
1598         {
1599                 // retry without Cached flag
1600                 return new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
1601         }
1602 }
1603
1604 void updateSingleInstance (const DeviceInterface&                                       vk,
1605                                                    const VkDevice                                                       device,
1606                                                    const BottomLevelAccelerationStructure&      bottomLevelAccelerationStructure,
1607                                                    const InstanceData&                                          instanceData,
1608                                                    deUint8*                                                                     bufferLocation,
1609                                                    VkAccelerationStructureBuildTypeKHR          buildType,
1610                                                    bool                                                                         inactiveInstances)
1611 {
1612         const VkAccelerationStructureKHR accelerationStructureKHR = *bottomLevelAccelerationStructure.getPtr();
1613
1614         // This part needs to be fixed once a new version of the VkAccelerationStructureInstanceKHR will be added to vkStructTypes.inl
1615         VkDeviceAddress accelerationStructureAddress;
1616         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1617         {
1618                 VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
1619                 {
1620                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,       // VkStructureType                              sType;
1621                         DE_NULL,                                                                                                                        // const void*                                  pNext;
1622                         accelerationStructureKHR                                                                                        // VkAccelerationStructureKHR   accelerationStructure;
1623                 };
1624                 accelerationStructureAddress = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
1625         }
1626
1627         deUint64 structureReference;
1628         if (inactiveInstances)
1629         {
1630                 // Instances will be marked inactive by making their references VK_NULL_HANDLE or having address zero.
1631                 structureReference = 0ull;
1632         }
1633         else
1634         {
1635                 structureReference      = (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1636                                                         ? deUint64(accelerationStructureAddress)
1637                                                         : deUint64(accelerationStructureKHR.getInternal());
1638         }
1639
1640         VkAccelerationStructureInstanceKHR      accelerationStructureInstanceKHR = makeVkAccelerationStructureInstanceKHR
1641         (
1642                 instanceData.matrix,                                                                    //  VkTransformMatrixKHR                transform;
1643                 instanceData.instanceCustomIndex,                                               //  deUint32                                    instanceCustomIndex:24;
1644                 instanceData.mask,                                                                              //  deUint32                                    mask:8;
1645                 instanceData.instanceShaderBindingTableRecordOffset,    //  deUint32                                    instanceShaderBindingTableRecordOffset:24;
1646                 instanceData.flags,                                                                             //  VkGeometryInstanceFlagsKHR  flags:8;
1647                 structureReference                                                                              //  deUint64                                    accelerationStructureReference;
1648         );
1649
1650         deMemcpy(bufferLocation, &accelerationStructureInstanceKHR, sizeof(VkAccelerationStructureInstanceKHR));
1651 }
1652
1653 void updateInstanceBuffer (const DeviceInterface&                                                                                               vk,
1654                                                    const VkDevice                                                                                                               device,
1655                                                    const std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>&  bottomLevelInstances,
1656                                                    const std::vector<InstanceData>&                                                                             instanceData,
1657                                                    const BufferWithMemory*                                                                                              instanceBuffer,
1658                                                    VkAccelerationStructureBuildTypeKHR                                                                  buildType,
1659                                                    bool                                                                                                                                 inactiveInstances)
1660 {
1661         DE_ASSERT(bottomLevelInstances.size() != 0);
1662         DE_ASSERT(bottomLevelInstances.size() == instanceData.size());
1663
1664         auto&                   instancesAlloc          = instanceBuffer->getAllocation();
1665         auto                    bufferStart                     = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
1666         VkDeviceSize    bufferOffset            = 0ull;
1667
1668         for (size_t instanceNdx = 0; instanceNdx < bottomLevelInstances.size(); ++instanceNdx)
1669         {
1670                 const auto& blas = *bottomLevelInstances[instanceNdx];
1671                 updateSingleInstance(vk, device, blas, instanceData[instanceNdx], bufferStart + bufferOffset, buildType, inactiveInstances);
1672                 bufferOffset += sizeof(VkAccelerationStructureInstanceKHR);
1673         }
1674
1675         flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
1676 }
1677
1678 class TopLevelAccelerationStructureKHR : public TopLevelAccelerationStructure
1679 {
1680 public:
1681         static deUint32                                                                                 getRequiredAllocationCount                                                      (void);
1682
1683                                                                                                                         TopLevelAccelerationStructureKHR                                        ();
1684                                                                                                                         TopLevelAccelerationStructureKHR                                        (const TopLevelAccelerationStructureKHR&                other) = delete;
1685         virtual                                                                                                 ~TopLevelAccelerationStructureKHR                                       ();
1686
1687         void                                                                                                    setBuildType                                                                            (const VkAccelerationStructureBuildTypeKHR              buildType) override;
1688         void                                                                                                    setCreateFlags                                                                          (const VkAccelerationStructureCreateFlagsKHR    createFlags) override;
1689         void                                                                                                    setCreateGeneric                                                                        (bool                                                                                   createGeneric) override;
1690         void                                                                                                    setBuildFlags                                                                           (const VkBuildAccelerationStructureFlagsKHR             buildFlags) override;
1691         void                                                                                                    setBuildWithoutPrimitives                                                       (bool                                                                                   buildWithoutPrimitives) override;
1692         void                                                                                                    setInactiveInstances                                                            (bool                                                                                   inactiveInstances) override;
1693         void                                                                                                    setDeferredOperation                                                            (const bool                                                                             deferredOperation,
1694                                                                                                                                                                                                                                  const deUint32                                                                 workerThreadCount) override;
1695         void                                                                                                    setUseArrayOfPointers                                                           (const bool                                                                             useArrayOfPointers) override;
1696         void                                                                                                    setIndirectBuildParameters                                                      (const VkBuffer                                                                 indirectBuffer,
1697                                                                                                                                                                                                                                  const VkDeviceSize                                                             indirectBufferOffset,
1698                                                                                                                                                                                                                                  const deUint32                                                                 indirectBufferStride) override;
1699         void                                                                                                    setUsePPGeometries                                                                      (const bool                                                                             usePPGeometries) override;
1700         VkBuildAccelerationStructureFlagsKHR                                    getBuildFlags                                                                           () const override;
1701
1702         void                                                                                                    create                                                                                          (const DeviceInterface&                                                 vk,
1703                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1704                                                                                                                                                                                                                                  Allocator&                                                                             allocator,
1705                                                                                                                                                                                                                                  VkDeviceSize                                                                   structureSize,
1706                                                                                                                                                                                                                                  VkDeviceAddress                                                                deviceAddress = 0u ) override;
1707         void                                                                                                    build                                                                                           (const DeviceInterface&                                                 vk,
1708                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1709                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer) override;
1710         void                                                                                                    copyFrom                                                                                        (const DeviceInterface&                                                 vk,
1711                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1712                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
1713                                                                                                                                                                                                                                  TopLevelAccelerationStructure*                                 accelerationStructure,
1714                                                                                                                                                                                                                                  bool                                                                                   compactCopy) override;
1715         void                                                                                                    serialize                                                                                       (const DeviceInterface&                                                 vk,
1716                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1717                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
1718                                                                                                                                                                                                                                  SerialStorage*                                                                 storage) override;
1719         void                                                                                                    deserialize                                                                                     (const DeviceInterface&                                                 vk,
1720                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1721                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
1722                                                                                                                                                                                                                                  SerialStorage*                                                                 storage) override;
1723
1724         std::vector<VkDeviceSize>                                                               getSerializingSizes                                                                     (const DeviceInterface&                                                 vk,
1725                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1726                                                                                                                                                                                                                                  const VkQueue                                                                  queue,
1727                                                                                                                                                                                                                                  const deUint32                                                                 queueFamilyIndex) override;
1728
1729         std::vector<deUint64>                                                                   getSerializingAddresses                                                         (const DeviceInterface&                                                 vk,
1730                                                                                                                                                                                                                                  const VkDevice                                                                 device) const override;
1731
1732
1733         const VkAccelerationStructureKHR*                                               getPtr                                                                                          (void) const override;
1734
1735         void                                                                                                    updateInstanceMatrix                                                            (const DeviceInterface&                                                 vk,
1736                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1737                                                                                                                                                                                                                                  size_t                                                                                 instanceIndex,
1738                                                                                                                                                                                                                                  const VkTransformMatrixKHR&                                    matrix) override;
1739
1740 protected:
1741         VkAccelerationStructureBuildTypeKHR                                             m_buildType;
1742         VkAccelerationStructureCreateFlagsKHR                                   m_createFlags;
1743         bool                                                                                                    m_createGeneric;
1744         VkBuildAccelerationStructureFlagsKHR                                    m_buildFlags;
1745         bool                                                                                                    m_buildWithoutPrimitives;
1746         bool                                                                                                    m_inactiveInstances;
1747         bool                                                                                                    m_deferredOperation;
1748         deUint32                                                                                                m_workerThreadCount;
1749         bool                                                                                                    m_useArrayOfPointers;
1750         de::MovePtr<BufferWithMemory>                                                   m_accelerationStructureBuffer;
1751         de::MovePtr<BufferWithMemory>                                                   m_instanceBuffer;
1752         de::MovePtr<BufferWithMemory>                                                   m_instanceAddressBuffer;
1753         de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
1754         std::vector<deUint8>                                                                    m_hostScratchBuffer;
1755         Move<VkAccelerationStructureKHR>                                                m_accelerationStructureKHR;
1756         VkBuffer                                                                                                m_indirectBuffer;
1757         VkDeviceSize                                                                                    m_indirectBufferOffset;
1758         deUint32                                                                                                m_indirectBufferStride;
1759         bool                                                                                                    m_usePPGeometries;
1760
1761
1762         void                                                                                                    prepareInstances                                                                        (const DeviceInterface&                                                 vk,
1763                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1764                                                                                                                                                                                                                                  VkAccelerationStructureGeometryKHR&                    accelerationStructureGeometryKHR,
1765                                                                                                                                                                                                                                  std::vector<deUint32>&                                                 maxPrimitiveCounts);
1766
1767         void                                                                                                    serializeBottoms                                                                        (const DeviceInterface&                                                 vk,
1768                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1769                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
1770                                                                                                                                                                                                                                  SerialStorage*                                                                 storage,
1771                                                                                                                                                                                                                                  VkDeferredOperationKHR                                                 deferredOperation);
1772
1773         void                                                                                                    createAndDeserializeBottoms                                                     (const DeviceInterface&                                                 vk,
1774                                                                                                                                                                                                                                  const VkDevice                                                                 device,
1775                                                                                                                                                                                                                                  const VkCommandBuffer                                                  cmdBuffer,
1776                                                                                                                                                                                                                                  Allocator&                                                                             allocator,
1777                                                                                                                                                                                                                                  SerialStorage*                                                                 storage) override;
1778 };
1779
1780 deUint32 TopLevelAccelerationStructureKHR::getRequiredAllocationCount (void)
1781 {
1782         /*
1783                 de::MovePtr<BufferWithMemory>                                                   m_instanceBuffer;
1784                 de::MovePtr<Allocation>                                                                 m_accelerationStructureAlloc;
1785                 de::MovePtr<BufferWithMemory>                                                   m_deviceScratchBuffer;
1786         */
1787         return 3u;
1788 }
1789
1790 TopLevelAccelerationStructureKHR::TopLevelAccelerationStructureKHR ()
1791         : TopLevelAccelerationStructure ()
1792         , m_buildType                                   (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1793         , m_createFlags                                 (0u)
1794         , m_createGeneric                               (false)
1795         , m_buildFlags                                  (0u)
1796         , m_buildWithoutPrimitives              (false)
1797         , m_inactiveInstances                   (false)
1798         , m_deferredOperation                   (false)
1799         , m_workerThreadCount                   (0)
1800         , m_useArrayOfPointers                  (false)
1801         , m_accelerationStructureBuffer (DE_NULL)
1802         , m_instanceBuffer                              (DE_NULL)
1803         , m_instanceAddressBuffer               (DE_NULL)
1804         , m_deviceScratchBuffer                 (DE_NULL)
1805         , m_accelerationStructureKHR    ()
1806         , m_indirectBuffer                              (DE_NULL)
1807         , m_indirectBufferOffset                (0)
1808         , m_indirectBufferStride                (0)
1809         , m_usePPGeometries                             (false)
1810 {
1811 }
1812
1813 TopLevelAccelerationStructureKHR::~TopLevelAccelerationStructureKHR ()
1814 {
1815 }
1816
1817 void TopLevelAccelerationStructureKHR::setBuildType (const VkAccelerationStructureBuildTypeKHR  buildType)
1818 {
1819         m_buildType = buildType;
1820 }
1821
1822 void TopLevelAccelerationStructureKHR::setCreateFlags (const VkAccelerationStructureCreateFlagsKHR      createFlags)
1823 {
1824         m_createFlags = createFlags;
1825 }
1826
1827 void TopLevelAccelerationStructureKHR::setCreateGeneric (bool createGeneric)
1828 {
1829         m_createGeneric = createGeneric;
1830 }
1831
1832 void TopLevelAccelerationStructureKHR::setInactiveInstances (bool inactiveInstances)
1833 {
1834         m_inactiveInstances = inactiveInstances;
1835 }
1836
1837 void TopLevelAccelerationStructureKHR::setBuildFlags (const VkBuildAccelerationStructureFlagsKHR        buildFlags)
1838 {
1839         m_buildFlags = buildFlags;
1840 }
1841
1842 void TopLevelAccelerationStructureKHR::setBuildWithoutPrimitives (bool buildWithoutPrimitives)
1843 {
1844         m_buildWithoutPrimitives = buildWithoutPrimitives;
1845 }
1846
1847 void TopLevelAccelerationStructureKHR::setDeferredOperation (const bool         deferredOperation,
1848                                                                                                                          const deUint32 workerThreadCount)
1849 {
1850         m_deferredOperation = deferredOperation;
1851         m_workerThreadCount = workerThreadCount;
1852 }
1853
1854 void TopLevelAccelerationStructureKHR::setUseArrayOfPointers (const bool        useArrayOfPointers)
1855 {
1856         m_useArrayOfPointers = useArrayOfPointers;
1857 }
1858
1859 void TopLevelAccelerationStructureKHR::setUsePPGeometries (const bool usePPGeometries)
1860 {
1861         m_usePPGeometries = usePPGeometries;
1862 }
1863
1864 void TopLevelAccelerationStructureKHR::setIndirectBuildParameters (const VkBuffer               indirectBuffer,
1865                                                                                                                                    const VkDeviceSize   indirectBufferOffset,
1866                                                                                                                                    const deUint32               indirectBufferStride)
1867 {
1868         m_indirectBuffer                = indirectBuffer;
1869         m_indirectBufferOffset  = indirectBufferOffset;
1870         m_indirectBufferStride  = indirectBufferStride;
1871 }
1872
1873 VkBuildAccelerationStructureFlagsKHR TopLevelAccelerationStructureKHR::getBuildFlags () const
1874 {
1875         return m_buildFlags;
1876 }
1877
1878 void TopLevelAccelerationStructureKHR::create (const DeviceInterface&                           vk,
1879                                                                                            const VkDevice                                               device,
1880                                                                                            Allocator&                                                   allocator,
1881                                                                                            VkDeviceSize                                                 structureSize,
1882                                                                                            VkDeviceAddress                                              deviceAddress)
1883 {
1884         // AS may be built from geometries using vkCmdBuildAccelerationStructureKHR / vkBuildAccelerationStructureKHR
1885         // or may be copied/compacted/deserialized from other AS ( in this case AS does not need geometries, but it needs to know its size before creation ).
1886         DE_ASSERT(!m_bottomLevelInstances.empty() != !(structureSize == 0)); // logical xor
1887
1888         if (structureSize == 0)
1889         {
1890                 VkAccelerationStructureGeometryKHR              accelerationStructureGeometryKHR;
1891                 const auto                                                              accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
1892                 std::vector<deUint32>                                   maxPrimitiveCounts;
1893                 prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
1894
1895                 VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR           =
1896                 {
1897                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                                               //  VkStructureType                                                                             sType;
1898                         DE_NULL,                                                                                                                                                                //  const void*                                                                                 pNext;
1899                         VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,                                                                                   //  VkAccelerationStructureTypeKHR                                              type;
1900                         m_buildFlags,                                                                                                                                                   //  VkBuildAccelerationStructureFlagsKHR                                flags;
1901                         VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                                                 //  VkBuildAccelerationStructureModeKHR                                 mode;
1902                         DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
1903                         DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
1904                         1u,                                                                                                                                                                             //  deUint32                                                                                    geometryCount;
1905                         (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),                                              //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
1906                         (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),                                   //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
1907                         makeDeviceOrHostAddressKHR(DE_NULL)                                                                                                             //  VkDeviceOrHostAddressKHR                                                    scratchData;
1908                 };
1909
1910                 VkAccelerationStructureBuildSizesInfoKHR        sizeInfo =
1911                 {
1912                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,  //  VkStructureType     sType;
1913                         DE_NULL,                                                                                                                //  const void*         pNext;
1914                         0,                                                                                                                              //  VkDeviceSize        accelerationStructureSize;
1915                         0,                                                                                                                              //  VkDeviceSize        updateScratchSize;
1916                         0                                                                                                                               //  VkDeviceSize        buildScratchSize;
1917                 };
1918
1919                 vk.getAccelerationStructureBuildSizesKHR(device, m_buildType, &accelerationStructureBuildGeometryInfoKHR, maxPrimitiveCounts.data(), &sizeInfo);
1920
1921                 m_structureSize         = sizeInfo.accelerationStructureSize;
1922                 m_updateScratchSize     = sizeInfo.updateScratchSize;
1923                 m_buildScratchSize      = sizeInfo.buildScratchSize;
1924         }
1925         else
1926         {
1927                 m_structureSize         = structureSize;
1928                 m_updateScratchSize     = 0u;
1929                 m_buildScratchSize      = 0u;
1930         }
1931
1932         {
1933                 const VkBufferCreateInfo                bufferCreateInfo = makeBufferCreateInfo(m_structureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1934                 try
1935                 {
1936                         m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::Cached | MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1937                 }
1938                 catch (const tcu::NotSupportedError&)
1939                 {
1940                         // retry without Cached flag
1941                         m_accelerationStructureBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1942                 }
1943         }
1944
1945         {
1946                 const VkAccelerationStructureTypeKHR            structureType                                           = (m_createGeneric
1947                                                                                                                                                                                    ? VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR
1948                                                                                                                                                                                    : VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
1949                 const VkAccelerationStructureCreateInfoKHR      accelerationStructureCreateInfoKHR      =
1950                 {
1951                         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,       //  VkStructureType                                                                                     sType;
1952                         DE_NULL,                                                                                                        //  const void*                                                                                         pNext;
1953                         m_createFlags,                                                                                          //  VkAccelerationStructureCreateFlagsKHR                                       createFlags;
1954                         m_accelerationStructureBuffer->get(),                                           //  VkBuffer                                                                                            buffer;
1955                         0u,                                                                                                                     //  VkDeviceSize                                                                                        offset;
1956                         m_structureSize,                                                                                        //  VkDeviceSize                                                                                        size;
1957                         structureType,                                                                                          //  VkAccelerationStructureTypeKHR                                                      type;
1958                         deviceAddress                                                                                           //  VkDeviceAddress                                                                                     deviceAddress;
1959                 };
1960
1961                 m_accelerationStructureKHR      = createAccelerationStructureKHR(vk, device, &accelerationStructureCreateInfoKHR, DE_NULL);
1962         }
1963
1964         if (m_buildScratchSize > 0u)
1965         {
1966                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1967                 {
1968                         const VkBufferCreateInfo                bufferCreateInfo        = makeBufferCreateInfo(m_buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1969                         m_deviceScratchBuffer                                                           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1970                 }
1971                 else
1972                 {
1973                         m_hostScratchBuffer.resize(static_cast<size_t>(m_buildScratchSize));
1974                 }
1975         }
1976
1977         if (m_useArrayOfPointers)
1978         {
1979                 const size_t                            pointerSize = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) ? sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress) : sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
1980                 const VkBufferCreateInfo        bufferCreateInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(m_bottomLevelInstances.size() * pointerSize), VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
1981                 m_instanceAddressBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress));
1982         }
1983
1984         if(!m_bottomLevelInstances.empty())
1985                 m_instanceBuffer = de::MovePtr<BufferWithMemory>(createInstanceBuffer(vk, device, allocator, m_bottomLevelInstances, m_instanceData));
1986 }
1987
1988 void TopLevelAccelerationStructureKHR::updateInstanceMatrix (const DeviceInterface& vk, const VkDevice device, size_t instanceIndex, const VkTransformMatrixKHR& matrix)
1989 {
1990         DE_ASSERT(m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR);
1991         DE_ASSERT(instanceIndex < m_bottomLevelInstances.size());
1992         DE_ASSERT(instanceIndex < m_instanceData.size());
1993
1994         const auto&             blas                    = *m_bottomLevelInstances[instanceIndex];
1995         auto&                   instanceData    = m_instanceData[instanceIndex];
1996         auto&                   instancesAlloc  = m_instanceBuffer->getAllocation();
1997         auto                    bufferStart             = reinterpret_cast<deUint8*>(instancesAlloc.getHostPtr());
1998         VkDeviceSize    bufferOffset    = sizeof(VkAccelerationStructureInstanceKHR) * instanceIndex;
1999
2000         instanceData.matrix = matrix;
2001         updateSingleInstance(vk, device, blas, instanceData, bufferStart + bufferOffset, m_buildType, m_inactiveInstances);
2002         flushMappedMemoryRange(vk, device, instancesAlloc.getMemory(), instancesAlloc.getOffset(), VK_WHOLE_SIZE);
2003 }
2004
2005 void TopLevelAccelerationStructureKHR::build (const DeviceInterface&    vk,
2006                                                                                           const VkDevice                        device,
2007                                                                                           const VkCommandBuffer         cmdBuffer)
2008 {
2009         DE_ASSERT(!m_bottomLevelInstances.empty());
2010         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2011         DE_ASSERT(m_buildScratchSize != 0);
2012
2013         updateInstanceBuffer(vk, device, m_bottomLevelInstances, m_instanceData, m_instanceBuffer.get(), m_buildType, m_inactiveInstances);
2014
2015         VkAccelerationStructureGeometryKHR              accelerationStructureGeometryKHR;
2016         const auto                                                              accelerationStructureGeometryKHRPtr = &accelerationStructureGeometryKHR;
2017         std::vector<deUint32>                                   maxPrimitiveCounts;
2018         prepareInstances(vk, device, accelerationStructureGeometryKHR, maxPrimitiveCounts);
2019
2020         VkDeviceOrHostAddressKHR                                scratchData                                                                             = (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2021                                                                                                                                                                                         ? makeDeviceOrHostAddressKHR(vk, device, m_deviceScratchBuffer->get(), 0)
2022                                                                                                                                                                                         : makeDeviceOrHostAddressKHR(m_hostScratchBuffer.data());
2023
2024         VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfoKHR           =
2025         {
2026                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,                                               //  VkStructureType                                                                             sType;
2027                 DE_NULL,                                                                                                                                                                //  const void*                                                                                 pNext;
2028                 VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,                                                                                   //  VkAccelerationStructureTypeKHR                                              type;
2029                 m_buildFlags,                                                                                                                                                   //  VkBuildAccelerationStructureFlagsKHR                                flags;
2030                 VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,                                                                                 //  VkBuildAccelerationStructureModeKHR                                 mode;
2031                 DE_NULL,                                                                                                                                                                //  VkAccelerationStructureKHR                                                  srcAccelerationStructure;
2032                 m_accelerationStructureKHR.get(),                                                                                                               //  VkAccelerationStructureKHR                                                  dstAccelerationStructure;
2033                 1u,                                                                                                                                                                             //  deUint32                                                                                    geometryCount;
2034                 (m_usePPGeometries ? nullptr : &accelerationStructureGeometryKHR),                                              //  const VkAccelerationStructureGeometryKHR*                   pGeometries;
2035                 (m_usePPGeometries ? &accelerationStructureGeometryKHRPtr : nullptr),                                   //  const VkAccelerationStructureGeometryKHR* const*    ppGeometries;
2036                 scratchData                                                                                                                                                             //  VkDeviceOrHostAddressKHR                                                    scratchData;
2037         };
2038
2039         const deUint32 primitiveCount = (m_buildWithoutPrimitives ? 0u : static_cast<deUint32>(m_bottomLevelInstances.size()));
2040
2041         VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfoKHR =
2042         {
2043                 primitiveCount, //  deUint32    primitiveCount;
2044                 0,                              //  deUint32    primitiveOffset;
2045                 0,                              //  deUint32    firstVertex;
2046                 0                               //  deUint32    transformOffset;
2047         };
2048         VkAccelerationStructureBuildRangeInfoKHR* accelerationStructureBuildRangeInfoKHRPtr     = &accelerationStructureBuildRangeInfoKHR;
2049
2050         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2051         {
2052                 if (m_indirectBuffer == DE_NULL)
2053                         vk.cmdBuildAccelerationStructuresKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2054                 else
2055                 {
2056                         VkDeviceAddress indirectDeviceAddress = getBufferDeviceAddress(vk, device, m_indirectBuffer, m_indirectBufferOffset);
2057                         deUint32*               pMaxPrimitiveCounts = maxPrimitiveCounts.data();
2058                         vk.cmdBuildAccelerationStructuresIndirectKHR(cmdBuffer, 1u, &accelerationStructureBuildGeometryInfoKHR, &indirectDeviceAddress, &m_indirectBufferStride, &pMaxPrimitiveCounts);
2059                 }
2060         }
2061         else if (!m_deferredOperation)
2062         {
2063                 VK_CHECK(vk.buildAccelerationStructuresKHR(device, DE_NULL, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr));
2064         }
2065         else
2066         {
2067                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2068                 const auto deferredOperation    = deferredOperationPtr.get();
2069
2070                 VkResult result = vk.buildAccelerationStructuresKHR(device, deferredOperation, 1u, &accelerationStructureBuildGeometryInfoKHR, (const VkAccelerationStructureBuildRangeInfoKHR**)&accelerationStructureBuildRangeInfoKHRPtr);
2071
2072                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2073
2074                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2075
2076                 accelerationStructureBuildGeometryInfoKHR.pNext = DE_NULL;
2077         }
2078
2079         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2080         {
2081                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2082                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
2083
2084                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2085         }
2086 }
2087
2088 void TopLevelAccelerationStructureKHR::copyFrom (const DeviceInterface&                         vk,
2089                                                                                                  const VkDevice                                         device,
2090                                                                                                  const VkCommandBuffer                          cmdBuffer,
2091                                                                                                  TopLevelAccelerationStructure*         accelerationStructure,
2092                                                                                                  bool                                                           compactCopy)
2093 {
2094         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2095         DE_ASSERT(accelerationStructure != DE_NULL);
2096
2097         VkCopyAccelerationStructureInfoKHR copyAccelerationStructureInfo =
2098         {
2099                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR,                                                                                                                 // VkStructureType                                              sType;
2100                 DE_NULL,                                                                                                                                                                                                                // const void*                                                  pNext;
2101                 *(accelerationStructure->getPtr()),                                                                                                                                                             // VkAccelerationStructureKHR                   src;
2102                 *(getPtr()),                                                                                                                                                                                                    // VkAccelerationStructureKHR                   dst;
2103                 compactCopy ? VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR : VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR   // VkCopyAccelerationStructureModeKHR   mode;
2104         };
2105
2106         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2107         {
2108                 vk.cmdCopyAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
2109         }
2110         else if (!m_deferredOperation)
2111         {
2112                 VK_CHECK(vk.copyAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2113         }
2114         else
2115         {
2116                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2117                 const auto deferredOperation    = deferredOperationPtr.get();
2118
2119                 VkResult result = vk.copyAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2120
2121                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2122
2123                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2124         }
2125
2126         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2127         {
2128                 const VkAccessFlags             accessMasks     = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2129                 const VkMemoryBarrier   memBarrier      = makeMemoryBarrier(accessMasks, accessMasks);
2130
2131                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2132         }
2133
2134 }
2135
2136 void TopLevelAccelerationStructureKHR::serialize (const DeviceInterface&        vk,
2137                                                                                                   const VkDevice                        device,
2138                                                                                                   const VkCommandBuffer         cmdBuffer,
2139                                                                                                   SerialStorage*                        storage)
2140 {
2141         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2142         DE_ASSERT(storage != DE_NULL);
2143
2144         const VkCopyAccelerationStructureToMemoryInfoKHR        copyAccelerationStructureInfo   =
2145         {
2146                 VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR,       // VkStructureType                                              sType;
2147                 DE_NULL,                                                                                                                        // const void*                                                  pNext;
2148                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                   src;
2149                 storage->getAddress(vk, device, m_buildType),                                           // VkDeviceOrHostAddressKHR                             dst;
2150                 VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR                                       // VkCopyAccelerationStructureModeKHR   mode;
2151         };
2152
2153         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2154         {
2155                 vk.cmdCopyAccelerationStructureToMemoryKHR(cmdBuffer, &copyAccelerationStructureInfo);
2156                 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2157         }
2158         else if (!m_deferredOperation)
2159         {
2160                 VK_CHECK(vk.copyAccelerationStructureToMemoryKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2161                 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, DE_NULL);
2162         }
2163         else
2164         {
2165                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2166                 const auto deferredOperation    = deferredOperationPtr.get();
2167
2168                 const VkResult result = vk.copyAccelerationStructureToMemoryKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2169
2170                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2171                 if (storage->hasDeepFormat()) serializeBottoms(vk, device, cmdBuffer, storage, deferredOperation);
2172
2173                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2174         }
2175 }
2176
2177 void TopLevelAccelerationStructureKHR::deserialize (const DeviceInterface&      vk,
2178                                                                                                         const VkDevice                  device,
2179                                                                                                         const VkCommandBuffer   cmdBuffer,
2180                                                                                                         SerialStorage*                  storage)
2181 {
2182         DE_ASSERT(m_accelerationStructureKHR.get() != DE_NULL);
2183         DE_ASSERT(storage != DE_NULL);
2184
2185         const VkCopyMemoryToAccelerationStructureInfoKHR        copyAccelerationStructureInfo   =
2186         {
2187                 VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR,       // VkStructureType                                                      sType;
2188                 DE_NULL,                                                                                                                        // const void*                                                          pNext;
2189                 storage->getAddressConst(vk, device, m_buildType),                                      // VkDeviceOrHostAddressConstKHR                        src;
2190                 *(getPtr()),                                                                                                            // VkAccelerationStructureKHR                           dst;
2191                 VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR                                     // VkCopyAccelerationStructureModeKHR           mode;
2192         };
2193
2194         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2195         {
2196                 vk.cmdCopyMemoryToAccelerationStructureKHR(cmdBuffer, &copyAccelerationStructureInfo);
2197         }
2198         else if (!m_deferredOperation)
2199         {
2200                 VK_CHECK(vk.copyMemoryToAccelerationStructureKHR(device, DE_NULL, &copyAccelerationStructureInfo));
2201         }
2202         else
2203         {
2204                 const auto deferredOperationPtr = createDeferredOperationKHR(vk, device);
2205                 const auto deferredOperation    = deferredOperationPtr.get();
2206
2207                 const VkResult result = vk.copyMemoryToAccelerationStructureKHR(device, deferredOperation, &copyAccelerationStructureInfo);
2208
2209                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2210
2211                 finishDeferredOperation(vk, device, deferredOperation, m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2212         }
2213
2214         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2215         {
2216                 const VkAccessFlags             accessMasks = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2217                 const VkMemoryBarrier   memBarrier = makeMemoryBarrier(accessMasks, accessMasks);
2218
2219                 cmdPipelineMemoryBarrier(vk, cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, &memBarrier);
2220         }
2221 }
2222
2223 void TopLevelAccelerationStructureKHR::serializeBottoms (const DeviceInterface& vk,
2224                                                                                                                  const VkDevice                 device,
2225                                                                                                                  const VkCommandBuffer  cmdBuffer,
2226                                                                                                                  SerialStorage*                 storage,
2227                                                                                                                  VkDeferredOperationKHR deferredOperation)
2228 {
2229         DE_UNREF(deferredOperation);
2230         DE_ASSERT(storage->hasDeepFormat());
2231
2232         const std::vector<deUint64>&    addresses               = storage->getSerialInfo().addresses();
2233         const std::size_t                               cbottoms                = m_bottomLevelInstances.size();
2234
2235         deUint32                                                storageIndex    = 0;
2236         std::vector<deUint64>                   matches;
2237
2238         for (std::size_t i = 0; i < cbottoms; ++i)
2239         {
2240                 const deUint64& lookAddr        = addresses[i+1];
2241                 auto                    end                     = matches.end();
2242                 auto                    match           = std::find_if(matches.begin(), end, [&](const deUint64& item){ return item == lookAddr; });
2243                 if (match == end)
2244                 {
2245                         matches.emplace_back(lookAddr);
2246                         m_bottomLevelInstances[i].get()->serialize(vk, device, cmdBuffer, storage->getBottomStorage(storageIndex).get());
2247                         storageIndex += 1;
2248                 }
2249         }
2250 }
2251
2252 void TopLevelAccelerationStructureKHR::createAndDeserializeBottoms (const DeviceInterface&      vk,
2253                                                                                                                                         const VkDevice                  device,
2254                                                                                                                                         const VkCommandBuffer   cmdBuffer,
2255                                                                                                                                         Allocator&                              allocator,
2256                                                                                                                                         SerialStorage*                  storage)
2257 {
2258         DE_ASSERT(storage->hasDeepFormat());
2259         DE_ASSERT(m_bottomLevelInstances.size() == 0);
2260
2261         const std::vector<deUint64>&                                    addresses               = storage->getSerialInfo().addresses();
2262         const std::size_t                                                               cbottoms                = addresses.size() - 1;
2263         deUint32                                                                                storageIndex    = 0;
2264         std::vector<std::pair<deUint64, std::size_t>>   matches;
2265
2266         for (std::size_t i = 0; i < cbottoms; ++i)
2267         {
2268                 const deUint64& lookAddr        = addresses[i+1];
2269                 auto                    end                     = matches.end();
2270                 auto                    match           = std::find_if(matches.begin(), end, [&](const std::pair<deUint64, deUint32>& item){ return item.first == lookAddr; });
2271                 if (match != end)
2272                 {
2273                         m_bottomLevelInstances .emplace_back(m_bottomLevelInstances[match->second]);
2274                 }
2275                 else
2276                 {
2277                         de::MovePtr<BottomLevelAccelerationStructure> blas = makeBottomLevelAccelerationStructure();
2278                         blas->createAndDeserializeFrom(vk, device, cmdBuffer, allocator, storage->getBottomStorage(storageIndex).get());
2279                         m_bottomLevelInstances.emplace_back(de::SharedPtr<BottomLevelAccelerationStructure>(blas.release()));
2280                         matches.emplace_back(lookAddr, i);
2281                         storageIndex += 1;
2282                 }
2283         }
2284
2285         std::vector<deUint64>                                           newAddresses    = getSerializingAddresses(vk, device);
2286         DE_ASSERT(addresses.size() == newAddresses.size());
2287
2288         SerialStorage::AccelerationStructureHeader* header                      = storage->getASHeader();
2289         DE_ASSERT(cbottoms ==header->handleCount);
2290
2291         // finally update bottom-level AS addresses before top-level AS deserialization
2292         for (std::size_t i = 0; i < cbottoms; ++i)
2293         {
2294                 header->handleArray[i] = newAddresses[i+1];
2295         }
2296 }
2297
2298 std::vector<VkDeviceSize> TopLevelAccelerationStructureKHR::getSerializingSizes (const DeviceInterface& vk,
2299                                                                                                                                                                  const VkDevice                 device,
2300                                                                                                                                                                  const VkQueue                  queue,
2301                                                                                                                                                                  const deUint32                 queueFamilyIndex)
2302 {
2303         const deUint32                                                  queryCount(deUint32(m_bottomLevelInstances.size()) + 1);
2304         std::vector<VkAccelerationStructureKHR> handles(queryCount);
2305         std::vector<VkDeviceSize>                               sizes(queryCount);
2306
2307         handles[0] = m_accelerationStructureKHR.get();
2308
2309         for (deUint32 h = 1; h < queryCount; ++h)
2310                 handles[h] = *m_bottomLevelInstances[h-1].get()->getPtr();
2311
2312         if (VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR == m_buildType)
2313                 queryAccelerationStructureSize(vk, device, DE_NULL, handles, m_buildType, DE_NULL, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
2314         else
2315         {
2316                 const Move<VkCommandPool>       cmdPool         = createCommandPool(vk, device, 0, queueFamilyIndex);
2317                 const Move<VkCommandBuffer>     cmdBuffer       = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2318                 const Move<VkQueryPool>         queryPool       = makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
2319
2320                 beginCommandBuffer(vk, *cmdBuffer);
2321                 queryAccelerationStructureSize(vk, device, *cmdBuffer, handles, m_buildType, *queryPool, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, sizes);
2322                 endCommandBuffer(vk, *cmdBuffer);
2323                 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
2324
2325                 VK_CHECK(vk.getQueryPoolResults(device, *queryPool, 0u, queryCount, queryCount * sizeof(VkDeviceSize), sizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2326         }
2327
2328         return sizes;
2329 }
2330
2331 std::vector<deUint64> TopLevelAccelerationStructureKHR::getSerializingAddresses (const DeviceInterface& vk, const VkDevice device) const
2332 {
2333         std::vector<deUint64> result(m_bottomLevelInstances.size() + 1);
2334
2335         VkAccelerationStructureDeviceAddressInfoKHR asDeviceAddressInfo =
2336         {
2337                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,       // VkStructureType                              sType;
2338                 DE_NULL,                                                                                                                        // const void*                                  pNext;
2339                 DE_NULL                                                                                                                         // VkAccelerationStructureKHR   accelerationStructure;
2340         };
2341
2342         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2343         {
2344                 asDeviceAddressInfo.accelerationStructure = m_accelerationStructureKHR.get();
2345                 result[0] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2346         }
2347         else
2348         {
2349                 result[0] = deUint64(getPtr()->getInternal());
2350         }
2351
2352         for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
2353         {
2354                 const BottomLevelAccelerationStructure&         bottomLevelAccelerationStructure        = *m_bottomLevelInstances[instanceNdx];
2355                 const VkAccelerationStructureKHR                        accelerationStructureKHR                        = *bottomLevelAccelerationStructure.getPtr();
2356
2357                 if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2358                 {
2359                         asDeviceAddressInfo.accelerationStructure = accelerationStructureKHR;
2360                         result[instanceNdx+1] = vk.getAccelerationStructureDeviceAddressKHR(device, &asDeviceAddressInfo);
2361                 }
2362                 else
2363                 {
2364                         result[instanceNdx+1] = deUint64(accelerationStructureKHR.getInternal());
2365                 }
2366         }
2367
2368         return result;
2369 }
2370
2371 const VkAccelerationStructureKHR* TopLevelAccelerationStructureKHR::getPtr (void) const
2372 {
2373         return &m_accelerationStructureKHR.get();
2374 }
2375
2376 void TopLevelAccelerationStructureKHR::prepareInstances (const DeviceInterface&                                                 vk,
2377                                                                                                                  const VkDevice                                                                 device,
2378                                                                                                                  VkAccelerationStructureGeometryKHR&                    accelerationStructureGeometryKHR,
2379                                                                                                                  std::vector<deUint32>&                                                 maxPrimitiveCounts)
2380 {
2381         maxPrimitiveCounts.resize(1);
2382         maxPrimitiveCounts[0] = static_cast<deUint32>(m_bottomLevelInstances.size());
2383
2384         VkDeviceOrHostAddressConstKHR                                                   instancesData;
2385         if (m_buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2386         {
2387                 if(m_instanceBuffer.get() != DE_NULL)
2388                 {
2389                         if (m_useArrayOfPointers)
2390                         {
2391                                 deUint8*                                                bufferStart                     = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
2392                                 VkDeviceSize                                    bufferOffset            = 0;
2393                                 VkDeviceOrHostAddressConstKHR   firstInstance           = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
2394                                 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
2395                                 {
2396                                         VkDeviceOrHostAddressConstKHR   currentInstance;
2397                                         currentInstance.deviceAddress   = firstInstance.deviceAddress + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
2398
2399                                         deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress));
2400                                         bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::deviceAddress);
2401                                 }
2402                                 flushMappedMemoryRange(vk, device, m_instanceAddressBuffer->getAllocation().getMemory(), m_instanceAddressBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
2403
2404                                 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceAddressBuffer->get(), 0);
2405                         }
2406                         else
2407                                 instancesData = makeDeviceOrHostAddressConstKHR(vk, device, m_instanceBuffer->get(), 0);
2408                 }
2409                 else
2410                         instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
2411         }
2412         else
2413         {
2414                 if (m_instanceBuffer.get() != DE_NULL)
2415                 {
2416                         if (m_useArrayOfPointers)
2417                         {
2418                                 deUint8*                                                bufferStart                     = static_cast<deUint8*>(m_instanceAddressBuffer->getAllocation().getHostPtr());
2419                                 VkDeviceSize                                    bufferOffset            = 0;
2420                                 for (size_t instanceNdx = 0; instanceNdx < m_bottomLevelInstances.size(); ++instanceNdx)
2421                                 {
2422                                         VkDeviceOrHostAddressConstKHR   currentInstance;
2423                                         currentInstance.hostAddress     = (deUint8*)m_instanceBuffer->getAllocation().getHostPtr() + instanceNdx * sizeof(VkAccelerationStructureInstanceKHR);
2424
2425                                         deMemcpy(&bufferStart[bufferOffset], &currentInstance, sizeof(VkDeviceOrHostAddressConstKHR::hostAddress));
2426                                         bufferOffset += sizeof(VkDeviceOrHostAddressConstKHR::hostAddress);
2427                                 }
2428                                 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceAddressBuffer->getAllocation().getHostPtr());
2429                         }
2430                         else
2431                                 instancesData = makeDeviceOrHostAddressConstKHR(m_instanceBuffer->getAllocation().getHostPtr());
2432                 }
2433                 else
2434                         instancesData = makeDeviceOrHostAddressConstKHR(DE_NULL);
2435         }
2436
2437         VkAccelerationStructureGeometryInstancesDataKHR accelerationStructureGeometryInstancesDataKHR   =
2438         {
2439                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,   //  VkStructureType                                     sType;
2440                 DE_NULL,                                                                                                                                //  const void*                                         pNext;
2441                 (VkBool32)( m_useArrayOfPointers ? DE_TRUE : DE_FALSE ),                                //  VkBool32                                            arrayOfPointers;
2442                 instancesData                                                                                                                   //  VkDeviceOrHostAddressConstKHR       data;
2443         };
2444
2445         accelerationStructureGeometryKHR                                        =
2446         {
2447                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,                                                                          //  VkStructureType                                                     sType;
2448                 DE_NULL,                                                                                                                                                                        //  const void*                                                         pNext;
2449                 VK_GEOMETRY_TYPE_INSTANCES_KHR,                                                                                                                         //  VkGeometryTypeKHR                                           geometryType;
2450                 makeVkAccelerationStructureInstancesDataKHR(accelerationStructureGeometryInstancesDataKHR),     //  VkAccelerationStructureGeometryDataKHR      geometry;
2451                 (VkGeometryFlagsKHR)0u                                                                                                                                          //  VkGeometryFlagsKHR                                          flags;
2452         };
2453 }
2454
2455 deUint32 TopLevelAccelerationStructure::getRequiredAllocationCount (void)
2456 {
2457         return TopLevelAccelerationStructureKHR::getRequiredAllocationCount();
2458 }
2459
2460 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure ()
2461 {
2462         return de::MovePtr<TopLevelAccelerationStructure>(new TopLevelAccelerationStructureKHR);
2463 }
2464
2465 bool queryAccelerationStructureSizeKHR (const DeviceInterface&                                                  vk,
2466                                                                                 const VkDevice                                                                  device,
2467                                                                                 const VkCommandBuffer                                                   cmdBuffer,
2468                                                                                 const std::vector<VkAccelerationStructureKHR>&  accelerationStructureHandles,
2469                                                                                 VkAccelerationStructureBuildTypeKHR                             buildType,
2470                                                                                 const VkQueryPool                                                               queryPool,
2471                                                                                 VkQueryType                                                                             queryType,
2472                                                                                 deUint32                                                                                firstQuery,
2473                                                                                 std::vector<VkDeviceSize>&                                              results)
2474 {
2475         DE_ASSERT(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR || queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR);
2476
2477         if (buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2478         {
2479                 // queryPool must be large enough to contain at least (firstQuery + accelerationStructureHandles.size()) queries
2480                 vk.cmdResetQueryPool(cmdBuffer, queryPool, firstQuery, deUint32(accelerationStructureHandles.size()));
2481                 vk.cmdWriteAccelerationStructuresPropertiesKHR(cmdBuffer, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType, queryPool, firstQuery);
2482                 // results cannot be retrieved to CPU at the moment - you need to do it using getQueryPoolResults after cmdBuffer is executed. Meanwhile function returns a vector of 0s.
2483                 results.resize(accelerationStructureHandles.size(), 0u);
2484                 return false;
2485         }
2486         // buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR
2487         results.resize(accelerationStructureHandles.size(), 0u);
2488         vk.writeAccelerationStructuresPropertiesKHR(device, deUint32(accelerationStructureHandles.size()), accelerationStructureHandles.data(), queryType,
2489                                                                                                 sizeof(VkDeviceSize) * accelerationStructureHandles.size(), results.data(), sizeof(VkDeviceSize));
2490         // results will contain proper values
2491         return true;
2492 }
2493
2494 bool queryAccelerationStructureSize (const DeviceInterface&                                                     vk,
2495                                                                          const VkDevice                                                                 device,
2496                                                                          const VkCommandBuffer                                                  cmdBuffer,
2497                                                                          const std::vector<VkAccelerationStructureKHR>& accelerationStructureHandles,
2498                                                                          VkAccelerationStructureBuildTypeKHR                    buildType,
2499                                                                          const VkQueryPool                                                              queryPool,
2500                                                                          VkQueryType                                                                    queryType,
2501                                                                          deUint32                                                                               firstQuery,
2502                                                                          std::vector<VkDeviceSize>&                                             results)
2503 {
2504         return queryAccelerationStructureSizeKHR(vk, device, cmdBuffer, accelerationStructureHandles, buildType, queryPool, queryType, firstQuery, results);
2505 }
2506
2507 RayTracingPipeline::RayTracingPipeline ()
2508         : m_shadersModules                      ()
2509         , m_pipelineLibraries           ()
2510         , m_shaderCreateInfos           ()
2511         , m_shadersGroupCreateInfos     ()
2512         , m_pipelineCreateFlags         (0U)
2513         , m_maxRecursionDepth           (1U)
2514         , m_maxPayloadSize                      (0U)
2515         , m_maxAttributeSize            (0U)
2516         , m_deferredOperation           (false)
2517         , m_workerThreadCount           (0)
2518 {
2519 }
2520
2521 RayTracingPipeline::~RayTracingPipeline ()
2522 {
2523 }
2524
2525 #define CHECKED_ASSIGN_SHADER(SHADER, STAGE)                                            \
2526         if (SHADER == VK_SHADER_UNUSED_KHR)                                                             \
2527                 SHADER = STAGE;                                                                                         \
2528         else                                                                                                                    \
2529                 TCU_THROW(InternalError, "Attempt to reassign shader")
2530
2531 void RayTracingPipeline::addShader (VkShaderStageFlagBits                                       shaderStage,
2532                                                                         Move<VkShaderModule>                                    shaderModule,
2533                                                                         deUint32                                                                group,
2534                                                                         const VkSpecializationInfo*                             specializationInfo,
2535                                                                         const VkPipelineShaderStageCreateFlags  pipelineShaderStageCreateFlags,
2536                                                                         const void*                                                             pipelineShaderStageCreateInfopNext)
2537 {
2538         addShader(shaderStage, makeVkSharedPtr(shaderModule), group, specializationInfo, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
2539 }
2540
2541 void RayTracingPipeline::addShader (VkShaderStageFlagBits                                       shaderStage,
2542                                                                         de::SharedPtr<Move<VkShaderModule>>             shaderModule,
2543                                                                         deUint32                                                                group,
2544                                                                         const VkSpecializationInfo*                             specializationInfoPtr,
2545                                                                         const VkPipelineShaderStageCreateFlags  pipelineShaderStageCreateFlags,
2546                                                                         const void*                                                             pipelineShaderStageCreateInfopNext)
2547 {
2548         addShader(shaderStage, **shaderModule, group, specializationInfoPtr, pipelineShaderStageCreateFlags, pipelineShaderStageCreateInfopNext);
2549         m_shadersModules.push_back(shaderModule);
2550 }
2551
2552 void RayTracingPipeline::addShader (VkShaderStageFlagBits                                       shaderStage,
2553                                                                         VkShaderModule                              shaderModule,
2554                                                                         deUint32                                                                group,
2555                                                                         const VkSpecializationInfo*                             specializationInfoPtr,
2556                                                                         const VkPipelineShaderStageCreateFlags  pipelineShaderStageCreateFlags,
2557                                                                         const void*                                                             pipelineShaderStageCreateInfopNext)
2558 {
2559         if (group >= m_shadersGroupCreateInfos.size())
2560         {
2561                 for (size_t groupNdx = m_shadersGroupCreateInfos.size(); groupNdx <= group; ++groupNdx)
2562                 {
2563                         VkRayTracingShaderGroupCreateInfoKHR    shaderGroupCreateInfo   =
2564                         {
2565                                 VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,     //  VkStructureType                                     sType;
2566                                 DE_NULL,                                                                                                        //  const void*                                         pNext;
2567                                 VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,                          //  VkRayTracingShaderGroupTypeKHR      type;
2568                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            generalShader;
2569                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            closestHitShader;
2570                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            anyHitShader;
2571                                 VK_SHADER_UNUSED_KHR,                                                                           //  deUint32                                            intersectionShader;
2572                                 DE_NULL,                                                                                                        //  const void*                                         pShaderGroupCaptureReplayHandle;
2573                         };
2574
2575                         m_shadersGroupCreateInfos.push_back(shaderGroupCreateInfo);
2576                 }
2577         }
2578
2579         const deUint32                                                  shaderStageNdx                  = (deUint32)m_shaderCreateInfos.size();
2580         VkRayTracingShaderGroupCreateInfoKHR&   shaderGroupCreateInfo   = m_shadersGroupCreateInfos[group];
2581
2582         switch (shaderStage)
2583         {
2584                 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:            CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,              shaderStageNdx);        break;
2585                 case VK_SHADER_STAGE_MISS_BIT_KHR:                      CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,              shaderStageNdx);        break;
2586                 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:          CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.generalShader,              shaderStageNdx);        break;
2587                 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:           CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.anyHitShader,               shaderStageNdx);        break;
2588                 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:       CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.closestHitShader,   shaderStageNdx);        break;
2589                 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:      CHECKED_ASSIGN_SHADER(shaderGroupCreateInfo.intersectionShader, shaderStageNdx);        break;
2590                 default:                                                                        TCU_THROW(InternalError, "Unacceptable stage");
2591         }
2592
2593         switch (shaderStage)
2594         {
2595                 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
2596                 case VK_SHADER_STAGE_MISS_BIT_KHR:
2597                 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
2598                 {
2599                         DE_ASSERT(shaderGroupCreateInfo.type == VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR);
2600                         shaderGroupCreateInfo.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
2601
2602                         break;
2603                 }
2604
2605                 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
2606                 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
2607                 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
2608                 {
2609                         DE_ASSERT(shaderGroupCreateInfo.type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR);
2610                         shaderGroupCreateInfo.type      = (shaderGroupCreateInfo.intersectionShader == VK_SHADER_UNUSED_KHR)
2611                                                                                 ? VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR
2612                                                                                 : VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
2613
2614                         break;
2615                 }
2616
2617                 default: TCU_THROW(InternalError, "Unacceptable stage");
2618         }
2619
2620         {
2621                 const VkPipelineShaderStageCreateInfo   shaderCreateInfo        =
2622                 {
2623                         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    //  VkStructureType                                             sType;
2624                         pipelineShaderStageCreateInfopNext,                                             //  const void*                                                 pNext;
2625                         pipelineShaderStageCreateFlags,                                                 //  VkPipelineShaderStageCreateFlags    flags;
2626                         shaderStage,                                                                                    //  VkShaderStageFlagBits                               stage;
2627                         shaderModule,                                                                                   //  VkShaderModule                                              module;
2628                         "main",                                                                                                 //  const char*                                                 pName;
2629                         specializationInfoPtr,                                                                  //  const VkSpecializationInfo*                 pSpecializationInfo;
2630                 };
2631
2632                 m_shaderCreateInfos.push_back(shaderCreateInfo);
2633         }
2634 }
2635
2636 void RayTracingPipeline::addLibrary (de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary)
2637 {
2638         m_pipelineLibraries.push_back(pipelineLibrary);
2639 }
2640
2641 Move<VkPipeline> RayTracingPipeline::createPipelineKHR (const DeviceInterface&                                                          vk,
2642                                                                                                                 const VkDevice                                                                          device,
2643                                                                                                                 const VkPipelineLayout                                                          pipelineLayout,
2644                                                                                                                 const std::vector<de::SharedPtr<Move<VkPipeline>>>&     pipelineLibraries)
2645 {
2646         for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
2647                 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
2648
2649         DE_ASSERT(m_shaderCreateInfos.size() > 0);
2650         DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
2651
2652         std::vector<VkPipeline>                                                         vkPipelineLibraries;
2653         for (auto it = begin(pipelineLibraries), eit = end(pipelineLibraries); it != eit; ++it)
2654                 vkPipelineLibraries.push_back( it->get()->get() );
2655         VkPipelineLibraryCreateInfoKHR                          librariesCreateInfo     =
2656         {
2657                 VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,             //  VkStructureType     sType;
2658                 DE_NULL,                                                                                                //  const void*         pNext;
2659                 deUint32(vkPipelineLibraries.size()),                                   //  deUint32            libraryCount;
2660                 dataOrNullPtr(vkPipelineLibraries)                                              //  VkPipeline*         pLibraries;
2661         };
2662         const VkRayTracingPipelineInterfaceCreateInfoKHR        pipelineInterfaceCreateInfo             =
2663         {
2664                 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR,       //  VkStructureType     sType;
2665                 DE_NULL,                                                                                                                        //  const void*         pNext;
2666                 m_maxPayloadSize,                                                                                                       //  deUint32            maxPayloadSize;
2667                 m_maxAttributeSize                                                                                                      //  deUint32            maxAttributeSize;
2668         };
2669         const bool                                                                                      addPipelineInterfaceCreateInfo  = m_maxPayloadSize != 0 || m_maxAttributeSize != 0;
2670         const VkRayTracingPipelineInterfaceCreateInfoKHR*       pipelineInterfaceCreateInfoPtr  = addPipelineInterfaceCreateInfo ? &pipelineInterfaceCreateInfo : DE_NULL;
2671         const VkPipelineLibraryCreateInfoKHR*                           librariesCreateInfoPtr                  = (vkPipelineLibraries.empty() ? nullptr : &librariesCreateInfo);
2672
2673         Move<VkDeferredOperationKHR>                                            deferredOperation;
2674         if (m_deferredOperation)
2675                 deferredOperation = createDeferredOperationKHR(vk, device);
2676
2677         VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo =
2678         {
2679                 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,   // VkStructureType                                              sType;
2680                 DE_NULL,                                                                                                // const void*                                                  pNext;
2681                 0,                                                                                                              // VkPipelineDynamicStateCreateFlags    flags;
2682                 static_cast<deUint32>(m_dynamicStates.size() ),                 // deUint32                                                             dynamicStateCount;
2683                 m_dynamicStates.data(),                                                                 // const VkDynamicState*                                pDynamicStates;
2684         };
2685
2686         const VkRayTracingPipelineCreateInfoKHR                         pipelineCreateInfo                              =
2687         {
2688                 VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, //  VkStructureType                                                             sType;
2689                 DE_NULL,                                                                                                //  const void*                                                                 pNext;
2690                 m_pipelineCreateFlags,                                                                  //  VkPipelineCreateFlags                                               flags;
2691                 (deUint32)m_shaderCreateInfos.size(),                                   //  deUint32                                                                    stageCount;
2692                 m_shaderCreateInfos.data(),                                                             //  const VkPipelineShaderStageCreateInfo*              pStages;
2693                 (deUint32)m_shadersGroupCreateInfos.size(),                             //  deUint32                                                                    groupCount;
2694                 m_shadersGroupCreateInfos.data(),                                               //  const VkRayTracingShaderGroupCreateInfoKHR* pGroups;
2695                 m_maxRecursionDepth,                                                                    //  deUint32                                                                    maxRecursionDepth;
2696                 librariesCreateInfoPtr,                                                                 //  VkPipelineLibraryCreateInfoKHR*                             pLibraryInfo;
2697                 pipelineInterfaceCreateInfoPtr,                                                 //  VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface;
2698                 &dynamicStateCreateInfo,                                                                //  const VkPipelineDynamicStateCreateInfo*             pDynamicState;
2699                 pipelineLayout,                                                                                 //  VkPipelineLayout                                                    layout;
2700                 (VkPipeline)DE_NULL,                                                                    //  VkPipeline                                                                  basePipelineHandle;
2701                 0,                                                                                                              //  deInt32                                                                             basePipelineIndex;
2702         };
2703         VkPipeline                                                                                      object                                                  = DE_NULL;
2704         VkResult                                                                                        result                                                  = vk.createRayTracingPipelinesKHR(device, deferredOperation.get(), DE_NULL, 1u, &pipelineCreateInfo, DE_NULL, &object);
2705
2706         if (m_deferredOperation)
2707         {
2708                 DE_ASSERT(result == VK_OPERATION_DEFERRED_KHR || result == VK_OPERATION_NOT_DEFERRED_KHR || result == VK_SUCCESS);
2709
2710                 finishDeferredOperation(vk, device, deferredOperation.get(), m_workerThreadCount, result == VK_OPERATION_NOT_DEFERRED_KHR);
2711         }
2712
2713         Move<VkPipeline> pipeline (check<VkPipeline>(object), Deleter<VkPipeline>(vk, device, DE_NULL));
2714         return pipeline;
2715 }
2716
2717
2718 Move<VkPipeline> RayTracingPipeline::createPipeline (const DeviceInterface&                                                                     vk,
2719                                                                                                          const VkDevice                                                                                 device,
2720                                                                                                          const VkPipelineLayout                                                                 pipelineLayout,
2721                                                                                                          const std::vector<de::SharedPtr<Move<VkPipeline>>>&    pipelineLibraries)
2722 {
2723         return createPipelineKHR(vk, device, pipelineLayout, pipelineLibraries);
2724 }
2725
2726 std::vector<de::SharedPtr<Move<VkPipeline>>> RayTracingPipeline::createPipelineWithLibraries (const DeviceInterface&                    vk,
2727                                                                                                                                                                                                 const VkDevice                                  device,
2728                                                                                                                                                                                                 const VkPipelineLayout                  pipelineLayout)
2729 {
2730         for (size_t groupNdx = 0; groupNdx < m_shadersGroupCreateInfos.size(); ++groupNdx)
2731                 DE_ASSERT(m_shadersGroupCreateInfos[groupNdx].sType == VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR);
2732
2733         DE_ASSERT(m_shaderCreateInfos.size() > 0);
2734         DE_ASSERT(m_shadersGroupCreateInfos.size() > 0);
2735
2736         std::vector<de::SharedPtr<Move<VkPipeline>>> result, allLibraries, firstLibraries;
2737         for(auto it=begin(m_pipelineLibraries), eit=end(m_pipelineLibraries); it!=eit; ++it)
2738         {
2739                 auto childLibraries = (*it)->get()->createPipelineWithLibraries(vk, device, pipelineLayout);
2740                 DE_ASSERT(childLibraries.size() > 0);
2741                 firstLibraries.push_back(childLibraries[0]);
2742                 std::copy(begin(childLibraries), end(childLibraries), std::back_inserter(allLibraries));
2743         }
2744         result.push_back(makeVkSharedPtr(createPipeline(vk, device, pipelineLayout, firstLibraries)));
2745         std::copy(begin(allLibraries), end(allLibraries), std::back_inserter(result));
2746         return result;
2747 }
2748
2749 de::MovePtr<BufferWithMemory> RayTracingPipeline::createShaderBindingTable (const DeviceInterface&              vk,
2750                                                                                                                                                         const VkDevice                          device,
2751                                                                                                                                                         const VkPipeline                        pipeline,
2752                                                                                                                                                         Allocator&                                      allocator,
2753                                                                                                                                                         const deUint32&                         shaderGroupHandleSize,
2754                                                                                                                                                         const deUint32                          shaderGroupBaseAlignment,
2755                                                                                                                                                         const deUint32&                         firstGroup,
2756                                                                                                                                                         const deUint32&                         groupCount,
2757                                                                                                                                                         const VkBufferCreateFlags&      additionalBufferCreateFlags,
2758                                                                                                                                                         const VkBufferUsageFlags&       additionalBufferUsageFlags,
2759                                                                                                                                                         const MemoryRequirement&        additionalMemoryRequirement,
2760                                                                                                                                                         const VkDeviceAddress&          opaqueCaptureAddress,
2761                                                                                                                                                         const deUint32                          shaderBindingTableOffset,
2762                                                                                                                                                         const deUint32                          shaderRecordSize,
2763                                                                                                                                                         const void**                            shaderGroupDataPtrPerGroup)
2764 {
2765         DE_ASSERT(shaderGroupBaseAlignment != 0u);
2766         DE_ASSERT((shaderBindingTableOffset % shaderGroupBaseAlignment) == 0);
2767         DE_UNREF(shaderGroupBaseAlignment);
2768
2769         const deUint32                                                  sbtSize                                                 = shaderBindingTableOffset + groupCount * deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize);
2770         const VkBufferUsageFlags                                sbtFlags                                                = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | additionalBufferUsageFlags;
2771         VkBufferCreateInfo                                              sbtCreateInfo                                   = makeBufferCreateInfo(sbtSize, sbtFlags);
2772         sbtCreateInfo.flags                                                                                                             |= additionalBufferCreateFlags;
2773         VkBufferOpaqueCaptureAddressCreateInfo  sbtCaptureAddressInfo                   =
2774         {
2775                 VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,    // VkStructureType      sType;
2776                 DE_NULL,                                                                                                                // const void*          pNext;
2777                 deUint64(opaqueCaptureAddress)                                                                  // deUint64                     opaqueCaptureAddress;
2778         };
2779
2780         if (opaqueCaptureAddress != 0u)
2781         {
2782                 sbtCreateInfo.pNext = &sbtCaptureAddressInfo;
2783                 sbtCreateInfo.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
2784         }
2785         const MemoryRequirement                 sbtMemRequirements                                              = MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress | additionalMemoryRequirement;
2786         de::MovePtr<BufferWithMemory>   sbtBuffer                                                               = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vk, device, allocator, sbtCreateInfo, sbtMemRequirements));
2787         vk::Allocation&                                 sbtAlloc                                                                = sbtBuffer->getAllocation();
2788
2789         // collect shader group handles
2790         std::vector<deUint8>                    shaderHandles                                                   (groupCount * shaderGroupHandleSize);
2791         VK_CHECK(getRayTracingShaderGroupHandles(vk, device, pipeline, firstGroup, groupCount, groupCount * shaderGroupHandleSize, shaderHandles.data()));
2792
2793         // reserve place for ShaderRecordKHR after each shader handle ( ShaderRecordKHR size might be 0 ). Also take alignment into consideration
2794         deUint8* shaderBegin = (deUint8*)sbtAlloc.getHostPtr() + shaderBindingTableOffset;
2795         for (deUint32 idx = 0; idx < groupCount; ++idx)
2796         {
2797                 deUint8* shaderSrcPos   = shaderHandles.data() + idx * shaderGroupHandleSize;
2798                 deUint8* shaderDstPos   = shaderBegin + idx * deAlign32(shaderGroupHandleSize + shaderRecordSize, shaderGroupHandleSize);
2799                 deMemcpy(shaderDstPos, shaderSrcPos, shaderGroupHandleSize);
2800
2801                 if (shaderGroupDataPtrPerGroup          != nullptr &&
2802                         shaderGroupDataPtrPerGroup[idx] != nullptr)
2803                 {
2804                         DE_ASSERT(sbtSize >= static_cast<deUint32>(shaderDstPos - shaderBegin) + shaderGroupHandleSize);
2805
2806                         deMemcpy(       shaderDstPos + shaderGroupHandleSize,
2807                                                 shaderGroupDataPtrPerGroup[idx],
2808                                                 shaderRecordSize);
2809                 }
2810         }
2811
2812         flushMappedMemoryRange(vk, device, sbtAlloc.getMemory(), sbtAlloc.getOffset(), VK_WHOLE_SIZE);
2813
2814         return sbtBuffer;
2815 }
2816
2817 void RayTracingPipeline::setCreateFlags (const VkPipelineCreateFlags& pipelineCreateFlags)
2818 {
2819         m_pipelineCreateFlags = pipelineCreateFlags;
2820 }
2821
2822 void RayTracingPipeline::setMaxRecursionDepth (const deUint32& maxRecursionDepth)
2823 {
2824         m_maxRecursionDepth = maxRecursionDepth;
2825 }
2826
2827 void RayTracingPipeline::setMaxPayloadSize (const deUint32& maxPayloadSize)
2828 {
2829         m_maxPayloadSize = maxPayloadSize;
2830 }
2831
2832 void RayTracingPipeline::setMaxAttributeSize (const deUint32& maxAttributeSize)
2833 {
2834         m_maxAttributeSize = maxAttributeSize;
2835 }
2836
2837 void RayTracingPipeline::setDeferredOperation (const bool               deferredOperation,
2838                                                                                            const deUint32       workerThreadCount)
2839 {
2840         m_deferredOperation = deferredOperation;
2841         m_workerThreadCount = workerThreadCount;
2842 }
2843
2844 void RayTracingPipeline::addDynamicState(const VkDynamicState& dynamicState)
2845 {
2846         m_dynamicStates.push_back(dynamicState);
2847 }
2848
2849 class RayTracingPropertiesKHR : public RayTracingProperties
2850 {
2851 public:
2852                                                         RayTracingPropertiesKHR                                         () = delete;
2853                                                         RayTracingPropertiesKHR                                         (const InstanceInterface&       vki,
2854                                                                                                                                                  const VkPhysicalDevice         physicalDevice);
2855         virtual                                 ~RayTracingPropertiesKHR                                        ();
2856
2857         virtual deUint32                getShaderGroupHandleSize                                        (void)  { return m_rayTracingPipelineProperties.shaderGroupHandleSize;                                          }
2858         virtual deUint32                getMaxRecursionDepth                                            (void)  { return m_rayTracingPipelineProperties.maxRayRecursionDepth;                                           }
2859         virtual deUint32                getMaxShaderGroupStride                                         (void)  { return m_rayTracingPipelineProperties.maxShaderGroupStride;                                           }
2860         virtual deUint32                getShaderGroupBaseAlignment                                     (void)  { return m_rayTracingPipelineProperties.shaderGroupBaseAlignment;                                       }
2861         virtual deUint64                getMaxGeometryCount                                                     (void)  { return m_accelerationStructureProperties.maxGeometryCount;                                            }
2862         virtual deUint64                getMaxInstanceCount                                                     (void)  { return m_accelerationStructureProperties.maxInstanceCount;                                            }
2863         virtual deUint64                getMaxPrimitiveCount                                            (void)  { return m_accelerationStructureProperties.maxPrimitiveCount;                                           }
2864         virtual deUint32                getMaxDescriptorSetAccelerationStructures       (void)  { return m_accelerationStructureProperties.maxDescriptorSetAccelerationStructures;      }
2865         deUint32                                getMaxRayDispatchInvocationCount                        (void)  { return m_rayTracingPipelineProperties.maxRayDispatchInvocationCount;                          }
2866         deUint32                                getMaxRayHitAttributeSize                                       (void)  { return m_rayTracingPipelineProperties.maxRayHitAttributeSize;                                         }
2867
2868 protected:
2869         VkPhysicalDeviceAccelerationStructurePropertiesKHR      m_accelerationStructureProperties;
2870         VkPhysicalDeviceRayTracingPipelinePropertiesKHR         m_rayTracingPipelineProperties;
2871 };
2872
2873 RayTracingPropertiesKHR::~RayTracingPropertiesKHR ()
2874 {
2875 }
2876
2877 RayTracingPropertiesKHR::RayTracingPropertiesKHR (const InstanceInterface&      vki,
2878                                                                                                   const VkPhysicalDevice        physicalDevice)
2879         : RayTracingProperties  (vki, physicalDevice)
2880 {
2881         m_accelerationStructureProperties       = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
2882         m_rayTracingPipelineProperties          = getPhysicalDeviceExtensionProperties(vki, physicalDevice);
2883 }
2884
2885 de::MovePtr<RayTracingProperties> makeRayTracingProperties (const InstanceInterface&    vki,
2886                                                                                                                         const VkPhysicalDevice          physicalDevice)
2887 {
2888         return de::MovePtr<RayTracingProperties>(new RayTracingPropertiesKHR(vki, physicalDevice));
2889 }
2890
2891 static inline void cmdTraceRaysKHR (const DeviceInterface&                                      vk,
2892                                                                         VkCommandBuffer                                                 commandBuffer,
2893                                                                         const VkStridedDeviceAddressRegionKHR*  raygenShaderBindingTableRegion,
2894                                                                         const VkStridedDeviceAddressRegionKHR*  missShaderBindingTableRegion,
2895                                                                         const VkStridedDeviceAddressRegionKHR*  hitShaderBindingTableRegion,
2896                                                                         const VkStridedDeviceAddressRegionKHR*  callableShaderBindingTableRegion,
2897                                                                         deUint32                                                                width,
2898                                                                         deUint32                                                                height,
2899                                                                         deUint32                                                                depth)
2900 {
2901         return vk.cmdTraceRaysKHR(commandBuffer,
2902                                                           raygenShaderBindingTableRegion,
2903                                                           missShaderBindingTableRegion,
2904                                                           hitShaderBindingTableRegion,
2905                                                           callableShaderBindingTableRegion,
2906                                                           width,
2907                                                           height,
2908                                                           depth);
2909 }
2910
2911
2912 void cmdTraceRays (const DeviceInterface&                                       vk,
2913                                    VkCommandBuffer                                                      commandBuffer,
2914                                    const VkStridedDeviceAddressRegionKHR*       raygenShaderBindingTableRegion,
2915                                    const VkStridedDeviceAddressRegionKHR*       missShaderBindingTableRegion,
2916                                    const VkStridedDeviceAddressRegionKHR*       hitShaderBindingTableRegion,
2917                                    const VkStridedDeviceAddressRegionKHR*       callableShaderBindingTableRegion,
2918                                    deUint32                                                                     width,
2919                                    deUint32                                                                     height,
2920                                    deUint32                                                                     depth)
2921 {
2922         DE_ASSERT(raygenShaderBindingTableRegion        != DE_NULL);
2923         DE_ASSERT(missShaderBindingTableRegion          != DE_NULL);
2924         DE_ASSERT(hitShaderBindingTableRegion           != DE_NULL);
2925         DE_ASSERT(callableShaderBindingTableRegion      != DE_NULL);
2926
2927         return cmdTraceRaysKHR(vk,
2928                                                    commandBuffer,
2929                                                    raygenShaderBindingTableRegion,
2930                                                    missShaderBindingTableRegion,
2931                                                    hitShaderBindingTableRegion,
2932                                                    callableShaderBindingTableRegion,
2933                                                    width,
2934                                                    height,
2935                                                    depth);
2936 }
2937
2938 static inline void cmdTraceRaysIndirectKHR (const DeviceInterface&                                      vk,
2939                                                                                         VkCommandBuffer                                                 commandBuffer,
2940                                                                                         const VkStridedDeviceAddressRegionKHR*  raygenShaderBindingTableRegion,
2941                                                                                         const VkStridedDeviceAddressRegionKHR*  missShaderBindingTableRegion,
2942                                                                                         const VkStridedDeviceAddressRegionKHR*  hitShaderBindingTableRegion,
2943                                                                                         const VkStridedDeviceAddressRegionKHR*  callableShaderBindingTableRegion,
2944                                                                                         VkDeviceAddress                                                 indirectDeviceAddress )
2945 {
2946         DE_ASSERT(raygenShaderBindingTableRegion        != DE_NULL);
2947         DE_ASSERT(missShaderBindingTableRegion          != DE_NULL);
2948         DE_ASSERT(hitShaderBindingTableRegion           != DE_NULL);
2949         DE_ASSERT(callableShaderBindingTableRegion      != DE_NULL);
2950         DE_ASSERT(indirectDeviceAddress                         != 0);
2951
2952         return vk.cmdTraceRaysIndirectKHR(commandBuffer,
2953                                                                           raygenShaderBindingTableRegion,
2954                                                                           missShaderBindingTableRegion,
2955                                                                           hitShaderBindingTableRegion,
2956                                                                           callableShaderBindingTableRegion,
2957                                                                           indirectDeviceAddress);
2958 }
2959
2960 void cmdTraceRaysIndirect (const DeviceInterface&                                       vk,
2961                                                    VkCommandBuffer                                                      commandBuffer,
2962                                                    const VkStridedDeviceAddressRegionKHR*       raygenShaderBindingTableRegion,
2963                                                    const VkStridedDeviceAddressRegionKHR*       missShaderBindingTableRegion,
2964                                                    const VkStridedDeviceAddressRegionKHR*       hitShaderBindingTableRegion,
2965                                                    const VkStridedDeviceAddressRegionKHR*       callableShaderBindingTableRegion,
2966                                                    VkDeviceAddress                                                      indirectDeviceAddress)
2967 {
2968         return cmdTraceRaysIndirectKHR(vk,
2969                                                                    commandBuffer,
2970                                                                    raygenShaderBindingTableRegion,
2971                                                                    missShaderBindingTableRegion,
2972                                                                    hitShaderBindingTableRegion,
2973                                                                    callableShaderBindingTableRegion,
2974                                                                    indirectDeviceAddress);
2975 }
2976
2977 } // vk