d3d12: Add video decode implementation of pipe_video_codec
authorSil Vilerino <sivileri@microsoft.com>
Mon, 2 May 2022 17:00:10 +0000 (10:00 -0700)
committerMarge Bot <emma+marge@anholt.net>
Tue, 17 May 2022 21:02:25 +0000 (21:02 +0000)
Acked-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16286>

16 files changed:
src/gallium/drivers/d3d12/d3d12_util.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_buffer.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_buffer.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dec.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dec.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dec_h264.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_video_types.h [new file with mode: 0644]
src/gallium/drivers/d3d12/meson.build

diff --git a/src/gallium/drivers/d3d12/d3d12_util.h b/src/gallium/drivers/d3d12/d3d12_util.h
new file mode 100644 (file)
index 0000000..550d701
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_UTIL_H
+#define D3D12_UTIL_H
+
+//------------------------------------------------------------------------------------------------
+template <typename T, typename U, typename V>
+inline void D3D12DecomposeSubresource( UINT Subresource, UINT MipLevels, UINT ArraySize, _Out_ T& MipSlice, _Out_ U& ArraySlice, _Out_ V& PlaneSlice ) noexcept
+{
+    MipSlice = static_cast<T>(Subresource % MipLevels);
+    ArraySlice = static_cast<U>((Subresource / MipLevels) % ArraySize);
+    PlaneSlice = static_cast<V>(Subresource / (MipLevels * ArraySize));
+}
+
+//------------------------------------------------------------------------------------------------
+constexpr UINT D3D12CalcSubresource( UINT MipSlice, UINT ArraySlice, UINT PlaneSlice, UINT MipLevels, UINT ArraySize ) noexcept
+{
+    return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize;
+}
+
+//------------------------------------------------------------------------------------------------
+struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER
+{
+    CD3DX12_RESOURCE_BARRIER() = default;
+    explicit CD3DX12_RESOURCE_BARRIER(const D3D12_RESOURCE_BARRIER &o) noexcept :
+        D3D12_RESOURCE_BARRIER(o)
+    {}
+    static inline CD3DX12_RESOURCE_BARRIER Transition(
+        _In_ ID3D12Resource* pResource,
+        D3D12_RESOURCE_STATES stateBefore,
+        D3D12_RESOURCE_STATES stateAfter,
+        UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+        D3D12_RESOURCE_BARRIER_FLAGS flags = D3D12_RESOURCE_BARRIER_FLAG_NONE) noexcept
+    {
+        CD3DX12_RESOURCE_BARRIER result = {};
+        D3D12_RESOURCE_BARRIER &barrier = result;
+        result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+        result.Flags = flags;
+        barrier.Transition.pResource = pResource;
+        barrier.Transition.StateBefore = stateBefore;
+        barrier.Transition.StateAfter = stateAfter;
+        barrier.Transition.Subresource = subresource;
+        return result;
+    }
+    static inline CD3DX12_RESOURCE_BARRIER Aliasing(
+        _In_ ID3D12Resource* pResourceBefore,
+        _In_ ID3D12Resource* pResourceAfter) noexcept
+    {
+        CD3DX12_RESOURCE_BARRIER result = {};
+        D3D12_RESOURCE_BARRIER &barrier = result;
+        result.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
+        barrier.Aliasing.pResourceBefore = pResourceBefore;
+        barrier.Aliasing.pResourceAfter = pResourceAfter;
+        return result;
+    }
+    static inline CD3DX12_RESOURCE_BARRIER UAV(
+        _In_ ID3D12Resource* pResource) noexcept
+    {
+        CD3DX12_RESOURCE_BARRIER result = {};
+        D3D12_RESOURCE_BARRIER &barrier = result;
+        result.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+        barrier.UAV.pResource = pResource;
+        return result;
+    }
+};
+
+//------------------------------------------------------------------------------------------------
+struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC
+{
+    CD3DX12_RESOURCE_DESC() = default;
+    explicit CD3DX12_RESOURCE_DESC( const D3D12_RESOURCE_DESC& o ) noexcept :
+        D3D12_RESOURCE_DESC( o )
+    {}
+    CD3DX12_RESOURCE_DESC(
+        D3D12_RESOURCE_DIMENSION dimension,
+        UINT64 alignment,
+        UINT64 width,
+        UINT height,
+        UINT16 depthOrArraySize,
+        UINT16 mipLevels,
+        DXGI_FORMAT format,
+        UINT sampleCount,
+        UINT sampleQuality,
+        D3D12_TEXTURE_LAYOUT layout,
+        D3D12_RESOURCE_FLAGS flags ) noexcept
+    {
+        Dimension = dimension;
+        Alignment = alignment;
+        Width = width;
+        Height = height;
+        DepthOrArraySize = depthOrArraySize;
+        MipLevels = mipLevels;
+        Format = format;
+        SampleDesc.Count = sampleCount;
+        SampleDesc.Quality = sampleQuality;
+        Layout = layout;
+        Flags = flags;
+    }
+    static inline CD3DX12_RESOURCE_DESC Buffer(
+        const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo,
+        D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) noexcept
+    {
+        return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes,
+            1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags );
+    }
+    static inline CD3DX12_RESOURCE_DESC Buffer(
+        UINT64 width,
+        D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE,
+        UINT64 alignment = 0 ) noexcept
+    {
+        return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1,
+            DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags );
+    }
+    static inline CD3DX12_RESOURCE_DESC Tex1D(
+        DXGI_FORMAT format,
+        UINT64 width,
+        UINT16 arraySize = 1,
+        UINT16 mipLevels = 0,
+        D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE,
+        D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+        UINT64 alignment = 0 ) noexcept
+    {
+        return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize,
+            mipLevels, format, 1, 0, layout, flags );
+    }
+    static inline CD3DX12_RESOURCE_DESC Tex2D(
+        DXGI_FORMAT format,
+        UINT64 width,
+        UINT height,
+        UINT16 arraySize = 1,
+        UINT16 mipLevels = 0,
+        UINT sampleCount = 1,
+        UINT sampleQuality = 0,
+        D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE,
+        D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+        UINT64 alignment = 0 ) noexcept
+    {
+        return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize,
+            mipLevels, format, sampleCount, sampleQuality, layout, flags );
+    }
+    static inline CD3DX12_RESOURCE_DESC Tex3D(
+        DXGI_FORMAT format,
+        UINT64 width,
+        UINT height,
+        UINT16 depth,
+        UINT16 mipLevels = 0,
+        D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE,
+        D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+        UINT64 alignment = 0 ) noexcept
+    {
+        return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth,
+            mipLevels, format, 1, 0, layout, flags );
+    }
+    inline UINT16 Depth() const noexcept
+    { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1u); }
+    inline UINT16 ArraySize() const noexcept
+    { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1u); }
+    inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) noexcept
+    { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); }
+};
+inline bool operator==( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) noexcept
+{
+    return l.Dimension == r.Dimension &&
+        l.Alignment == r.Alignment &&
+        l.Width == r.Width &&
+        l.Height == r.Height &&
+        l.DepthOrArraySize == r.DepthOrArraySize &&
+        l.MipLevels == r.MipLevels &&
+        l.Format == r.Format &&
+        l.SampleDesc.Count == r.SampleDesc.Count &&
+        l.SampleDesc.Quality == r.SampleDesc.Quality &&
+        l.Layout == r.Layout &&
+        l.Flags == r.Flags;
+}
+inline bool operator!=( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) noexcept
+{ return !( l == r ); }
+
+
+//------------------------------------------------------------------------------------------------
+struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES
+{
+    CD3DX12_HEAP_PROPERTIES() = default;
+    explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) noexcept :
+        D3D12_HEAP_PROPERTIES(o)
+    {}
+    CD3DX12_HEAP_PROPERTIES(
+        D3D12_CPU_PAGE_PROPERTY cpuPageProperty,
+        D3D12_MEMORY_POOL memoryPoolPreference,
+        UINT creationNodeMask = 1,
+        UINT nodeMask = 1 ) noexcept
+    {
+        Type = D3D12_HEAP_TYPE_CUSTOM;
+        CPUPageProperty = cpuPageProperty;
+        MemoryPoolPreference = memoryPoolPreference;
+        CreationNodeMask = creationNodeMask;
+        VisibleNodeMask = nodeMask;
+    }
+    explicit CD3DX12_HEAP_PROPERTIES(
+        D3D12_HEAP_TYPE type,
+        UINT creationNodeMask = 1,
+        UINT nodeMask = 1 ) noexcept
+    {
+        Type = type;
+        CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+        MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+        CreationNodeMask = creationNodeMask;
+        VisibleNodeMask = nodeMask;
+    }
+    bool IsCPUAccessible() const noexcept
+    {
+        return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || (Type == D3D12_HEAP_TYPE_CUSTOM &&
+            (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK));
+    }
+};
+inline bool operator==( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) noexcept
+{
+    return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty &&
+        l.MemoryPoolPreference == r.MemoryPoolPreference &&
+        l.CreationNodeMask == r.CreationNodeMask &&
+        l.VisibleNodeMask == r.VisibleNodeMask;
+}
+inline bool operator!=( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) noexcept
+{ return !( l == r ); }
+
+#endif
\ No newline at end of file
diff --git a/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp b/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp
new file mode 100644 (file)
index 0000000..f230036
--- /dev/null
@@ -0,0 +1,314 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_video_array_of_textures_dpb_manager.h"
+#include <algorithm>
+///
+/// d3d12_array_of_textures_dpb_manager
+///
+// Differences with d3d12_texture_array_dpb_manager
+// Uses an std::vector with individual D3D resources as backing storage instead of an D3D12 Texture Array
+// Supports dynamic pool capacity extension (by pushing back a new D3D12Resource) of the pool
+
+#ifndef _WIN32
+#include <wsl/winadapter.h>
+#endif
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <directx/d3d12.h>
+#include "d3d12_util.h"
+
+void
+d3d12_array_of_textures_dpb_manager::create_reconstructed_picture_allocations(ID3D12Resource **ppResource)
+{
+   D3D12_HEAP_PROPERTIES Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, m_nodeMask, m_nodeMask);
+
+   CD3DX12_RESOURCE_DESC reconstructedPictureResourceDesc = CD3DX12_RESOURCE_DESC::Tex2D(m_encodeFormat,
+                                                                                         m_encodeResolution.Width,
+                                                                                         m_encodeResolution.Height,
+                                                                                         1,
+                                                                                         1,
+                                                                                         1,
+                                                                                         0,
+                                                                                         m_resourceAllocFlags);
+   HRESULT hr = m_pDevice->CreateCommittedResource(&Properties,
+                                                       D3D12_HEAP_FLAG_NONE,
+                                                       &reconstructedPictureResourceDesc,
+                                                       D3D12_RESOURCE_STATE_COMMON,
+                                                       nullptr,
+                                                       IID_PPV_ARGS(ppResource));
+   if (FAILED(hr)) {
+      debug_printf("CreateCommittedResource failed with HR %x\n", hr);
+      assert(false);
+   }
+}
+
+d3d12_array_of_textures_dpb_manager::d3d12_array_of_textures_dpb_manager(
+   uint32_t                                    dpbInitialSize,
+   ID3D12Device *                              pDevice,
+   DXGI_FORMAT                                 encodeSessionFormat,
+   D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution,
+   D3D12_RESOURCE_FLAGS                        resourceAllocFlags,
+   bool                                        setNullSubresourcesOnAllZero,
+   uint32_t                                    nodeMask,
+   bool                                        allocatePool)
+   : m_dpbInitialSize(dpbInitialSize),
+     m_pDevice(pDevice),
+     m_encodeFormat(encodeSessionFormat),
+     m_encodeResolution(encodeSessionResolution),
+     m_resourceAllocFlags(resourceAllocFlags),
+     m_NullSubresourcesOnAllZero(setNullSubresourcesOnAllZero),
+     m_nodeMask(nodeMask)
+{
+   // Initialize D3D12 DPB exposed in this class implemented CRUD interface for a DPB
+   clear_decode_picture_buffer();
+
+   // Sometimes the client of this class can reuse allocations from an upper layer
+   // and doesn't need to get fresh/tracked allocations
+   if(allocatePool)
+   {
+      // Implement a reusable pool of D3D12 Resources as an array of textures
+      m_ResourcesPool.resize(m_dpbInitialSize);
+
+      // Build resource pool with commitedresources with a d3ddevice and the encoding session settings (eg. resolution) and
+      // the reference_only flag
+      for (auto &reusableRes : m_ResourcesPool) {
+         reusableRes.isFree = true;
+         create_reconstructed_picture_allocations(reusableRes.pResource.GetAddressOf());
+      }
+   }
+}
+
+uint32_t
+d3d12_array_of_textures_dpb_manager::clear_decode_picture_buffer()
+{
+   uint32_t untrackCount = 0;
+   // Mark resources used in DPB as re-usable in the resources pool
+   for (auto &dpbResource : m_D3D12DPB.pResources) {
+      // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods
+      // in this interface
+      untrackCount += untrack_reconstructed_picture_allocation({ dpbResource, 0 }) ? 1 : 0;
+   }
+
+   // Clear DPB
+   m_D3D12DPB.pResources.clear();
+   m_D3D12DPB.pSubresources.clear();
+   m_D3D12DPB.pHeaps.clear();
+   m_D3D12DPB.pResources.reserve(m_dpbInitialSize);
+   m_D3D12DPB.pSubresources.reserve(m_dpbInitialSize);
+   m_D3D12DPB.pHeaps.reserve(m_dpbInitialSize);
+
+   return untrackCount;
+}
+
+// Assigns a reference frame at a given position
+void
+d3d12_array_of_textures_dpb_manager::assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture,
+                                                            uint32_t                          dpbPosition)
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   assert (dpbPosition < m_D3D12DPB.pResources.size());
+
+   m_D3D12DPB.pResources[dpbPosition]    = pReconPicture.pReconstructedPicture;
+   m_D3D12DPB.pSubresources[dpbPosition] = pReconPicture.ReconstructedPictureSubresource;
+   m_D3D12DPB.pHeaps[dpbPosition]        = pReconPicture.pVideoHeap;
+}
+
+// Adds a new reference frame at a given position
+void
+d3d12_array_of_textures_dpb_manager::insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture,
+                                                            uint32_t                          dpbPosition)
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   if (dpbPosition > m_D3D12DPB.pResources.size()) {
+      // extend capacity
+      m_D3D12DPB.pResources.resize(dpbPosition);
+      m_D3D12DPB.pSubresources.resize(dpbPosition);
+      m_D3D12DPB.pHeaps.resize(dpbPosition);
+   }
+
+   m_D3D12DPB.pResources.insert(m_D3D12DPB.pResources.begin() + dpbPosition, pReconPicture.pReconstructedPicture);
+   m_D3D12DPB.pSubresources.insert(m_D3D12DPB.pSubresources.begin() + dpbPosition,
+                                   pReconPicture.ReconstructedPictureSubresource);
+   m_D3D12DPB.pHeaps.insert(m_D3D12DPB.pHeaps.begin() + dpbPosition, pReconPicture.pVideoHeap);
+}
+
+// Gets a reference frame at a given position
+d3d12_video_reconstructed_picture
+d3d12_array_of_textures_dpb_manager::get_reference_frame(uint32_t dpbPosition)
+{
+   assert(dpbPosition < m_D3D12DPB.pResources.size());
+
+   d3d12_video_reconstructed_picture retVal = { m_D3D12DPB.pResources[dpbPosition],
+                                                m_D3D12DPB.pSubresources[dpbPosition],
+                                                m_D3D12DPB.pHeaps[dpbPosition] };
+
+   return retVal;
+}
+
+// Removes a new reference frame at a given position and returns operation success
+bool
+d3d12_array_of_textures_dpb_manager::remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked)
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   assert(dpbPosition < m_D3D12DPB.pResources.size());
+
+   // If removed resource came from resource pool, mark it as free
+   // to free it for a new usage
+   // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods in
+   // this interface
+   bool resUntracked = untrack_reconstructed_picture_allocation({ m_D3D12DPB.pResources[dpbPosition], 0 });
+
+   if (pResourceUntracked != nullptr) {
+      *pResourceUntracked = resUntracked;
+   }
+
+   // Remove from DPB tables
+   m_D3D12DPB.pResources.erase(m_D3D12DPB.pResources.begin() + dpbPosition);
+   m_D3D12DPB.pSubresources.erase(m_D3D12DPB.pSubresources.begin() + dpbPosition);
+   m_D3D12DPB.pHeaps.erase(m_D3D12DPB.pHeaps.begin() + dpbPosition);
+
+   return true;
+}
+
+// Returns true if the trackedItem was allocated (and is being tracked) by this class
+bool
+d3d12_array_of_textures_dpb_manager::is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem)
+{
+   for (auto &reusableRes : m_ResourcesPool) {
+      if (trackedItem.pReconstructedPicture == reusableRes.pResource.Get() && !reusableRes.isFree) {
+         return true;
+      }
+   }
+   return false;
+}
+
+// Returns whether it found the tracked resource on this instance pool tracking and was able to free it
+bool
+d3d12_array_of_textures_dpb_manager::untrack_reconstructed_picture_allocation(
+   d3d12_video_reconstructed_picture trackedItem)
+{
+   for (auto &reusableRes : m_ResourcesPool) {
+      if (trackedItem.pReconstructedPicture == reusableRes.pResource.Get()) {
+         reusableRes.isFree = true;
+         return true;
+      }
+   }
+   return false;
+}
+
+// Returns a fresh resource for a new reconstructed picture to be written to
+// this class implements the dpb allocations as an array of textures
+d3d12_video_reconstructed_picture
+d3d12_array_of_textures_dpb_manager::get_new_tracked_picture_allocation()
+{
+   d3d12_video_reconstructed_picture freshAllocation = { // pResource
+                                                         nullptr,
+                                                         // subresource
+                                                         0
+   };
+
+   // Find first (if any) available resource to (re-)use
+   bool bAvailableResourceInPool = false;
+   for (auto &reusableRes : m_ResourcesPool) {
+      if (reusableRes.isFree) {
+         bAvailableResourceInPool              = true;
+         freshAllocation.pReconstructedPicture = reusableRes.pResource.Get();
+         reusableRes.isFree                    = false;
+         break;
+      }
+   }
+
+   if (!bAvailableResourceInPool) {
+      // Expand resources pool by one
+      debug_printf(
+         "[d3d12_array_of_textures_dpb_manager] ID3D12Resource Pool capacity (%ld) exceeded - extending capacity "
+         "and appending new allocation at the end",
+         m_ResourcesPool.size());
+      d3d12_reusable_resource newPoolEntry = {};
+      newPoolEntry.isFree                  = false;
+      create_reconstructed_picture_allocations(newPoolEntry.pResource.GetAddressOf());
+      m_ResourcesPool.push_back(newPoolEntry);
+
+      // Assign it to current ask
+      freshAllocation.pReconstructedPicture = newPoolEntry.pResource.Get();
+   }
+
+   return freshAllocation;
+}
+
+uint32_t
+d3d12_array_of_textures_dpb_manager::get_number_of_pics_in_dpb()
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   assert(m_D3D12DPB.pResources.size() < UINT32_MAX);
+   return static_cast<uint32_t>(m_D3D12DPB.pResources.size());
+}
+
+d3d12_video_reference_frames
+d3d12_array_of_textures_dpb_manager::get_current_reference_frames()
+{
+   // If all subresources are 0, the DPB is loaded with an array of individual textures, the D3D Encode API expects
+   // pSubresources to be null in this case The D3D Decode API expects it to be non-null even with all zeroes.
+   uint32_t *pSubresources = m_D3D12DPB.pSubresources.data();
+   if ((std::all_of(m_D3D12DPB.pSubresources.cbegin(), m_D3D12DPB.pSubresources.cend(), [](int i) { return i == 0; })) &&
+       m_NullSubresourcesOnAllZero) {
+      pSubresources = nullptr;
+   }
+
+   d3d12_video_reference_frames retVal = { get_number_of_pics_in_dpb(),
+                                           m_D3D12DPB.pResources.data(),
+                                           pSubresources,
+                                           m_D3D12DPB.pHeaps.data() };
+
+   return retVal;
+}
+
+// number of resources in the pool that are marked as in use
+uint32_t
+d3d12_array_of_textures_dpb_manager::get_number_of_in_use_allocations()
+{
+   uint32_t countOfInUseResourcesInPool = 0;
+   for (auto &reusableRes : m_ResourcesPool) {
+      if (!reusableRes.isFree) {
+         countOfInUseResourcesInPool++;
+      }
+   }
+   return countOfInUseResourcesInPool;
+}
+
+// Returns the number of pictures currently stored in the DPB
+uint32_t
+d3d12_array_of_textures_dpb_manager::get_number_of_tracked_allocations()
+{
+   assert(m_ResourcesPool.size() < UINT32_MAX);
+   return static_cast<uint32_t>(m_ResourcesPool.size());
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h b/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h
new file mode 100644 (file)
index 0000000..437d5c0
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#ifndef D3D12_VIDEO_ARRAY_OF_TEXTURES_DPB_MANAGER_H
+#define D3D12_VIDEO_ARRAY_OF_TEXTURES_DPB_MANAGER_H
+
+#include "d3d12_video_dpb_storage_manager.h"
+#include "d3d12_video_types.h"
+
+class d3d12_array_of_textures_dpb_manager : public d3d12_video_dpb_storage_manager_interface
+{
+   // d3d12_video_dpb_storage_manager_interface
+ public:
+   // Adds a new reference frame at a given position
+   void insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition);
+
+   // Assigns a reference frame at a given position
+   void assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition);
+
+   // Gets a reference frame at a given position
+   d3d12_video_reconstructed_picture get_reference_frame(uint32_t dpbPosition);
+
+   // Removes a new reference frame at a given position and returns operation success
+   // pResourceUntracked is an optional output indicating if the removed resource was being tracked by the pool
+   bool remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked = nullptr);
+
+   // Returns the resource allocation for a NEW picture
+   d3d12_video_reconstructed_picture get_new_tracked_picture_allocation();
+
+   // Returns true if the trackedItem was allocated (and is being tracked) by this class
+   bool is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem);
+
+   // Returns whether it found the tracked resource on this instance pool tracking and was able to free it
+   bool untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem);
+
+   // Returns the number of pictures currently stored in the DPB
+   uint32_t get_number_of_pics_in_dpb();
+
+   // Returns all the current reference frames stored
+   d3d12_video_reference_frames get_current_reference_frames();
+
+   // Removes all pictures from DPB
+   // returns the number of resources marked as reusable
+   uint32_t clear_decode_picture_buffer();
+
+   // number of resources in the pool that are marked as in use
+   uint32_t get_number_of_in_use_allocations();
+
+   uint32_t get_number_of_tracked_allocations();
+
+   // d3d12_array_of_textures_dpb_manager
+ public:
+   d3d12_array_of_textures_dpb_manager(
+      uint32_t dpbInitialSize,   // Maximum in use resources for a DPB of size x should be x+1 for cases when a P frame
+                                 // is using the x references in the L0 list and also using an extra resource to output
+                                 // it's own recon pic.
+      ID3D12Device *                              pDevice,
+      DXGI_FORMAT                                 encodeSessionFormat,
+      D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution,
+      D3D12_RESOURCE_FLAGS                        resourceAllocFlags           = D3D12_RESOURCE_FLAG_NONE,
+      bool                                        setNullSubresourcesOnAllZero = false,
+      uint32_t                                    nodeMask                     = 0,
+      bool                                        allocatePool                 = true);
+   ~d3d12_array_of_textures_dpb_manager()
+   { }
+
+   // d3d12_array_of_textures_dpb_manager
+ private:
+   void create_reconstructed_picture_allocations(ID3D12Resource **ppResource);
+
+   size_t                                      m_dpbInitialSize = 0;
+   ID3D12Device *                              m_pDevice;
+   DXGI_FORMAT                                 m_encodeFormat;
+   D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC m_encodeResolution;
+
+   // DPB with array of resources backing storage
+
+   struct d3d12_video_dpb
+   {
+      std::vector<ID3D12Resource *> pResources;
+      std::vector<uint32_t>         pSubresources;
+      std::vector<IUnknown *>       pHeaps;
+   };
+
+   d3d12_video_dpb m_D3D12DPB;
+
+   // Flags used when creating the resource pool
+   // Usually if reference only is needed for d3d12 video use
+   // D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
+   // D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
+   D3D12_RESOURCE_FLAGS m_resourceAllocFlags;
+
+   // Pool of resources to be aliased by the DPB without giving memory ownership
+   // This resources are allocated and released by this implementation
+   struct d3d12_reusable_resource
+   {
+      ComPtr<ID3D12Resource> pResource;
+      // subresource is always 0 on this AoT implementation of the resources pool
+      bool isFree;
+   };
+
+   std::vector<d3d12_reusable_resource> m_ResourcesPool;
+
+   // If all subresources are 0, the DPB is loaded with an array of individual textures, the D3D Encode API expects
+   // pSubresources to be null in this case The D3D Decode API expects it to be non-null even with all zeroes.
+   bool m_NullSubresourcesOnAllZero = false;
+
+   uint32_t m_nodeMask = 0;
+};
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_buffer.cpp b/src/gallium/drivers/d3d12/d3d12_video_buffer.cpp
new file mode 100644 (file)
index 0000000..a6e0be3
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_video_buffer.h"
+#include "d3d12_resource.h"
+#include "d3d12_video_dec.h"
+#include "d3d12_residency.h"
+
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_video.h"
+#include "vl/vl_video_buffer.h"
+#include "util/u_sampler.h"
+
+/**
+ * creates a video buffer
+ */
+struct pipe_video_buffer *
+d3d12_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *tmpl)
+{
+   assert(pipe);
+   assert(tmpl);
+
+   ///
+   /// Initialize d3d12_video_buffer
+   ///
+
+
+   if (!(tmpl->buffer_format == PIPE_FORMAT_NV12)) {
+      debug_printf("[d3d12_video_buffer] buffer_format is only supported as PIPE_FORMAT_NV12.\n");
+      return nullptr;
+   }
+
+   if (!(pipe_format_to_chroma_format(tmpl->buffer_format) == PIPE_VIDEO_CHROMA_FORMAT_420)) {
+      debug_printf(
+         "[d3d12_video_buffer] tmpl->buffer_format only supported as a PIPE_VIDEO_CHROMA_FORMAT_420 format.\n");
+      return nullptr;
+   }
+
+   // Not using new doesn't call ctor and the initializations in the class declaration are lost
+   struct d3d12_video_buffer *pD3D12VideoBuffer = new d3d12_video_buffer;
+
+   // Fill base template
+   pD3D12VideoBuffer->base               = *tmpl;
+   pD3D12VideoBuffer->base.buffer_format = tmpl->buffer_format;
+   pD3D12VideoBuffer->base.context       = pipe;
+   pD3D12VideoBuffer->base.width         = tmpl->width;
+   pD3D12VideoBuffer->base.height        = tmpl->height;
+   pD3D12VideoBuffer->base.interlaced    = tmpl->interlaced;
+   pD3D12VideoBuffer->base.associated_data = nullptr;
+   pD3D12VideoBuffer->base.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET;
+
+   // Fill vtable
+   pD3D12VideoBuffer->base.destroy                     = d3d12_video_buffer_destroy;
+   pD3D12VideoBuffer->base.get_sampler_view_planes     = d3d12_video_buffer_get_sampler_view_planes;
+   pD3D12VideoBuffer->base.get_sampler_view_components = d3d12_video_buffer_get_sampler_view_components;
+   pD3D12VideoBuffer->base.get_surfaces                = d3d12_video_buffer_get_surfaces;
+   pD3D12VideoBuffer->base.destroy_associated_data     = d3d12_video_buffer_destroy_associated_data;
+
+   struct pipe_resource templ;
+   memset(&templ, 0, sizeof(templ));
+   templ.target     = PIPE_TEXTURE_2D;
+   templ.bind       = pD3D12VideoBuffer->base.bind;
+   templ.format     = pD3D12VideoBuffer->base.buffer_format;
+   // YUV 4:2:0 formats in D3D12 need to have multiple of 2 dimensions
+   templ.width0     = align(pD3D12VideoBuffer->base.width, 2);
+   templ.height0    = align(pD3D12VideoBuffer->base.height, 2);
+   templ.depth0     = 1;
+   templ.array_size = 1;
+   templ.flags      = 0;
+
+   // This calls d3d12_create_resource as the function ptr is set in d3d12_screen.resource_create
+   pD3D12VideoBuffer->texture = (struct d3d12_resource *) pipe->screen->resource_create(pipe->screen, &templ);
+   d3d12_promote_to_permanent_residency((struct d3d12_screen*) pipe->screen, pD3D12VideoBuffer->texture);
+
+   if (pD3D12VideoBuffer->texture == nullptr) {
+      debug_printf("[d3d12_video_buffer] d3d12_video_buffer_create - Call to resource_create() to create "
+                      "d3d12_resource failed\n");
+      goto failed;
+   }
+
+   pD3D12VideoBuffer->num_planes = util_format_get_num_planes(pD3D12VideoBuffer->texture->overall_format);
+   assert(pD3D12VideoBuffer->num_planes == 2);
+   return &pD3D12VideoBuffer->base;
+
+failed:
+   if (pD3D12VideoBuffer != nullptr) {
+      d3d12_video_buffer_destroy((struct pipe_video_buffer *) pD3D12VideoBuffer);
+   }
+
+   return nullptr;
+}
+
+/**
+ * destroy this video buffer
+ */
+void
+d3d12_video_buffer_destroy(struct pipe_video_buffer *buffer)
+{
+   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer;
+
+   // Destroy pD3D12VideoBuffer->texture (if any)
+   if (pD3D12VideoBuffer->texture) {
+      pipe_resource *pBaseResource = &pD3D12VideoBuffer->texture->base.b;
+      pipe_resource_reference(&pBaseResource, NULL);
+   }
+
+   // Destroy associated data (if any)
+   if (pD3D12VideoBuffer->base.associated_data != nullptr) {
+      d3d12_video_buffer_destroy_associated_data(pD3D12VideoBuffer->base.associated_data);
+      // Set to nullptr after cleanup, no dangling pointers
+      pD3D12VideoBuffer->base.associated_data = nullptr;
+   }
+
+   // Destroy (if any) codec where the associated data came from
+   if (pD3D12VideoBuffer->base.codec != nullptr) {
+      d3d12_video_decoder_destroy(pD3D12VideoBuffer->base.codec);
+      // Set to nullptr after cleanup, no dangling pointers
+      pD3D12VideoBuffer->base.codec = nullptr;
+   }
+
+   for (uint i = 0; i < pD3D12VideoBuffer->surfaces.size(); ++i) {
+      if (pD3D12VideoBuffer->surfaces[i] != NULL) {
+         pipe_surface_reference(&pD3D12VideoBuffer->surfaces[i], NULL);
+      }
+   }
+
+   for (uint i = 0; i < pD3D12VideoBuffer->sampler_view_planes.size(); ++i) {
+      if (pD3D12VideoBuffer->sampler_view_planes[i] != NULL) {
+         pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_planes[i], NULL);
+      }
+   }
+
+   for (uint i = 0; i < pD3D12VideoBuffer->sampler_view_components.size(); ++i) {
+      if (pD3D12VideoBuffer->sampler_view_components[i] != NULL) {
+         pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_components[i], NULL);
+      }
+   }
+
+   delete pD3D12VideoBuffer;
+}
+
+/*
+ * destroy the associated data
+ */
+void
+d3d12_video_buffer_destroy_associated_data(void *associated_data)
+{ }
+
+/**
+ * get an individual surfaces for each plane
+ */
+struct pipe_surface **
+d3d12_video_buffer_get_surfaces(struct pipe_video_buffer *buffer)
+{
+   assert(buffer);
+   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer;
+   struct pipe_context *      pipe              = pD3D12VideoBuffer->base.context;
+   struct pipe_surface        surface_template  = {};
+
+   // Some video frameworks iterate over [0..VL_MAX_SURFACES) and ignore the nullptr entries
+   // So we have to null initialize the other surfaces not used from [num_planes..VL_MAX_SURFACES)
+   // Like in src/gallium/frontends/va/surface.c
+   pD3D12VideoBuffer->surfaces.resize(VL_MAX_SURFACES, nullptr);
+
+   // pCurPlaneResource refers to the planar resource, not the overall resource.
+   // in d3d12_resource this is handled by having a linked list of planes with
+   // d3dRes->base.next ptr to next plane resource
+   // starting with the plane 0 being the overall resource
+   struct pipe_resource *pCurPlaneResource = &pD3D12VideoBuffer->texture->base.b;
+
+   for (uint PlaneSlice = 0; PlaneSlice < pD3D12VideoBuffer->num_planes; ++PlaneSlice) {
+      if (!pD3D12VideoBuffer->surfaces[PlaneSlice]) {
+         memset(&surface_template, 0, sizeof(surface_template));
+         surface_template.format =
+            util_format_get_plane_format(pD3D12VideoBuffer->texture->overall_format, PlaneSlice);
+
+         pD3D12VideoBuffer->surfaces[PlaneSlice] =
+            pipe->create_surface(pipe, pCurPlaneResource, &surface_template);
+
+         if (!pD3D12VideoBuffer->surfaces[PlaneSlice]) {
+            goto error;
+         }
+      }
+      pCurPlaneResource = pCurPlaneResource->next;
+   }
+
+   return pD3D12VideoBuffer->surfaces.data();
+
+error:
+   for (uint PlaneSlice = 0; PlaneSlice < pD3D12VideoBuffer->num_planes; ++PlaneSlice) {
+      pipe_surface_reference(&pD3D12VideoBuffer->surfaces[PlaneSlice], NULL);
+   }
+
+   return nullptr;
+}
+
+/**
+ * get an individual sampler view for each plane
+ */
+struct pipe_sampler_view **
+d3d12_video_buffer_get_sampler_view_planes(struct pipe_video_buffer *buffer)
+{
+   assert(buffer);
+   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer;
+   struct pipe_context *      pipe              = pD3D12VideoBuffer->base.context;
+   struct pipe_sampler_view   samplerViewTemplate;
+
+   // Some video frameworks iterate over [0..VL_MAX_SURFACES) and ignore the nullptr entries
+   // So we have to null initialize the other surfaces not used from [num_planes..VL_MAX_SURFACES)
+   // Like in src/gallium/frontends/vdpau/surface.c
+   pD3D12VideoBuffer->sampler_view_planes.resize(VL_MAX_SURFACES, nullptr);
+
+   // pCurPlaneResource refers to the planar resource, not the overall resource.
+   // in d3d12_resource this is handled by having a linked list of planes with
+   // d3dRes->base.next ptr to next plane resource
+   // starting with the plane 0 being the overall resource
+   struct pipe_resource *pCurPlaneResource = &pD3D12VideoBuffer->texture->base.b;
+
+   for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) {
+      if (!pD3D12VideoBuffer->sampler_view_planes[i]) {
+         assert(pCurPlaneResource);   // the d3d12_resource has a linked list with the exact name of number of elements
+                                      // as planes
+
+         memset(&samplerViewTemplate, 0, sizeof(samplerViewTemplate));
+         u_sampler_view_default_template(&samplerViewTemplate, pCurPlaneResource, pCurPlaneResource->format);
+
+         pD3D12VideoBuffer->sampler_view_planes[i] =
+            pipe->create_sampler_view(pipe, pCurPlaneResource, &samplerViewTemplate);
+
+         if (!pD3D12VideoBuffer->sampler_view_planes[i]) {
+            goto error;
+         }
+      }
+
+      pCurPlaneResource = pCurPlaneResource->next;
+   }
+
+   return pD3D12VideoBuffer->sampler_view_planes.data();
+
+error:
+   for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) {
+      pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_planes[i], NULL);
+   }
+
+   return nullptr;
+}
+
+/**
+ * get an individual sampler view for each component
+ */
+struct pipe_sampler_view **
+d3d12_video_buffer_get_sampler_view_components(struct pipe_video_buffer *buffer)
+{
+   assert(buffer);
+   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer;
+   struct pipe_context *      pipe              = pD3D12VideoBuffer->base.context;
+   struct pipe_sampler_view   samplerViewTemplate;
+
+   // pCurPlaneResource refers to the planar resource, not the overall resource.
+   // in d3d12_resource this is handled by having a linked list of planes with
+   // d3dRes->base.next ptr to next plane resource
+   // starting with the plane 0 being the overall resource
+   struct pipe_resource *pCurPlaneResource = &pD3D12VideoBuffer->texture->base.b;
+
+   // At the end of the loop, "component" will have the total number of items valid in sampler_view_components
+   // since component can end up being <= VL_NUM_COMPONENTS, we assume VL_NUM_COMPONENTS first and then resize/adjust to
+   // fit the container size pD3D12VideoBuffer->sampler_view_components to the actual components number
+   pD3D12VideoBuffer->sampler_view_components.resize(VL_NUM_COMPONENTS, nullptr);
+   uint component = 0;
+
+   for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) {
+      // For example num_components would be 1 for the Y plane (R8 in NV12), 2 for the UV plane (R8G8 in NV12)
+      unsigned num_components = util_format_get_nr_components(pCurPlaneResource->format);
+
+      for (uint j = 0; j < num_components; ++j, ++component) {
+         assert(component < VL_NUM_COMPONENTS);
+
+         if (!pD3D12VideoBuffer->sampler_view_components[component]) {
+            memset(&samplerViewTemplate, 0, sizeof(samplerViewTemplate));
+            u_sampler_view_default_template(&samplerViewTemplate, pCurPlaneResource, pCurPlaneResource->format);
+            samplerViewTemplate.swizzle_r = samplerViewTemplate.swizzle_g = samplerViewTemplate.swizzle_b =
+               PIPE_SWIZZLE_X + j;
+            samplerViewTemplate.swizzle_a = PIPE_SWIZZLE_1;
+
+            pD3D12VideoBuffer->sampler_view_components[component] =
+               pipe->create_sampler_view(pipe, pCurPlaneResource, &samplerViewTemplate);
+            if (!pD3D12VideoBuffer->sampler_view_components[component]) {
+               goto error;
+            }
+         }
+      }
+
+      pCurPlaneResource = pCurPlaneResource->next;
+   }
+
+   // Adjust size to fit component <= VL_NUM_COMPONENTS
+   pD3D12VideoBuffer->sampler_view_components.resize(component);
+
+   return pD3D12VideoBuffer->sampler_view_components.data();
+
+error:
+   for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) {
+      pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_components[i], NULL);
+   }
+
+   return nullptr;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_video_buffer.h b/src/gallium/drivers/d3d12/d3d12_video_buffer.h
new file mode 100644 (file)
index 0000000..62f0454
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#ifndef D3D12_VIDEO_BUFFER_H
+#define D3D12_VIDEO_BUFFER_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_video_codec.h"
+#include <vector>
+
+///
+/// Pipe video buffer interface starts
+///
+
+/**
+ * creates a video buffer
+ */
+struct pipe_video_buffer *
+d3d12_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *tmpl);
+
+/**
+ * destroy this video buffer
+ */
+void
+d3d12_video_buffer_destroy(struct pipe_video_buffer *buffer);
+
+/**
+ * get an individual sampler view for each plane
+ */
+struct pipe_sampler_view **
+d3d12_video_buffer_get_sampler_view_planes(struct pipe_video_buffer *buffer);
+
+/**
+ * get an individual sampler view for each component
+ */
+struct pipe_sampler_view **
+d3d12_video_buffer_get_sampler_view_components(struct pipe_video_buffer *buffer);
+
+/**
+ * get an individual surfaces for each plane
+ */
+struct pipe_surface **
+d3d12_video_buffer_get_surfaces(struct pipe_video_buffer *buffer);
+
+/*
+ * destroy the associated data
+ */
+void
+d3d12_video_buffer_destroy_associated_data(void *associated_data);
+
+/**
+ * output for decoding / input for displaying
+ */
+struct d3d12_video_buffer
+{
+   pipe_video_buffer                       base;
+   struct d3d12_resource *                 texture;
+   uint                                    num_planes;
+   std::vector<pipe_surface *>      surfaces;
+   std::vector<pipe_sampler_view *> sampler_view_planes;
+   std::vector<pipe_sampler_view *> sampler_view_components;
+};
+
+///
+/// Pipe video buffer interface ends
+///
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec.cpp
new file mode 100644 (file)
index 0000000..73c9eb2
--- /dev/null
@@ -0,0 +1,1361 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_context.h"
+#include "d3d12_format.h"
+#include "d3d12_resource.h"
+#include "d3d12_screen.h"
+#include "d3d12_surface.h"
+#include "d3d12_video_dec.h"
+#include "d3d12_video_dec_h264.h"
+#include "d3d12_video_buffer.h"
+#include "d3d12_residency.h"
+
+#include "vl/vl_video_buffer.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_video.h"
+#include "util/vl_vlc.h"
+
+struct pipe_video_codec *
+d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
+{
+   ///
+   /// Initialize d3d12_video_decoder
+   ///
+
+
+   // Not using new doesn't call ctor and the initializations in the class declaration are lost
+   struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
+
+   pD3D12Dec->base = *codec;
+   pD3D12Dec->m_screen = context->screen;
+
+   pD3D12Dec->base.context = context;
+   pD3D12Dec->base.width = codec->width;
+   pD3D12Dec->base.height = codec->height;
+   // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock
+   // / get_feedback for encode)
+   pD3D12Dec->base.destroy = d3d12_video_decoder_destroy;
+   pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame;
+   pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream;
+   pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame;
+   pD3D12Dec->base.flush = d3d12_video_decoder_flush;
+
+   pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile);
+   pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile);
+   pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile);
+
+   ///
+   /// Try initializing D3D12 Video device and check for device caps
+   ///
+
+   struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
+   pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen);
+
+   ///
+   /// Create decode objects
+   ///
+   HRESULT hr = S_OK;
+   if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
+          IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n");
+      goto failed;
+   }
+
+   if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
+                      "d3d12_video_decoder_check_caps_and_create_decoder\n");
+      goto failed;
+   }
+
+   if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
+      debug_printf(
+         "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n");
+      goto failed;
+   }
+
+   if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
+                      "d3d12_video_decoder_create_video_state_buffers\n");
+      goto failed;
+   }
+
+   pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat };
+   hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO,
+                                                                        &pD3D12Dec->m_decodeFormatInfo,
+                                                                        sizeof(pD3D12Dec->m_decodeFormatInfo));
+   if(FAILED(hr)) {
+      debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
+      goto failed;
+   }
+
+   return &pD3D12Dec->base;
+
+failed:
+   if (pD3D12Dec != nullptr) {
+      d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec);
+   }
+
+   return nullptr;
+}
+
+/**
+ * Destroys a d3d12_video_decoder
+ * Call destroy_XX for applicable XX nested member types before deallocating
+ * Destroy methods should check != nullptr on their input target argument as this method can be called as part of
+ * cleanup from failure on the creation method
+ */
+void
+d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
+{
+   if (codec == nullptr) {
+      return;
+   }
+
+   d3d12_video_decoder_flush(codec);   // Flush pending work before destroying.
+
+   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
+
+   //
+   // Destroys a decoder
+   // Call destroy_XX for applicable XX nested member types before deallocating
+   // Destroy methods should check != nullptr on their input target argument as this method can be called as part of
+   // cleanup from failure on the creation method
+   //
+
+   // No need for d3d12_destroy_video_objects
+   //    All the objects created here are smart pointer members of d3d12_video_decoder
+   // No need for d3d12_destroy_video_decoder_and_heap
+   //    All the objects created here are smart pointer members of d3d12_video_decoder
+   // No need for d3d12_destroy_video_dpbmanagers
+   //    All the objects created here are smart pointer members of d3d12_video_decoder
+
+   // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder
+
+   // Call dtor to make ComPtr work
+   delete pD3D12Dec;
+}
+
+/**
+ * start decoding of a new frame
+ */
+void
+d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
+                                struct pipe_video_buffer *target,
+                                struct pipe_picture_desc *picture)
+{
+   // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in
+   // d3d12_video_decoder_decode_bitstream
+   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
+   assert(pD3D12Dec);
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
+                 pD3D12Dec->m_fenceValue);
+}
+
+/**
+ * decode a bitstream
+ */
+void
+d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
+                                     struct pipe_video_buffer *target,
+                                     struct pipe_picture_desc *picture,
+                                     unsigned num_buffers,
+                                     const void *const *buffers,
+                                     const unsigned *sizes)
+{
+   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
+   assert(pD3D12Dec);
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n",
+                 pD3D12Dec->m_fenceValue);
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+   assert(pD3D12Dec->m_spDecodeCommandQueue);
+   assert(pD3D12Dec->m_pD3D12Screen);
+   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
+   assert(pD3D12VideoBuffer);
+
+   ///
+   /// Compressed bitstream buffers
+   ///
+
+   /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED
+   /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 ->
+   /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If
+   /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3.
+   /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes
+   /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED
+   // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that:
+   // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data
+   // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate
+   // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all
+   // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed
+   // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start
+   // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at
+   // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we
+   // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does.
+
+   // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the
+   // combined bitstream of all decode_bitstream calls.
+
+   // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the
+   // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode
+   // (optional) , sliceN}
+
+   if (num_buffers > 2)   // Assume this means multiple slices at once in a decode_bitstream call
+   {
+      // Based on VA frontend codebase, this never happens for video (no JPEG)
+      // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream
+
+      // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it
+      // was a series of different calls
+
+      // group by start codes and buffers and perform calls for the number of slices
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected "
+                     "for fenceValue: %d, breaking down the calls into one per slice\n",
+                     pD3D12Dec->m_fenceValue);
+
+      size_t curBufferIdx = 0;
+
+      // Vars to be used for the delegation calls to decode_bitstream
+      unsigned call_num_buffers = 0;
+      const void *const *call_buffers = nullptr;
+      const unsigned *call_sizes = nullptr;
+
+      while (curBufferIdx < num_buffers) {
+         // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a
+         // startcode+slicedata or just slicedata call
+         call_buffers = &buffers[curBufferIdx];
+         call_sizes = &sizes[curBufferIdx];
+
+         // Usually start codes are less or equal than 4 bytes
+         // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the
+         // current buffer.
+         call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1;
+
+         // Delegate call with one or two buffers only
+         d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes);
+
+         curBufferIdx += call_num_buffers;   // Consume from the loop the buffers sent in the last call
+      }
+   } else {
+      ///
+      /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0].
+      ///
+
+      // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be
+      // handled by flattening all the buffers into a single one and passing that to HW.
+
+      size_t totalReceivedBuffersSize = 0u;   // Combined size of all sizes[]
+      for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
+         totalReceivedBuffersSize += sizes[bufferIdx];
+      }
+
+      // Bytes of data pre-staged before this decode_frame call
+      size_t preStagedDataSize = pD3D12Dec->m_stagingDecodeBitstream.size();
+
+      // Extend the staging buffer size, as decode_frame can be called several times before end_frame
+      pD3D12Dec->m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
+
+      // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
+      // buffers will be appended
+      uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_stagingDecodeBitstream.data() + preStagedDataSize;
+
+      // Append new data at the end.
+      size_t dstOffset = 0u;
+      for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
+         memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]);
+         dstOffset += sizes[bufferIdx];
+      }
+
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n",
+                    pD3D12Dec->m_fenceValue);
+   }
+}
+
+void
+d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec,
+                                                 struct pipe_video_buffer *target,
+                                                 struct pipe_picture_desc *picture)
+{
+   switch (pD3D12Dec->m_d3d12DecProfileType) {
+      case d3d12_video_decode_profile_type_h264:
+      {
+         pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
+         pD3D12Dec->m_pCurrentDecodeTarget = target;
+         pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref;
+      } break;
+
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+}
+
+/**
+ * end decoding of the current frame
+ */
+void
+d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
+                              struct pipe_video_buffer *target,
+                              struct pipe_picture_desc *picture)
+{
+   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
+   assert(pD3D12Dec);
+   struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
+   assert(pD3D12Screen);
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n",
+                 pD3D12Dec->m_fenceValue);
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+   assert(pD3D12Dec->m_spDecodeCommandQueue);
+   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
+   assert(pD3D12VideoBuffer);
+
+   ///
+   /// Store current decode output target texture and reference textures from upper layer
+   ///
+   d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture);
+
+   ///
+   /// Codec header picture parameters buffers
+   ///
+
+   d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
+   assert(pD3D12Dec->m_picParamsBuffer.size() > 0);
+
+   ///
+   /// Prepare Slice control buffers before clearing staging buffer
+   ///
+   assert(pD3D12Dec->m_stagingDecodeBitstream.size() > 0);   // Make sure the staging wasn't cleared yet in end_frame
+   d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
+   assert(pD3D12Dec->m_SliceControlBuffer.size() > 0);
+
+   ///
+   /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
+   ///
+
+   uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_stagingDecodeBitstream.size();
+   uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_stagingDecodeBitstream.data();
+
+   // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
+   if (pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
+      if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
+         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
+                         "d3d12_video_decoder_create_staging_bitstream_buffer\n");
+         debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
+                pD3D12Dec->m_fenceValue);
+         assert(false);
+         return;
+      }
+   }
+
+   // Upload frame bitstream CPU data to ID3D12Resource buffer
+   pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize =
+      sliceDataStagingBufferSize;   // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
+   assert(pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize <=
+          pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize);
+
+   /* One-shot transfer operation with data supplied in a user
+    * pointer.
+    */
+   pipe_resource *pPipeCompressedBufferObj =
+      d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get());
+   assert(pPipeCompressedBufferObj);
+   pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context,    // context
+                                           pPipeCompressedBufferObj,   // dst buffer
+                                           PIPE_MAP_WRITE,             // usage PIPE_MAP_x
+                                           0,                          // offset
+                                           sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize,   // size
+                                           sliceDataStagingBufferPtr                                          // data
+   );
+
+   // Flush buffer_subdata batch and wait on this CPU thread for GPU work completion
+   // before deleting the source CPU buffer below
+   struct pipe_fence_handle *pUploadGPUCompletionFence = NULL;
+   pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
+                                  &pUploadGPUCompletionFence,
+                                  PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
+   assert(pUploadGPUCompletionFence);
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
+                  "buffer_subdata to upload compressed bitstream.\n");
+   pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, pUploadGPUCompletionFence, PIPE_TIMEOUT_INFINITE);
+   pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pUploadGPUCompletionFence, NULL);
+
+   // [After buffer_subdata GPU work is finished] Clear CPU staging buffer now that end_frame is called and was uploaded
+   // to GPU for DecodeFrame call.
+   pD3D12Dec->m_stagingDecodeBitstream.resize(0);
+
+   ///
+   /// Proceed to record the GPU Decode commands
+   ///
+
+   // Requested conversions by caller upper layer (none for now)
+   d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {};
+
+   ///
+   /// Record DecodeFrame operation and resource state transitions.
+   ///
+
+   // Translate input D3D12 structure
+   D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
+
+   d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get();
+   d3d12InputArguments.CompressedBitstream.Offset = 0u;
+   constexpr uint64_t d3d12BitstreamOffsetAlignment =
+      128u;   // specified in
+              // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
+   assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
+         ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
+   d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize;
+
+   D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
+      CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
+                                           D3D12_RESOURCE_STATE_COMMON,
+                                           D3D12_RESOURCE_STATE_VIDEO_DECODE_READ),
+   };
+   pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
+
+   // Schedule reverse (back to common) transitions before command list closes for current frame
+   pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
+      CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
+                                           D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
+                                           D3D12_RESOURCE_STATE_COMMON));
+
+   ///
+   /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for
+   /// display/consumption
+   ///
+   ID3D12Resource *pOutputD3D12Texture;
+   uint outputD3D12Subresource = 0;
+
+   ///
+   /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output
+   /// and to store as future reference in DPB
+   ///
+   ID3D12Resource *pRefOnlyOutputD3D12Texture;
+   uint refOnlyOutputD3D12Subresource = 0;
+
+   if(!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec,
+                                                target,
+                                                pD3D12VideoBuffer,
+                                                &pOutputD3D12Texture,             // output
+                                                &outputD3D12Subresource,          // output
+                                                &pRefOnlyOutputD3D12Texture,      // output
+                                                &refOnlyOutputD3D12Subresource,   // output
+                                                requestedConversionArguments)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
+                      "d3d12_video_decoder_prepare_for_decode_frame\n");
+      debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
+                pD3D12Dec->m_fenceValue);
+      assert(false);
+      return;
+   }
+
+   ///
+   /// Set codec picture parameters CPU buffer
+   ///
+
+   d3d12InputArguments.NumFrameArguments =
+      1u;   // Only the codec data received from the above layer with picture params
+   d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
+      D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
+      static_cast<uint32_t>(pD3D12Dec->m_picParamsBuffer.size()),
+      pD3D12Dec->m_picParamsBuffer.data(),
+   };
+
+   if (pD3D12Dec->m_SliceControlBuffer.size() > 0) {
+      d3d12InputArguments.NumFrameArguments++;
+      d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
+         D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
+         static_cast<uint32_t>(pD3D12Dec->m_SliceControlBuffer.size()),
+         pD3D12Dec->m_SliceControlBuffer.data(),
+      };
+   }
+
+   if (pD3D12Dec->m_InverseQuantMatrixBuffer.size() > 0) {
+      d3d12InputArguments.NumFrameArguments++;
+      d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
+         D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
+         static_cast<uint32_t>(pD3D12Dec->m_InverseQuantMatrixBuffer.size()),
+         pD3D12Dec->m_InverseQuantMatrixBuffer.data(),
+      };
+   }
+
+   d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames();
+   if (D3D12_DEBUG_VERBOSE & d3d12_debug) {
+      pD3D12Dec->m_spDPBManager->print_dpb();
+   }
+
+   d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get();
+
+   // translate output D3D12 structure
+   D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {};
+   d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture;
+   d3d12OutputArguments.OutputSubresource = outputD3D12Subresource;
+
+   bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
+                          d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
+   if (fReferenceOnly) {
+      d3d12OutputArguments.ConversionArguments.Enable = TRUE;
+
+      assert(pRefOnlyOutputD3D12Texture);
+      d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture;
+      d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource;
+
+      const D3D12_RESOURCE_DESC &descReference =
+         d3d12OutputArguments.ConversionArguments.pReferenceTexture2D->GetDesc();
+      d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space(
+         !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)),
+         util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/,
+         /* StudioRGB= */ false,
+         /* P709= */ true,
+         /* StudioYUV= */ true);
+
+      const D3D12_RESOURCE_DESC &descOutput = d3d12OutputArguments.pOutputTexture2D->GetDesc();
+      d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space(
+         !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)),
+         util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/,
+         /* StudioRGB= */ false,
+         /* P709= */ true,
+         /* StudioYUV= */ true);
+
+      const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = pD3D12Dec->m_spVideoDecoderHeap->GetDesc();
+      d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth;
+      d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight;
+   } else {
+      d3d12OutputArguments.ConversionArguments.Enable = FALSE;
+   }
+
+   CD3DX12_RESOURCE_DESC outputDesc(d3d12OutputArguments.pOutputTexture2D->GetDesc());
+   uint32_t MipLevel, PlaneSlice, ArraySlice;
+   D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource,
+                             outputDesc.MipLevels,
+                             outputDesc.ArraySize(),
+                             MipLevel,
+                             ArraySlice,
+                             PlaneSlice);
+
+   for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
+      uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
+
+      D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
+         CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
+                                              D3D12_RESOURCE_STATE_COMMON,
+                                              D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+                                              planeOutputSubresource),
+      };
+      pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
+   }
+
+   // Schedule reverse (back to common) transitions before command list closes for current frame
+   for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
+      uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
+      pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
+         CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
+                                              D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+                                              D3D12_RESOURCE_STATE_COMMON,
+                                              planeOutputSubresource));
+   }
+
+   // Record DecodeFrame
+
+   pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(),
+                                                  &d3d12OutputArguments,
+                                                  &d3d12InputArguments);
+
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
+                 pD3D12Dec->m_fenceValue);
+
+   ///
+   /// Flush work to the GPU and blocking wait until decode finishes
+   ///
+   pD3D12Dec->m_needsGPUFlush = true;
+   d3d12_video_decoder_flush(codec);
+
+   if (!pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
+      ///
+      /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
+      /// We cannot use the standalone video buffer allocation directly and we must use instead
+      /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same
+      /// allocation
+      /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes
+      ///
+
+      // Get destination resource
+      struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target);
+
+      // Get source pipe_resource
+      pipe_resource *pPipeSrc =
+         d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
+      assert(pPipeSrc);
+
+      // Copy all format subresources/texture planes
+
+      for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
+         assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
+         struct pipe_box box = { 0,
+                                 0,
+                                 // src array slice, taken as Z for TEXTURE_2D_ARRAY
+                                 static_cast<int16_t>(d3d12OutputArguments.OutputSubresource),
+                                 static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0),
+                                 static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0),
+                                 1 };
+
+         pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context,
+                                                       pPipeDstViews[PlaneSlice]->texture,              // dst
+                                                       0,                                               // dst level
+                                                       0,                                               // dstX
+                                                       0,                                               // dstY
+                                                       0,                                               // dstZ
+                                                       (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next,   // src
+                                                       0,                                               // src level
+                                                       &box);
+      }
+      // Flush resource_copy_region batch and wait on this CPU thread for GPU work completion
+      struct pipe_fence_handle *completion_fence = NULL;
+      pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
+                                     &completion_fence,
+                                     PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
+      assert(completion_fence);
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
+                     "resource_copy_region on decoded frame.\n");
+      pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, completion_fence, PIPE_TIMEOUT_INFINITE);
+      pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &completion_fence, NULL);
+   }
+}
+
+/**
+ * flush any outstanding command buffers to the hardware
+ * should be called before a video_buffer is acessed by the gallium frontend again
+ */
+void
+d3d12_video_decoder_flush(struct pipe_video_codec *codec)
+{
+   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
+   assert(pD3D12Dec);
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+   assert(pD3D12Dec->m_spDecodeCommandQueue);
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on "
+                 "fenceValue: %d\n",
+                 pD3D12Dec->m_fenceValue);
+
+   if (!pD3D12Dec->m_needsGPUFlush) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n");
+   } else {
+      HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
+      if (hr != S_OK) {
+         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
+                         " - D3D12Device was removed BEFORE commandlist "
+                         "execution with HR %x.\n",
+                         hr);
+         goto flush_fail;
+      }
+
+      // Close and execute command list and wait for idle on CPU blocking
+      // this method before resetting list and allocator for next submission.
+
+      if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
+         pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
+                                                           pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
+         pD3D12Dec->m_transitionsBeforeCloseCmdList.clear();
+      }
+
+      hr = pD3D12Dec->m_spDecodeCommandList->Close();
+      if (FAILED(hr)) {
+         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr);
+         goto flush_fail;
+      }
+
+      ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
+      pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
+      pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
+      pD3D12Dec->m_spFence->SetEventOnCompletion(pD3D12Dec->m_fenceValue, nullptr);
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - ExecuteCommandLists finished on signal with "
+                    "fenceValue: %d\n",
+                    pD3D12Dec->m_fenceValue);
+
+      hr = pD3D12Dec->m_spCommandAllocator->Reset();
+      if (FAILED(hr)) {
+         debug_printf(
+            "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n",
+            hr);
+         goto flush_fail;
+      }
+
+      hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_spCommandAllocator.Get());
+      if (FAILED(hr)) {
+         debug_printf(
+            "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n",
+            hr);
+         goto flush_fail;
+      }
+
+      // Validate device was not removed
+      hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
+      if (hr != S_OK) {
+         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
+                         " - D3D12Device was removed AFTER commandlist "
+                         "execution with HR %x, but wasn't before.\n",
+                         hr);
+         goto flush_fail;
+      }
+
+      debug_printf(
+         "[d3d12_video_decoder] d3d12_video_decoder_flush - GPU signaled execution finalized for fenceValue: %d\n",
+         pD3D12Dec->m_fenceValue);
+
+      pD3D12Dec->m_fenceValue++;
+      pD3D12Dec->m_needsGPUFlush = false;
+   }
+   return;
+
+flush_fail:
+   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue);
+   assert(false);
+}
+
+bool
+d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen,
+                                           struct d3d12_video_decoder *pD3D12Dec)
+{
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+
+   D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE };
+   HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc,
+                                                      IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf()));
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue "
+                      "failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pD3D12Dec->m_spFence));
+   if (FAILED(hr)) {
+      debug_printf(
+         "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n",
+         hr);
+      return false;
+   }
+
+   hr = pD3D12Screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
+                                                  IID_PPV_ARGS(pD3D12Dec->m_spCommandAllocator.GetAddressOf()));
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
+                      "CreateCommandAllocator failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   hr = pD3D12Screen->dev->CreateCommandList(0,
+                                             D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
+                                             pD3D12Dec->m_spCommandAllocator.Get(),
+                                             nullptr,
+                                             IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
+
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
+                      "failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   D3D12_COMMAND_QUEUE_DESC copyQueueDesc = { D3D12_COMMAND_LIST_TYPE_COPY };
+   hr = pD3D12Screen->dev->CreateCommandQueue(&copyQueueDesc, IID_PPV_ARGS(pD3D12Dec->m_spCopyQueue.GetAddressOf()));
+
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue "
+                      "failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   return true;
+}
+
+bool
+d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen,
+                                                  struct d3d12_video_decoder *pD3D12Dec)
+{
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+
+   pD3D12Dec->m_decoderDesc = {};
+
+   D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile,
+                                                            D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE,
+                                                            D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE };
+
+   D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {};
+   decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex;
+   decodeSupport.Configuration = decodeConfiguration;
+   decodeSupport.Width = pD3D12Dec->base.width;
+   decodeSupport.Height = pD3D12Dec->base.height;
+   decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat;
+   // no info from above layer on framerate/bitrate
+   decodeSupport.FrameRate.Numerator = 0;
+   decodeSupport.FrameRate.Denominator = 0;
+   decodeSupport.BitRate = 0;
+
+   HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT,
+                                                                     &decodeSupport,
+                                                                     sizeof(decodeSupport));
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport "
+                      "failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - "
+                      "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n");
+      return false;
+   }
+
+   pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags;
+   pD3D12Dec->m_tier = decodeSupport.DecodeTier;
+
+   if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) {
+      pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures;
+   }
+
+   if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) {
+      pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height;
+   }
+
+   if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
+      pD3D12Dec->m_ConfigDecoderSpecificFlags |=
+         d3d12_video_decode_config_specific_flag_reference_only_textures_required;
+   }
+
+   pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask;
+   pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration;
+
+   hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc,
+                                                            IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder "
+                      "failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   return true;
+}
+
+bool
+d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen,
+                                               struct d3d12_video_decoder *pD3D12Dec)
+{
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+   if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen,
+                                                            pD3D12Dec,
+                                                            pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on "
+                      "d3d12_video_decoder_create_staging_bitstream_buffer\n");
+      return false;
+   }
+
+   return true;
+}
+
+bool
+d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen,
+                                                    struct d3d12_video_decoder *pD3D12Dec,
+                                                    uint64_t bufSize)
+{
+   assert(pD3D12Dec->m_spD3D12VideoDevice);
+
+   if (pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
+      pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Reset();
+   }
+
+   auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
+   auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize);
+   HRESULT hr = pD3D12Screen->dev->CreateCommittedResource(
+      &descHeap,
+      D3D12_HEAP_FLAG_NONE,
+      &descResource,
+      D3D12_RESOURCE_STATE_COMMON,
+      nullptr,
+      IID_PPV_ARGS(pD3D12Dec->m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
+   if (FAILED(hr)) {
+      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
+                      "CreateCommittedResource failed with HR %x\n",
+                      hr);
+      return false;
+   }
+
+   pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
+   return true;
+}
+
+bool
+d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec,
+                                             struct pipe_video_buffer *pCurrentDecodeTarget,
+                                             struct d3d12_video_buffer *pD3D12VideoBuffer,
+                                             ID3D12Resource **ppOutTexture2D,
+                                             uint32_t *pOutSubresourceIndex,
+                                             ID3D12Resource **ppRefOnlyOutTexture2D,
+                                             uint32_t *pRefOnlyOutSubresourceIndex,
+                                             const d3d12_video_decode_output_conversion_arguments &conversionArgs)
+{
+   if(!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) {
+      debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n");
+      return false;
+   }
+
+   // Refresh DPB active references for current frame, release memory for unused references.
+   d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec);
+
+   // Get the output texture for the current frame to be decoded
+   pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget,
+                                                                      ppOutTexture2D,
+                                                                      pOutSubresourceIndex);
+
+   auto vidBuffer = (struct d3d12_video_buffer *)(pCurrentDecodeTarget);
+   // If is_pipe_buffer_underlying_output_decode_allocation is enabled,
+   // we can just use the underlying allocation in pCurrentDecodeTarget
+   // and avoid an extra copy after decoding the frame.
+   // If this is the case, we need to handle the residency of this resource
+   // (if not we're actually creating the resources with CreateCommitedResource with
+   // residency by default)
+   if(pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
+      assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D);
+      // Make it permanently resident for video use
+      d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture);
+   }
+
+   // Get the reference only texture for the current frame to be decoded (if applicable)
+   bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
+                          d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
+   if (fReferenceOnly) {
+      bool needsTransitionToDecodeWrite = false;
+      pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget,
+                                                           ppRefOnlyOutTexture2D,
+                                                           pRefOnlyOutSubresourceIndex,
+                                                           needsTransitionToDecodeWrite);
+      assert(needsTransitionToDecodeWrite);
+
+      CD3DX12_RESOURCE_DESC outputDesc((*ppRefOnlyOutTexture2D)->GetDesc());
+      uint32_t MipLevel, PlaneSlice, ArraySlice;
+      D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex,
+                                outputDesc.MipLevels,
+                                outputDesc.ArraySize(),
+                                MipLevel,
+                                ArraySlice,
+                                PlaneSlice);
+
+      for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
+         uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
+
+         D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
+            CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
+                                                 D3D12_RESOURCE_STATE_COMMON,
+                                                 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+                                                 planeOutputSubresource),
+         };
+         pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
+      }
+
+      // Schedule reverse (back to common) transitions before command list closes for current frame
+      for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
+         uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
+         pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
+            CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
+                                                 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+                                                 D3D12_RESOURCE_STATE_COMMON,
+                                                 planeOutputSubresource));
+      }
+   }
+
+   // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame
+   // otherwise, use the standard output resource
+   ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D;
+   uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex;
+
+   switch (pD3D12Dec->m_d3d12DecProfileType) {
+      case d3d12_video_decode_profile_type_h264:
+      {
+         d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec,
+                                                                   pCurrentFrameDPBEntry,
+                                                                   currentFrameDPBEntrySubresource);
+      } break;
+
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+
+   return true;
+}
+
+bool
+d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec,
+                                    struct d3d12_video_buffer *pD3D12VideoBuffer,
+                                    const d3d12_video_decode_output_conversion_arguments &conversionArguments)
+{
+   uint32_t width;
+   uint32_t height;
+   uint16_t maxDPB;
+   bool isInterlaced;
+   d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB, isInterlaced);
+
+   ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
+   D3D12_RESOURCE_DESC outputResourceDesc = pPipeD3D12DstResource->GetDesc();
+
+   pD3D12VideoBuffer->base.interlaced = isInterlaced;
+   D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested =
+      isInterlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE;
+   if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) ||
+       (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) {
+      // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder.
+      D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc;
+      decoderDesc.Configuration.InterlaceType = interlaceTypeRequested;
+      decoderDesc.Configuration.DecodeProfile =
+         d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType);
+      pD3D12Dec->m_spVideoDecoder.Reset();
+      HRESULT hr =
+         pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc,
+                                                             IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
+      if (FAILED(hr)) {
+         debug_printf(
+            "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n",
+            hr);
+         return false;
+      }
+      // Update state after CreateVideoDecoder succeeds only.
+      pD3D12Dec->m_decoderDesc = decoderDesc;
+   }
+
+   if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap ||
+       pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width ||
+       pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height ||
+       pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) {
+      // Detect the combination of AOT/ReferenceOnly to configure the DPB manager
+      uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount +
+                                                                  1 /*extra slot for current picture*/ :
+                                                               maxDPB;
+      d3d12_video_decode_dpb_descriptor dpbDesc = {};
+      dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width;
+      dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height;
+      dpbDesc.Format =
+         (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format;
+      dpbDesc.fArrayOfTexture =
+         ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0);
+      dpbDesc.dpbSize = referenceCount;
+      dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask;
+      dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags &
+                                 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0);
+
+      // Create DPB manager
+      if (pD3D12Dec->m_spDPBManager == nullptr) {
+         pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen,
+                                                                                    pD3D12Dec->m_NodeMask,
+                                                                                    pD3D12Dec->m_d3d12DecProfileType,
+                                                                                    dpbDesc));
+      }
+
+      //
+      // (Re)-create decoder heap
+      //
+      D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {};
+      decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask;
+      decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration;
+      decoderHeapDesc.DecodeWidth = dpbDesc.Width;
+      decoderHeapDesc.DecodeHeight = dpbDesc.Height;
+      decoderHeapDesc.Format = dpbDesc.Format;
+      decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB;
+      pD3D12Dec->m_spVideoDecoderHeap.Reset();
+      HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap(
+         &decoderHeapDesc,
+         IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf()));
+      if (FAILED(hr)) {
+         debug_printf(
+            "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n",
+            hr);
+         return false;
+      }
+      // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only.
+      pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc;
+   }
+
+   pD3D12Dec->m_decodeFormat = outputResourceDesc.Format;
+
+   return true;
+}
+
+void
+d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec)
+{
+   switch (pD3D12Dec->m_d3d12DecProfileType) {
+      case d3d12_video_decode_profile_type_h264:
+      {
+         d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec);
+      } break;
+
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+}
+
+void
+d3d12_video_decoder_get_frame_info(
+   struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced)
+{
+   *pWidth = 0;
+   *pHeight = 0;
+   *pMaxDPB = 0;
+   isInterlaced = false;
+
+   switch (pD3D12Dec->m_d3d12DecProfileType) {
+      case d3d12_video_decode_profile_type_h264:
+      {
+         d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB, isInterlaced);
+      } break;
+
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+
+   if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) {
+      const uint32_t AlignmentMask = 31;
+      *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask;
+   }
+}
+
+///
+/// Returns the number of bytes starting from [buf.data() + buffsetOffset] where the _targetCode_ is found
+/// Returns -1 if start code not found
+///
+int
+d3d12_video_decoder_get_next_startcode_offset(std::vector<uint8_t> &buf,
+                                              unsigned int bufferOffset,
+                                              unsigned int targetCode,
+                                              unsigned int targetCodeBitSize,
+                                              unsigned int numBitsToSearchIntoBuffer)
+{
+   struct vl_vlc vlc = { 0 };
+
+   // Shorten the buffer to be [buffetOffset, endOfBuf)
+   unsigned int bufSize = buf.size() - bufferOffset;
+   uint8_t *bufPtr = buf.data();
+   bufPtr += bufferOffset;
+
+   /* search the first numBitsToSearchIntoBuffer bytes for a startcode */
+   vl_vlc_init(&vlc, 1, (const void *const *) &bufPtr, &bufSize);
+   for (uint i = 0; i < numBitsToSearchIntoBuffer && vl_vlc_bits_left(&vlc) >= targetCodeBitSize; ++i) {
+      if (vl_vlc_peekbits(&vlc, targetCodeBitSize) == targetCode)
+         return i;
+      vl_vlc_eatbits(&vlc, 8);   // Stride is 8 bits = 1 byte
+      vl_vlc_fillbits(&vlc);
+   }
+
+   return -1;
+}
+
+void
+d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
+   struct d3d12_video_decoder *codec,   // input argument, current decoder
+   struct pipe_picture_desc
+      *picture,   // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name
+   struct d3d12_video_buffer *pD3D12VideoBuffer   // input argument, target video buffer
+)
+{
+   assert(picture);
+   assert(codec);
+   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
+
+   d3d12_video_decode_profile_type profileType =
+      d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
+   switch (profileType) {
+      case d3d12_video_decode_profile_type_h264:
+      {
+         size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264);
+         pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
+         ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
+         D3D12_RESOURCE_DESC outputResourceDesc = pPipeD3D12DstResource->GetDesc();
+         DXVA_PicParams_H264 dxvaPicParamsH264 =
+            d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue,
+                                                                        codec->base.profile,
+                                                                        outputResourceDesc.Width,
+                                                                        outputResourceDesc.Height,
+                                                                        pPicControlH264);
+
+         d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
+                                                                      &dxvaPicParamsH264,
+                                                                      dxvaPicParamsBufferSize);
+
+         size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264);
+         DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
+         d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture,
+                                                                   dxvaQmatrixH264);
+         d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
+      } break;
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+}
+
+void
+d3d12_video_decoder_prepare_dxva_slices_control(
+   struct d3d12_video_decoder *pD3D12Dec,   // input argument, current decoder
+   struct pipe_picture_desc *picture
+)
+{
+   d3d12_video_decode_profile_type profileType =
+      d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile);
+   switch (profileType) {
+      case d3d12_video_decode_profile_type_h264:
+      {
+         
+         std::vector<DXVA_Slice_H264_Short> pOutSliceControlBuffers;
+         struct pipe_h264_picture_desc* picture_h264 = (struct pipe_h264_picture_desc*) picture;
+         d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pOutSliceControlBuffers, picture_h264);
+
+         assert(sizeof(pOutSliceControlBuffers.data()[0]) == sizeof(DXVA_Slice_H264_Short));
+         uint64_t DXVAStructSize = pOutSliceControlBuffers.size() * sizeof((pOutSliceControlBuffers.data()[0]));
+         assert((DXVAStructSize % sizeof(DXVA_Slice_H264_Short)) == 0);
+         d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(pD3D12Dec,
+                                                                            pOutSliceControlBuffers.data(),
+                                                                            DXVAStructSize);
+         assert(pD3D12Dec->m_SliceControlBuffer.size() == DXVAStructSize);
+      } break;
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+}
+
+void
+d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(struct d3d12_video_decoder *pD3D12Dec,
+                                                                   void *pDXVAStruct,
+                                                                   uint64_t DXVAStructSize)
+{
+   if (pD3D12Dec->m_SliceControlBuffer.capacity() < DXVAStructSize) {
+      pD3D12Dec->m_SliceControlBuffer.reserve(DXVAStructSize);
+   }
+
+   pD3D12Dec->m_SliceControlBuffer.resize(DXVAStructSize);
+   memcpy(pD3D12Dec->m_SliceControlBuffer.data(), pDXVAStruct, DXVAStructSize);
+}
+
+void
+d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec,
+                                                         void *pDXVAStruct,
+                                                         uint64_t DXVAStructSize)
+{
+   if (pD3D12Dec->m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
+      pD3D12Dec->m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
+   }
+
+   pD3D12Dec->m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
+   memcpy(pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
+}
+
+void
+d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec,
+                                                             void *pDXVAStruct,
+                                                             uint64_t DXVAStructSize)
+{
+   if (pD3D12Dec->m_picParamsBuffer.capacity() < DXVAStructSize) {
+      pD3D12Dec->m_picParamsBuffer.reserve(DXVAStructSize);
+   }
+
+   pD3D12Dec->m_picParamsBuffer.resize(DXVAStructSize);
+   memcpy(pD3D12Dec->m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
+}
+
+bool
+d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
+                                     d3d12_video_decode_profile_type profileType)
+{
+   bool supportedProfile = false;
+   switch (profileType) {
+      case d3d12_video_decode_profile_type_h264:
+         supportedProfile = true;
+         break;
+      default:
+         supportedProfile = false;
+         break;
+   }
+
+   return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile;
+}
+
+d3d12_video_decode_profile_type
+d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)
+{
+   switch (profile) {
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
+         return d3d12_video_decode_profile_type_h264;
+      default:
+      {
+         unreachable("Unsupported pipe video profile");
+      } break;
+   }
+}
+
+GUID
+d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)
+{
+   switch (profile) {
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
+         return D3D12_VIDEO_DECODE_PROFILE_H264;
+      default:
+         return {};
+   }
+}
+
+GUID
+d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType)
+{
+   switch (profileType) {
+      case d3d12_video_decode_profile_type_h264:
+         return D3D12_VIDEO_DECODE_PROFILE_H264;
+         break;
+      default:
+      {
+         unreachable("Unsupported d3d12_video_decode_profile_type");
+      } break;
+   }
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec.h b/src/gallium/drivers/d3d12/d3d12_video_dec.h
new file mode 100644 (file)
index 0000000..0b69e6c
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_VIDEO_DEC_H
+#define D3D12_VIDEO_DEC_H
+
+#include "d3d12_video_types.h"
+#include "d3d12_video_dec_references_mgr.h"
+
+///
+/// Pipe video interface starts
+///
+
+/**
+ * creates a video decoder
+ */
+struct pipe_video_codec *
+d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ);
+
+/**
+ * destroy this video decoder
+ */
+void
+d3d12_video_decoder_destroy(struct pipe_video_codec *codec);
+
+/**
+ * start decoding of a new frame
+ */
+void
+d3d12_video_decoder_begin_frame(struct pipe_video_codec * codec,
+                                struct pipe_video_buffer *target,
+                                struct pipe_picture_desc *picture);
+
+/**
+ * decode a bitstream
+ */
+void
+d3d12_video_decoder_decode_bitstream(struct pipe_video_codec * codec,
+                                     struct pipe_video_buffer *target,
+                                     struct pipe_picture_desc *picture,
+                                     unsigned                  num_buffers,
+                                     const void *const *       buffers,
+                                     const unsigned *          sizes);
+
+/**
+ * end decoding of the current frame
+ */
+void
+d3d12_video_decoder_end_frame(struct pipe_video_codec * codec,
+                              struct pipe_video_buffer *target,
+                              struct pipe_picture_desc *picture);
+
+/**
+ * flush any outstanding command buffers to the hardware
+ * should be called before a video_buffer is acessed by the gallium frontend again
+ */
+void
+d3d12_video_decoder_flush(struct pipe_video_codec *codec);
+
+///
+/// Pipe video interface ends
+///
+
+///
+/// d3d12_video_decoder functions starts
+///
+
+struct d3d12_video_decoder
+{
+   struct pipe_video_codec base;
+   struct pipe_screen *    m_screen;
+   struct d3d12_screen *   m_pD3D12Screen;
+
+   ///
+   /// D3D12 objects and context info
+   ///
+
+   const uint m_NodeMask  = 0u;
+   const uint m_NodeIndex = 0u;
+
+   ComPtr<ID3D12Fence> m_spFence;
+   uint                m_fenceValue = 1u;
+
+   ComPtr<ID3D12VideoDevice>             m_spD3D12VideoDevice;
+   ComPtr<ID3D12VideoDecoder>            m_spVideoDecoder;
+   ComPtr<ID3D12VideoDecoderHeap>        m_spVideoDecoderHeap;
+   ComPtr<ID3D12CommandQueue>            m_spDecodeCommandQueue;
+   ComPtr<ID3D12CommandAllocator>        m_spCommandAllocator;
+   ComPtr<ID3D12VideoDecodeCommandList1> m_spDecodeCommandList;
+   ComPtr<ID3D12CommandQueue>            m_spCopyQueue;
+
+   std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList;
+
+   D3D12_VIDEO_DECODER_DESC               m_decoderDesc     = {};
+   D3D12_VIDEO_DECODER_HEAP_DESC          m_decoderHeapDesc = {};
+   D3D12_VIDEO_DECODE_TIER                m_tier            = D3D12_VIDEO_DECODE_TIER_NOT_SUPPORTED;
+   DXGI_FORMAT                            m_decodeFormat;
+   D3D12_FEATURE_DATA_FORMAT_INFO         m_decodeFormatInfo           = {};
+   D3D12_VIDEO_DECODE_CONFIGURATION_FLAGS m_configurationFlags         = D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_NONE;
+   GUID                                   m_d3d12DecProfile            = {};
+   d3d12_video_decode_profile_type        m_d3d12DecProfileType        = {};
+   uint                                   m_ConfigDecoderSpecificFlags = 0u;
+
+   ///
+   /// Current frame tracked state
+   ///
+
+   // Tracks DPB and reference picture textures
+   std::unique_ptr<d3d12_video_decoder_references_manager> m_spDPBManager;
+
+   // Holds pointers to current decode output target texture and reference textures from upper layer
+   struct pipe_video_buffer *m_pCurrentDecodeTarget;
+   struct pipe_video_buffer **m_pCurrentReferenceTargets;
+
+   // Holds the input bitstream buffer while it's being constructed in decode_bitstream calls
+   std::vector<uint8_t> m_stagingDecodeBitstream;
+
+   const uint64_t m_InitialCompBitstreamGPUBufferSize = (1024 /*1K*/ * 1024 /*1MB*/) * 8 /*8 MB*/;   // 8MB
+
+   // Holds the input bitstream buffer in GPU video memory
+   ComPtr<ID3D12Resource> m_curFrameCompressedBitstreamBuffer;
+   uint64_t               m_curFrameCompressedBitstreamBufferAllocatedSize =
+      m_InitialCompBitstreamGPUBufferSize;   // Actual number of allocated bytes available in the buffer (after
+                                             // m_curFrameCompressedBitstreamBufferPayloadSize might be garbage)
+   uint64_t m_curFrameCompressedBitstreamBufferPayloadSize = 0u;   // Actual number of bytes of valid data
+
+   // Holds a buffer for the DXVA struct layout of the picture params of the current frame
+   std::vector<uint8_t> m_picParamsBuffer;   // size() has the byte size of the currently held picparams ; capacity()
+                                             // has the underlying container allocation size
+
+   // Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX of the
+   // current frame m_InverseQuantMatrixBuffer.size() == 0 means no quantization matrix buffer is set for current frame
+   std::vector<uint8_t> m_InverseQuantMatrixBuffer;   // size() has the byte size of the currently held
+                                                      // VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX ;
+                                                      // capacity() has the underlying container allocation size
+
+   // Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL of the current frame
+   // m_SliceControlBuffer.size() == 0 means no quantization matrix buffer is set for current frame
+   std::vector<uint8_t>
+      m_SliceControlBuffer;   // size() has the byte size of the currently held VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL ;
+                              // capacity() has the underlying container allocation size
+
+   // Indicates if GPU commands have not been flushed and are pending.
+   bool m_needsGPUFlush = false;
+};
+
+bool
+d3d12_video_decoder_create_command_objects(const struct d3d12_screen * pD3D12Screen,
+                                           struct d3d12_video_decoder *pD3D12Dec);
+bool
+d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen * pD3D12Screen,
+                                                  struct d3d12_video_decoder *pD3D12Dec);
+bool
+d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen * pD3D12Screen,
+                                               struct d3d12_video_decoder *pD3D12Dec);
+bool
+d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen * pD3D12Screen,
+                                                    struct d3d12_video_decoder *pD3D12Dec,
+                                                    uint64_t                    bufSize);
+void
+d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec,
+                                                struct pipe_video_buffer *target,
+                                                struct pipe_picture_desc *picture);
+bool
+d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec,
+                                             struct pipe_video_buffer *  pCurrentDecodeTarget,
+                                             struct d3d12_video_buffer * pD3D12VideoBuffer,
+                                             ID3D12Resource **           ppOutTexture2D,
+                                             uint32_t *                  pOutSubresourceIndex,
+                                             ID3D12Resource **           ppRefOnlyOutTexture2D,
+                                             uint32_t *                  pRefOnlyOutSubresourceIndex,
+                                             const d3d12_video_decode_output_conversion_arguments &conversionArgs);
+void
+d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec);
+bool
+d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *                          pD3D12Dec,
+                                    struct d3d12_video_buffer *                           pD3D12VideoBuffer,
+                                    const d3d12_video_decode_output_conversion_arguments &conversionArguments);
+void
+d3d12_video_decoder_get_frame_info(
+   struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced);
+void
+d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder *codec,
+                                                                   struct pipe_picture_desc *  picture,
+                                                                   struct d3d12_video_buffer * pD3D12VideoBuffer);
+template <typename T>
+T *
+d3d12_video_decoder_get_current_dxva_picparams(struct d3d12_video_decoder *codec)
+{
+   return reinterpret_cast<T *>(codec->m_picParamsBuffer.data());
+}
+bool
+d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
+                                     d3d12_video_decode_profile_type         profileType);
+d3d12_video_decode_profile_type
+d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile);
+GUID
+d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType);
+void
+d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *codec,
+                                                             void *                      pDXVABuffer,
+                                                             uint64_t                    DXVABufferSize);
+void
+d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec,
+                                                         void *                      pDXVAStruct,
+                                                         uint64_t                    DXVAStructSize);
+void
+d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture);
+void
+d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(struct d3d12_video_decoder *pD3D12Dec,
+                                                                   void *                      pDXVAStruct,
+                                                                   uint64_t                    DXVAStructSize);
+int
+d3d12_video_decoder_get_next_startcode_offset(std::vector<uint8_t> &buf,
+                                              unsigned int          bufferOffset,
+                                              unsigned int          targetCode,
+                                              unsigned int          targetCodeBitSize,
+                                              unsigned int          numBitsToSearchIntoBuffer);
+
+///
+/// d3d12_video_decoder functions ends
+///
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp
new file mode 100644 (file)
index 0000000..41bf2e5
--- /dev/null
@@ -0,0 +1,602 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_video_dec.h"
+#include "d3d12_video_dec_h264.h"
+
+void
+d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder *pD3D12Dec)
+{
+   // Method overview
+   // 1. Codec specific strategy in switch statement regarding reference frames eviction policy. Should only mark active
+   // DPB references, leaving evicted ones as unused
+   // 2. Call release_unused_references_texture_memory(); at the end of this method. Any references (and texture
+   // allocations associated)
+   //    that were left not marked as used in m_spDPBManager by step (2) are lost.
+
+   // Assign DXVA original Index7Bits indices to current frame and references
+   DXVA_PicParams_H264 *pCurrPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
+   for (uint8_t i = 0; i < 16; i++) {
+      // From H264 DXVA spec:
+      // Index7Bits
+      //     An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture
+      //     parameters structure(section 4.0) or the RefPicList member of the slice control data
+      //     structure(section 6.0) When Index7Bits is used in the CurrPic and RefFrameList members of the picture
+      //     parameters structure, the value directly specifies the DXVA index of an uncompressed surface. When
+      //     Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies
+      //     the surface indirectly, as an index into the RefFrameList array of the associated picture parameters
+      //     structure.For more information, see section 6.2. In all cases, when Index7Bits does not contain a valid
+      //     index, the value is 127.
+      if (pCurrPicParams->RefFrameList[i].bPicEntry != DXVA_H264_INVALID_PICTURE_ENTRY_VALUE) {
+         pCurrPicParams->RefFrameList[i].Index7Bits =
+            pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentReferenceTargets[i]);
+      }
+   }
+
+   pD3D12Dec->m_spDPBManager->mark_all_references_as_unused();
+   pD3D12Dec->m_spDPBManager->mark_references_in_use(pCurrPicParams->RefFrameList);
+
+   // Releases the underlying reference picture texture objects of all references that were not marked as used in this
+   // method.
+   pD3D12Dec->m_spDPBManager->release_unused_references_texture_memory();
+
+   pCurrPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentDecodeTarget);
+
+   debug_printf("[d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input] DXVA_PicParams_H264 converted "
+                 "from pipe_h264_picture_desc (No reference index remapping)\n");
+   d3d12_video_decoder_log_pic_params_h264(pCurrPicParams);
+}
+
+void
+d3d12_video_decoder_get_frame_info_h264(
+   struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced)
+{
+   auto pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
+   // wFrameWidthInMbsMinus1 Width of the frame containing this picture, in units of macroblocks, minus 1. (The width in
+   // macroblocks is wFrameWidthInMbsMinus1 plus 1.) wFrameHeightInMbsMinus1 Height of the frame containing this
+   // picture, in units of macroblocks, minus 1. (The height in macroblocks is wFrameHeightInMbsMinus1 plus 1.) When the
+   // picture is a field, the height of the frame is twice the height of the picture and is an integer multiple of 2 in
+   // units of macroblocks.
+   *pWidth = (pPicParams->wFrameWidthInMbsMinus1 + 1) * 16;
+   *pHeight = (pPicParams->wFrameHeightInMbsMinus1 + 1) / (pPicParams->frame_mbs_only_flag ? 1 : 2);
+   *pHeight = (2 - pPicParams->frame_mbs_only_flag) * *pHeight;
+   *pHeight = *pHeight * 16;
+   *pMaxDPB = pPicParams->num_ref_frames + 1;
+   isInterlaced = !pPicParams->frame_mbs_only_flag;
+}
+
+///
+/// Pushes the current frame as next reference, updates the DXVA H264 structure with the indices of the DPB and
+/// transitions the references
+///
+void
+d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder *pD3D12Dec,
+                                                          ID3D12Resource *pTexture2D,
+                                                          uint32_t subresourceIndex)
+{
+   DXVA_PicParams_H264 *pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
+   pPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->store_future_reference(pPicParams->CurrPic.Index7Bits,
+                                                                                      pD3D12Dec->m_spVideoDecoderHeap,
+                                                                                      pTexture2D,
+                                                                                      subresourceIndex);
+
+   // From H264 DXVA spec:
+   // Index7Bits
+   //     An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture
+   //     parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0)
+   //     When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value
+   //     directly specifies the DXVA index of an uncompressed surface. When Index7Bits is used in the RefPicList member
+   //     of the slice control data structure, the value identifies the surface indirectly, as an index into the
+   //     RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. In
+   //     all cases, when Index7Bits does not contain a valid index, the value is 127.
+
+   std::vector<D3D12_RESOURCE_BARRIER>
+      neededStateTransitions;   // Returned by update_entries to perform by the method caller
+   pD3D12Dec->m_spDPBManager->update_entries(
+      d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec)->RefFrameList,
+      neededStateTransitions);
+
+   pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(neededStateTransitions.size(), neededStateTransitions.data());
+
+   // Schedule reverse (back to common) transitions before command list closes for current frame
+   for (auto BarrierDesc : neededStateTransitions) {
+      std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
+      pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(BarrierDesc);
+   }
+
+   debug_printf(
+      "[d3d12_video_decoder_prepare_current_frame_references_h264] DXVA_PicParams_H264 after index remapping)\n");
+   d3d12_video_decoder_log_pic_params_h264(
+      d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec));
+}
+
+void
+d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder *pD3D12Dec,
+                                                     std::vector<DXVA_Slice_H264_Short> &pOutSliceControlBuffers,
+                                                     struct pipe_h264_picture_desc *picture_h264)
+{
+   debug_printf("[d3d12_video_decoder_h264] Upper layer reported %d slices for this frame, parsing them below...\n",
+                  picture_h264->slice_count);
+   size_t processedBitstreamBytes = 0u;
+   size_t sliceIdx = 0;
+   bool sliceFound = false;
+   do {
+      DXVA_Slice_H264_Short currentSliceEntry = {};
+      // From DXVA spec: All bits for the slice are located within the corresponding bitstream data buffer.
+      currentSliceEntry.wBadSliceChopping = 0u;
+      sliceFound = d3d12_video_decoder_get_next_slice_size_and_offset_h264(pD3D12Dec->m_stagingDecodeBitstream,
+                                                                           processedBitstreamBytes,
+                                                                           currentSliceEntry.SliceBytesInBuffer,
+                                                                           currentSliceEntry.BSNALunitDataLocation);
+
+      if (sliceFound) {
+         d3d12_video_decoder_nal_unit_type_h264 naluType = (d3d12_video_decoder_nal_unit_type_h264)(
+            pD3D12Dec->m_stagingDecodeBitstream[currentSliceEntry.BSNALunitDataLocation +
+                                                (DXVA_H264_START_CODE_LEN_BITS / 8)] &
+            0x1F);
+         debug_printf("[d3d12_video_decoder_h264] Detected slice (NALU Type %d) index %ld with size %d and offset %d "
+                        "for frame with "
+                        "fenceValue: %d\n",
+                        naluType,
+                        sliceIdx,
+                        currentSliceEntry.SliceBytesInBuffer,
+                        currentSliceEntry.BSNALunitDataLocation,
+                        pD3D12Dec->m_fenceValue);
+
+         sliceIdx++;
+         processedBitstreamBytes += currentSliceEntry.SliceBytesInBuffer;
+         pOutSliceControlBuffers.push_back(currentSliceEntry);
+      }
+   } while (sliceFound && (sliceIdx < picture_h264->slice_count));
+   assert(pOutSliceControlBuffers.size() == picture_h264->slice_count);
+}
+
+bool
+d3d12_video_decoder_get_next_slice_size_and_offset_h264(std::vector<uint8_t> &buf,
+                                                        unsigned int bufferOffset,
+                                                        uint32_t &outSliceSize,
+                                                        uint32_t &outSliceOffset)
+{
+   // Search the rest of the full frame buffer after the offset
+   uint numBitsToSearchIntoBuffer = buf.size() - bufferOffset;
+   int currentSlicePosition = d3d12_video_decoder_get_next_startcode_offset(buf,
+                                                                            bufferOffset,
+                                                                            DXVA_H264_START_CODE,
+                                                                            DXVA_H264_START_CODE_LEN_BITS,
+                                                                            numBitsToSearchIntoBuffer);
+
+   // Return false now if we didn't find a next slice based on the bufferOffset parameter
+   if (currentSlicePosition < 0) {
+      return false;
+   } else {
+      // Save the absolute buffer offset until the next slice in the output param
+      outSliceOffset = currentSlicePosition + bufferOffset;
+
+      // Found a next NALU, make sure it's a slice:
+      d3d12_video_decoder_nal_unit_type_h264 naluType =
+         (d3d12_video_decoder_nal_unit_type_h264)(buf[outSliceOffset + (DXVA_H264_START_CODE_LEN_BITS / 8)] & 0x1F);
+
+      bool isNaluSliceType = (naluType == type_slice) || (naluType == type_slice_part_A) ||
+                             (naluType == type_slice_part_B) || (naluType == type_slice_part_C) ||
+                             (naluType == type_slice_IDR) || (naluType == type_slice_aux) ||
+                             (naluType == type_slice_layer_ext);
+
+      if (!isNaluSliceType) {
+         // We found a NALU, but it's not a slice
+         return false;
+      } else {
+         // We did find a next slice based on the bufferOffset parameter
+
+         // Skip current start code, to get the slice after this, to calculate its size
+         bufferOffset += (DXVA_H264_START_CODE_LEN_BITS / 8 /*convert bits to bytes*/);
+         numBitsToSearchIntoBuffer = buf.size() - bufferOffset;
+
+         int c_signedStartCodeLen = (DXVA_H264_START_CODE_LEN_BITS / 8 /*convert bits to bytes*/);
+         int nextSlicePosition = c_signedStartCodeLen   // Takes into account the skipped start code
+                                 + d3d12_video_decoder_get_next_startcode_offset(buf,
+                                                                                 bufferOffset,
+                                                                                 DXVA_H264_START_CODE,
+                                                                                 DXVA_H264_START_CODE_LEN_BITS,
+                                                                                 numBitsToSearchIntoBuffer);
+
+         if (nextSlicePosition <
+             c_signedStartCodeLen)   // if no slice found, d3d12_video_decoder_get_next_startcode_offset returns - 1
+         {
+            // This means currentSlicePosition points to the last slice in the buffer
+            outSliceSize = buf.size() - outSliceOffset;
+         } else {
+            // This means there are more slices after the one pointed by currentSlicePosition
+            outSliceSize = nextSlicePosition - currentSlicePosition;
+         }
+         return true;
+      }
+   }
+}
+
+static void
+d3d12_video_decoder_log_pic_entry_h264(DXVA_PicEntry_H264 &picEntry)
+{
+   debug_printf("\t\tIndex7Bits: %d\n"
+                 "\t\tAssociatedFlag: %d\n"
+                 "\t\tbPicEntry: %d\n",
+                 picEntry.Index7Bits,
+                 picEntry.AssociatedFlag,
+                 picEntry.bPicEntry);
+}
+
+void
+d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 *pPicParams)
+{
+   debug_printf("\n=============================================\n");
+   debug_printf("wFrameWidthInMbsMinus1 = %d\n", pPicParams->wFrameWidthInMbsMinus1);
+   debug_printf("wFrameHeightInMbsMinus1 = %d\n", pPicParams->wFrameHeightInMbsMinus1);
+   debug_printf("CurrPic.Index7Bits = %d\n", pPicParams->CurrPic.Index7Bits);
+   debug_printf("CurrPic.AssociatedFlag = %d\n", pPicParams->CurrPic.AssociatedFlag);
+   debug_printf("num_ref_frames = %d\n", pPicParams->num_ref_frames);
+   debug_printf("sp_for_switch_flag = %d\n", pPicParams->sp_for_switch_flag);
+   debug_printf("field_pic_flag = %d\n", pPicParams->field_pic_flag);
+   debug_printf("MbaffFrameFlag = %d\n", pPicParams->MbaffFrameFlag);
+   debug_printf("residual_colour_transform_flag = %d\n", pPicParams->residual_colour_transform_flag);
+   debug_printf("chroma_format_idc = %d\n", pPicParams->chroma_format_idc);
+   debug_printf("RefPicFlag = %d\n", pPicParams->RefPicFlag);
+   debug_printf("IntraPicFlag = %d\n", pPicParams->IntraPicFlag);
+   debug_printf("constrained_intra_pred_flag = %d\n", pPicParams->constrained_intra_pred_flag);
+   debug_printf("MinLumaBipredSize8x8Flag = %d\n", pPicParams->MinLumaBipredSize8x8Flag);
+   debug_printf("weighted_pred_flag = %d\n", pPicParams->weighted_pred_flag);
+   debug_printf("weighted_bipred_idc = %d\n", pPicParams->weighted_bipred_idc);
+   debug_printf("MbsConsecutiveFlag = %d\n", pPicParams->MbsConsecutiveFlag);
+   debug_printf("frame_mbs_only_flag = %d\n", pPicParams->frame_mbs_only_flag);
+   debug_printf("transform_8x8_mode_flag = %d\n", pPicParams->transform_8x8_mode_flag);
+   debug_printf("StatusReportFeedbackNumber = %d\n", pPicParams->StatusReportFeedbackNumber);
+   debug_printf("CurrFieldOrderCnt[0] = %d\n", pPicParams->CurrFieldOrderCnt[0]);
+   debug_printf("CurrFieldOrderCnt[1] = %d\n", pPicParams->CurrFieldOrderCnt[1]);
+   debug_printf("chroma_qp_index_offset = %d\n", pPicParams->chroma_qp_index_offset);
+   debug_printf("second_chroma_qp_index_offset = %d\n", pPicParams->second_chroma_qp_index_offset);
+   debug_printf("ContinuationFlag = %d\n", pPicParams->ContinuationFlag);
+   debug_printf("pic_init_qp_minus26 = %d\n", pPicParams->pic_init_qp_minus26);
+   debug_printf("pic_init_qs_minus26 = %d\n", pPicParams->pic_init_qs_minus26);
+   debug_printf("num_ref_idx_l0_active_minus1 = %d\n", pPicParams->num_ref_idx_l0_active_minus1);
+   debug_printf("num_ref_idx_l1_active_minus1 = %d\n", pPicParams->num_ref_idx_l1_active_minus1);
+   debug_printf("frame_num = %d\n", pPicParams->frame_num);
+   debug_printf("log2_max_frame_num_minus4 = %d\n", pPicParams->log2_max_frame_num_minus4);
+   debug_printf("pic_order_cnt_type = %d\n", pPicParams->pic_order_cnt_type);
+   debug_printf("log2_max_pic_order_cnt_lsb_minus4 = %d\n", pPicParams->log2_max_pic_order_cnt_lsb_minus4);
+   debug_printf("delta_pic_order_always_zero_flag = %d\n", pPicParams->delta_pic_order_always_zero_flag);
+   debug_printf("direct_8x8_inference_flag = %d\n", pPicParams->direct_8x8_inference_flag);
+   debug_printf("entropy_coding_mode_flag = %d\n", pPicParams->entropy_coding_mode_flag);
+   debug_printf("pic_order_present_flag = %d\n", pPicParams->pic_order_present_flag);
+   debug_printf("deblocking_filter_control_present_flag = %d\n", pPicParams->deblocking_filter_control_present_flag);
+   debug_printf("redundant_pic_cnt_present_flag = %d\n", pPicParams->redundant_pic_cnt_present_flag);
+   debug_printf("num_slice_groups_minus1 = %d\n", pPicParams->num_slice_groups_minus1);
+   debug_printf("slice_group_map_type = %d\n", pPicParams->slice_group_map_type);
+   debug_printf("slice_group_change_rate_minus1 = %d\n", pPicParams->slice_group_change_rate_minus1);
+   debug_printf("Reserved8BitsB = %d\n", pPicParams->Reserved8BitsB);
+   debug_printf("UsedForReferenceFlags 0x%08x\n", pPicParams->UsedForReferenceFlags);
+   debug_printf("NonExistingFrameFlags 0x%08x\n", pPicParams->NonExistingFrameFlags);
+
+   const UINT16 RefPicListLength = _countof(DXVA_PicParams_H264::RefFrameList);
+
+   debug_printf("[D3D12 Video Decoder H264 DXVA PicParams info]\n"
+                 "\t[Current Picture Entry]\n");
+   d3d12_video_decoder_log_pic_entry_h264(pPicParams->CurrPic);
+
+   debug_printf("[Decode RefFrameList Pic_Entry list] Entries where bPicEntry == "
+                 "DXVA_H264_INVALID_PICTURE_ENTRY_VALUE are not printed\n");
+   for (uint32_t refIdx = 0; refIdx < RefPicListLength; refIdx++) {
+      if (DXVA_H264_INVALID_PICTURE_ENTRY_VALUE != pPicParams->RefFrameList[refIdx].bPicEntry) {
+         debug_printf("\t[Reference PicEntry %d]\n", refIdx);
+         d3d12_video_decoder_log_pic_entry_h264(pPicParams->RefFrameList[refIdx]);
+         debug_printf("\t\tFrameNumList: %d\n"
+                       "\t\tFieldOrderCntList[0]: %d\n"
+                       "\t\tFieldOrderCntList[1]: %d\n",
+                       pPicParams->FrameNumList[refIdx],
+                       pPicParams->FieldOrderCntList[refIdx][0],
+                       pPicParams->FieldOrderCntList[refIdx][1]);
+      }
+   }
+}
+
+DXVA_PicParams_H264
+d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(
+   uint32_t frameNum,
+   pipe_video_profile profile,
+   uint32_t decodeWidth,    // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other
+                            // codecs.
+   uint32_t decodeHeight,   // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other
+                            // codecs.
+   pipe_h264_picture_desc *pPipeDesc)
+{
+   DXVA_PicParams_H264 dxvaStructure = {};
+
+   // uint16_t  wFrameWidthInMbsMinus1;
+   uint width_in_mb = decodeWidth / D3D12_VIDEO_H264_MB_IN_PIXELS;
+   dxvaStructure.wFrameWidthInMbsMinus1 = width_in_mb - 1;
+   // uint16_t  wFrameHeightInMbsMinus1;
+   uint height_in_mb = static_cast<uint>(std::ceil(decodeHeight / D3D12_VIDEO_H264_MB_IN_PIXELS));
+   dxvaStructure.wFrameHeightInMbsMinus1 = height_in_mb - 1;
+
+   // CurrPic.Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264
+   // CurrPic.AssociatedFlag
+   // If field_pic_flag is 1, the AssociatedFlag field in CurrPic is interpreted as follows:
+   // 0 -> The current picture is the top field of the uncompressed destination frame surface.
+   // 1 -> The current picture is the bottom field of the uncompressed destination frame surface.
+   // If field_pic_flag is 0, AssociatedFlag has no meaning and shall be 0, and the accelerator shall ignore the value.
+   if (pPipeDesc->field_pic_flag) {
+      dxvaStructure.CurrPic.AssociatedFlag = (pPipeDesc->bottom_field_flag == 0) ? 0 : 1;
+   } else {
+      dxvaStructure.CurrPic.AssociatedFlag = 0;
+   }
+
+   // uint8_t   num_ref_frames;
+   dxvaStructure.num_ref_frames = pPipeDesc->num_ref_frames;
+   // union {
+   // struct {
+   // uint16_t  field_pic_flag                 : 1;
+   dxvaStructure.field_pic_flag = pPipeDesc->field_pic_flag;
+   // From H264 codec spec
+   // The variable MbaffFrameFlag is derived as
+   // MbaffFrameFlag = ( mb_adaptive_frame_field_flag && !field_pic_flag )
+   dxvaStructure.MbaffFrameFlag = (pPipeDesc->pps->sps->mb_adaptive_frame_field_flag && !pPipeDesc->field_pic_flag);
+   // uint16_t  residual_colour_transform_flag :1
+   dxvaStructure.residual_colour_transform_flag = pPipeDesc->pps->sps->separate_colour_plane_flag;
+   // uint16_t sp_for_switch_flag // switch slices are not supported by VA
+   dxvaStructure.sp_for_switch_flag = 0;
+   // uint16_t  chroma_format_idc              : 2;
+   assert(pPipeDesc->pps->sps->chroma_format_idc == 1);   // Not supported otherwise
+   dxvaStructure.chroma_format_idc = 1;   // This is always 4:2:0 for D3D12 Video. NV12/P010 DXGI formats only.
+   // uint16_t  RefPicFlag                     : 1;
+   dxvaStructure.RefPicFlag = pPipeDesc->is_reference;
+
+   // uint16_t  constrained_intra_pred_flag    : 1;
+   dxvaStructure.constrained_intra_pred_flag = pPipeDesc->pps->constrained_intra_pred_flag;
+   // uint16_t  weighted_pred_flag             : 1;
+   dxvaStructure.weighted_pred_flag = pPipeDesc->pps->weighted_pred_flag;
+   // uint16_t  weighted_bipred_idc            : 2;
+   dxvaStructure.weighted_bipred_idc = pPipeDesc->pps->weighted_bipred_idc;
+   // From DXVA spec:
+   // The value shall be 1 unless the restricted-mode profile in use explicitly supports the value 0.
+   // FMO is not supported by VAAPI
+   dxvaStructure.MbsConsecutiveFlag = 1;
+   // uint16_t  frame_mbs_only_flag            : 1;
+   dxvaStructure.frame_mbs_only_flag = pPipeDesc->pps->sps->frame_mbs_only_flag;
+   // uint16_t  transform_8x8_mode_flag        : 1;
+   dxvaStructure.transform_8x8_mode_flag = pPipeDesc->pps->transform_8x8_mode_flag;
+   // };
+   // uint16_t  wBitFields;
+   // };
+   // uint8_t  bit_depth_luma_minus8;
+   dxvaStructure.bit_depth_luma_minus8 = pPipeDesc->pps->sps->bit_depth_luma_minus8;
+   assert(dxvaStructure.bit_depth_luma_minus8 == 0);   // Only support for NV12 now
+   // uint8_t  bit_depth_chroma_minus8;
+   dxvaStructure.bit_depth_chroma_minus8 = pPipeDesc->pps->sps->bit_depth_chroma_minus8;
+   assert(dxvaStructure.bit_depth_chroma_minus8 == 0);   // Only support for NV12 now
+   // uint16_t MinLumaBipredSize8x8Flag
+   dxvaStructure.MinLumaBipredSize8x8Flag = pPipeDesc->pps->sps->MinLumaBiPredSize8x8;
+   // char pic_init_qs_minus26
+   dxvaStructure.pic_init_qs_minus26 = pPipeDesc->pps->pic_init_qs_minus26;
+   // uint8_t   chroma_qp_index_offset;   /* also used for QScb */
+   dxvaStructure.chroma_qp_index_offset = pPipeDesc->pps->chroma_qp_index_offset;
+   // uint8_t   second_chroma_qp_index_offset; /* also for QScr */
+   dxvaStructure.second_chroma_qp_index_offset = pPipeDesc->pps->second_chroma_qp_index_offset;
+
+   /* remainder for parsing */
+   // uint8_t   pic_init_qp_minus26;
+   dxvaStructure.pic_init_qp_minus26 = pPipeDesc->pps->pic_init_qp_minus26;
+   // uint8_t  num_ref_idx_l0_active_minus1;
+   dxvaStructure.num_ref_idx_l0_active_minus1 = pPipeDesc->num_ref_idx_l0_active_minus1;
+   // uint8_t  num_ref_idx_l1_active_minus1;
+   dxvaStructure.num_ref_idx_l1_active_minus1 = pPipeDesc->num_ref_idx_l1_active_minus1;
+
+   // uint16_t frame_num;
+   dxvaStructure.frame_num = pPipeDesc->frame_num;
+
+   // uint8_t  log2_max_frame_num_minus4;
+   dxvaStructure.log2_max_frame_num_minus4 = pPipeDesc->pps->sps->log2_max_frame_num_minus4;
+   // uint8_t  pic_order_cnt_type;
+   dxvaStructure.pic_order_cnt_type = pPipeDesc->pps->sps->pic_order_cnt_type;
+   // uint8_t  log2_max_pic_order_cnt_lsb_minus4;
+   dxvaStructure.log2_max_pic_order_cnt_lsb_minus4 = pPipeDesc->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+   // uint8_t  delta_pic_order_always_zero_flag;
+   dxvaStructure.delta_pic_order_always_zero_flag = pPipeDesc->pps->sps->delta_pic_order_always_zero_flag;
+   // uint8_t  direct_8x8_inference_flag;
+   dxvaStructure.direct_8x8_inference_flag = pPipeDesc->pps->sps->direct_8x8_inference_flag;
+   // uint8_t  entropy_coding_mode_flag;
+   dxvaStructure.entropy_coding_mode_flag = pPipeDesc->pps->entropy_coding_mode_flag;
+   // uint8_t  num_slice_groups_minus1;
+   dxvaStructure.num_slice_groups_minus1 = pPipeDesc->pps->num_slice_groups_minus1;
+   assert(dxvaStructure.num_slice_groups_minus1 == 0);   // FMO Not supported by VA
+
+   // uint8_t  slice_group_map_type;
+   dxvaStructure.slice_group_map_type = pPipeDesc->pps->slice_group_map_type;
+   // uint8_t  deblocking_filter_control_present_flag;
+   dxvaStructure.deblocking_filter_control_present_flag = pPipeDesc->pps->deblocking_filter_control_present_flag;
+   // uint8_t  redundant_pic_cnt_present_flag;
+   dxvaStructure.redundant_pic_cnt_present_flag = pPipeDesc->pps->redundant_pic_cnt_present_flag;
+   // uint16_t slice_group_change_rate_minus1;
+   dxvaStructure.slice_group_change_rate_minus1 = pPipeDesc->pps->slice_group_change_rate_minus1;
+
+   // int32_t    CurrFieldOrderCnt[2];
+   dxvaStructure.CurrFieldOrderCnt[0] = pPipeDesc->field_order_cnt[0];
+   dxvaStructure.CurrFieldOrderCnt[1] = pPipeDesc->field_order_cnt[1];
+
+   // DXVA_PicEntry_H264  RefFrameList[16]; /* DXVA_PicEntry_H264.AssociatedFlag 1 means LongTermRef */
+   // From DXVA spec:
+   // RefFrameList
+   // Contains a list of 16 uncompressed frame buffer surfaces.  All uncompressed surfaces that correspond to pictures
+   // currently marked as "used for reference" must appear in the RefFrameList array. Non-reference surfaces (those
+   // which only contain pictures for which the value of RefPicFlag was 0 when the picture was decoded) shall not appear
+   // in RefFrameList for a subsequent picture. In addition, surfaces that contain only pictures marked as "unused for
+   // reference" shall not appear in RefFrameList for a subsequent picture.
+
+   dxvaStructure.UsedForReferenceFlags = 0;   // initialize to zero and set only the appropiate values below
+
+   bool frameUsesAnyRefPicture = false;
+   for (uint i = 0; i < 16; i++) {
+      // Fix ad-hoc behaviour from the VA upper layer which always marks short term references as top_is_reference and
+      // bottom_is_reference as true and then differenciates using INT_MAX in field_order_cnt_list[i][0]/[1] to indicate
+      // not used convert to expected
+      if (pPipeDesc->field_order_cnt_list[i][0] == INT_MAX) {
+         pPipeDesc->top_is_reference[i] = false;
+         pPipeDesc->field_order_cnt_list[i][0] = 0;   // DXVA Spec says this has to be zero if unused
+      }
+
+      if (pPipeDesc->field_order_cnt_list[i][1] == INT_MAX) {
+         pPipeDesc->bottom_is_reference[i] = false;
+         pPipeDesc->field_order_cnt_list[i][1] = 0;   // DXVA Spec says this has to be zero if unused
+      }
+
+      // If both top and bottom reference flags are false, this is an invalid entry
+      bool validEntry = (pPipeDesc->top_is_reference[i] || pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]);
+      if (!validEntry) {
+         // From DXVA spec:
+         // Entries that will not be used for decoding the current picture, or any subsequent pictures, are indicated by
+         // setting bPicEntry to 0xFF. If bPicEntry is not 0xFF, the entry may be used as a reference surface for
+         // decoding the current picture or a subsequent picture (in decoding order).
+         dxvaStructure.RefFrameList[i].bPicEntry = DXVA_H264_INVALID_PICTURE_ENTRY_VALUE;
+         dxvaStructure.FieldOrderCntList[i][0] = 0;
+         dxvaStructure.FieldOrderCntList[i][1] = 0;
+         dxvaStructure.FrameNumList[i] = 0;
+      } else {
+         frameUsesAnyRefPicture = true;
+         // From DXVA spec:
+         // For each entry whose value is not 0xFF, the value of AssociatedFlag is interpreted as follows:
+         // 0 - Not a long-term reference frame.
+         // 1 - Long-term reference frame. The uncompressed frame buffer contains a reference frame or one or more
+         // reference fields marked as "used for long-term reference." If field_pic_flag is 1, the current uncompressed
+         // frame surface may appear in the list for the purpose of decoding the second field of a complementary
+         // reference field pair.
+         dxvaStructure.RefFrameList[i].AssociatedFlag = pPipeDesc->is_long_term[i] ? 1u : 0u;
+
+         // dxvaStructure.RefFrameList[i].Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264
+
+         // uint16_t FrameNumList[16];
+         //     FrameNumList
+         // For each entry in RefFrameList, the corresponding entry in FrameNumList
+         // contains the value of FrameNum or LongTermFrameIdx, depending on the value of
+         // AssociatedFlag in the RefFrameList entry. (FrameNum is assigned to short-term
+         // reference pictures, and LongTermFrameIdx is assigned to long-term reference
+         // pictures.)
+         // If an element in the list of frames is not relevent (for example, if the corresponding
+         // entry in RefFrameList is empty or is marked as "not used for reference"), the value
+         // of the FrameNumList entry shall be 0. Accelerators can rely on this constraint being
+         // fulfilled.
+         dxvaStructure.FrameNumList[i] = pPipeDesc->frame_num_list[i];
+
+         // int32_t    FieldOrderCntList[16][2];
+         // Contains the picture order counts for the reference frames listed in RefFrameList.
+         // For each entry i in the RefFrameList array, FieldOrderCntList[i][0] contains the
+         // value of TopFieldOrderCnt for entry i, and FieldOrderCntList[i][1] contains the
+         // value of BottomFieldOrderCnt for entry i.
+         //
+         // If an element of the list is not relevent (for example, if the corresponding entry in
+         // RefFrameList is empty or is marked as "not used for reference"), the value of
+         // TopFieldOrderCnt or BottomFieldOrderCnt in FieldOrderCntList shall be 0.
+         // Accelerators can rely on this constraint being fulfilled.
+
+         dxvaStructure.FieldOrderCntList[i][0] = pPipeDesc->field_order_cnt_list[i][0];
+         dxvaStructure.FieldOrderCntList[i][1] = pPipeDesc->field_order_cnt_list[i][1];
+
+         // From DXVA spec
+         // UsedForReferenceFlags
+         // Contains two 1-bit flags for each entry in RefFrameList. For the ith entry in RefFrameList, the two flags
+         // are accessed as follows:  Flag1i = (UsedForReferenceFlags >> (2 * i)) & 1  Flag2i = (UsedForReferenceFlags
+         // >> (2 * i + 1)) & 1 If Flag1i is 1, the top field of frame number i is marked as "used for reference," as
+         // defined by the H.264/AVC specification. If Flag2i is 1, the bottom field of frame number i is marked as
+         // "used for reference." (Otherwise, if either flag is 0, that field is not marked as "used for reference.") If
+         // an element in the list of frames is not relevent (for example, if the corresponding entry in RefFrameList is
+         // empty), the value of both flags for that entry shall be 0. Accelerators may rely on this constraint being
+         // fulfilled.
+
+         if (pPipeDesc->top_is_reference[i] || pPipeDesc->is_long_term[i]) {
+            dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i));
+         }
+
+         if (pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]) {
+            dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i + 1));
+         }
+      }
+   }
+
+   // frame type (I, P, B, etc) is not included in pipeDesc data, let's try to derive it
+   // from the reference list...if frame doesn't use any references, it should be an I frame.
+   dxvaStructure.IntraPicFlag = !frameUsesAnyRefPicture;
+
+   // uint8_t  pic_order_present_flag; /* Renamed to bottom_field_pic_order_in_frame_present_flag in newer standard
+   // versions. */
+   dxvaStructure.pic_order_present_flag = pPipeDesc->pps->bottom_field_pic_order_in_frame_present_flag;
+
+   // Software decoders should be implemented, as soon as feasible, to set the value of
+   // Reserved16Bits to 3. The value 0 was previously assigned for uses prior to July 20,
+   // 2007. The value 1 was previously assigned for uses prior to October 12, 2007. The
+   // value 2 was previously assigned for uses prior to January 15, 2009. Software
+   // decoders shall not set Reserved16Bits to any value other than those listed here.
+   // Note Software decoders that set Reserved16Bits to 3 should ensure that any aspects of software decoder operation
+   // that were previously not in conformance with this version of the specification have been corrected in the current
+   // implementation. One particular aspect of conformance that should be checked is the ordering of quantization
+   // scaling list data, as specified in section 5.2. In addition, the ReservedIntraBit flag in the macroblock control
+   // buffer must use the semantics described in section 7.2 (this flag was previously reserved). The semantics of
+   // Index7Bits and RefPicList have also been clarified in updates to this specification.
+   dxvaStructure.Reserved16Bits = 3;
+
+   // DXVA spec: Arbitrary number set by the host decoder to use as a tag in the status report
+   // feedback data. The value should not equal 0, and should be different in each call to
+   // Execute. For more information, see section 12.0, Status Report Data Structure.
+   dxvaStructure.StatusReportFeedbackNumber = frameNum;
+   assert(dxvaStructure.StatusReportFeedbackNumber > 0);
+
+   // from DXVA spec
+   // ContinuationFlag
+   // If this flag is 1, the remainder of this structure is present in the buffer and contains valid values. If this
+   // flag is 0, the structure might be truncated at this point in the buffer, or the remaining fields may be set to 0
+   // and shall be ignored by the accelerator. The remaining members of this structure are needed only for off-host
+   // bitstream parsing. If the host decoder parses the bitstream, the decoder can truncate the picture parameters data
+   // structure buffer after the ContinuationFlag or set the remaining members to zero. uint8_t  ContinuationFlag;
+   dxvaStructure.ContinuationFlag =
+      1;   // DXVA destination struct does contain members from the slice section of pipeDesc...
+
+   return dxvaStructure;
+}
+
+void
+d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc *pPipeDesc,
+                                                          DXVA_Qmatrix_H264 &outMatrixBuffer)
+{
+   // Please note here that the matrices coming from the gallium VA frontend are copied from VAIQMatrixBufferH264
+   // which are specified in VAAPI as being in raster scan order (different than zigzag needed by DXVA)
+   // also please note that VAIQMatrixBufferH264.ScalingList8x8 is copied into the first two rows of
+   // pipe_h264_pps.ScalingList8x8 leaving the upper 4 rows of  pipe_h264_pps.ScalingList8x8[6][64] unmodified
+   // Finally, please note that other gallium frontends might decide to copy the scaling lists in other order
+   // and this section might have to be extended to add support for them.
+
+   // In DXVA each scaling list is ordered in zig-zag scan order, convert them from raster scan order.
+   unsigned i, j;
+   for (i = 0; i < 6; i++) {
+      for (j = 0; j < 16; j++) {
+         outMatrixBuffer.bScalingLists4x4[i][j] = pPipeDesc->pps->ScalingList4x4[i][d3d12_video_zigzag_scan[j]];
+      }
+   }
+   for (i = 0; i < 64; i++) {
+      outMatrixBuffer.bScalingLists8x8[0][i] = pPipeDesc->pps->ScalingList8x8[0][d3d12_video_zigzag_direct[i]];
+      outMatrixBuffer.bScalingLists8x8[1][i] = pPipeDesc->pps->ScalingList8x8[1][d3d12_video_zigzag_direct[i]];
+   }
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_h264.h b/src/gallium/drivers/d3d12/d3d12_video_dec_h264.h
new file mode 100644 (file)
index 0000000..db1ad22
--- /dev/null
@@ -0,0 +1,253 @@
+
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_VIDEO_DEC_H264_H
+#define D3D12_VIDEO_DEC_H264_H
+
+#include "d3d12_video_types.h"
+
+// From DXVA spec regarding DXVA_PicEntry_H264 entries:
+// Entries that will not be used for decoding the current picture, or any subsequent pictures,
+// are indicated by setting bPicEntry to 0xFF.
+// If bPicEntry is not 0xFF, the entry may be used as a reference surface for decoding the current picture or
+// a subsequent picture (in decoding order).
+constexpr uint16_t DXVA_H264_INVALID_PICTURE_INDEX =
+   0x7F;   // This corresponds to DXVA_PicEntry_H264.Index7Bits ; Not to be confused with the invalid value for
+           // DXVA_PicEntry_H264.bPicEntry full char value
+constexpr uint16_t DXVA_H264_INVALID_PICTURE_ENTRY_VALUE = 0xFF;   // This corresponds to DXVA_PicEntry_H264.bPicEntry
+
+constexpr unsigned int DXVA_H264_START_CODE          = 0x000001;   // 3 byte start code
+constexpr unsigned int DXVA_H264_START_CODE_LEN_BITS = 24;         // 3 byte start code
+
+enum d3d12_video_decoder_nal_unit_type_h264
+{
+    type_unspecified = 0,
+    type_slice = 1,
+    type_slice_part_A = 2,
+    type_slice_part_B = 3,
+    type_slice_part_C = 4,
+    type_slice_IDR = 5,
+    type_SEI = 6,
+    type_SPS = 7,
+    type_PPS = 8,
+    type_acces_delimiter = 9,
+    type_EOSeq = 10,
+    type_EOStr = 11,
+    type_EOData = 12,
+    type_SPSxt = 13,
+    type_prefix_nal_unit = 14,
+    type_subset_SPS = 15,
+    type_reserved16 = 16,
+    type_reserved17 = 17,
+    type_reserved18 = 18,
+    type_slice_aux = 19,
+    type_slice_layer_ext = 20,
+    type_reserved21 = 21,
+    type_reserved22 = 22,
+    type_reserved23 = 23,
+    type_unspecified24 = 24,
+    type_unspecified25 = 25,
+    type_unspecified26 = 26,
+    type_unspecified27 = 27,
+    type_unspecified28 = 28,
+    type_unspecified29 = 29,
+    type_unspecified30 = 30,
+    type_unspecified31 = 31
+};
+
+/* H.264/AVC picture entry data structure */
+/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */
+#pragma pack(push, BeforeDXVApacking, 1)
+typedef struct _DXVA_PicEntry_H264
+{
+   union
+   {
+      struct
+      {
+         uint8_t Index7Bits : 7;
+         uint8_t AssociatedFlag : 1;
+      };
+      uint8_t bPicEntry;
+   };
+} DXVA_PicEntry_H264, *LPDXVA_PicEntry_H264; /* 1 byte */
+#pragma pack(pop, BeforeDXVApacking)
+
+/* H.264/AVC picture parameters structure */
+/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */
+#pragma pack(push, BeforeDXVApacking, 1)
+typedef struct _DXVA_PicParams_H264
+{
+   uint16_t           wFrameWidthInMbsMinus1;
+   uint16_t           wFrameHeightInMbsMinus1;
+   DXVA_PicEntry_H264 CurrPic; /* flag is bot field flag */
+   uint8_t            num_ref_frames;
+
+   union
+   {
+      struct
+      {
+         uint16_t field_pic_flag : 1;
+         uint16_t MbaffFrameFlag : 1;
+         uint16_t residual_colour_transform_flag : 1;
+         uint16_t sp_for_switch_flag : 1;
+         uint16_t chroma_format_idc : 2;
+         uint16_t RefPicFlag : 1;
+         uint16_t constrained_intra_pred_flag : 1;
+
+         uint16_t weighted_pred_flag : 1;
+         uint16_t weighted_bipred_idc : 2;
+         uint16_t MbsConsecutiveFlag : 1;
+         uint16_t frame_mbs_only_flag : 1;
+         uint16_t transform_8x8_mode_flag : 1;
+         uint16_t MinLumaBipredSize8x8Flag : 1;
+         uint16_t IntraPicFlag : 1;
+      };
+      uint16_t wBitFields;
+   };
+   uint8_t bit_depth_luma_minus8;
+   uint8_t bit_depth_chroma_minus8;
+
+   uint16_t Reserved16Bits;
+   uint32_t StatusReportFeedbackNumber;
+
+   DXVA_PicEntry_H264 RefFrameList[16]; /* flag LT */
+   int32_t            CurrFieldOrderCnt[2];
+   int32_t            FieldOrderCntList[16][2];
+
+   char    pic_init_qs_minus26;
+   char    chroma_qp_index_offset;        /* also used for QScb */
+   char    second_chroma_qp_index_offset; /* also for QScr */
+   uint8_t ContinuationFlag;
+
+   /* remainder for parsing */
+   char    pic_init_qp_minus26;
+   uint8_t num_ref_idx_l0_active_minus1;
+   uint8_t num_ref_idx_l1_active_minus1;
+   uint8_t Reserved8BitsA;
+
+   uint16_t FrameNumList[16];
+   uint32_t UsedForReferenceFlags;
+   uint16_t NonExistingFrameFlags;
+   uint16_t frame_num;
+
+   uint8_t log2_max_frame_num_minus4;
+   uint8_t pic_order_cnt_type;
+   uint8_t log2_max_pic_order_cnt_lsb_minus4;
+   uint8_t delta_pic_order_always_zero_flag;
+
+   uint8_t direct_8x8_inference_flag;
+   uint8_t entropy_coding_mode_flag;
+   uint8_t pic_order_present_flag;
+   uint8_t num_slice_groups_minus1;
+
+   uint8_t slice_group_map_type;
+   uint8_t deblocking_filter_control_present_flag;
+   uint8_t redundant_pic_cnt_present_flag;
+   uint8_t Reserved8BitsB;
+
+   uint16_t slice_group_change_rate_minus1;
+
+   uint8_t SliceGroupMap[810]; /* 4b/sgmu, Size BT.601 */
+
+} DXVA_PicParams_H264, *LPDXVA_PicParams_H264;
+#pragma pack(pop, BeforeDXVApacking)
+
+/* H.264/AVC quantization weighting matrix data structure */
+/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */
+#pragma pack(push, BeforeDXVApacking, 1)
+typedef struct _DXVA_Qmatrix_H264
+{
+   uint8_t bScalingLists4x4[6][16];
+   uint8_t bScalingLists8x8[2][64];
+
+} DXVA_Qmatrix_H264, *LPDXVA_Qmatrix_H264;
+#pragma pack(pop, BeforeDXVApacking)
+
+// For translating the QP matrices from VA to DXVA
+const uint8_t d3d12_video_zigzag_direct[64] = {
+   0,   1,  8, 16,  9,  2,  3, 10,
+   17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34,
+   27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36,
+   29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46,
+   53, 60, 61, 54, 47, 55, 62, 63
+};
+
+// For translating the QP matrices from VA to DXVA
+const uint8_t d3d12_video_zigzag_scan[16+1] = {
+   0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
+   1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+   1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
+   3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+
+/* H.264/AVC slice control data structure - short form */
+/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */
+#pragma pack(push, BeforeDXVApacking, 1)
+typedef struct _DXVA_Slice_H264_Short
+{
+   uint32_t BSNALunitDataLocation; /* type 1..5 */
+   uint32_t SliceBytesInBuffer;    /* for off-host parse */
+   uint16_t wBadSliceChopping;     /* for off-host parse */
+} DXVA_Slice_H264_Short, *LPDXVA_Slice_H264_Short;
+#pragma pack(pop, BeforeDXVApacking)
+
+DXVA_PicParams_H264
+d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(uint32_t                frameNum,
+                                                            pipe_video_profile      profile,
+                                                            uint32_t                frameWidth,
+                                                            uint32_t                frameHeight,
+                                                            pipe_h264_picture_desc *pipeDesc);
+void
+d3d12_video_decoder_get_frame_info_h264(
+   struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced);
+void
+d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder *pD3D12Dec,
+                                                          ID3D12Resource *            pTexture2D,
+                                                          uint32_t                    subresourceIndex);
+void
+d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc *pPipeDesc,
+                                                          DXVA_Qmatrix_H264 &     outMatrixBuffer);
+void
+d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder *pD3D12Dec);
+bool
+d3d12_video_decoder_get_next_slice_size_and_offset_h264(std::vector<uint8_t> &buf,
+                                                   unsigned int          bufferOffset,
+                                                   uint32_t &            outSliceSize,
+                                                   uint32_t &            outSliceOffset);
+
+uint 
+d3d12_video_decoder_get_slice_count_h264(std::vector<uint8_t> &buf);
+
+void
+d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder *        pD3D12Dec,
+                                                     std::vector<DXVA_Slice_H264_Short> &pOutSliceControlBuffers,
+                                                     struct pipe_h264_picture_desc* picture_h264);
+
+void
+d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 * pPicParams);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp
new file mode 100644 (file)
index 0000000..8f170a9
--- /dev/null
@@ -0,0 +1,449 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_video_dec_references_mgr.h"
+#include "d3d12_video_dec_h264.h"
+#include "d3d12_video_texture_array_dpb_manager.h"
+#include "d3d12_video_array_of_textures_dpb_manager.h"
+#include "d3d12_screen.h"
+#include "d3d12_resource.h"
+#include "d3d12_video_buffer.h"
+#include <algorithm>
+#include <string>
+
+//----------------------------------------------------------------------------------------------------------------------------------
+static uint16_t
+GetInvalidReferenceIndex(d3d12_video_decode_profile_type DecodeProfileType)
+{
+   assert(DecodeProfileType <= d3d12_video_decode_profile_type_max_valid);
+
+   switch (DecodeProfileType) {
+      case d3d12_video_decode_profile_type_h264:
+         return DXVA_H264_INVALID_PICTURE_INDEX;
+      default:
+         return 0;
+   };
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+///
+/// This should always be a clear (non ref only) texture, to be presented downstream as the decoded texture
+/// Please see get_reference_only_output for the current frame recon pic ref only allocation
+///
+void
+d3d12_video_decoder_references_manager::get_current_frame_decode_output_texture(struct pipe_video_buffer *  pCurrentDecodeTarget,
+                                                                                ID3D12Resource **ppOutTexture2D,
+                                                                                uint32_t *       pOutSubresourceIndex)
+{
+// First try to find if there's an existing entry for this pCurrentDecodeTarget already in the DPB
+   // For interlaced scenarios, multiple end_frame calls will need to reference the same texture for top/bottom
+   assert(m_DecodeTargetToOriginalIndex7Bits.count(pCurrentDecodeTarget) > 0); // Needs to already have a Index7Bits assigned for current pic params
+   uint16_t remappedIdx = find_remapped_index(m_DecodeTargetToOriginalIndex7Bits[pCurrentDecodeTarget]);
+
+   if(remappedIdx != m_invalidIndex) { // If it already has a remapped index in use, reuse that allocation
+      // return the existing allocation for this decode target
+      d3d12_video_reconstructed_picture reconPicture = m_upD3D12TexturesStorageManager->get_reference_frame(remappedIdx);
+      *ppOutTexture2D       = reconPicture.pReconstructedPicture;
+      *pOutSubresourceIndex = reconPicture.ReconstructedPictureSubresource;
+   } else {
+      if (is_reference_only()) {
+         // When using clear DPB references (not ReferenceOnly) the decode output allocations come from
+         // m_upD3D12TexturesStorageManager as decode output == reconpic decode output Otherwise, when ReferenceOnly is
+         // true, both the reference frames in the DPB and the current frame reconpic output must be REFERENCE_ONLY, all
+         // the allocations are stored in m_upD3D12TexturesStorageManager but we need a +1 allocation without the
+         // REFERENCE_FRAME to use as clear decoded output. In this case d3d12_video_decoder_references_manager allocates
+         // and provides m_pClearDecodedOutputTexture Please note that m_pClearDecodedOutputTexture needs to be copied/read
+         // by the client before calling end_frame again, as the allocation will be reused for the next frame.
+
+         if (m_pClearDecodedOutputTexture == nullptr) {
+            D3D12_HEAP_PROPERTIES Properties =
+               CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, m_dpbDescriptor.m_NodeMask, m_dpbDescriptor.m_NodeMask);
+            CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Tex2D(m_dpbDescriptor.Format,
+                                                                        m_dpbDescriptor.Width,
+                                                                        m_dpbDescriptor.Height,
+                                                                        1,
+                                                                        1,
+                                                                        1,
+                                                                        0,
+                                                                        D3D12_RESOURCE_FLAG_NONE);
+
+            HRESULT hr = m_pD3D12Screen->dev->CreateCommittedResource(&Properties,
+                                                            D3D12_HEAP_FLAG_NONE,
+                                                            &resDesc,
+                                                            D3D12_RESOURCE_STATE_COMMON,
+                                                            nullptr,
+                                                            IID_PPV_ARGS(m_pClearDecodedOutputTexture.GetAddressOf()));
+            if (FAILED(hr)) {
+               debug_printf("CreateCommittedResource failed with HR %x\n", hr);
+               assert(false);
+            }
+         }
+
+         *ppOutTexture2D       = m_pClearDecodedOutputTexture.Get();
+         *pOutSubresourceIndex = 0;
+      } else {
+         if(is_array_of_textures()) {
+            // In non ref picture and non texarray mode, we can just use the underlying allocation in pCurrentDecodeTarget
+            // and avoid an extra copy after decoding the frame.
+            assert(is_pipe_buffer_underlying_output_decode_allocation());
+
+            auto vidBuffer = (struct d3d12_video_buffer *)(pCurrentDecodeTarget);
+            *ppOutTexture2D       = d3d12_resource_resource(vidBuffer->texture);
+            *pOutSubresourceIndex = 0;
+            #if DEBUG
+               D3D12_RESOURCE_DESC desc = (*ppOutTexture2D)->GetDesc();
+               assert(desc.DepthOrArraySize == 1);
+               // if the underlying resource is a texture array at some point (if the impl. changes)
+               // we need to also return the correct underlying subresource in *pOutSubresourceIndex = <subres>
+            #endif
+            
+         } else {
+            // The DPB Storage only has standard (without the ref only flags) allocations, directly use one of those.
+            d3d12_video_reconstructed_picture pFreshAllocation =
+               m_upD3D12TexturesStorageManager->get_new_tracked_picture_allocation();
+            *ppOutTexture2D       = pFreshAllocation.pReconstructedPicture;
+            *pOutSubresourceIndex = pFreshAllocation.ReconstructedPictureSubresource;
+         }
+         
+      }
+   }
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+_Use_decl_annotations_ void
+d3d12_video_decoder_references_manager::get_reference_only_output(
+   struct pipe_video_buffer *  pCurrentDecodeTarget,
+   ID3D12Resource **ppOutputReference,     // out -> new reference slot assigned or nullptr
+   uint32_t *       pOutputSubresource,    // out -> new reference slot assigned or nullptr
+   bool &outNeedsTransitionToDecodeWrite   // out -> indicates if output resource argument has to be transitioned to
+                                           // D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller
+)
+{
+   assert(is_reference_only());
+
+   // First try to find if there's an existing entry for this pCurrentDecodeTarget already in the DPB
+   // For interlaced scenarios, multiple end_frame calls will need to reference the same texture for top/bottom
+   assert(m_DecodeTargetToOriginalIndex7Bits.count(pCurrentDecodeTarget) > 0); // Needs to already have a Index7Bits assigned for current pic params
+   uint16_t remappedIdx = find_remapped_index(m_DecodeTargetToOriginalIndex7Bits[pCurrentDecodeTarget]);
+
+   if(remappedIdx != m_invalidIndex) { // If it already has a remapped index in use, reuse that allocation
+      // return the existing allocation for this decode target
+      d3d12_video_reconstructed_picture reconPicture = m_upD3D12TexturesStorageManager->get_reference_frame(remappedIdx);
+      *ppOutputReference              = reconPicture.pReconstructedPicture;
+      *pOutputSubresource             = reconPicture.ReconstructedPictureSubresource;
+      outNeedsTransitionToDecodeWrite = true;
+   } else {
+      // The DPB Storage only has REFERENCE_ONLY allocations, use one of those.
+      d3d12_video_reconstructed_picture pFreshAllocation =
+         m_upD3D12TexturesStorageManager->get_new_tracked_picture_allocation();
+      *ppOutputReference              = pFreshAllocation.pReconstructedPicture;
+      *pOutputSubresource             = pFreshAllocation.ReconstructedPictureSubresource;
+      outNeedsTransitionToDecodeWrite = true;
+   }   
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+D3D12_VIDEO_DECODE_REFERENCE_FRAMES
+d3d12_video_decoder_references_manager::get_current_reference_frames()
+{
+   d3d12_video_reference_frames args = m_upD3D12TexturesStorageManager->get_current_reference_frames();
+
+
+   // Convert generic IUnknown into the actual decoder heap object
+   m_ppHeaps.resize(args.NumTexture2Ds, nullptr);
+   HRESULT hr = S_OK;
+   for (uint32_t i = 0; i < args.NumTexture2Ds; i++) {
+      if (args.ppHeaps[i]) {
+         hr = args.ppHeaps[i]->QueryInterface(IID_PPV_ARGS(&m_ppHeaps[i]));
+         assert(SUCCEEDED(hr));
+      } else {
+         m_ppHeaps[i] = nullptr;
+      }
+   }
+
+   D3D12_VIDEO_DECODE_REFERENCE_FRAMES retVal = {
+      args.NumTexture2Ds,
+      args.ppTexture2Ds,
+      args.pSubresources,
+      m_ppHeaps.data(),
+   };
+
+   return retVal;
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+_Use_decl_annotations_
+d3d12_video_decoder_references_manager::d3d12_video_decoder_references_manager(
+   const struct d3d12_screen *       pD3D12Screen,
+   uint32_t                          NodeMask,
+   d3d12_video_decode_profile_type   DecodeProfileType,
+   d3d12_video_decode_dpb_descriptor m_dpbDescriptor)
+   : m_DecodeTargetToOriginalIndex7Bits({ }),
+     m_CurrentIndex7BitsAvailable(0),
+     m_pD3D12Screen(pD3D12Screen),
+     m_invalidIndex(GetInvalidReferenceIndex(DecodeProfileType)),
+     m_dpbDescriptor(m_dpbDescriptor),
+     m_formatInfo({ m_dpbDescriptor.Format })
+{
+   HRESULT hr = m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &m_formatInfo, sizeof(m_formatInfo));
+   assert(SUCCEEDED(hr));
+   D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC targetFrameResolution = { static_cast<uint32_t>(m_dpbDescriptor.Width),
+                                                                         m_dpbDescriptor.Height };
+   D3D12_RESOURCE_FLAGS                        resourceAllocFlags =
+      m_dpbDescriptor.fReferenceOnly ?
+         (D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) :
+         D3D12_RESOURCE_FLAG_NONE;
+
+   if (m_dpbDescriptor.fArrayOfTexture) {
+      // If all subresources are 0, the DPB is loaded with an array of individual textures, the D3D Encode API expects
+      // pSubresources to be null in this case The D3D Decode API expects it to be non-null even with all zeroes.
+      bool setNullSubresourcesOnAllZero = false;
+      m_upD3D12TexturesStorageManager =
+         std::make_unique<d3d12_array_of_textures_dpb_manager>(m_dpbDescriptor.dpbSize,
+                                                               m_pD3D12Screen->dev,
+                                                               m_dpbDescriptor.Format,
+                                                               targetFrameResolution,
+                                                               resourceAllocFlags,
+                                                               setNullSubresourcesOnAllZero,
+                                                               m_dpbDescriptor.m_NodeMask,
+                                                               !is_pipe_buffer_underlying_output_decode_allocation());
+   } else {
+      m_upD3D12TexturesStorageManager = std::make_unique<d3d12_texture_array_dpb_manager>(m_dpbDescriptor.dpbSize,
+                                                                                          m_pD3D12Screen->dev,
+                                                                                          m_dpbDescriptor.Format,
+                                                                                          targetFrameResolution,
+                                                                                          resourceAllocFlags,
+                                                                                          m_dpbDescriptor.m_NodeMask);
+   }
+
+   m_referenceDXVAIndices.resize(m_dpbDescriptor.dpbSize);
+
+   d3d12_video_reconstructed_picture reconPicture = { nullptr, 0, nullptr };
+
+   for (uint32_t dpbIdx = 0; dpbIdx < m_dpbDescriptor.dpbSize; dpbIdx++) {
+      m_upD3D12TexturesStorageManager->insert_reference_frame(reconPicture, dpbIdx);
+   }
+
+   mark_all_references_as_unused();
+   release_unused_references_texture_memory();
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+uint16_t
+d3d12_video_decoder_references_manager::find_remapped_index(uint16_t originalIndex)
+{
+   // Check if the index is already mapped.
+   for (uint16_t remappedIndex = 0; remappedIndex < m_dpbDescriptor.dpbSize; remappedIndex++) {
+      if (m_referenceDXVAIndices[remappedIndex].originalIndex == originalIndex) {
+         return remappedIndex;
+      }
+   }
+
+   return m_invalidIndex;
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+uint16_t
+d3d12_video_decoder_references_manager::update_entry(
+   uint16_t         index,                // in
+   ID3D12Resource *&pOutputReference,     // out -> new reference slot assigned or nullptr
+   uint32_t &       OutputSubresource,    // out -> new reference slot assigned or 0
+   bool &outNeedsTransitionToDecodeRead   // out -> indicates if output resource argument has to be transitioned to
+                                          // D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller
+)
+{
+   uint16_t remappedIndex         = m_invalidIndex;
+   outNeedsTransitionToDecodeRead = false;
+
+   if (index != m_invalidIndex) {
+      remappedIndex = find_remapped_index(index);
+
+      outNeedsTransitionToDecodeRead = true;
+      if (remappedIndex == m_invalidIndex || remappedIndex == m_currentOutputIndex) {
+         debug_printf("[d3d12_video_decoder_references_manager] update_entry - Invalid Reference Index\n");
+
+         remappedIndex                  = m_currentOutputIndex;
+         outNeedsTransitionToDecodeRead = false;
+      }
+
+      d3d12_video_reconstructed_picture reconPicture =
+         m_upD3D12TexturesStorageManager->get_reference_frame(remappedIndex);
+      pOutputReference  = outNeedsTransitionToDecodeRead ? reconPicture.pReconstructedPicture : nullptr;
+      OutputSubresource = outNeedsTransitionToDecodeRead ? reconPicture.ReconstructedPictureSubresource : 0u;
+   }
+
+   return remappedIndex;
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+_Use_decl_annotations_ uint16_t
+d3d12_video_decoder_references_manager::store_future_reference(uint16_t                        index,
+                                                               ComPtr<ID3D12VideoDecoderHeap> &decoderHeap,
+                                                               ID3D12Resource *                pTexture2D,
+                                                               uint32_t                        subresourceIndex)
+{
+   // Check if the index was in use.
+   uint16_t remappedIndex = find_remapped_index(index);
+
+   if (remappedIndex == m_invalidIndex) {
+      // The current output index was not used last frame.  Get an unused entry.
+      remappedIndex = find_remapped_index(m_invalidIndex);
+   }
+
+   if (remappedIndex == m_invalidIndex) {
+      debug_printf(
+         "[d3d12_video_decoder_references_manager] d3d12_video_decoder_references_manager - Decode - No available "
+         "reference map entry for output.\n");
+      assert(false);
+   }
+
+   // Set the index as the key in this map entry.
+   m_referenceDXVAIndices[remappedIndex].originalIndex = index;
+   IUnknown *pUnkHeap                                  = nullptr;
+   HRESULT hr = decoderHeap.Get()->QueryInterface(IID_PPV_ARGS(&pUnkHeap));
+   assert(SUCCEEDED(hr));
+   d3d12_video_reconstructed_picture reconPic = { pTexture2D, subresourceIndex, pUnkHeap };
+
+   m_upD3D12TexturesStorageManager->assign_reference_frame(reconPic, remappedIndex);
+
+   // Store the index to use for error handling when caller specifies and invalid reference index.
+   m_currentOutputIndex = remappedIndex;
+   m_currentSubresourceIndex = subresourceIndex;
+   m_currentResource = pTexture2D;
+
+   return remappedIndex;
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+void
+d3d12_video_decoder_references_manager::mark_reference_in_use(uint16_t index)
+{
+   if (index != m_invalidIndex) {
+      uint16_t remappedIndex = find_remapped_index(index);
+      if (remappedIndex != m_invalidIndex) {
+         m_referenceDXVAIndices[remappedIndex].fUsed = true;
+      }
+   }
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+void
+d3d12_video_decoder_references_manager::release_unused_references_texture_memory()
+{
+   for (uint32_t index = 0; index < m_dpbDescriptor.dpbSize; index++) {
+      if (!m_referenceDXVAIndices[index].fUsed) {
+         d3d12_video_reconstructed_picture reconPicture = m_upD3D12TexturesStorageManager->get_reference_frame(index);
+         if (reconPicture.pReconstructedPicture != nullptr) {
+            bool wasTracked = m_upD3D12TexturesStorageManager->untrack_reconstructed_picture_allocation(reconPicture);
+            // Untrack this resource, will mark it as free un the underlying storage buffer pool
+            // if not tracked, must be due to no-copies allocation
+            assert (wasTracked || is_pipe_buffer_underlying_output_decode_allocation());
+
+            d3d12_video_reconstructed_picture nullReconPic = { nullptr, 0, nullptr };
+
+            // Mark the unused refpic as null/empty in the DPB
+            m_upD3D12TexturesStorageManager->assign_reference_frame(nullReconPic, index);
+
+            // Remove the entry in m_DecodeTargetToOriginalIndex7Bits
+            auto value = m_referenceDXVAIndices[index].originalIndex;
+            auto it = std::find_if(m_DecodeTargetToOriginalIndex7Bits.begin(), m_DecodeTargetToOriginalIndex7Bits.end(),
+               [&value](const std::pair< struct pipe_video_buffer*, uint8_t > &p) {
+                  return p.second == value;
+               });
+
+            assert(it != m_DecodeTargetToOriginalIndex7Bits.end());
+
+            m_DecodeTargetToOriginalIndex7Bits.erase(it);
+         }
+
+
+         m_referenceDXVAIndices[index].originalIndex = m_invalidIndex;
+      }
+   }
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+void
+d3d12_video_decoder_references_manager::mark_all_references_as_unused()
+{
+   for (uint32_t index = 0; index < m_dpbDescriptor.dpbSize; index++) {
+      m_referenceDXVAIndices[index].fUsed = false;
+   }
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+void
+d3d12_video_decoder_references_manager::print_dpb()
+{
+   // Resource backing storage always has to match dpbsize
+   if(!is_pipe_buffer_underlying_output_decode_allocation()) {
+      assert(m_upD3D12TexturesStorageManager->get_number_of_tracked_allocations() == m_dpbDescriptor.dpbSize);
+   }
+
+   // get_current_reference_frames query-interfaces the pVideoHeap's.
+   D3D12_VIDEO_DECODE_REFERENCE_FRAMES curRefFrames = get_current_reference_frames();
+   std::string dpbContents;
+   for (uint32_t dpbResIdx = 0;dpbResIdx < curRefFrames.NumTexture2Ds;dpbResIdx++) {
+      dpbContents += "\t{ DPBidx: ";
+      dpbContents += std::to_string(dpbResIdx);
+      dpbContents += " - ResourcePtr: ";
+      char strBufTex[256];
+      memset(&strBufTex, '\0', 256);
+      sprintf(strBufTex, "%p", curRefFrames.ppTexture2Ds[dpbResIdx]);
+      dpbContents += std::string(strBufTex);
+      dpbContents += " - SubresourceIdx: ";
+      dpbContents += (curRefFrames.pSubresources ? std::to_string(curRefFrames.pSubresources[dpbResIdx]) : "0");
+      dpbContents += " - DecoderHeapPtr: ";
+      char strBufHeap[256];
+      memset(&strBufHeap, '\0', 256);
+      if(curRefFrames.ppHeaps && curRefFrames.ppHeaps[dpbResIdx]) {
+         sprintf(strBufHeap, "%p", curRefFrames.ppHeaps[dpbResIdx]);
+         dpbContents += std::string(strBufHeap);  
+      } else {
+         dpbContents += "(nil)";
+      }
+      dpbContents += " - Slot type: ";
+      dpbContents +=  ((m_currentResource == curRefFrames.ppTexture2Ds[dpbResIdx]) && (m_currentSubresourceIndex == curRefFrames.pSubresources[dpbResIdx])) ? "Current decoded frame output" : "Reference frame";
+      dpbContents += " - DXVA_PicParams Reference Index: ";
+      dpbContents += (m_referenceDXVAIndices[dpbResIdx].originalIndex != m_invalidIndex) ? std::to_string(m_referenceDXVAIndices[dpbResIdx].originalIndex) : "DXVA_UNUSED_PICENTRY";
+      dpbContents += "}\n";
+   }
+
+   debug_printf("[D3D12 Video Decoder Picture Manager] Decode session information:\n"
+               "\tDPB Maximum Size (max_ref_count + one_slot_curpic): %d\n"
+               "\tDXGI_FORMAT: %d\n"
+               "\tTexture resolution: (%ld, %d)\n"
+               "\tD3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY enforced: %d\n"
+               "\tAllocation Mode: %s\n"
+               "\n ----------------------\n\tCurrent frame information:\n"
+               "\tD3D12_VIDEO_DECODE_REFERENCE_FRAMES.NumTexture2Ds: %d\n"
+               "\tDPB Contents Table:\n%s",
+               m_upD3D12TexturesStorageManager->get_number_of_tracked_allocations(),
+               m_dpbDescriptor.Format,
+               m_dpbDescriptor.Width,
+               m_dpbDescriptor.Height,
+               m_dpbDescriptor.fReferenceOnly,
+               (m_dpbDescriptor.fArrayOfTexture ? "ArrayOfTextures" : "TextureArray"),
+               m_upD3D12TexturesStorageManager->get_number_of_pics_in_dpb(),
+               dpbContents.c_str());
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h b/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h
new file mode 100644 (file)
index 0000000..63dd3d7
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_VIDEO_DEC_REFMGR_H
+#define D3D12_VIDEO_DEC_REFMGR_H
+
+#include "d3d12_video_types.h"
+#include "d3d12_video_dpb_storage_manager.h"
+#include "d3d12_util.h"
+#include <algorithm>
+#include <map>
+
+struct d3d12_video_decoder_references_manager
+{
+   d3d12_video_decoder_references_manager(const struct d3d12_screen *       pD3D12Screen,
+                                          uint32_t                          NodeMask,
+                                          d3d12_video_decode_profile_type   DecodeProfileType,
+                                          d3d12_video_decode_dpb_descriptor dpbDescriptor);
+
+   bool is_reference_only()
+   {
+      return m_dpbDescriptor.fReferenceOnly;
+   }
+   bool is_array_of_textures()
+   {
+      return m_dpbDescriptor.fArrayOfTexture;
+   }
+
+   bool is_pipe_buffer_underlying_output_decode_allocation()
+   {
+      return (!is_reference_only() && is_array_of_textures());
+   }
+
+   void mark_all_references_as_unused();
+   void release_unused_references_texture_memory();
+
+   template <typename T, size_t size>
+   void mark_references_in_use(const T (&picEntries)[size]);
+   void mark_reference_in_use(uint16_t index);
+
+   uint16_t store_future_reference(uint16_t index,
+                                   _In_ ComPtr<ID3D12VideoDecoderHeap> &decoderHeap,
+                                   ID3D12Resource *                     pTexture2D,
+                                   uint32_t                             subresourceIndex);
+
+   // Will clear() argument outNeededTransitions and fill it with the necessary transitions to perform by the caller
+   // after the method returns
+   template <typename T, size_t size>
+   void update_entries(T (&picEntries)[size], std::vector<D3D12_RESOURCE_BARRIER> &outNeededTransitions);
+
+   void get_reference_only_output(
+      struct pipe_video_buffer *  pCurrentDecodeTarget,
+      ID3D12Resource **ppOutputReference,     // out -> new reference slot assigned or nullptr
+      uint32_t *       pOutputSubresource,    // out -> new reference slot assigned or nullptr
+      bool &outNeedsTransitionToDecodeWrite   // out -> indicates if output resource argument has to be transitioned to
+                                              // D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE by the caller
+   );
+
+   // Gets the output texture for the current frame to be decoded
+   void get_current_frame_decode_output_texture(struct pipe_video_buffer *pCurrentDecodeTarget, ID3D12Resource **ppOutTexture2D, uint32_t *pOutSubresourceIndex);
+
+   D3D12_VIDEO_DECODE_REFERENCE_FRAMES get_current_reference_frames();
+
+   void print_dpb();
+
+   ///
+   /// Get the Index7Bits associated with this decode target
+   /// If there isn't one assigned yet, gives out a fresh/unused Index7Bits
+   ///
+   uint8_t get_index7bits(struct pipe_video_buffer * pDecodeTarget) {
+      bool bDecodeTargetAlreadyHasIndex = (m_DecodeTargetToOriginalIndex7Bits.count(pDecodeTarget) > 0);
+      if(bDecodeTargetAlreadyHasIndex)
+      {
+         return m_DecodeTargetToOriginalIndex7Bits[pDecodeTarget];
+      } else {
+         uint8_t freshIdx = m_CurrentIndex7BitsAvailable;
+         
+         // Make sure next "available" index is not already used. Should be cleaned up and there shouldn't be never 127 in flight used indices
+            #if DEBUG
+               auto it = std::find_if(m_DecodeTargetToOriginalIndex7Bits.begin(), m_DecodeTargetToOriginalIndex7Bits.end(),
+                  [&freshIdx](const std::pair< struct pipe_video_buffer*, uint8_t > &p) {
+                     return p.second == freshIdx;
+                  });
+
+               assert(it == m_DecodeTargetToOriginalIndex7Bits.end());
+            #endif
+
+         // Point to next circular index for next call
+         m_CurrentIndex7BitsAvailable = ((m_CurrentIndex7BitsAvailable + 1) % 127);
+
+         // Assign freshIdx to pDecodeTarget
+         m_DecodeTargetToOriginalIndex7Bits[pDecodeTarget] = freshIdx;
+         return freshIdx;
+      }
+   }
+
+ private:
+   uint16_t update_entry(
+      uint16_t         index,                // in
+      ID3D12Resource *&pOutputReference,     // out -> new reference slot assigned or nullptr
+      uint32_t &       OutputSubresource,    // out -> new reference slot assigned or 0
+      bool &outNeedsTransitionToDecodeRead   // out -> indicates if output resource argument has to be transitioned to
+                                             // D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller
+   );
+
+   uint16_t find_remapped_index(uint16_t originalIndex);
+
+   struct ReferenceData
+   {
+      uint16_t originalIndex;
+      bool     fUsed;
+   };
+
+   // Holds the DPB textures
+   std::unique_ptr<d3d12_video_dpb_storage_manager_interface> m_upD3D12TexturesStorageManager;
+   std::vector<ID3D12VideoDecoderHeap *>
+      m_ppHeaps;   // Auxiliary allocation to QueryInterface the IUnknown's
+                   // m_upD3D12TexturesStorageManager->get_current_reference_frames().ppHeaps
+                   // containing the generic video encode/decode heap;
+
+   // Holds the mapping between DXVA PicParams indices and the D3D12 indices
+   std::vector<ReferenceData> m_referenceDXVAIndices;
+   
+   std::map<struct pipe_video_buffer *, uint8_t> m_DecodeTargetToOriginalIndex7Bits = { };
+   uint8_t m_CurrentIndex7BitsAvailable = 0;
+   ComPtr<ID3D12Resource> m_pClearDecodedOutputTexture;
+
+   const struct d3d12_screen *       m_pD3D12Screen;
+   uint16_t                          m_invalidIndex;
+   d3d12_video_decode_dpb_descriptor m_dpbDescriptor      = {};
+   uint16_t                          m_currentOutputIndex = 0;
+   uint16_t                          m_currentSubresourceIndex = 0;
+   ID3D12Resource*                   m_currentResource = nullptr;
+   D3D12_FEATURE_DATA_FORMAT_INFO    m_formatInfo         = { m_dpbDescriptor.Format };
+};
+
+
+//----------------------------------------------------------------------------------------------------------------------------------
+template <typename T, size_t size>
+void
+d3d12_video_decoder_references_manager::update_entries(T (&picEntries)[size],
+                                                       std::vector<D3D12_RESOURCE_BARRIER> &outNeededTransitions)
+{
+   outNeededTransitions.clear();
+
+   for (auto &picEntry : picEntries) {
+      // uint16_t update_entry(
+      //     uint16_t index, // in
+      //     ID3D12Resource*& pOutputReference, // out -> new reference slot assigned or nullptr
+      //     uint32_t& OutputSubresource, // out -> new reference slot assigned or 0
+      //     bool& outNeedsTransitionToDecodeRead // out -> indicates if output resource argument has to be transitioned
+      //     to D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller
+      // );
+
+      ID3D12Resource *pOutputReference               = {};
+      uint32_t        OutputSubresource              = 0u;
+      bool            outNeedsTransitionToDecodeRead = false;
+
+      picEntry.Index7Bits =
+         update_entry(picEntry.Index7Bits, pOutputReference, OutputSubresource, outNeedsTransitionToDecodeRead);
+
+      if (outNeedsTransitionToDecodeRead) {
+         ///
+         /// The subresource indexing in D3D12 Video within the DPB doesn't take into account the Y, UV planes (ie.
+         /// subresource 0, 1, 2, 3..., N are different full NV12 references in the DPB) but when using the subresources
+         /// in other areas of D3D12 we need to convert it to the D3D12CalcSubresource format, explained in
+         /// https://docs.microsoft.com/en-us/windows/win32/direct3d12/subresources
+         ///
+         CD3DX12_RESOURCE_DESC refDesc(pOutputReference->GetDesc());
+         uint32_t              MipLevel, PlaneSlice, ArraySlice;
+         D3D12DecomposeSubresource(OutputSubresource,
+                                   refDesc.MipLevels,
+                                   refDesc.ArraySize(),
+                                   MipLevel,
+                                   ArraySlice,
+                                   PlaneSlice);
+
+         for (PlaneSlice = 0; PlaneSlice < m_formatInfo.PlaneCount; PlaneSlice++) {
+            uint planeOutputSubresource = refDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
+            outNeededTransitions.push_back(CD3DX12_RESOURCE_BARRIER::Transition(pOutputReference,
+                                                                                D3D12_RESOURCE_STATE_COMMON,
+                                                                                D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
+                                                                                planeOutputSubresource));
+         }
+      }
+   }
+}
+
+//----------------------------------------------------------------------------------------------------------------------------------
+template <typename T, size_t size>
+void
+d3d12_video_decoder_references_manager::mark_references_in_use(const T (&picEntries)[size])
+{
+   for (auto &picEntry : picEntries) {
+      mark_reference_in_use(picEntry.Index7Bits);
+   }
+}
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h b/src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h
new file mode 100644 (file)
index 0000000..08c6a9e
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#ifndef D3D12_VIDEO_DPB_STORAGE_MANAGER_INTERFACE_H
+#define D3D12_VIDEO_DPB_STORAGE_MANAGER_INTERFACE_H
+
+#include "d3d12_video_types.h"
+
+struct d3d12_video_reconstructed_picture
+{
+   ID3D12Resource *pReconstructedPicture;
+   uint32_t        ReconstructedPictureSubresource;
+   IUnknown *      pVideoHeap;
+};
+
+struct d3d12_video_reference_frames
+{
+   uint32_t         NumTexture2Ds;
+   ID3D12Resource **ppTexture2Ds;
+   uint32_t *       pSubresources;
+   IUnknown **      ppHeaps;
+};
+
+// Defines interface for storing and retrieving the decoded picture buffer ID3D12Resources with
+// the reconstructed pictures
+// Implementors of this interface can decide how to do this, let Class1 and Class2 be implementors...
+// for example Class1 can use a texture array and Class2 or an array of textures
+class d3d12_video_dpb_storage_manager_interface
+{
+   // d3d12_video_dpb_storage_manager_interface
+ public:
+   // Adds a new reference frame at a given position
+   virtual void insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition) = 0;
+
+   // Gets a reference frame at a given position
+   virtual d3d12_video_reconstructed_picture get_reference_frame(uint32_t dpbPosition) = 0;
+
+   // Assigns a reference frame at a given position
+   virtual void assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition) = 0;
+
+   // Removes a new reference frame at a given position and returns operation success
+   // pResourceUntracked is an optional output indicating if the removed resource was being tracked by the pool
+   virtual bool remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked) = 0;
+
+   // Returns the resource allocation for a NEW reconstructed picture
+   virtual d3d12_video_reconstructed_picture get_new_tracked_picture_allocation() = 0;
+
+   // Returns whether it found the tracked resource on this instance pool tracking and was able to free it
+   virtual bool untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem) = 0;
+
+   // Returns true if the trackedItem was allocated (and is being tracked) by this class
+   virtual bool is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem) = 0;
+
+   // resource pool size
+   virtual uint32_t get_number_of_tracked_allocations() = 0;
+
+   // number of resources in the pool that are marked as in use
+   virtual uint32_t get_number_of_in_use_allocations() = 0;
+
+   // Returns the number of pictures currently stored in the DPB
+   virtual uint32_t get_number_of_pics_in_dpb() = 0;
+
+   // Returns all the current reference frames stored in the storage manager
+   virtual d3d12_video_reference_frames get_current_reference_frames() = 0;
+
+   // Remove all pictures from DPB
+   // returns the number of resources marked as reusable
+   virtual uint32_t clear_decode_picture_buffer() = 0;
+
+   virtual ~d3d12_video_dpb_storage_manager_interface()
+   { }
+};
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp b/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp
new file mode 100644 (file)
index 0000000..199c678
--- /dev/null
@@ -0,0 +1,308 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_video_texture_array_dpb_manager.h"
+
+#ifndef _WIN32
+#include <wsl/winadapter.h>
+#endif
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <directx/d3d12.h>
+
+#include "d3d12_util.h"
+
+///
+/// d3d12_texture_array_dpb_manager
+///
+
+// Differences with ArrayOfTextures
+// Uses a D3D12 Texture Array instead of an std::vector with individual D3D resources as backing storage
+// Doesn't support extension (by reallocation and copy) of the pool
+
+void
+d3d12_texture_array_dpb_manager::create_reconstructed_picture_allocations(ID3D12Resource **ppResource,
+                                                                          uint16_t         texArraySize)
+{
+   if (texArraySize > 0) {
+      D3D12_HEAP_PROPERTIES Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, m_nodeMask, m_nodeMask);
+      CD3DX12_RESOURCE_DESC reconstructedPictureResourceDesc = CD3DX12_RESOURCE_DESC::Tex2D(m_encodeFormat,
+                                                                                            m_encodeResolution.Width,
+                                                                                            m_encodeResolution.Height,
+                                                                                            texArraySize,
+                                                                                            1,
+                                                                                            1,
+                                                                                            0,
+                                                                                            m_resourceAllocFlags);
+
+      HRESULT hr = m_pDevice->CreateCommittedResource(&Properties,
+                                                          D3D12_HEAP_FLAG_NONE,
+                                                          &reconstructedPictureResourceDesc,
+                                                          D3D12_RESOURCE_STATE_COMMON,
+                                                          nullptr,
+                                                          IID_PPV_ARGS(ppResource));
+      if (FAILED(hr)) {
+         debug_printf("CreateCommittedResource failed with HR %x\n", hr);
+         assert(false);
+      }
+   }
+}
+
+d3d12_texture_array_dpb_manager::~d3d12_texture_array_dpb_manager()
+{ }
+
+d3d12_texture_array_dpb_manager::d3d12_texture_array_dpb_manager(
+   uint16_t                                    dpbTextureArraySize,
+   ID3D12Device *                              pDevice,
+   DXGI_FORMAT                                 encodeSessionFormat,
+   D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution,
+   D3D12_RESOURCE_FLAGS                        resourceAllocFlags,
+   uint32_t                                    nodeMask)
+   : m_pDevice(pDevice),
+     m_encodeFormat(encodeSessionFormat),
+     m_encodeResolution(encodeSessionResolution),
+     m_dpbTextureArraySize(dpbTextureArraySize),
+     m_resourceAllocFlags(resourceAllocFlags),
+     m_nodeMask(nodeMask)
+{
+   // Initialize D3D12 DPB exposed in this class implemented CRUD interface for a DPB
+   clear_decode_picture_buffer();
+
+   // Implement a reusable pool of D3D12 Resources as an array of textures
+   uint16_t poolFixedSize = m_dpbTextureArraySize;
+   m_ResourcesPool.resize(poolFixedSize);
+
+   // Build resource pool with commitedresources with a d3ddevice and the encoding session settings (eg. resolution) and
+   // the reference_only flag
+   create_reconstructed_picture_allocations(m_baseTexArrayResource.GetAddressOf(), poolFixedSize);
+
+   for (uint32_t idxSubres = 0; idxSubres < poolFixedSize; idxSubres++) {
+      m_ResourcesPool[idxSubres].pResource   = m_baseTexArrayResource;
+      m_ResourcesPool[idxSubres].subresource = idxSubres;
+      m_ResourcesPool[idxSubres].isFree      = true;
+   }
+}
+
+uint32_t
+d3d12_texture_array_dpb_manager::clear_decode_picture_buffer()
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+
+   uint32_t untrackCount = 0;
+   // Mark resources used in DPB as re-usable in the resources pool
+   for (uint32_t idx = 0; idx < m_D3D12DPB.pResources.size(); idx++) {
+      // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods
+      // in this interface
+      untrackCount +=
+         untrack_reconstructed_picture_allocation({ m_D3D12DPB.pResources[idx], m_D3D12DPB.pSubresources[idx] }) ? 1 :
+                                                                                                                   0;
+   }
+
+   // Clear DPB
+   m_D3D12DPB.pResources.clear();
+   m_D3D12DPB.pSubresources.clear();
+   m_D3D12DPB.pHeaps.clear();
+   m_D3D12DPB.pResources.reserve(m_dpbTextureArraySize);
+   m_D3D12DPB.pSubresources.reserve(m_dpbTextureArraySize);
+   m_D3D12DPB.pHeaps.reserve(m_dpbTextureArraySize);
+
+   return untrackCount;
+}
+
+// Assigns a reference frame at a given position
+void
+d3d12_texture_array_dpb_manager::assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture,
+                                                        uint32_t                          dpbPosition)
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   assert(dpbPosition < m_D3D12DPB.pResources.size());
+
+   m_D3D12DPB.pResources[dpbPosition]    = pReconPicture.pReconstructedPicture;
+   m_D3D12DPB.pSubresources[dpbPosition] = pReconPicture.ReconstructedPictureSubresource;
+   m_D3D12DPB.pHeaps[dpbPosition]        = pReconPicture.pVideoHeap;
+}
+
+// Adds a new reference frame at a given position
+void
+d3d12_texture_array_dpb_manager::insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture,
+                                                        uint32_t                          dpbPosition)
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   if (dpbPosition > m_D3D12DPB.pResources.size()) {
+      // extend capacity
+      m_D3D12DPB.pResources.resize(dpbPosition);
+      m_D3D12DPB.pSubresources.resize(dpbPosition);
+      m_D3D12DPB.pHeaps.resize(dpbPosition);
+   }
+
+   m_D3D12DPB.pResources.insert(m_D3D12DPB.pResources.begin() + dpbPosition, pReconPicture.pReconstructedPicture);
+   m_D3D12DPB.pSubresources.insert(m_D3D12DPB.pSubresources.begin() + dpbPosition,
+                                   pReconPicture.ReconstructedPictureSubresource);
+   m_D3D12DPB.pHeaps.insert(m_D3D12DPB.pHeaps.begin() + dpbPosition, pReconPicture.pVideoHeap);
+}
+
+// Gets a reference frame at a given position
+d3d12_video_reconstructed_picture
+d3d12_texture_array_dpb_manager::get_reference_frame(uint32_t dpbPosition)
+{
+   assert(dpbPosition < m_D3D12DPB.pResources.size());
+
+   d3d12_video_reconstructed_picture retVal = { m_D3D12DPB.pResources[dpbPosition],
+                                                m_D3D12DPB.pSubresources[dpbPosition],
+                                                m_D3D12DPB.pHeaps[dpbPosition] };
+
+   return retVal;
+}
+
+// Removes a new reference frame at a given position and returns operation success
+bool
+d3d12_texture_array_dpb_manager::remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked)
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+
+   assert(dpbPosition < m_D3D12DPB.pResources.size());
+
+   // If removed resource came from resource pool, mark it as free
+   // to free it for a new usage
+   // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods in
+   // this interface
+   bool resUntracked = untrack_reconstructed_picture_allocation(
+      { m_D3D12DPB.pResources[dpbPosition], m_D3D12DPB.pSubresources[dpbPosition] });
+
+   if (pResourceUntracked != nullptr) {
+      *pResourceUntracked = resUntracked;
+   }
+
+   // Remove from DPB tables
+   m_D3D12DPB.pResources.erase(m_D3D12DPB.pResources.begin() + dpbPosition);
+   m_D3D12DPB.pSubresources.erase(m_D3D12DPB.pSubresources.begin() + dpbPosition);
+   m_D3D12DPB.pHeaps.erase(m_D3D12DPB.pHeaps.begin() + dpbPosition);
+
+   return true;
+}
+
+// Returns true if the trackedItem was allocated (and is being tracked) by this class
+bool
+d3d12_texture_array_dpb_manager::is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem)
+{
+   for (auto &reusableRes : m_ResourcesPool) {
+      if ((trackedItem.pReconstructedPicture == reusableRes.pResource.Get()) &&
+          (trackedItem.ReconstructedPictureSubresource == reusableRes.subresource) && !reusableRes.isFree) {
+         return true;
+      }
+   }
+   return false;
+}
+
+// Returns whether it found the tracked resource on this instance pool tracking and was able to free it
+bool
+d3d12_texture_array_dpb_manager::untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem)
+{
+   for (auto &reusableRes : m_ResourcesPool) {
+      if ((trackedItem.pReconstructedPicture == reusableRes.pResource.Get()) &&
+          (trackedItem.ReconstructedPictureSubresource == reusableRes.subresource)) {
+         reusableRes.isFree = true;
+         return true;
+      }
+   }
+   return false;
+}
+
+// Returns a fresh resource for a NEW picture to be written to
+// this class implements the dpb allocations as an array of textures
+d3d12_video_reconstructed_picture
+d3d12_texture_array_dpb_manager::get_new_tracked_picture_allocation()
+{
+   d3d12_video_reconstructed_picture freshAllocation = { // pResource
+                                                         nullptr,
+                                                         // subresource
+                                                         0
+   };
+
+   // Find first (if any) available resource to (re-)use
+   bool bAvailableResourceInPool = false;
+   for (auto &reusableRes : m_ResourcesPool) {
+      if (reusableRes.isFree) {
+         bAvailableResourceInPool                        = true;
+         freshAllocation.pReconstructedPicture           = reusableRes.pResource.Get();
+         freshAllocation.ReconstructedPictureSubresource = reusableRes.subresource;
+         reusableRes.isFree                              = false;
+         break;
+      }
+   }
+
+   if (!bAvailableResourceInPool) {
+      debug_printf("[d3d12_texture_array_dpb_manager] ID3D12Resource pool is full - Pool capacity (%ld) - Returning null allocation",
+                      m_ResourcesPool.size());
+   }
+
+   return freshAllocation;
+}
+
+uint32_t
+d3d12_texture_array_dpb_manager::get_number_of_pics_in_dpb()
+{
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size());
+   assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size());
+   assert(m_D3D12DPB.pResources.size() < UINT32_MAX);
+   return static_cast<uint32_t>(m_D3D12DPB.pResources.size());
+}
+
+d3d12_video_reference_frames
+d3d12_texture_array_dpb_manager::get_current_reference_frames()
+{
+   d3d12_video_reference_frames retVal = {
+      get_number_of_pics_in_dpb(),
+      m_D3D12DPB.pResources.data(),
+      m_D3D12DPB.pSubresources.data(),
+      m_D3D12DPB.pHeaps.data(),
+   };
+
+   return retVal;
+}
+
+// number of resources in the pool that are marked as in use
+uint32_t
+d3d12_texture_array_dpb_manager::get_number_of_in_use_allocations()
+{
+   uint32_t countOfInUseResourcesInPool = 0;
+   for (auto &reusableRes : m_ResourcesPool) {
+      if (!reusableRes.isFree) {
+         countOfInUseResourcesInPool++;
+      }
+   }
+   return countOfInUseResourcesInPool;
+}
+
+// Returns the number of pictures currently stored in the DPB
+uint32_t
+d3d12_texture_array_dpb_manager::get_number_of_tracked_allocations()
+{
+   assert(m_ResourcesPool.size() < UINT32_MAX);
+   return static_cast<uint32_t>(m_ResourcesPool.size());
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h b/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h
new file mode 100644 (file)
index 0000000..a8212cf
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#ifndef D3D12_VIDEO_TEXTURE_ARRAY_DPB_MANAGER_H
+#define D3D12_VIDEO_TEXTURE_ARRAY_DPB_MANAGER_H
+
+#include "d3d12_video_dpb_storage_manager.h"
+#include "d3d12_video_types.h"
+
+class d3d12_texture_array_dpb_manager : public d3d12_video_dpb_storage_manager_interface
+{
+   // d3d12_video_dpb_storage_manager_interface
+ public:
+   // Adds a new reference frame at a given position
+   void insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition);
+
+   // Assigns a reference frame at a given position
+   void assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition);
+
+   // Gets a reference frame at a given position
+   d3d12_video_reconstructed_picture get_reference_frame(uint32_t dpbPosition);
+
+   // Removes a new reference frame at a given position and returns operation success
+   // pResourceUntracked is an optional output indicating if the removed resource was being tracked by the pool
+   bool remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked = nullptr);
+
+   // Returns the resource allocation for a NEW picture
+   d3d12_video_reconstructed_picture get_new_tracked_picture_allocation();
+
+   // Returns true if the trackedItem was allocated (and is being tracked) by this class
+   bool is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem);
+
+   // Returns whether it found the tracked resource on this instance pool tracking and was able to free it
+   bool untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem);
+
+   // Returns the number of pictures currently stored in the DPB
+   uint32_t get_number_of_pics_in_dpb();
+
+   // Returns all the current reference frames stored
+   d3d12_video_reference_frames get_current_reference_frames();
+
+   // Removes all pictures from DPB
+   // returns the number of resources marked as reusable
+   uint32_t clear_decode_picture_buffer();
+
+   // number of resources in the pool that are marked as in use
+   uint32_t get_number_of_in_use_allocations();
+
+   uint32_t get_number_of_tracked_allocations();
+
+   // d3d12_texture_array_dpb_manager
+ public:
+   d3d12_texture_array_dpb_manager(
+      uint16_t dpbInitialSize,   // Maximum in use resources for a DPB of size x should be x+1 for cases when a P frame
+                                 // is using the x references in the L0 list and also using an extra resource to output
+                                 // it's own recon pic.
+      ID3D12Device *                              pDevice,
+      DXGI_FORMAT                                 encodeSessionFormat,
+      D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution,
+      D3D12_RESOURCE_FLAGS                        resourceAllocFlags = D3D12_RESOURCE_FLAG_NONE,
+      uint32_t                                    nodeMask           = 0);
+   ~d3d12_texture_array_dpb_manager();
+
+   // d3d12_texture_array_dpb_manager
+ private:
+   void create_reconstructed_picture_allocations(ID3D12Resource **ppResource, uint16_t texArraySize);
+
+   ID3D12Device *                              m_pDevice;
+   DXGI_FORMAT                                 m_encodeFormat;
+   D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC m_encodeResolution;
+   uint16_t                                    m_dpbTextureArraySize = 0;
+
+   // DPB with array of resources backing storage
+
+   struct d3d12_video_dpb
+   {
+      std::vector<ID3D12Resource *> pResources;
+      std::vector<uint32_t>         pSubresources;
+      std::vector<IUnknown *>       pHeaps;
+   };
+
+   d3d12_video_dpb m_D3D12DPB;
+
+   // Flags used when creating the resource pool
+   // Usually if reference only is needed for d3d12 video use
+   // D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
+   // D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
+   D3D12_RESOURCE_FLAGS m_resourceAllocFlags;
+
+   // Pool of resources to be aliased by the DPB without giving memory ownership
+   // This resources are allocated and released by this implementation
+   struct d3d12_reusable_resource
+   {
+      ComPtr<ID3D12Resource> pResource;
+      uint32_t               subresource;
+      bool                   isFree;
+   };
+
+   ComPtr<ID3D12Resource>               m_baseTexArrayResource;
+   std::vector<d3d12_reusable_resource> m_ResourcesPool;
+
+   uint32_t m_nodeMask = 0u;
+};
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_video_types.h b/src/gallium/drivers/d3d12/d3d12_video_types.h
new file mode 100644 (file)
index 0000000..4ae2e63
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_VIDEO_TYPES_H
+#define D3D12_VIDEO_TYPES_H
+
+#include <stdarg.h>
+#include <memory>
+#include <vector>
+#include <functional>
+
+#include "pipe/p_context.h"
+#include "pipe/p_video_codec.h"
+#include "d3d12_fence.h"
+#include "d3d12_debug.h"
+
+#include <directx/d3d12video.h>
+#include <dxguids/dxguids.h>
+
+#include <wrl/client.h>
+using Microsoft::WRL::ComPtr;
+
+// Allow encoder to continue the encoding session when an optional 
+// rate control mode such as the following is used but not supported
+//
+// D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_VBV_SIZES
+// D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE
+//
+// If setting this OS Env variable to true, the encoding process will continue, disregarding the settings
+// requested for the optional RC mode
+//
+
+const bool D3D12_VIDEO_ENC_FALLBACK_RATE_CONTROL_CONFIG = debug_get_bool_option("D3D12_VIDEO_ENC_FALLBACK_RATE_CONTROL_CONFIG", false);
+
+/* For CBR mode, to guarantee bitrate of generated stream complies with
+* target bitrate (e.g. no over +/-10%), vbv_buffer_size should be same
+* as target bitrate. Controlled by OS env var D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE
+*/
+const bool D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE = debug_get_bool_option("D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE", false);
+
+// Allow encoder to continue the encoding session when aa slice mode 
+// is requested but not supported.
+//
+// If setting this OS Env variable to true, the encoder will try to adjust to the closest slice
+// setting available and encode using that configuration anyway
+//
+const bool D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG = debug_get_bool_option("D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG", false);
+
+constexpr unsigned int D3D12_VIDEO_H264_MB_IN_PIXELS = 16;
+
+enum d3d12_video_decode_config_specific_flags
+{
+   d3d12_video_decode_config_specific_flag_none              = 0,
+   d3d12_video_decode_config_specific_flag_alignment_height  = 1 << 12,   // set by accelerator
+   d3d12_video_decode_config_specific_flag_array_of_textures = 1 << 14,   // set by accelerator
+   d3d12_video_decode_config_specific_flag_reuse_decoder =
+      1 << 15,   // set by accelerator - This bit means that the decoder can be re-used with resolution change and bit
+                 // depth change (including profile GUID change from 8bit to 10bit and vice versa).
+   d3d12_video_decode_config_specific_flag_reference_only_textures_required = 1 << 30,   // custom created for WSL
+};
+
+enum d3d12_video_decode_profile_type
+{
+   d3d12_video_decode_profile_type_none,
+   d3d12_video_decode_profile_type_h264,
+   d3d12_video_decode_profile_type_max_valid
+};
+
+struct d3d12_video_decode_dpb_descriptor
+{
+   DXGI_FORMAT Format          = DXGI_FORMAT_UNKNOWN;
+   uint64_t    Width           = 0;
+   uint32_t    Height          = 0;
+   bool        fArrayOfTexture = false;
+   bool        fReferenceOnly  = false;
+   uint16_t    dpbSize         = 0;
+   uint32_t    m_NodeMask      = 0;
+};
+
+struct d3d12_video_decode_output_conversion_arguments
+{
+   BOOL                  Enable;
+   DXGI_COLOR_SPACE_TYPE OutputColorSpace;
+   D3D12_VIDEO_SAMPLE    ReferenceInfo;
+   uint32_t              ReferenceFrameCount;
+};
+
+void
+d3d12_video_encoder_convert_from_d3d12_level_h264(D3D12_VIDEO_ENCODER_LEVELS_H264 level12,
+                                                  uint32_t &                      specLevel,
+                                                  uint32_t &                      constraint_set3_flag);
+D3D12_VIDEO_ENCODER_PROFILE_H264
+d3d12_video_encoder_convert_profile_to_d3d12_enc_profile_h264(enum pipe_video_profile profile);
+D3D12_VIDEO_ENCODER_CODEC
+d3d12_video_encoder_convert_codec_to_d3d12_enc_codec(enum pipe_video_profile profile);
+GUID
+d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile);
+
+#endif
index 8c76155..7853895 100644 (file)
@@ -47,6 +47,12 @@ files_libd3d12 = files(
   'd3d12_surface.cpp',
   'd3d12_tcs_variant.cpp',
   'D3D12ResourceState.cpp',
+  'd3d12_video_dec.cpp',
+  'd3d12_video_dec_references_mgr.cpp',
+  'd3d12_video_dec_h264.cpp',
+  'd3d12_video_buffer.cpp',
+  'd3d12_video_texture_array_dpb_manager.cpp',
+  'd3d12_video_array_of_textures_dpb_manager.cpp',
 )
 
 if host_machine.system() == 'windows'