d3d12: introduce d3d12 gallium driver
authorErik Faye-Lund <erik.faye-lund@collabora.com>
Sun, 26 May 2019 08:43:12 +0000 (10:43 +0200)
committerMarge Bot <eric+marge@anholt.net>
Tue, 10 Nov 2020 15:37:07 +0000 (15:37 +0000)
This driver will allow running OpenGL and OpenCL on top of Gallium
for any hardware supporting Microsoft's Direct3D 12 on Windows 10.

This is the combination of a lot of commits from our development branch,
containing code from several authors.

Co-authored-by: Bill Kristiansen <billkris@microsoft.com>
Co-authored-by: Gert Wollny <gert.wollny@collabora.com>
Co-authored-by: Jesse Natalie <jenatali@microsoft.com>
Co-authored-by: Louis-Francis Ratté-Boulianne <lfrb@collabora.com>
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7477>

47 files changed:
meson.build
meson_options.txt
src/gallium/drivers/d3d12/d3d12_batch.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_batch.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_blit.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_blit.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_bufmgr.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_bufmgr.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_compiler.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_compiler.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_context.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_context.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_debug.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_descriptor_pool.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_draw.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_fence.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_fence.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_format.c [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_format.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_gs_variant.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.c [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_nir_passes.c [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_nir_passes.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_pipeline_state.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_public.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_query.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_query.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_resource.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_resource.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_root_signature.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_root_signature.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_screen.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_screen.h [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_surface.cpp [new file with mode: 0644]
src/gallium/drivers/d3d12/d3d12_surface.h [new file with mode: 0644]
src/gallium/drivers/d3d12/meson.build [new file with mode: 0644]
src/gallium/meson.build
src/gallium/targets/graw-gdi/meson.build
src/gallium/targets/libgl-gdi/libgl_gdi.c
src/gallium/targets/libgl-gdi/meson.build
src/meson.build

index a887f5c..d8ac6e8 100644 (file)
@@ -235,6 +235,7 @@ with_gallium_virgl = gallium_drivers.contains('virgl')
 with_gallium_swr = gallium_drivers.contains('swr')
 with_gallium_lima = gallium_drivers.contains('lima')
 with_gallium_zink = gallium_drivers.contains('zink')
+with_gallium_d3d12 = gallium_drivers.contains('d3d12')
 
 with_gallium = gallium_drivers.length() != 0
 
index 04a0c2b..7db6907 100644 (file)
@@ -68,7 +68,7 @@ option(
   choices : [
     'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
     'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
-    'swr', 'panfrost', 'iris', 'lima', 'zink'
+    'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12'
   ],
   description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
 )
diff --git a/src/gallium/drivers/d3d12/d3d12_batch.cpp b/src/gallium/drivers/d3d12/d3d12_batch.cpp
new file mode 100644 (file)
index 0000000..a1b5aa0
--- /dev/null
@@ -0,0 +1,256 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_batch.h"
+#include "d3d12_context.h"
+#include "d3d12_fence.h"
+#include "d3d12_query.h"
+#include "d3d12_resource.h"
+#include "d3d12_screen.h"
+#include "d3d12_surface.h"
+
+#include "util/hash_table.h"
+#include "util/set.h"
+#include "util/u_inlines.h"
+
+bool
+d3d12_init_batch(struct d3d12_context *ctx, struct d3d12_batch *batch)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+
+   batch->bos = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                 _mesa_key_pointer_equal);
+   batch->sampler_views = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                           _mesa_key_pointer_equal);
+   batch->surfaces = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+   batch->objects = _mesa_set_create(NULL,
+                                     _mesa_hash_pointer,
+                                     _mesa_key_pointer_equal);
+
+   if (!batch->bos || !batch->sampler_views || !batch->surfaces || !batch->objects)
+      return false;
+
+   util_dynarray_init(&batch->zombie_samplers, NULL);
+
+   if (FAILED(screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                                  __uuidof(batch->cmdalloc),
+                                                  (void **)&batch->cmdalloc)))
+      return false;
+
+
+   batch->sampler_heap =
+      d3d12_descriptor_heap_new(screen->dev,
+                                D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
+                                D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
+                                128);
+
+   batch->view_heap =
+      d3d12_descriptor_heap_new(screen->dev,
+                                D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
+                                D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
+                                1024);
+
+   if (!batch->sampler_heap && !batch->view_heap)
+      return false;
+
+   return true;
+}
+
+static void
+delete_bo(set_entry *entry)
+{
+   struct d3d12_bo *bo = (struct d3d12_bo *)entry->key;
+   d3d12_bo_unreference(bo);
+}
+
+static void
+delete_sampler_view(set_entry *entry)
+{
+   struct pipe_sampler_view *pres = (struct pipe_sampler_view *)entry->key;
+   pipe_sampler_view_reference(&pres, NULL);
+}
+
+static void
+delete_surface(set_entry *entry)
+{
+   struct pipe_surface *surf = (struct pipe_surface *)entry->key;
+   pipe_surface_reference(&surf, NULL);
+}
+
+static void
+delete_object(set_entry *entry)
+{
+   ID3D12Object *object = (ID3D12Object *)entry->key;
+   object->Release();
+}
+
+bool
+d3d12_reset_batch(struct d3d12_context *ctx, struct d3d12_batch *batch, uint64_t timeout_ns)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+
+   // batch hasn't been submitted before
+   if (!batch->fence && !batch->has_errors)
+      return true;
+
+   if (batch->fence) {
+      if (!d3d12_fence_finish(batch->fence, timeout_ns))
+         return false;
+      d3d12_fence_reference(&batch->fence, NULL);
+   }
+
+   _mesa_set_clear(batch->bos, delete_bo);
+   _mesa_set_clear(batch->sampler_views, delete_sampler_view);
+   _mesa_set_clear(batch->surfaces, delete_surface);
+   _mesa_set_clear(batch->objects, delete_object);
+
+   util_dynarray_foreach(&batch->zombie_samplers, d3d12_descriptor_handle, handle)
+      d3d12_descriptor_handle_free(handle);
+   util_dynarray_clear(&batch->zombie_samplers);
+
+   d3d12_descriptor_heap_clear(batch->view_heap);
+   d3d12_descriptor_heap_clear(batch->sampler_heap);
+
+   if (FAILED(batch->cmdalloc->Reset())) {
+      debug_printf("D3D12: resetting ID3D12CommandAllocator failed\n");
+      return false;
+   }
+   batch->has_errors = false;
+   return true;
+}
+
+void
+d3d12_destroy_batch(struct d3d12_context *ctx, struct d3d12_batch *batch)
+{
+   d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE);
+   batch->cmdalloc->Release();
+   d3d12_descriptor_heap_free(batch->sampler_heap);
+   d3d12_descriptor_heap_free(batch->view_heap);
+   _mesa_set_destroy(batch->bos, NULL);
+   _mesa_set_destroy(batch->sampler_views, NULL);
+   _mesa_set_destroy(batch->surfaces, NULL);
+   _mesa_set_destroy(batch->objects, NULL);
+   util_dynarray_fini(&batch->zombie_samplers);
+}
+
+void
+d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+   ID3D12DescriptorHeap* heaps[2] = { d3d12_descriptor_heap_get(batch->view_heap),
+                                      d3d12_descriptor_heap_get(batch->sampler_heap) };
+
+   d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE);
+
+   /* Create or reset global command list */
+   if (ctx->cmdlist) {
+      if (FAILED(ctx->cmdlist->Reset(batch->cmdalloc, NULL))) {
+         debug_printf("D3D12: resetting ID3D12GraphicsCommandList failed\n");
+         batch->has_errors = true;
+         return;
+      }
+   } else {
+      if (FAILED(screen->dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                                batch->cmdalloc, NULL,
+                                                __uuidof(ctx->cmdlist),
+                                                (void **)&ctx->cmdlist))) {
+         debug_printf("D3D12: creating ID3D12GraphicsCommandList failed\n");
+         batch->has_errors = true;
+         return;
+      }
+   }
+
+   ctx->cmdlist->SetDescriptorHeaps(2, heaps);
+   ctx->cmdlist_dirty = ~0;
+   for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
+      ctx->shader_dirty[i] = ~0;
+
+   if (!ctx->queries_disabled)
+      d3d12_resume_queries(ctx);
+}
+
+void
+d3d12_end_batch(struct d3d12_context *ctx, struct d3d12_batch *batch)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+
+   if (!ctx->queries_disabled)
+      d3d12_suspend_queries(ctx);
+
+   if (FAILED(ctx->cmdlist->Close())) {
+      debug_printf("D3D12: closing ID3D12GraphicsCommandList failed\n");
+      batch->has_errors = true;
+      return;
+   }
+
+   ID3D12CommandList* cmdlists[] = { ctx->cmdlist };
+   screen->cmdqueue->ExecuteCommandLists(1, cmdlists);
+   batch->fence = d3d12_create_fence(screen, ctx);
+}
+
+bool
+d3d12_batch_has_references(struct d3d12_batch *batch,
+                           struct d3d12_bo *bo)
+{
+   return (_mesa_set_search(batch->bos, bo) != NULL);
+}
+
+void
+d3d12_batch_reference_resource(struct d3d12_batch *batch,
+                               struct d3d12_resource *res)
+{
+   if (!d3d12_batch_has_references(batch, res->bo)) {
+      _mesa_set_add(batch->bos, res->bo);
+      d3d12_bo_reference(res->bo);
+   }
+}
+
+void
+d3d12_batch_reference_sampler_view(struct d3d12_batch *batch,
+                                   struct d3d12_sampler_view *sv)
+{
+   struct set_entry *entry = _mesa_set_search(batch->sampler_views, sv);
+   if (!entry) {
+      entry = _mesa_set_add(batch->sampler_views, sv);
+      pipe_reference(NULL, &sv->base.reference);
+   }
+}
+
+void
+d3d12_batch_reference_surface_texture(struct d3d12_batch *batch,
+                                      struct d3d12_surface *surf)
+{
+   d3d12_batch_reference_resource(batch, d3d12_resource(surf->base.texture));
+}
+
+void
+d3d12_batch_reference_object(struct d3d12_batch *batch,
+                             ID3D12Object *object)
+{
+   struct set_entry *entry = _mesa_set_search(batch->objects, object);
+   if (!entry) {
+      entry = _mesa_set_add(batch->objects, object);
+      object->AddRef();
+   }
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_batch.h b/src/gallium/drivers/d3d12/d3d12_batch.h
new file mode 100644 (file)
index 0000000..0f90a4b
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_BATCH_H
+#define D3D12_BATCH_H
+
+#include "util/u_dynarray.h"
+#include <stdint.h>
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <d3d12.h>
+
+struct d3d12_bo;
+struct d3d12_descriptor_heap;
+struct d3d12_fence;
+
+struct d3d12_batch {
+   struct d3d12_fence *fence;
+
+   struct set *bos;
+   struct set *sampler_views;
+   struct set *surfaces;
+   struct set *objects;
+
+   struct util_dynarray zombie_samplers;
+
+   ID3D12CommandAllocator *cmdalloc;
+   struct d3d12_descriptor_heap *sampler_heap;
+   struct d3d12_descriptor_heap *view_heap;
+   bool has_errors;
+};
+
+bool
+d3d12_init_batch(struct d3d12_context *ctx, struct d3d12_batch *batch);
+
+void
+d3d12_destroy_batch(struct d3d12_context *ctx, struct d3d12_batch *batch);
+
+void
+d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch);
+
+void
+d3d12_end_batch(struct d3d12_context *ctx, struct d3d12_batch *batch);
+
+bool
+d3d12_reset_batch(struct d3d12_context *ctx, struct d3d12_batch *batch, uint64_t timeout_ns);
+
+bool
+d3d12_batch_has_references(struct d3d12_batch *batch,
+                           struct d3d12_bo *bo);
+
+void
+d3d12_batch_reference_resource(struct d3d12_batch *batch,
+                               struct d3d12_resource *res);
+
+void
+d3d12_batch_reference_sampler_view(struct d3d12_batch *batch,
+                                   struct d3d12_sampler_view *sv);
+
+void
+d3d12_batch_reference_surface_texture(struct d3d12_batch *batch,
+                              struct d3d12_surface *surf);
+
+void
+d3d12_batch_reference_object(struct d3d12_batch *batch,
+                             ID3D12Object *object);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_blit.cpp b/src/gallium/drivers/d3d12/d3d12_blit.cpp
new file mode 100644 (file)
index 0000000..3022487
--- /dev/null
@@ -0,0 +1,975 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_context.h"
+#include "d3d12_compiler.h"
+#include "d3d12_debug.h"
+#include "d3d12_format.h"
+#include "d3d12_resource.h"
+#include "d3d12_screen.h"
+
+#include "util/u_blitter.h"
+#include "util/format/u_format.h"
+
+#include "nir_to_dxil.h"
+#include "nir_builder.h"
+
+static void
+copy_buffer_region_no_barriers(struct d3d12_context *ctx,
+                               struct d3d12_resource *dst,
+                               uint64_t dst_offset,
+                               struct d3d12_resource *src,
+                               uint64_t src_offset,
+                               uint64_t size)
+{
+   uint64_t dst_off, src_off;
+   ID3D12Resource *dst_buf = d3d12_resource_underlying(dst, &dst_off);
+   ID3D12Resource *src_buf = d3d12_resource_underlying(src, &src_off);
+
+   ctx->cmdlist->CopyBufferRegion(dst_buf, dst_offset + dst_off,
+                                  src_buf, src_offset + src_off,
+                                  size);
+}
+
+static bool
+is_resolve(const struct pipe_blit_info *info)
+{
+   return info->src.resource->nr_samples > 1 &&
+          info->dst.resource->nr_samples <= 1;
+}
+
+static bool
+resolve_supported(const struct pipe_blit_info *info)
+{
+   assert(is_resolve(info));
+
+   // check for unsupported operations
+   if (util_format_is_depth_or_stencil(info->src.format) &&
+       info->mask != PIPE_MASK_Z) {
+      return false;
+   } else {
+      if (util_format_get_mask(info->dst.format) != info->mask ||
+          util_format_get_mask(info->src.format) != info->mask)
+         return false;
+   }
+
+   if (info->filter != PIPE_TEX_FILTER_NEAREST ||
+       info->scissor_enable ||
+       info->num_window_rectangles > 0 ||
+       info->alpha_blend)
+      return false;
+
+   // formats need to match
+   struct d3d12_resource *src = d3d12_resource(info->src.resource);
+   struct d3d12_resource *dst = d3d12_resource(info->dst.resource);
+   if (src->dxgi_format != dst->dxgi_format)
+      return false;
+
+   if (util_format_is_pure_integer(src->base.format))
+      return false;
+
+   // sizes needs to match
+   if (info->src.box.width != info->dst.box.width ||
+       info->src.box.height != info->dst.box.height)
+      return false;
+
+   // can only resolve full subresource
+   if (info->src.box.width != u_minify(info->src.resource->width0,
+                                       info->src.level) ||
+       info->src.box.height != u_minify(info->src.resource->height0,
+                                        info->src.level) ||
+       info->dst.box.width != u_minify(info->dst.resource->width0,
+                                           info->dst.level) ||
+       info->dst.box.height != u_minify(info->dst.resource->height0,
+                                            info->dst.level))
+      return false;
+
+   return true;
+}
+
+static void
+blit_resolve(struct d3d12_context *ctx, const struct pipe_blit_info *info)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   struct d3d12_resource *src = d3d12_resource(info->src.resource);
+   struct d3d12_resource *dst = d3d12_resource(info->dst.resource);
+
+   d3d12_transition_resource_state(ctx, src,
+                                   D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
+   d3d12_transition_resource_state(ctx, dst,
+                                   D3D12_RESOURCE_STATE_RESOLVE_DEST);
+
+   d3d12_apply_resource_states(ctx);
+
+   d3d12_batch_reference_resource(batch, src);
+   d3d12_batch_reference_resource(batch, dst);
+
+   DXGI_FORMAT dxgi_format = d3d12_get_resource_srv_format(src->base.format, src->base.target);
+
+   assert(src->dxgi_format == dst->dxgi_format);
+   ctx->cmdlist->ResolveSubresource(
+      d3d12_resource_resource(dst), info->dst.level,
+      d3d12_resource_resource(src), info->src.level,
+      dxgi_format);
+}
+
+static bool
+formats_are_copy_compatible(enum pipe_format src, enum pipe_format dst)
+{
+   if (src == dst)
+      return true;
+
+   /* We can skip the stencil copy */
+   if (util_format_get_depth_only(src) == dst ||
+       util_format_get_depth_only(dst) == src)
+      return true;
+
+   return false;
+}
+
+static bool
+box_fits(const struct pipe_box *box, const struct pipe_resource *res, int level)
+{
+   unsigned lwidth = u_minify(res->width0, level);
+   unsigned lheight= u_minify(res->height0, level);
+   unsigned ldepth = res->target == PIPE_TEXTURE_3D ? u_minify(res->depth0, level) :
+                                                      res->array_size;
+
+   unsigned wb = box->x;
+   unsigned we = box->x + box->width;
+
+   unsigned hb = box->y;
+   unsigned he = box->y + box->height;
+
+   unsigned db = box->z;
+   unsigned de = box->z + box->depth;
+
+   return (wb <= lwidth && we <= lwidth &&
+           hb <= lheight && he <= lheight &&
+           db <= ldepth && de <= ldepth);
+}
+
+static bool
+direct_copy_supported(struct d3d12_screen *screen,
+                      const struct pipe_blit_info *info,
+                      bool have_predication)
+{
+   if (info->scissor_enable || info->alpha_blend ||
+       (have_predication && info->render_condition_enable) ||
+       MAX2(info->src.resource->nr_samples, 1) != MAX2(info->dst.resource->nr_samples, 1)) {
+      return false;
+   }
+
+   if (!formats_are_copy_compatible(info->src.format, info->dst.format))
+      return false;
+
+   if (util_format_is_depth_or_stencil(info->src.format) && !(info->mask & PIPE_MASK_ZS)) {
+      return false;
+   }
+
+   if (!util_format_is_depth_or_stencil(info->src.format)) {
+      if (util_format_get_mask(info->dst.format) != info->mask ||
+          util_format_get_mask(info->src.format) != info->mask)
+         return false;
+   }
+
+   if (abs(info->src.box.height) != info->dst.box.height) {
+      return false;
+   }
+
+   if (info->src.box.height != info->dst.box.height &&
+       (!util_format_is_depth_or_stencil(info->src.format) ||
+        screen->opts2.ProgrammableSamplePositionsTier ==
+        D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED)) {
+      return false;
+   }
+
+   if (!box_fits(&info->dst.box, info->dst.resource, info->dst.level)) {
+      return false;
+   }
+   if (!box_fits(&info->src.box, info->src.resource, info->src.level)) {
+      return false;
+   }
+
+   if (info->src.box.width != info->dst.box.width) {
+      return false;
+   }
+
+   if (info->src.box.depth != info->dst.box.depth) {
+      return false;
+   }
+
+   if ((screen->opts2.ProgrammableSamplePositionsTier ==
+        D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED &&
+        (info->src.resource->bind & PIPE_BIND_DEPTH_STENCIL ||
+         info->dst.resource->bind & PIPE_BIND_DEPTH_STENCIL)) ||
+        info->src.resource->nr_samples > 1) {
+
+      if (info->dst.box.x != 0 ||
+          info->dst.box.y != 0 ||
+          info->dst.box.z != 0)
+         return false;
+
+      if (info->src.box.x != 0 ||
+          info->src.box.y != 0 ||
+          info->src.box.z != 0 ||
+          info->src.box.width != u_minify(info->src.resource->width0,
+                                          info->src.level) ||
+          info->src.box.height != u_minify(info->src.resource->height0,
+                                           info->src.level) ||
+          info->src.box.depth != u_minify(info->src.resource->depth0,
+                                          info->src.level))
+         return false;
+   }
+
+   return true;
+}
+
+inline static unsigned
+get_subresource_id(enum pipe_texture_target target, unsigned subres, unsigned stride,
+                   unsigned z, unsigned *updated_z)
+{
+   if (d3d12_subresource_id_uses_layer(target)) {
+      subres += stride * z;
+      if (updated_z)
+         *updated_z = 0;
+   }
+   return subres;
+}
+
+static void
+copy_subregion_no_barriers(struct d3d12_context *ctx,
+                           struct d3d12_resource *dst,
+                           unsigned dst_level,
+                           unsigned dstx, unsigned dsty, unsigned dstz,
+                           struct d3d12_resource *src,
+                           unsigned src_level,
+                           const struct pipe_box *psrc_box,
+                           unsigned mask)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+   D3D12_TEXTURE_COPY_LOCATION src_loc, dst_loc;
+   unsigned src_z = psrc_box->z;
+
+   int src_subres_stride = src->base.last_level + 1;
+   int dst_subres_stride = dst->base.last_level + 1;
+
+   int src_array_size = src->base.array_size;
+   int dst_array_size = dst->base.array_size;
+
+   if (dst->base.target == PIPE_TEXTURE_CUBE)
+      dst_array_size *= 6;
+
+   if (src->base.target == PIPE_TEXTURE_CUBE)
+      src_array_size *= 6;
+
+   int stencil_src_res_offset = 1;
+   int stencil_dst_res_offset = 1;
+
+   int src_nres = 1;
+   int dst_nres = 1;
+
+   if (dst->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+       dst->base.format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+       dst->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+      stencil_dst_res_offset = dst_subres_stride * dst_array_size;
+      src_nres = 2;
+   }
+
+   if (src->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+       src->base.format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+       dst->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+      stencil_src_res_offset = src_subres_stride * src_array_size;
+      dst_nres = 2;
+   }
+
+   static_assert(PIPE_MASK_S == 0x20 && PIPE_MASK_Z == 0x10, "unexpected ZS format mask");
+   int nsubres = min(src_nres, dst_nres);
+   unsigned subresource_copy_mask = nsubres > 1 ? mask >> 4 : 1;
+
+   for (int subres = 0; subres < nsubres; ++subres) {
+
+      if (!(subresource_copy_mask & (1 << subres)))
+         continue;
+
+      src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+      src_loc.SubresourceIndex = get_subresource_id(src->base.target, src_level, src_subres_stride, src_z, &src_z) +
+                                 subres * stencil_src_res_offset;
+      src_loc.pResource = d3d12_resource_resource(src);
+
+      dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+      dst_loc.SubresourceIndex = get_subresource_id(dst->base.target, dst_level, dst_subres_stride, dstz, &dstz) +
+                                 subres * stencil_dst_res_offset;
+      dst_loc.pResource = d3d12_resource_resource(dst);
+
+      if (psrc_box->x == 0 && psrc_box->y == 0 && psrc_box->z == 0 &&
+          psrc_box->width == u_minify(src->base.width0, src_level) &&
+          psrc_box->height == u_minify(src->base.height0, src_level) &&
+          psrc_box->depth == u_minify(src->base.depth0, src_level)) {
+
+         assert((dstx == 0 && dsty == 0 && dstz == 0) ||
+                screen->opts2.ProgrammableSamplePositionsTier !=
+                D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED ||
+                (!util_format_is_depth_or_stencil(dst->base.format) &&
+                 !util_format_is_depth_or_stencil(src->base.format) &&
+                  dst->base.nr_samples <= 1 &&
+                  src->base.nr_samples <= 1));
+
+         ctx->cmdlist->CopyTextureRegion(&dst_loc, dstx, dsty, dstz,
+                                         &src_loc, NULL);
+
+      } else {
+         D3D12_BOX src_box;
+         src_box.left = psrc_box->x;
+         src_box.right = MIN2(psrc_box->x + psrc_box->width, u_minify(src->base.width0, src_level));
+         src_box.top = psrc_box->y;
+         src_box.bottom = MIN2(psrc_box->y + psrc_box->height, u_minify(src->base.height0, src_level));
+         src_box.front = src_z;
+         src_box.back = src_z + psrc_box->depth;
+
+         assert((screen->opts2.ProgrammableSamplePositionsTier !=
+                 D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED ||
+                 (!util_format_is_depth_or_stencil(dst->base.format) &&
+                  !util_format_is_depth_or_stencil(src->base.format))) &&
+                dst->base.nr_samples <= 1 &&
+                src->base.nr_samples <= 1);
+
+         ctx->cmdlist->CopyTextureRegion(&dst_loc, dstx, dsty, dstz,
+                                         &src_loc, &src_box);
+      }
+   }
+}
+
+static void
+copy_resource_y_flipped_no_barriers(struct d3d12_context *ctx,
+                                    struct d3d12_resource *dst,
+                                    unsigned dst_level,
+                                    const struct pipe_box *pdst_box,
+                                    struct d3d12_resource *src,
+                                    unsigned src_level,
+                                    const struct pipe_box *psrc_box,
+                                    unsigned mask)
+{
+   if (D3D12_DEBUG_BLIT & d3d12_debug) {
+      debug_printf("D3D12 BLIT as COPY: from %s@%d %dx%dx%d + %dx%dx%d\n",
+                   util_format_name(src->base.format), src_level,
+                   psrc_box->x, psrc_box->y, psrc_box->z,
+                   psrc_box->width, psrc_box->height, psrc_box->depth);
+      debug_printf("      to   %s@%d %dx%dx%d\n",
+                   util_format_name(dst->base.format), dst_level,
+                   pdst_box->x, pdst_box->y, pdst_box->z);
+   }
+
+   struct pipe_box src_box = *psrc_box;
+   int src_inc = psrc_box->height > 0 ? 1 : -1;
+   int dst_inc = pdst_box->height > 0 ? 1 : -1;
+   src_box.height = 1;
+   int rows_to_copy = abs(psrc_box->height);
+
+   if (psrc_box->height < 0)
+      --src_box.y;
+
+   for (int y = 0, dest_y = pdst_box->y; y < rows_to_copy;
+        ++y, src_box.y += src_inc, dest_y += dst_inc) {
+      copy_subregion_no_barriers(ctx, dst, dst_level,
+                                 pdst_box->x, dest_y, pdst_box->z,
+                                 src, src_level, &src_box, mask);
+   }
+}
+
+void
+d3d12_direct_copy(struct d3d12_context *ctx,
+                  struct d3d12_resource *dst,
+                  unsigned dst_level,
+                  const struct pipe_box *pdst_box,
+                  struct d3d12_resource *src,
+                  unsigned src_level,
+                  const struct pipe_box *psrc_box,
+                  unsigned mask)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+
+   unsigned src_subres = get_subresource_id(src->base.target, src_level, src->base.last_level + 1,
+                                            psrc_box->z, nullptr);
+   unsigned dst_subres = get_subresource_id(dst->base.target, dst_level, dst->base.last_level + 1,
+                                            pdst_box->z, nullptr);
+
+   if (D3D12_DEBUG_BLIT & d3d12_debug)
+      debug_printf("BLIT: Direct copy from subres %d to subres  %d\n",
+                   src_subres, dst_subres);
+
+
+   d3d12_transition_subresources_state(ctx, src, src_subres, 1, 0, 1, 0,
+                                       d3d12_get_format_num_planes(src->base.format),
+                                       D3D12_RESOURCE_STATE_COPY_SOURCE);
+
+   d3d12_transition_subresources_state(ctx, dst, dst_subres, 1, 0, 1, 0,
+                                       d3d12_get_format_num_planes(dst->base.format),
+                                       D3D12_RESOURCE_STATE_COPY_DEST);
+
+   d3d12_apply_resource_states(ctx);
+
+   d3d12_batch_reference_resource(batch, src);
+   d3d12_batch_reference_resource(batch, dst);
+
+   if (src->base.target == PIPE_BUFFER) {
+      copy_buffer_region_no_barriers(ctx, dst, pdst_box->x,
+                                     src, psrc_box->x, psrc_box->width);
+   } else if (psrc_box->height == pdst_box->height) {
+      /* No flipping, we can forward this directly to resource_copy_region */
+      copy_subregion_no_barriers(ctx, dst, dst_level,
+                                 pdst_box->x, pdst_box->y, pdst_box->z,
+                                 src, src_level, psrc_box, mask);
+   } else {
+      assert(psrc_box->height == -pdst_box->height);
+      copy_resource_y_flipped_no_barriers(ctx, dst, dst_level, pdst_box,
+                                          src, src_level, psrc_box, mask);
+   }
+}
+
+static bool
+is_same_resource(const struct pipe_blit_info *info)
+{
+   return d3d12_resource_resource(d3d12_resource(info->src.resource)) ==
+             d3d12_resource_resource(d3d12_resource(info->dst.resource)) &&
+          info->src.level == info->dst.level;
+}
+
+static struct pipe_resource *
+create_staging_resource(struct d3d12_context *ctx,
+                        struct d3d12_resource *src,
+                        unsigned src_level,
+                        const struct pipe_box *src_box,
+                        struct pipe_box *dst_box,
+                        unsigned mask)
+
+{
+   struct pipe_resource templ = {{0}};
+   struct pipe_resource *staging_res;
+   struct pipe_box copy_src;
+
+   u_box_3d(MIN2(src_box->x, src_box->x + src_box->width),
+            MIN2(src_box->y, src_box->y + src_box->height),
+            MIN2(src_box->z, src_box->z + src_box->depth),
+            abs(src_box->width), abs(src_box->height), abs(src_box->depth),
+            &copy_src);
+
+   templ.format = src->base.format;
+   templ.width0 = copy_src.width;
+   templ.height0 = copy_src.height;
+   templ.depth0 = copy_src.depth;
+   templ.array_size = 1;
+   templ.nr_samples = 1;
+   templ.nr_storage_samples = 1;
+   templ.usage = PIPE_USAGE_STAGING;
+   templ.bind = util_format_is_depth_or_stencil(templ.format) ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET;
+   templ.target = src->base.target;
+
+   staging_res = ctx->base.screen->resource_create(ctx->base.screen, &templ);
+
+   dst_box->x = 0;
+   dst_box->y = 0;
+   dst_box->z = 0;
+   dst_box->width = copy_src.width;
+   dst_box->height = copy_src.height;
+   dst_box->depth = copy_src.depth;
+
+   d3d12_direct_copy(ctx, d3d12_resource(staging_res), 0, dst_box,
+                     src, src_level, &copy_src, mask);
+
+   if (src_box->width < 0) {
+      dst_box->x = dst_box->width;
+      dst_box->width = src_box->width;
+   }
+
+   if (src_box->height < 0) {
+      dst_box->y = dst_box->height;
+      dst_box->height = src_box->height;
+   }
+
+   if (src_box->depth < 0) {
+      dst_box->z = dst_box->depth;
+      dst_box->depth = src_box->depth;
+   }
+   return staging_res;
+}
+
+static void
+blit_same_resource(struct d3d12_context *ctx,
+                   const struct pipe_blit_info *info)
+{
+   struct pipe_blit_info dst_info = *info;
+
+   dst_info.src.level = 0;
+   dst_info.src.resource = create_staging_resource(ctx, d3d12_resource(info->src.resource),
+                                                   info->src.level,
+                                                   &info->src.box,
+                                                   &dst_info.src.box, PIPE_MASK_RGBAZS);
+   ctx->base.blit(&ctx->base, &dst_info);
+   pipe_resource_reference(&dst_info.src.resource, NULL);
+}
+
+static void
+util_blit_save_state(struct d3d12_context *ctx)
+{
+   util_blitter_save_blend(ctx->blitter, ctx->gfx_pipeline_state.blend);
+   util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->gfx_pipeline_state.zsa);
+   util_blitter_save_vertex_elements(ctx->blitter, ctx->gfx_pipeline_state.ves);
+   util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
+   util_blitter_save_rasterizer(ctx->blitter, ctx->gfx_pipeline_state.rast);
+   util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_VERTEX]);
+   util_blitter_save_geometry_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_GEOMETRY]);
+
+   util_blitter_save_framebuffer(ctx->blitter, &ctx->fb);
+   util_blitter_save_viewport(ctx->blitter, ctx->viewport_states);
+   util_blitter_save_scissor(ctx->blitter, ctx->scissor_states);
+   util_blitter_save_fragment_sampler_states(ctx->blitter,
+                                             ctx->num_samplers[PIPE_SHADER_FRAGMENT],
+                                             (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_fragment_sampler_views(ctx->blitter,
+                                            ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],
+                                            ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->cbufs[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vbs);
+   util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask);
+   util_blitter_save_so_targets(ctx->blitter, ctx->gfx_pipeline_state.num_so_targets, ctx->so_targets);
+}
+
+static void
+util_blit(struct d3d12_context *ctx,
+          const struct pipe_blit_info *info)
+{
+   util_blit_save_state(ctx);
+
+   util_blitter_blit(ctx->blitter, info);
+}
+
+static bool
+resolve_stencil_supported(struct d3d12_context *ctx,
+                          const struct pipe_blit_info *info)
+{
+   assert(is_resolve(info));
+
+   if (!util_format_is_depth_or_stencil(info->src.format) ||
+       !(info->mask & PIPE_MASK_S))
+      return false;
+
+   if (info->mask & PIPE_MASK_Z) {
+      struct pipe_blit_info new_info = *info;
+      new_info.mask = PIPE_MASK_Z;
+      if (!resolve_supported(&new_info) &&
+          !util_blitter_is_blit_supported(ctx->blitter, &new_info))
+         return false;
+   }
+
+   struct pipe_blit_info new_info = *info;
+   new_info.dst.format = PIPE_FORMAT_R8_UINT;
+   return util_blitter_is_blit_supported(ctx->blitter, &new_info);
+}
+
+static struct pipe_resource *
+create_tmp_resource(struct pipe_screen *screen,
+                    const struct pipe_blit_info *info)
+{
+   struct pipe_resource tpl = { 0 };
+   tpl.width0 = info->dst.box.width;
+   tpl.height0 = info->dst.box.height;
+   tpl.depth0 = info->dst.box.depth;
+   tpl.array_size = 1;
+   tpl.format = PIPE_FORMAT_R8_UINT;
+   tpl.target = info->dst.resource->target;
+   tpl.nr_samples = info->dst.resource->nr_samples;
+   tpl.nr_storage_samples = info->dst.resource->nr_storage_samples;
+   tpl.usage = PIPE_USAGE_STREAM;
+   tpl.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+   return screen->resource_create(screen, &tpl);
+}
+
+static void *
+get_stencil_resolve_vs(struct d3d12_context *ctx)
+{
+   if (ctx->stencil_resolve_vs)
+      return ctx->stencil_resolve_vs;
+
+   nir_builder b;
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX,
+                                  dxil_get_nir_compiler_options());
+   b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs");
+
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                              vec4, "pos");
+
+   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+                                               vec4, "gl_Position");
+   pos_out->data.location = VARYING_SLOT_POS;
+
+   nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
+
+   struct pipe_shader_state state = { 0 };
+   state.type = PIPE_SHADER_IR_NIR;
+   state.ir.nir = b.shader;
+   ctx->stencil_resolve_vs = ctx->base.create_vs_state(&ctx->base, &state);
+
+   return ctx->stencil_resolve_vs;
+}
+
+static void *
+get_stencil_resolve_fs(struct d3d12_context *ctx)
+{
+   if (ctx->stencil_resolve_fs)
+      return ctx->stencil_resolve_fs;
+
+   nir_builder b;
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT,
+                                  dxil_get_nir_compiler_options());
+
+   nir_variable *stencil_out = nir_variable_create(b.shader,
+                                                   nir_var_shader_out,
+                                                   glsl_uint_type(),
+                                                   "stencil_out");
+   stencil_out->data.location = FRAG_RESULT_COLOR;
+
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_UINT);
+   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+                                               sampler_type, "stencil_tex");
+   sampler->data.binding = 0;
+   sampler->data.explicit_binding = true;
+
+   nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                              glsl_vec4_type(), "pos");
+   pos_in->data.location = VARYING_SLOT_POS; // VARYING_SLOT_VAR0?
+   nir_ssa_def *pos = nir_load_var(&b, pos_in);
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+   tex->op = nir_texop_txf_ms;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, nir_f2i32(&b, pos), 0x3));
+   tex->src[1].src_type = nir_tex_src_ms_index;
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); /* just use first sample */
+   tex->src[2].src_type = nir_tex_src_texture_deref;
+   tex->src[2].src = nir_src_for_ssa(tex_deref);
+   tex->dest_type = nir_type_uint;
+   tex->is_array = false;
+   tex->coord_components = 2;
+
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(&b, &tex->instr);
+
+   nir_store_var(&b, stencil_out, nir_channel(&b, &tex->dest.ssa, 1), 0x1);
+
+   struct pipe_shader_state state = { 0 };
+   state.type = PIPE_SHADER_IR_NIR;
+   state.ir.nir = b.shader;
+   ctx->stencil_resolve_fs = ctx->base.create_fs_state(&ctx->base, &state);
+
+   return ctx->stencil_resolve_fs;
+}
+
+static void *
+get_sampler_state(struct d3d12_context *ctx)
+{
+   if (ctx->sampler_state)
+      return ctx->sampler_state;
+
+   struct pipe_sampler_state state;
+   memset(&state, 0, sizeof(state));
+   state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   state.normalized_coords = 1;
+
+   return ctx->sampler_state = ctx->base.create_sampler_state(&ctx->base, &state);
+}
+
+static struct pipe_resource *
+resolve_stencil_to_temp(struct d3d12_context *ctx,
+                        const struct pipe_blit_info *info)
+{
+   struct pipe_context *pctx = &ctx->base;
+   struct pipe_resource *tmp = create_tmp_resource(pctx->screen, info);
+   if (!tmp) {
+      debug_printf("D3D12: failed to create stencil-resolve temp-resource\n");
+      return NULL;
+   }
+   assert(tmp->nr_samples < 2);
+
+   /* resolve stencil into tmp */
+   struct pipe_surface dst_tmpl;
+   util_blitter_default_dst_texture(&dst_tmpl, tmp, 0, 0);
+   dst_tmpl.format = tmp->format;
+   struct pipe_surface *dst_surf = pctx->create_surface(pctx, tmp, &dst_tmpl);
+   if (!dst_surf) {
+      debug_printf("D3D12: failed to create stencil-resolve dst-surface\n");
+      return NULL;
+   }
+
+   struct pipe_sampler_view src_templ, *src_view;
+   util_blitter_default_src_texture(ctx->blitter, &src_templ,
+                                    info->src.resource, info->src.level);
+   src_templ.format = util_format_stencil_only(info->src.format);
+   src_view = pctx->create_sampler_view(pctx, info->src.resource, &src_templ);
+
+   void *sampler_state = get_sampler_state(ctx);
+
+   util_blit_save_state(ctx);
+   pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 1, &src_view);
+   pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state);
+   util_blitter_custom_shader(ctx->blitter, dst_surf,
+                              get_stencil_resolve_vs(ctx),
+                              get_stencil_resolve_fs(ctx));
+   util_blitter_restore_textures(ctx->blitter);
+   pipe_surface_reference(&dst_surf, NULL);
+   pipe_sampler_view_reference(&src_view, NULL);
+   return tmp;
+}
+
+static void
+blit_resolve_stencil(struct d3d12_context *ctx,
+                     const struct pipe_blit_info *info)
+{
+   assert(info->mask & PIPE_MASK_S);
+
+   if (D3D12_DEBUG_BLIT & d3d12_debug)
+      debug_printf("D3D12 BLIT: blit_resolve_stencil\n");
+
+   if (info->mask & PIPE_MASK_Z) {
+      /* resolve depth into dst */
+      struct pipe_blit_info new_info = *info;
+      new_info.mask = PIPE_MASK_Z;
+
+      if (resolve_supported(&new_info))
+         blit_resolve(ctx, &new_info);
+      else
+         util_blit(ctx, &new_info);
+   }
+
+   struct pipe_resource *tmp = resolve_stencil_to_temp(ctx, info);
+
+
+   /* copy resolved stencil into dst */
+   struct d3d12_resource *dst = d3d12_resource(info->dst.resource);
+   d3d12_transition_subresources_state(ctx, d3d12_resource(tmp),
+                                       0, 1, 0, 1, 0, 1,
+                                       D3D12_RESOURCE_STATE_COPY_SOURCE);
+   d3d12_transition_subresources_state(ctx, dst,
+                                       0, 1, 0, 1, 1, 1,
+                                       D3D12_RESOURCE_STATE_COPY_DEST);
+   d3d12_apply_resource_states(ctx);
+
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   d3d12_batch_reference_resource(batch, d3d12_resource(tmp));
+   d3d12_batch_reference_resource(batch, dst);
+
+   D3D12_BOX src_box;
+   src_box.left = src_box.top = src_box.front = 0;
+   src_box.right = tmp->width0;
+   src_box.bottom = tmp->height0;
+   src_box.back = tmp->depth0;
+
+   D3D12_TEXTURE_COPY_LOCATION src_loc;
+   src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+   src_loc.SubresourceIndex = 0;
+   src_loc.pResource = d3d12_resource_resource(d3d12_resource(tmp));
+
+   D3D12_TEXTURE_COPY_LOCATION dst_loc;
+   dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+   dst_loc.SubresourceIndex = 1;
+   dst_loc.pResource = d3d12_resource_resource(dst);
+
+   ctx->cmdlist->CopyTextureRegion(&dst_loc, info->dst.box.x,
+                                   info->dst.box.y, info->dst.box.z,
+                                   &src_loc, &src_box);
+
+   pipe_resource_reference(&tmp, NULL);
+}
+
+static bool
+replicate_stencil_supported(struct d3d12_context *ctx,
+                            const struct pipe_blit_info *info)
+{
+   if (!util_format_is_depth_or_stencil(info->src.format) ||
+       !(info->mask & PIPE_MASK_S))
+      return false;
+
+   if (info->mask & PIPE_MASK_Z) {
+      struct pipe_blit_info new_info = *info;
+      new_info.mask = PIPE_MASK_Z;
+      if (!util_blitter_is_blit_supported(ctx->blitter, &new_info))
+         return false;
+   }
+
+   return true;
+}
+
+static void
+blit_replicate_stencil(struct d3d12_context *ctx,
+                       const struct pipe_blit_info *info)
+{
+   assert(info->mask & PIPE_MASK_S);
+
+   if (D3D12_DEBUG_BLIT & d3d12_debug)
+      debug_printf("D3D12 BLIT: blit_replicate_stencil\n");
+
+   if (info->mask & PIPE_MASK_Z) {
+      /* resolve depth into dst */
+      struct pipe_blit_info new_info = *info;
+      new_info.mask = PIPE_MASK_Z;
+      util_blit(ctx, &new_info);
+   }
+
+   util_blit_save_state(ctx);
+   util_blitter_stencil_fallback(ctx->blitter, info->dst.resource,
+                                 info->dst.level,
+                                 &info->dst.box,
+                                 info->src.resource,
+                                 info->src.level,
+                                 &info->src.box,
+                                 info->scissor_enable ? &info->scissor : NULL);
+}
+
+void
+d3d12_blit(struct pipe_context *pctx,
+           const struct pipe_blit_info *info)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   if (!info->render_condition_enable && ctx->current_predication) {
+      if (D3D12_DEBUG_BLIT & d3d12_debug)
+         debug_printf("D3D12 BLIT: Disable predication\n");
+      ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
+   }
+
+   if (D3D12_DEBUG_BLIT & d3d12_debug) {
+      debug_printf("D3D12 BLIT: from %s@%d msaa:%d %dx%dx%d + %dx%dx%d\n",
+                   util_format_name(info->src.format), info->src.level,
+                   info->src.resource->nr_samples,
+                   info->src.box.x, info->src.box.y, info->src.box.z,
+                   info->src.box.width, info->src.box.height, info->src.box.depth);
+      debug_printf("            to   %s@%d msaa:%d %dx%dx%d + %dx%dx%d ",
+                   util_format_name(info->dst.format), info->dst.level,
+                   info->dst.resource->nr_samples,
+                   info->dst.box.x, info->dst.box.y, info->dst.box.z,
+                   info->dst.box.width, info->dst.box.height, info->dst.box.depth);
+      debug_printf("| flags %s%s%s\n",
+                   info->render_condition_enable ? "cond " : "",
+                   info->scissor_enable ? "scissor " : "",
+                   info->alpha_blend ? "blend" : "");
+   }
+
+   if (is_same_resource(info))
+      blit_same_resource(ctx, info);
+   else if (is_resolve(info)) {
+      if (resolve_supported(info))
+         blit_resolve(ctx, info);
+      else if (util_blitter_is_blit_supported(ctx->blitter, info))
+         util_blit(ctx, info);
+      else if (resolve_stencil_supported(ctx, info))
+         blit_resolve_stencil(ctx, info);
+      else
+         debug_printf("D3D12: resolve unsupported %s -> %s\n",
+                    util_format_short_name(info->src.resource->format),
+                    util_format_short_name(info->dst.resource->format));
+   } else if (direct_copy_supported(d3d12_screen(pctx->screen), info,
+                                    ctx->current_predication != nullptr))
+      d3d12_direct_copy(ctx, d3d12_resource(info->dst.resource),
+                        info->dst.level, &info->dst.box,
+                        d3d12_resource(info->src.resource),
+                        info->src.level, &info->src.box, info->mask);
+   else if (util_blitter_is_blit_supported(ctx->blitter, info))
+      util_blit(ctx, info);
+   else if (replicate_stencil_supported(ctx, info))
+      blit_replicate_stencil(ctx, info);
+   else
+      debug_printf("D3D12: blit unsupported %s -> %s\n",
+                 util_format_short_name(info->src.resource->format),
+                 util_format_short_name(info->dst.resource->format));
+
+   if (!info->render_condition_enable && ctx->current_predication) {
+      ctx->cmdlist->SetPredication(
+               d3d12_resource_resource(ctx->current_predication), 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
+      if (D3D12_DEBUG_BLIT & d3d12_debug)
+         debug_printf("D3D12 BLIT: Re-enable predication\n");
+   }
+
+}
+
+static void
+d3d12_resource_copy_region(struct pipe_context *pctx,
+                           struct pipe_resource *pdst,
+                           unsigned dst_level,
+                           unsigned dstx, unsigned dsty, unsigned dstz,
+                           struct pipe_resource *psrc,
+                           unsigned src_level,
+                           const struct pipe_box *psrc_box)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_resource *dst = d3d12_resource(pdst);
+   struct d3d12_resource *src = d3d12_resource(psrc);
+   struct pipe_resource *staging_res = NULL;
+   const struct pipe_box *src_box = psrc_box;
+   struct pipe_box staging_box, dst_box;
+
+   if (D3D12_DEBUG_BLIT & d3d12_debug) {
+      debug_printf("D3D12 COPY: from %s@%d msaa:%d mips:%d %dx%dx%d + %dx%dx%d\n",
+                   util_format_name(psrc->format), src_level, psrc->nr_samples,
+                   psrc->last_level,
+                   psrc_box->x, psrc_box->y, psrc_box->z,
+                   psrc_box->width, psrc_box->height, psrc_box->depth);
+      debug_printf("            to   %s@%d msaa:%d mips:%d %dx%dx%d\n",
+                   util_format_name(pdst->format), dst_level, psrc->nr_samples,
+                   psrc->last_level, dstx, dsty, dstz);
+   }
+
+   /* Use an intermediate resource if copying from/to the same subresource */
+   if (d3d12_resource_resource(dst) == d3d12_resource_resource(src) && dst_level == src_level) {
+      staging_res = create_staging_resource(ctx, src, src_level, psrc_box, &staging_box, PIPE_MASK_RGBAZS);
+      src = d3d12_resource(staging_res);
+      src_level = 0;
+      src_box = &staging_box;
+   }
+
+   dst_box.x = dstx;
+   dst_box.y = dsty;
+   dst_box.z = dstz;
+   dst_box.width = psrc_box->width;
+   dst_box.height = psrc_box->height;
+
+   d3d12_direct_copy(ctx, dst, dst_level, &dst_box,
+                     src, src_level, src_box, PIPE_MASK_RGBAZS);
+
+   if (staging_res)
+      pipe_resource_reference(&staging_res, NULL);
+}
+
+void
+d3d12_context_blit_init(struct pipe_context *ctx)
+{
+   ctx->resource_copy_region = d3d12_resource_copy_region;
+   ctx->blit = d3d12_blit;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_blit.h b/src/gallium/drivers/d3d12/d3d12_blit.h
new file mode 100644 (file)
index 0000000..f1ddc64
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_BLIT_H
+#define D3D12_BLIT_H
+
+struct d3d12_context;
+struct d3d12_resource;
+struct pipe_box;
+
+void
+d3d12_context_blit_init(struct pipe_context *ctx);
+
+void
+d3d12_direct_copy(struct d3d12_context *ctx,
+                  struct d3d12_resource *dst,
+                  unsigned dst_level,
+                  const struct pipe_box *pdst_box,
+                  struct d3d12_resource *src,
+                  unsigned src_level,
+                  const struct pipe_box *psrc_box, unsigned mask);
+
+#endif // D3D12_BLIT_H
diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp
new file mode 100644 (file)
index 0000000..758ff94
--- /dev/null
@@ -0,0 +1,333 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_bufmgr.h"
+#include "d3d12_format.h"
+#include "d3d12_screen.h"
+
+#include "D3D12ResourceState.h"
+
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+#include "util/format/u_format.h"
+#include "util/u_memory.h"
+
+#include <d3d12.h>
+
+struct d3d12_bufmgr {
+   struct pb_manager base;
+
+   ID3D12Device *dev;
+};
+
+extern const struct pb_vtbl d3d12_buffer_vtbl;
+
+static inline struct d3d12_bufmgr *
+d3d12_bufmgr(struct pb_manager *mgr)
+{
+   assert(mgr);
+
+   return (struct d3d12_bufmgr *)mgr;
+}
+
+static struct TransitionableResourceState *
+create_trans_state(ID3D12Resource *res, enum pipe_format format)
+{
+   D3D12_RESOURCE_DESC desc = res->GetDesc();
+
+   // Calculate the total number of subresources
+   unsigned arraySize = desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ?
+                        1 : desc.DepthOrArraySize;
+   unsigned total_subresources = desc.MipLevels *
+                                 arraySize *
+                                 d3d12_non_opaque_plane_count(desc.Format);
+   total_subresources *= util_format_has_stencil(util_format_description(format)) ?
+                         2 : 1;
+
+   return new TransitionableResourceState(res,
+                                          total_subresources,
+                                          SupportsSimultaneousAccess(desc));
+}
+
+struct d3d12_bo *
+d3d12_bo_wrap_res(ID3D12Resource *res, enum pipe_format format)
+{
+   struct d3d12_bo *bo;
+
+   bo = CALLOC_STRUCT(d3d12_bo);
+   if (!bo)
+      return NULL;
+
+   bo->refcount = 1;
+   bo->res = res;
+   bo->trans_state = create_trans_state(res, format);
+
+   return bo;
+}
+
+struct d3d12_bo *
+d3d12_bo_new(ID3D12Device *dev, uint64_t size, uint64_t alignment)
+{
+   ID3D12Resource *res;
+
+   D3D12_RESOURCE_DESC res_desc;
+   res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+   res_desc.Format = DXGI_FORMAT_UNKNOWN;
+   res_desc.Alignment = alignment;
+   res_desc.Width = size;
+   res_desc.Height = 1;
+   res_desc.DepthOrArraySize = 1;
+   res_desc.MipLevels = 1;
+   res_desc.SampleDesc.Count = 1;
+   res_desc.SampleDesc.Quality = 0;
+   res_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+   res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+
+   D3D12_HEAP_PROPERTIES heap_pris = dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_UPLOAD);
+   HRESULT hres = dev->CreateCommittedResource(&heap_pris,
+                                               D3D12_HEAP_FLAG_NONE,
+                                               &res_desc,
+                                               D3D12_RESOURCE_STATE_COMMON,
+                                               NULL,
+                                               __uuidof(ID3D12Resource),
+                                               (void **)&res);
+
+   if (FAILED(hres))
+      return NULL;
+
+   return d3d12_bo_wrap_res(res, PIPE_FORMAT_NONE);
+}
+
+struct d3d12_bo *
+d3d12_bo_wrap_buffer(struct pb_buffer *buf)
+{
+   struct d3d12_bo *bo;
+
+   bo = CALLOC_STRUCT(d3d12_bo);
+   if (!bo)
+      return NULL;
+
+   bo->refcount = 1;
+   bo->buffer = buf;
+   bo->trans_state = NULL; /* State from base BO will be used */
+
+   return bo;
+}
+
+void
+d3d12_bo_unreference(struct d3d12_bo *bo)
+{
+   if (bo == NULL)
+      return;
+
+   assert(p_atomic_read(&bo->refcount) > 0);
+
+   if (p_atomic_dec_zero(&bo->refcount)) {
+      if (bo->buffer) {
+         pb_reference(&bo->buffer, NULL);
+      } else {
+         delete bo->trans_state;
+         bo->res->Release();
+      }
+      FREE(bo);
+   }
+}
+
+void *
+d3d12_bo_map(struct d3d12_bo *bo, D3D12_RANGE *range)
+{
+   struct d3d12_bo *base_bo;
+   D3D12_RANGE offset_range = {0, 0};
+   uint64_t offset;
+   void *ptr;
+
+   base_bo = d3d12_bo_get_base(bo, &offset);
+
+   if (!range || offset == 0) {
+      /* Nothing to do */
+   } else if (range->Begin >= range->End) {
+      offset_range.Begin = offset;
+      offset_range.End = offset + d3d12_bo_get_size(bo);
+      range = &offset_range;
+   } else {
+      offset_range.Begin = range->Begin + offset;
+      offset_range.End = range->End + offset;
+      range = &offset_range;
+   }
+
+   if (FAILED(base_bo->res->Map(0, range, &ptr)))
+      return NULL;
+
+   return (uint8_t *)ptr + (range ? range->Begin : 0);
+}
+
+void
+d3d12_bo_unmap(struct d3d12_bo *bo, D3D12_RANGE *range)
+{
+   struct d3d12_bo *base_bo;
+   D3D12_RANGE offset_range = {0, 0};
+   uint64_t offset;
+
+   base_bo = d3d12_bo_get_base(bo, &offset);
+
+   if (!range || bo == base_bo)
+   {
+      /* Nothing to do */
+   } else if (range->Begin >= range->End) {
+      offset_range.Begin = offset;
+      offset_range.End = offset + base_bo->res->GetDesc().Width;
+   } else {
+      offset_range.Begin = range->Begin + offset;
+      offset_range.End = range->End + offset;
+   }
+
+   base_bo->res->Unmap(0, range);
+}
+
+static void
+d3d12_buffer_destroy(struct pb_buffer *pbuf)
+{
+   struct d3d12_buffer *buf = d3d12_buffer(pbuf);
+
+   d3d12_bo_unmap(buf->bo, &buf->range);
+   d3d12_bo_unreference(buf->bo);
+   FREE(buf);
+}
+
+static void *
+d3d12_buffer_map(struct pb_buffer *pbuf,
+                 enum pb_usage_flags flags,
+                 void *flush_ctx)
+{
+   return d3d12_buffer(pbuf)->map;
+}
+
+static void
+d3d12_buffer_unmap(struct pb_buffer *pbuf)
+{
+}
+
+static void
+d3d12_buffer_get_base_buffer(struct pb_buffer *buf,
+                             struct pb_buffer **base_buf,
+                             pb_size *offset)
+{
+   *base_buf = buf;
+   *offset = 0;
+}
+
+static enum pipe_error
+d3d12_buffer_validate(struct pb_buffer *pbuf,
+                      struct pb_validate *vl,
+                      enum pb_usage_flags flags )
+{
+   /* Always pinned */
+   return PIPE_OK;
+}
+
+static void
+d3d12_buffer_fence(struct pb_buffer *pbuf,
+                   struct pipe_fence_handle *fence )
+{
+}
+
+const struct pb_vtbl d3d12_buffer_vtbl = {
+   d3d12_buffer_destroy,
+   d3d12_buffer_map,
+   d3d12_buffer_unmap,
+   d3d12_buffer_validate,
+   d3d12_buffer_fence,
+   d3d12_buffer_get_base_buffer
+};
+
+static struct pb_buffer *
+d3d12_bufmgr_create_buffer(struct pb_manager *pmgr,
+                           pb_size size,
+                           const struct pb_desc *pb_desc)
+{
+   struct d3d12_bufmgr *mgr = d3d12_bufmgr(pmgr);
+   struct d3d12_buffer *buf;
+
+   buf = CALLOC_STRUCT(d3d12_buffer);
+   if (!buf)
+      return NULL;
+
+   // Align the buffer to D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT
+   // in case it is to be used as a CBV.
+   size = align64(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
+
+   pipe_reference_init(&buf->base.reference, 1);
+   buf->base.alignment = pb_desc->alignment;
+   buf->base.usage = pb_desc->usage;
+   buf->base.vtbl = &d3d12_buffer_vtbl;
+   buf->base.size = size;
+   buf->range.Begin = 0;
+   buf->range.End = size;
+
+   buf->bo = d3d12_bo_new(mgr->dev, size, pb_desc->alignment);
+   if (!buf->bo) {
+      FREE(buf);
+      return NULL;
+   }
+
+   buf->map = d3d12_bo_map(buf->bo, &buf->range);
+   if (!buf->map) {
+      d3d12_bo_unreference(buf->bo);
+      FREE(buf);
+      return NULL;
+   }
+
+   return &buf->base;
+}
+
+static void
+d3d12_bufmgr_flush(struct pb_manager *mgr)
+{
+   /* No-op */
+}
+
+static void
+d3d12_bufmgr_destroy(struct pb_manager *_mgr)
+{
+   struct d3d12_bufmgr *mgr = d3d12_bufmgr(_mgr);
+   FREE(mgr);
+}
+
+struct pb_manager *
+d3d12_bufmgr_create(struct d3d12_screen *screen)
+{
+   struct d3d12_bufmgr *mgr;
+
+   mgr = CALLOC_STRUCT(d3d12_bufmgr);
+   if (!mgr)
+      return NULL;
+
+   mgr->base.destroy = d3d12_bufmgr_destroy;
+   mgr->base.create_buffer = d3d12_bufmgr_create_buffer;
+   mgr->base.flush = d3d12_bufmgr_flush;
+
+   mgr->dev = screen->dev;
+
+   return &mgr->base;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.h b/src/gallium/drivers/d3d12/d3d12_bufmgr.h
new file mode 100644 (file)
index 0000000..4aee2c2
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_BUFMGR_H
+#define D3D12_BUFMGR_H
+
+#include "pipebuffer/pb_buffer.h"
+#include "util/u_atomic.h"
+
+#include <d3d12.h>
+
+struct d3d12_bufmgr;
+struct d3d12_screen;
+struct pb_manager;
+struct TransitionableResourceState;
+
+struct d3d12_bo {
+   int refcount;
+   ID3D12Resource *res;
+   struct pb_buffer *buffer;
+   struct TransitionableResourceState *trans_state;
+};
+
+struct d3d12_buffer {
+   struct pb_buffer base;
+
+   struct d3d12_bo *bo;
+   D3D12_RANGE range;
+   void *map;
+};
+
+static inline struct d3d12_buffer *
+d3d12_buffer(struct pb_buffer *buf)
+{
+   assert(buf);
+   return (struct d3d12_buffer *)buf;
+}
+
+static inline struct d3d12_bo *
+d3d12_bo_get_base(struct d3d12_bo *bo, uint64_t *offset)
+{
+   if (bo->buffer) {
+      struct pb_buffer *base_buffer;
+      pb_get_base_buffer(bo->buffer, &base_buffer, offset);
+      return d3d12_buffer(base_buffer)->bo;
+   } else {
+      *offset = 0;
+      return bo;
+   }
+}
+
+static inline uint64_t
+d3d12_bo_get_size(struct d3d12_bo *bo)
+{
+   if (bo->buffer)
+      return bo->buffer->size;
+   else
+      return bo->res->GetDesc().Width;
+}
+
+static inline bool
+d3d12_bo_is_suballocated(struct d3d12_bo *bo)
+{
+   struct d3d12_bo *base_bo;
+   uint64_t offset;
+
+   if (!bo->buffer)
+      return false;
+
+   base_bo = d3d12_bo_get_base(bo, &offset);
+   return d3d12_bo_get_size(base_bo) != d3d12_bo_get_size(bo);
+}
+
+struct d3d12_bo *
+d3d12_bo_new(ID3D12Device *dev, uint64_t size, uint64_t alignment);
+
+struct d3d12_bo *
+d3d12_bo_wrap_res(ID3D12Resource *res, enum pipe_format format);
+
+struct d3d12_bo *
+d3d12_bo_wrap_buffer(struct pb_buffer *buf);
+
+static inline void
+d3d12_bo_reference(struct d3d12_bo *bo)
+{
+   p_atomic_inc(&bo->refcount);
+}
+
+void
+d3d12_bo_unreference(struct d3d12_bo *bo);
+
+void *
+d3d12_bo_map(struct d3d12_bo *bo, D3D12_RANGE *range);
+
+void
+d3d12_bo_unmap(struct d3d12_bo *bo, D3D12_RANGE *range);
+
+struct pb_manager *
+d3d12_bufmgr_create(struct d3d12_screen *screen);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp
new file mode 100644 (file)
index 0000000..e0e1138
--- /dev/null
@@ -0,0 +1,1396 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_compiler.h"
+#include "d3d12_context.h"
+#include "d3d12_debug.h"
+#include "d3d12_screen.h"
+#include "d3d12_nir_passes.h"
+#include "nir_to_dxil.h"
+
+#include "pipe/p_state.h"
+
+#include "nir.h"
+#include "nir/nir_draw_helpers.h"
+#include "nir/tgsi_to_nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "util/u_simple_shaders.h"
+
+#include <d3d12.h>
+#include <dxcapi.h>
+#include <wrl.h>
+
+extern "C" {
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_point_sprite.h"
+}
+
+using Microsoft::WRL::ComPtr;
+
+struct d3d12_validation_tools
+{
+   d3d12_validation_tools();
+
+   bool validate_and_sign(struct blob *dxil);
+
+   void disassemble(struct blob *dxil);
+
+   void load_dxil_dll();
+
+   struct HModule {
+      HModule();
+      ~HModule();
+
+      bool load(LPCSTR file_name);
+      operator HMODULE () const;
+   private:
+      HMODULE module;
+   };
+
+   HModule dxil_module;
+   HModule dxc_compiler_module;
+   ComPtr<IDxcCompiler> compiler;
+   ComPtr<IDxcValidator> validator;
+   ComPtr<IDxcLibrary> library;
+};
+
+struct d3d12_validation_tools *d3d12_validator_create()
+{
+   return new d3d12_validation_tools();
+}
+
+void d3d12_validator_destroy(struct d3d12_validation_tools *validator)
+{
+   delete validator;
+}
+
+
+const void *
+d3d12_get_compiler_options(struct pipe_screen *screen,
+                           enum pipe_shader_ir ir,
+                           enum pipe_shader_type shader)
+{
+   assert(ir == PIPE_SHADER_IR_NIR);
+   return dxil_get_nir_compiler_options();
+}
+
+static uint32_t
+resource_dimension(enum glsl_sampler_dim dim)
+{
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+      return RESOURCE_DIMENSION_TEXTURE1D;
+   case GLSL_SAMPLER_DIM_2D:
+      return RESOURCE_DIMENSION_TEXTURE2D;
+   case GLSL_SAMPLER_DIM_3D:
+      return RESOURCE_DIMENSION_TEXTURE3D;
+   case GLSL_SAMPLER_DIM_CUBE:
+      return RESOURCE_DIMENSION_TEXTURECUBE;
+   default:
+      return RESOURCE_DIMENSION_UNKNOWN;
+   }
+}
+
+static struct d3d12_shader *
+compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
+            struct d3d12_shader_key *key, struct nir_shader *nir)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+   struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
+   shader->key = *key;
+   shader->nir = nir;
+   sel->current = shader;
+
+   NIR_PASS_V(nir, nir_lower_samplers);
+   NIR_PASS_V(nir, d3d12_create_bare_samplers);
+
+   if (key->samples_int_textures)
+      NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
+                 key->tex_wrap_states, key->swizzle_state,
+                 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
+
+   if (key->vs.needs_format_emulation)
+      d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
+
+   uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
+   uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
+   NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
+   shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
+                              nir->info.num_ubos > num_ubos_before_lower_to_ubo;
+
+   if (key->last_vertex_processing_stage) {
+      if (key->invert_depth)
+         NIR_PASS_V(nir, d3d12_nir_invert_depth);
+      NIR_PASS_V(nir, nir_lower_clip_halfz);
+      NIR_PASS_V(nir, d3d12_lower_yflip);
+   }
+   NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
+   NIR_PASS_V(nir, d3d12_lower_load_first_vertex);
+   NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
+   NIR_PASS_V(nir, d3d12_lower_bool_input);
+
+   struct nir_to_dxil_options opts = {};
+   opts.interpolate_at_vertex = screen->have_load_at_vertex;
+   opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
+   opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1;
+   opts.provoking_vertex = key->fs.provoking_vertex;
+
+   struct blob tmp;
+   if (!nir_to_dxil(nir, &opts, &tmp)) {
+      debug_printf("D3D12: nir_to_dxil failed\n");
+      return NULL;
+   }
+
+   // Non-ubo variables
+   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
+      auto type = glsl_without_array(var->type);
+      if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
+         unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
+         for (unsigned i = 0; i < count; ++i) {
+            shader->srv_bindings[shader->num_srv_bindings].index = var->data.binding + i;
+            shader->srv_bindings[shader->num_srv_bindings].binding = var->data.binding;
+            shader->srv_bindings[shader->num_srv_bindings].dimension = resource_dimension(glsl_get_sampler_dim(type));
+            shader->num_srv_bindings++;
+         }
+      }
+   }
+
+   // Ubo variables
+   if(nir->info.num_ubos) {
+      // Ignore state_vars ubo as it is bound as root constants
+      unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
+      for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) {
+         shader->cb_bindings[shader->num_cb_bindings++].binding = i;
+      }
+   }
+   ctx->validation_tools->validate_and_sign(&tmp);
+
+   if (d3d12_debug & D3D12_DEBUG_DISASS) {
+      ctx->validation_tools->disassemble(&tmp);
+   }
+
+   blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
+
+   if (d3d12_debug & D3D12_DEBUG_DXIL) {
+      char buf[256];
+      static int i;
+      snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
+      FILE *fp = fopen(buf, "wb");
+      fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
+      fclose(fp);
+      fprintf(stderr, "wrote '%s'...\n", buf);
+   }
+   return shader;
+}
+
+struct d3d12_selection_context {
+   struct d3d12_context *ctx;
+   const struct pipe_draw_info *dinfo;
+   bool needs_point_sprite_lowering;
+   bool needs_vertex_reordering;
+   unsigned provoking_vertex;
+   bool alternate_tri;
+   unsigned fill_mode_lowered;
+   unsigned cull_mode_lowered;
+   bool manual_depth_range;
+   unsigned missing_dual_src_outputs;
+   unsigned frag_result_color_lowering;
+};
+
+static unsigned
+missing_dual_src_outputs(struct d3d12_context *ctx)
+{
+   if (!ctx->gfx_pipeline_state.blend->is_dual_src)
+      return 0;
+
+   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+   nir_shader *s = fs->initial;
+
+   unsigned indices_seen = 0;
+   nir_foreach_function(function, s) {
+      if (function->impl) {
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                  continue;
+
+               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+               if (intr->intrinsic != nir_intrinsic_store_deref)
+                  continue;
+
+               nir_variable *var = nir_intrinsic_get_var(intr, 0);
+               if (var->data.mode != nir_var_shader_out ||
+                   (var->data.location != FRAG_RESULT_COLOR &&
+                    var->data.location != FRAG_RESULT_DATA0))
+                  continue;
+
+               indices_seen |= 1u << var->data.index;
+               if ((indices_seen & 3) == 3)
+                  return 0;
+            }
+         }
+      }
+   }
+
+   return 3 & ~indices_seen;
+}
+
+static unsigned
+frag_result_color_lowering(struct d3d12_context *ctx)
+{
+   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+   assert(fs);
+
+   if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
+      return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
+
+   return 0;
+}
+
+static bool
+manual_depth_range(struct d3d12_context *ctx)
+{
+   if (!d3d12_need_zero_one_depth_range(ctx))
+      return false;
+
+   /**
+    * If we can't use the D3D12 zero-one depth-range, we might have to apply
+    * depth-range ourselves.
+    *
+    * Because we only need to override the depth-range to zero-one range in
+    * the case where we write frag-depth, we only need to apply manual
+    * depth-range to gl_FragCoord.z.
+    *
+    * No extra care is needed to be taken in the case where gl_FragDepth is
+    * written conditionally, because the GLSL 4.60 spec states:
+    *
+    *    If a shader statically assigns a value to gl_FragDepth, and there
+    *    is an execution path through the shader that does not set
+    *    gl_FragDepth, then the value of the fragment’s depth may be
+    *    undefined for executions of the shader that take that path. That
+    *    is, if the set of linked fragment shaders statically contain a
+    *    write to gl_FragDepth, then it is responsible for always writing
+    *    it.
+    */
+
+   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+   return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
+}
+
+static bool
+needs_edge_flag_fix(enum pipe_prim_type mode)
+{
+   return (mode == PIPE_PRIM_QUADS ||
+           mode == PIPE_PRIM_QUAD_STRIP ||
+           mode == PIPE_PRIM_POLYGON);
+}
+
+static unsigned
+fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
+{
+   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
+
+   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
+        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
+       ctx->gfx_pipeline_state.rast == NULL ||
+       (dinfo->mode != PIPE_PRIM_TRIANGLES &&
+        dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
+      return PIPE_POLYGON_MODE_FILL;
+
+   /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
+   if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
+         ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
+        (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
+         ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
+       (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
+        needs_edge_flag_fix(ctx->initial_api_prim)))
+      return PIPE_POLYGON_MODE_LINE;
+
+   if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
+      return PIPE_POLYGON_MODE_POINT;
+
+   return PIPE_POLYGON_MODE_FILL;
+}
+
+static bool
+needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
+{
+   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
+   struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+
+   if (gs != NULL && !gs->is_gs_variant) {
+      /* There is an user GS; Check if it outputs points with PSIZE */
+      return (gs->initial->info.gs.output_primitive == GL_POINTS &&
+              gs->initial->info.outputs_written & VARYING_BIT_PSIZ);
+   } else {
+      /* No user GS; check if we are drawing wide points */
+      return ((dinfo->mode == PIPE_PRIM_POINTS ||
+               fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
+              (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
+               ctx->gfx_pipeline_state.rast->base.offset_point ||
+               (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
+                vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
+              (vs->initial->info.outputs_written & VARYING_BIT_POS));
+   }
+}
+
+static unsigned
+cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
+{
+   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
+        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
+       ctx->gfx_pipeline_state.rast == NULL ||
+       ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
+      return PIPE_FACE_NONE;
+
+   return ctx->gfx_pipeline_state.rast->base.cull_face;
+}
+
+static unsigned
+get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate)
+{
+   struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
+   struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+   struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_gs_variant ? gs : vs;
+
+   /* Make sure GL prims match Gallium prims */
+   STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
+   STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
+   STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
+
+   enum pipe_prim_type mode;
+   switch (last_vertex_stage->stage) {
+   case PIPE_SHADER_GEOMETRY:
+      mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
+      break;
+   case PIPE_SHADER_VERTEX:
+      mode = sel_ctx->dinfo ? sel_ctx->dinfo->mode : PIPE_PRIM_TRIANGLES;
+      break;
+   default:
+      unreachable("Tesselation shaders are not supported");
+   }
+
+   bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
+                          sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
+   *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
+                (!gs || gs->is_gs_variant ||
+                 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
+   return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
+}
+
+static bool
+has_flat_varyings(struct d3d12_context *ctx)
+{
+   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+
+   if (!fs || !fs->current)
+      return false;
+
+   nir_foreach_variable_with_modes(input, fs->current->nir,
+                                   nir_var_shader_in) {
+      if (input->data.interpolation == INTERP_MODE_FLAT)
+         return true;
+   }
+
+   return false;
+}
+
+static bool
+needs_vertex_reordering(struct d3d12_selection_context *sel_ctx)
+{
+   struct d3d12_context *ctx = sel_ctx->ctx;
+   bool flat = has_flat_varyings(ctx);
+   bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
+
+   if (fill_mode_lowered(ctx, sel_ctx->dinfo) != PIPE_POLYGON_MODE_FILL)
+      return false;
+
+   /* TODO add support for line primitives */
+
+   /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
+      If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
+   if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
+                                                  sel_ctx->alternate_tri))
+      return true;
+
+   /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
+      strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
+      only works when there is no flat shading involved. In that scenario, we don't care about
+      the provoking vertex. */
+   if (xfb && !flat && sel_ctx->alternate_tri) {
+      sel_ctx->provoking_vertex = 0;
+      return true;
+   }
+
+   return false;
+}
+
+static nir_variable *
+create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
+                         unsigned slot, nir_variable_mode mode)
+{
+   nir_variable *var;
+   char tmp[100];
+
+   snprintf(tmp, ARRAY_SIZE(tmp),
+            mode == nir_var_shader_in ? "in_%d" : "out_%d",
+            info->vars[slot].driver_location);
+   var = nir_variable_create(nir, mode, info->vars[slot].type, tmp);
+   var->data.location = slot;
+   var->data.driver_location = info->vars[slot].driver_location;
+   var->data.interpolation = info->vars[slot].interpolation;
+
+   return var;
+}
+
+static void
+fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
+              nir_variable_mode modes, uint64_t mask)
+{
+   nir_foreach_variable_with_modes(var, s, modes) {
+      unsigned slot = var->data.location;
+      uint64_t slot_bit = BITFIELD64_BIT(slot);
+
+      if (!(mask & slot_bit))
+         continue;
+      info->vars[slot].driver_location = var->data.driver_location;
+      info->vars[slot].type = var->type;
+      info->vars[slot].interpolation = var->data.interpolation;
+      info->mask |= slot_bit;
+   }
+}
+
+static void
+fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
+{
+   if (!fs || !fs->current)
+      return;
+
+   nir_foreach_variable_with_modes(input, fs->current->nir,
+                                   nir_var_shader_in) {
+      if (input->data.interpolation == INTERP_MODE_FLAT)
+         key->flat_varyings |= BITFIELD64_BIT(input->data.location);
+   }
+}
+
+static void
+validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
+{
+   struct d3d12_context *ctx = sel_ctx->ctx;
+   d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
+   d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+   struct d3d12_gs_variant_key key = {0};
+   bool variant_needed = false;
+
+   d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+
+   /* Nothing to do if there is a user geometry shader bound */
+   if (gs != NULL && !gs->is_gs_variant)
+      return;
+
+   /* Fill the geometry shader variant key */
+   if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
+      key.fill_mode = sel_ctx->fill_mode_lowered;
+      key.cull_mode = sel_ctx->cull_mode_lowered;
+      key.has_front_face = (fs->initial->info.system_values_read & SYSTEM_BIT_FRONT_FACE) ? 1 : 0;
+      if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
+         key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
+      key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
+      fill_flat_varyings(&key, fs);
+      if (key.flat_varyings != 0)
+         key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
+      variant_needed = true;
+   } else if (sel_ctx->needs_point_sprite_lowering) {
+      key.passthrough = true;
+      variant_needed = true;
+   } else if (sel_ctx->needs_vertex_reordering) {
+      /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
+      key.provoking_vertex = sel_ctx->provoking_vertex;
+      key.alternate_tri = sel_ctx->alternate_tri;
+      variant_needed = true;
+   }
+
+   if (variant_needed) {
+      fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
+                    vs->initial->info.outputs_written);
+   }
+
+   /* Check if the currently bound geometry shader variant is correct */
+   if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
+      return;
+
+   /* Find/create the proper variant and bind it */
+   gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
+   ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
+}
+
+static bool
+d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
+{
+   assert(expect->stage == have->stage);
+   assert(expect);
+   assert(have);
+
+   /* Because we only add varyings we check that a shader has at least the expected in-
+    * and outputs. */
+   if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs,
+              sizeof(struct d3d12_varying_info)) ||
+       memcmp(&expect->required_varying_outputs, &have->required_varying_outputs,
+              sizeof(struct d3d12_varying_info)) ||
+       (expect->next_varying_inputs != have->next_varying_inputs) ||
+       (expect->prev_varying_outputs != have->prev_varying_outputs))
+      return false;
+
+   if (expect->stage == PIPE_SHADER_GEOMETRY) {
+      if (expect->gs.writes_psize) {
+         if (!have->gs.writes_psize ||
+             expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
+             expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
+             expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
+             expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
+            return false;
+      } else if (have->gs.writes_psize) {
+         return false;
+      }
+      if (expect->gs.primitive_id != have->gs.primitive_id ||
+          expect->gs.triangle_strip != have->gs.triangle_strip)
+         return false;
+   } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
+      if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
+          expect->fs.manual_depth_range != have->fs.manual_depth_range ||
+          expect->fs.polygon_stipple != have->fs.polygon_stipple ||
+          expect->fs.cast_to_uint != have->fs.cast_to_uint ||
+          expect->fs.cast_to_int != have->fs.cast_to_int)
+         return false;
+   }
+
+   if (expect->tex_saturate_s != have->tex_saturate_s ||
+       expect->tex_saturate_r != have->tex_saturate_r ||
+       expect->tex_saturate_t != have->tex_saturate_t)
+      return false;
+
+   if (expect->samples_int_textures != have->samples_int_textures)
+      return false;
+
+   if (expect->n_texture_states != have->n_texture_states)
+      return false;
+
+   if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
+              expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
+      return false;
+
+   if (memcmp(expect->swizzle_state, have->swizzle_state,
+              expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
+      return false;
+
+   if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
+              expect->n_texture_states * sizeof(enum compare_func)))
+      return false;
+
+   if (expect->invert_depth != have->invert_depth)
+      return false;
+
+   if (expect->stage == PIPE_SHADER_VERTEX) {
+      if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
+         return false;
+
+      if (expect->vs.needs_format_emulation) {
+         if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
+                    PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
+            return false;
+      }
+   }
+
+   if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
+      return false;
+
+   return true;
+}
+
+static void
+d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
+                      d3d12_shader_key *key, d3d12_shader_selector *sel,
+                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
+{
+   pipe_shader_type stage = sel->stage;
+
+   uint64_t system_generated_in_values =
+         VARYING_BIT_PNTC |
+         VARYING_BIT_PRIMITIVE_ID;
+
+   uint64_t system_out_values =
+         VARYING_BIT_CLIP_DIST0 |
+         VARYING_BIT_CLIP_DIST1;
+
+   memset(key, 0, sizeof(d3d12_shader_key));
+   key->stage = stage;
+
+   if (prev) {
+      /* We require as inputs what the previous stage has written,
+       * except certain system values */
+      if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
+         system_out_values |= VARYING_BIT_POS;
+      if (stage == PIPE_SHADER_FRAGMENT)
+         system_out_values |= VARYING_BIT_PSIZ;
+      uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
+      fill_varyings(&key->required_varying_inputs, prev->current->nir,
+                    nir_var_shader_out, mask);
+      key->prev_varying_outputs = prev->current->nir->info.outputs_written;
+
+      /* Set the provoking vertex based on the previous shader output. Only set the
+       * key value if the driver actually supports changing the provoking vertex though */
+      if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
+          !sel_ctx->needs_vertex_reordering &&
+          d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
+         key->fs.provoking_vertex = sel_ctx->provoking_vertex;
+   }
+
+   /* We require as outputs what the next stage reads,
+    * except certain system values */
+   if (next) {
+      if (!next->is_gs_variant) {
+         if (stage == PIPE_SHADER_VERTEX)
+            system_generated_in_values |= VARYING_BIT_POS;
+         uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
+         fill_varyings(&key->required_varying_outputs, next->current->nir,
+                       nir_var_shader_in, mask);
+      }
+      key->next_varying_inputs = next->current->nir->info.inputs_read;
+   }
+
+   if (stage == PIPE_SHADER_GEOMETRY ||
+       (stage == PIPE_SHADER_VERTEX && (!next || next->stage != PIPE_SHADER_GEOMETRY))) {
+      key->last_vertex_processing_stage = 1;
+      key->invert_depth = sel_ctx->ctx->reverse_depth_range;
+      if (sel_ctx->ctx->pstipple.enabled)
+         key->next_varying_inputs |= VARYING_BIT_POS;
+   }
+
+   if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
+      struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
+      if (sel_ctx->needs_point_sprite_lowering) {
+         key->gs.writes_psize = 1;
+         key->gs.point_size_per_vertex = rast->point_size_per_vertex;
+         key->gs.sprite_coord_enable = rast->sprite_coord_enable;
+         key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
+         if (sel_ctx->ctx->flip_y < 0)
+            key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
+         key->gs.aa_point = rast->point_smooth;
+         key->gs.stream_output_factor = 6;
+      } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
+         key->gs.stream_output_factor = 2;
+      } else if (sel_ctx->needs_vertex_reordering && !sel->is_gs_variant) {
+         key->gs.triangle_strip = 1;
+      }
+
+      if (sel->is_gs_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
+         key->gs.primitive_id = 1;
+   } else if (stage == PIPE_SHADER_FRAGMENT) {
+      key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
+      key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
+      key->fs.manual_depth_range = sel_ctx->manual_depth_range;
+      key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled;
+      if (sel_ctx->ctx->gfx_pipeline_state.blend &&
+          sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
+          !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
+         key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
+         key->fs.cast_to_int = !key->fs.cast_to_uint;
+      }
+   }
+
+   if (sel->samples_int_textures) {
+      key->samples_int_textures = sel->samples_int_textures;
+      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
+      /* Copy only states with integer textures */
+      for(int i = 0; i < key->n_texture_states; ++i) {
+         auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
+         if (wrap_state.is_int_sampler) {
+            memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
+            key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
+         }
+      }
+   }
+
+   for (int i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
+      if (sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
+         continue;
+
+      if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
+         key->tex_saturate_r |= 1 << i;
+      if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
+         key->tex_saturate_s |= 1 << i;
+      if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
+         key->tex_saturate_t |= 1 << i;
+   }
+
+   if (sel->compare_with_lod_bias_grad) {
+      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
+      memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
+             key->n_texture_states * sizeof(enum compare_func));
+      memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
+             key->n_texture_states * sizeof(dxil_texture_swizzle_state));
+   }
+
+   if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
+      key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
+      if (key->vs.needs_format_emulation) {
+         memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
+                sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
+      }
+   }
+
+   if (stage == PIPE_SHADER_FRAGMENT &&
+       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
+       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant &&
+       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
+      key->fs.remap_front_facing = 1;
+   }
+}
+
+static void
+select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
+                     d3d12_shader_selector *prev, d3d12_shader_selector *next)
+{
+   struct d3d12_context *ctx = sel_ctx->ctx;
+   d3d12_shader_key key;
+   nir_shader *new_nir_variant;
+   unsigned pstipple_binding = UINT32_MAX;
+
+   d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
+
+   /* Check for an existing variant */
+   for (d3d12_shader *variant = sel->first; variant;
+        variant = variant->next_variant) {
+
+      if (d3d12_compare_shader_keys(&key, &variant->key)) {
+         sel->current = variant;
+         return;
+      }
+   }
+
+   /* Clone the NIR shader */
+   new_nir_variant = nir_shader_clone(sel, sel->initial);
+
+   /* Apply any needed lowering passes */
+   if (key.gs.writes_psize) {
+      NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
+                 !key.gs.sprite_origin_upper_left,
+                 key.gs.point_size_per_vertex,
+                 key.gs.sprite_coord_enable,
+                 key.next_varying_inputs);
+
+      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
+      nir_shader_gather_info(new_nir_variant, impl);
+   }
+
+   if (key.gs.primitive_id) {
+      NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
+
+      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
+      nir_shader_gather_info(new_nir_variant, impl);
+   }
+
+   if (key.gs.triangle_strip)
+      NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
+
+   if (key.fs.polygon_stipple) {
+      NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
+                 &pstipple_binding, 0, false);
+
+      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
+      nir_shader_gather_info(new_nir_variant, impl);
+   }
+
+   if (key.fs.remap_front_facing) {
+      d3d12_forward_front_face(new_nir_variant);
+
+      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
+      nir_shader_gather_info(new_nir_variant, impl);
+   }
+
+   if (key.fs.missing_dual_src_outputs) {
+      NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
+                 key.fs.missing_dual_src_outputs);
+   } else if (key.fs.frag_result_color_lowering) {
+      NIR_PASS_V(new_nir_variant, d3d12_lower_frag_result,
+                 key.fs.frag_result_color_lowering);
+   }
+
+   if (key.fs.manual_depth_range)
+      NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
+
+   if (sel->compare_with_lod_bias_grad)
+      NIR_PASS_V(new_nir_variant, d3d12_lower_sample_tex_compare, key.n_texture_states,
+                 key.sampler_compare_funcs, key.swizzle_state);
+
+   if (key.fs.cast_to_uint)
+      NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
+   if (key.fs.cast_to_int)
+      NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
+
+   {
+      struct nir_lower_tex_options tex_options = { };
+      tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
+      tex_options.lower_rect = true;
+      tex_options.lower_rect_offset = true;
+      tex_options.saturate_s = key.tex_saturate_s;
+      tex_options.saturate_r = key.tex_saturate_r;
+      tex_options.saturate_t = key.tex_saturate_t;
+
+      NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
+   }
+
+   /* Add the needed in and outputs, and re-sort */
+   uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
+
+   if (prev) {
+      while (mask) {
+         int slot = u_bit_scan64(&mask);
+         create_varying_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in);
+      }
+      d3d12_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
+                                      key.prev_varying_outputs);
+   }
+
+   mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
+
+   if (next) {
+      while (mask) {
+         int slot = u_bit_scan64(&mask);
+         create_varying_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out);
+      }
+      d3d12_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
+                                      key.next_varying_inputs);
+   }
+
+   d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
+   assert(new_variant);
+
+   /* keep track of polygon stipple texture binding */
+   new_variant->pstipple_binding = pstipple_binding;
+
+   /* prepend the new shader in the selector chain and pick it */
+   new_variant->next_variant = sel->first;
+   sel->current = sel->first = new_variant;
+}
+
+static d3d12_shader_selector *
+get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
+{
+   /* No TESS_CTRL or TESS_EVAL yet */
+
+   switch (current) {
+   case PIPE_SHADER_VERTEX:
+      return NULL;
+   case PIPE_SHADER_FRAGMENT:
+      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
+         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+      /* fallthrough */
+   case PIPE_SHADER_GEOMETRY:
+      return ctx->gfx_stages[PIPE_SHADER_VERTEX];
+   default:
+      unreachable("shader type not supported");
+   }
+}
+
+static d3d12_shader_selector *
+get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
+{
+   /* No TESS_CTRL or TESS_EVAL yet */
+
+   switch (current) {
+   case PIPE_SHADER_VERTEX:
+      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
+         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+      /* fallthrough */
+   case PIPE_SHADER_GEOMETRY:
+      return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+   case PIPE_SHADER_FRAGMENT:
+      return NULL;
+   default:
+      unreachable("shader type not supported");
+   }
+}
+
+enum tex_scan_flags {
+   TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
+   TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
+   TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
+};
+
+static unsigned
+scan_texture_use(nir_shader *nir)
+{
+   unsigned result = 0;
+   nir_foreach_function(func, nir) {
+      nir_foreach_block(block, func->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type == nir_instr_type_tex) {
+               auto tex = nir_instr_as_tex(instr);
+               switch (tex->op) {
+               case nir_texop_txb:
+               case nir_texop_txl:
+               case nir_texop_txd:
+                  if (tex->is_shadow)
+                     result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
+                  /* fallthrough */
+               case nir_texop_tex:
+                  if (tex->dest_type & (nir_type_int | nir_type_uint))
+                     result |= TEX_SAMPLE_INTEGER_TEXTURE;
+               default:
+                  ;
+               }
+            }
+            if (TEX_SCAN_ALL_FLAGS == result)
+               return result;
+         }
+      }
+   }
+   return result;
+}
+
+static uint64_t
+update_so_info(struct pipe_stream_output_info *so_info,
+               uint64_t outputs_written)
+{
+   uint64_t so_outputs = 0;
+   uint8_t reverse_map[64] = {0};
+   unsigned slot = 0;
+
+   while (outputs_written)
+      reverse_map[slot++] = u_bit_scan64(&outputs_written);
+
+   for (unsigned i = 0; i < so_info->num_outputs; i++) {
+      struct pipe_stream_output *output = &so_info->output[i];
+
+      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
+      output->register_index = reverse_map[output->register_index];
+
+      so_outputs |= 1ull << output->register_index;
+   }
+
+   return so_outputs;
+}
+
+struct d3d12_shader_selector *
+d3d12_create_shader(struct d3d12_context *ctx,
+                    pipe_shader_type stage,
+                    const struct pipe_shader_state *shader)
+{
+   struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
+   sel->stage = stage;
+
+   struct nir_shader *nir = NULL;
+
+   if (shader->type == PIPE_SHADER_IR_NIR) {
+      nir = (nir_shader *)shader->ir.nir;
+   } else {
+      assert(shader->type == PIPE_SHADER_IR_TGSI);
+      nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
+   }
+
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   unsigned tex_scan_result = scan_texture_use(nir);
+   sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
+   sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
+
+   memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
+   update_so_info(&sel->so_info, nir->info.outputs_written);
+
+   assert(nir != NULL);
+   d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
+   d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
+
+   uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
+                         0 : VARYING_BIT_PRIMITIVE_ID;
+
+   uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
+                          (1ull << FRAG_RESULT_STENCIL) :
+                          VARYING_BIT_PRIMITIVE_ID;
+
+   d3d12_fix_io_uint_type(nir, in_mask, out_mask);
+
+   if (nir->info.stage != MESA_SHADER_VERTEX)
+      nir->info.inputs_read =
+            d3d12_reassign_driver_locations(nir, nir_var_shader_in,
+                                            prev ? prev->current->nir->info.outputs_written : 0);
+   else
+      nir->info.inputs_read = d3d12_sort_by_driver_location(nir, nir_var_shader_in);
+
+   if (nir->info.stage != MESA_SHADER_FRAGMENT) {
+      nir->info.outputs_written =
+            d3d12_reassign_driver_locations(nir, nir_var_shader_out,
+                                            next ? next->current->nir->info.inputs_read : 0);
+   } else {
+      NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
+      d3d12_sort_ps_outputs(nir);
+   }
+
+   /* Integer cube maps are not supported in DirectX because sampling is not supported
+    * on integer textures and TextureLoad is not supported for cube maps, so we have to
+    * lower integer cube maps to be handled like 2D textures arrays*/
+   NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array);
+
+   /* Keep this initial shader as the blue print for possible variants */
+   sel->initial = nir;
+
+   /*
+    * We must compile some shader here, because if the previous or a next shaders exists later
+    * when the shaders are bound, then the key evaluation in the shader selector will access
+    * the current variant of these  prev and next shader, and we can only assign
+    * a current variant when it has been successfully compiled.
+    *
+    * For shaders that require lowering because certain instructions are not available
+    * and their emulation is state depended (like sampling an integer texture that must be
+    * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
+    * we must go through the shader selector here to create a compilable variant.
+    * For shaders that are not depended on the state this is just compiling the original
+    * shader.
+    *
+    * TODO: get rid of having to compiling the shader here if it can be forseen that it will
+    * be thrown away (i.e. it depends on states that are likely to change before the shader is
+    * used for the first time)
+    */
+   struct d3d12_selection_context sel_ctx = {0};
+   sel_ctx.ctx = ctx;
+   select_shader_variant(&sel_ctx, sel, prev, next);
+
+   if (!sel->current) {
+      ralloc_free(sel);
+      return NULL;
+   }
+
+   return sel;
+}
+
+void
+d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
+{
+   static unsigned order[] = {PIPE_SHADER_VERTEX, PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT};
+   struct d3d12_selection_context sel_ctx;
+
+   sel_ctx.ctx = ctx;
+   sel_ctx.dinfo = dinfo;
+   sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
+   sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
+   sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
+   sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri);
+   sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx);
+   sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
+   sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
+   sel_ctx.manual_depth_range = manual_depth_range(ctx);
+
+   validate_geometry_shader_variant(&sel_ctx);
+
+   for (int i = 0; i < ARRAY_SIZE(order); ++i) {
+      auto sel = ctx->gfx_stages[order[i]];
+      if (!sel)
+         continue;
+
+      d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
+      d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
+
+      select_shader_variant(&sel_ctx, sel, prev, next);
+   }
+}
+
+void
+d3d12_shader_free(struct d3d12_shader_selector *sel)
+{
+   auto shader = sel->first;
+   while (shader) {
+      free(shader->bytecode);
+      shader = shader->next_variant;
+   }
+   ralloc_free(sel->initial);
+   ralloc_free(sel);
+}
+
+// Used to get path to self
+extern "C" extern IMAGE_DOS_HEADER __ImageBase;
+
+void d3d12_validation_tools::load_dxil_dll()
+{
+   if (!dxil_module.load("dxil.dll")) {
+      char selfPath[MAX_PATH] = "";
+      uint32_t pathSize = GetModuleFileNameA((HINSTANCE)&__ImageBase, selfPath, sizeof(selfPath));
+      if (pathSize == 0 || pathSize == sizeof(selfPath)) {
+         debug_printf("D3D12: Unable to get path to self");
+         return;
+      }
+
+      auto lastSlash = strrchr(selfPath, '\\');
+      if (!lastSlash) {
+         debug_printf("D3D12: Unable to get path to self");
+         return;
+      }
+
+      *(lastSlash + 1) = '\0';
+      if (strcat_s(selfPath, "dxil.dll") != 0) {
+         debug_printf("D3D12: Unable to get path to dxil.dll next to self");
+         return;
+      }
+
+      dxil_module.load(selfPath);
+   }
+}
+
+d3d12_validation_tools::d3d12_validation_tools()
+{
+   load_dxil_dll();
+   DxcCreateInstanceProc dxil_create_func = (DxcCreateInstanceProc)GetProcAddress(dxil_module, "DxcCreateInstance");
+   assert(dxil_create_func);
+
+   HRESULT hr = dxil_create_func(CLSID_DxcValidator,  IID_PPV_ARGS(&validator));
+   if (FAILED(hr)) {
+      debug_printf("D3D12: Unable to create validator\n");
+   }
+
+   DxcCreateInstanceProc compiler_create_func  = nullptr;
+   if(dxc_compiler_module.load("dxcompiler.dll"))
+      compiler_create_func = (DxcCreateInstanceProc)GetProcAddress(dxc_compiler_module, "DxcCreateInstance");
+
+   if (compiler_create_func) {
+      hr = compiler_create_func(CLSID_DxcLibrary, IID_PPV_ARGS(&library));
+      if (FAILED(hr)) {
+         debug_printf("D3D12: Unable to create library instance: %x\n", hr);
+      }
+
+      if (d3d12_debug & D3D12_DEBUG_DISASS) {
+         hr = compiler_create_func(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler));
+         if (FAILED(hr)) {
+            debug_printf("D3D12: Unable to create compiler instance\n");
+         }
+      }
+   } else if (d3d12_debug & D3D12_DEBUG_DISASS) {
+      debug_printf("D3D12: Disassembly requested but compiler couldn't be loaded\n");
+   }
+}
+
+d3d12_validation_tools::HModule::HModule():
+   module(0)
+{
+}
+
+d3d12_validation_tools::HModule::~HModule()
+{
+   if (module)
+      ::FreeLibrary(module);
+}
+
+inline
+d3d12_validation_tools::HModule::operator HMODULE () const
+{
+   return module;
+}
+
+bool
+d3d12_validation_tools::HModule::load(LPCSTR file_name)
+{
+   module = ::LoadLibrary(file_name);
+   return module != nullptr;
+}
+
+
+class ShaderBlob : public IDxcBlob {
+public:
+   ShaderBlob(blob* data) : m_data(data) {}
+
+   LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return m_data->data; }
+
+   SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return m_data->size; }
+
+   HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; }
+
+   ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
+
+   ULONG STDMETHODCALLTYPE Release() override { return 0; }
+
+   blob* m_data;
+};
+
+bool d3d12_validation_tools::validate_and_sign(struct blob *dxil)
+{
+   ShaderBlob source(dxil);
+
+   ComPtr<IDxcOperationResult> result;
+   if (!validator)
+      return false;
+
+   validator->Validate(&source, DxcValidatorFlags_InPlaceEdit, &result);
+   HRESULT validationStatus;
+   result->GetStatus(&validationStatus);
+   if (FAILED(validationStatus) && library) {
+      ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
+      result->GetErrorBuffer(&printBlob);
+      library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
+
+      char *errorString;
+      if (printBlobUtf8) {
+         errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
+      }
+
+      errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
+      debug_printf("== VALIDATION ERROR =============================================\n%s\n"
+                   "== END ==========================================================\n",
+                   errorString);
+
+      return false;
+   }
+   return true;
+
+}
+
+void d3d12_validation_tools::disassemble(struct blob *dxil)
+{
+   if (!compiler) {
+      fprintf(stderr, "D3D12: No Disassembler\n");
+      return;
+   }
+   ShaderBlob source(dxil);
+   IDxcBlobEncoding* pDisassembly = nullptr;
+
+   if (FAILED(compiler->Disassemble(&source, &pDisassembly))) {
+      fprintf(stderr, "D3D12: Disassembler failed\n");
+      return;
+   }
+
+   ComPtr<IDxcBlobEncoding> dissassably(pDisassembly);
+   ComPtr<IDxcBlobEncoding> blobUtf8;
+   library->GetBlobAsUtf8(pDisassembly, blobUtf8.GetAddressOf());
+   if (!blobUtf8) {
+      fprintf(stderr, "D3D12: Unable to get utf8 encoding\n");
+      return;
+   }
+
+   char *disassembly = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
+   disassembly[blobUtf8->GetBufferSize() - 1] = 0;
+
+   fprintf(stderr, "== BEGIN SHADER ============================================\n"
+           "%s\n"
+           "== END SHADER ==============================================\n",
+           disassembly);
+}
+
+/* Sort io values so that first come normal varyings,
+ * then system values, and then system generated values.
+ */
+static void insert_sorted(struct exec_list *var_list, nir_variable *new_var)
+{
+   nir_foreach_variable_in_list(var, var_list) {
+      if (var->data.driver_location > new_var->data.driver_location ||
+          (var->data.driver_location == new_var->data.driver_location &&
+           var->data.location > new_var->data.location)) {
+         exec_node_insert_node_before(&var->node, &new_var->node);
+         return;
+      }
+   }
+   exec_list_push_tail(var_list, &new_var->node);
+}
+
+/* Order varyings according to driver location */
+uint64_t
+d3d12_sort_by_driver_location(nir_shader *s, nir_variable_mode modes)
+{
+   uint64_t result = 0;
+   struct exec_list new_list;
+   exec_list_make_empty(&new_list);
+
+   nir_foreach_variable_with_modes_safe(var, s, modes) {
+      exec_node_remove(&var->node);
+      insert_sorted(&new_list, var);
+      result |= 1ull << var->data.location;
+   }
+   exec_list_append(&s->variables, &new_list);
+   return result;
+}
+
+/* Sort PS outputs so that color outputs come first */
+void
+d3d12_sort_ps_outputs(nir_shader *s)
+{
+   struct exec_list new_list;
+   exec_list_make_empty(&new_list);
+
+   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
+      exec_node_remove(&var->node);
+      /* We use the driver_location here to avoid introducing a new
+       * struct or member variable here. The true, updated driver location
+       * will be written below, after sorting */
+      switch (var->data.location) {
+      case FRAG_RESULT_DEPTH:
+         var->data.driver_location = 1;
+         break;
+      case FRAG_RESULT_STENCIL:
+         var->data.driver_location = 2;
+         break;
+      case FRAG_RESULT_SAMPLE_MASK:
+         var->data.driver_location = 3;
+         break;
+      default:
+         var->data.driver_location = 0;
+      }
+      insert_sorted(&new_list, var);
+   }
+   exec_list_append(&s->variables, &new_list);
+
+   unsigned driver_loc = 0;
+   nir_foreach_variable_with_modes(var, s, nir_var_shader_out) {
+      var->data.driver_location = driver_loc++;
+   }
+}
+
+/* Order between stage values so that normal varyings come first,
+ * then sysvalues and then system generated values.
+ */
+uint64_t
+d3d12_reassign_driver_locations(nir_shader *s, nir_variable_mode modes,
+                                uint64_t other_stage_mask)
+{
+   struct exec_list new_list;
+   exec_list_make_empty(&new_list);
+
+   uint64_t result = 0;
+   nir_foreach_variable_with_modes_safe(var, s, modes) {
+      exec_node_remove(&var->node);
+      /* We use the driver_location here to avoid introducing a new
+       * struct or member variable here. The true, updated driver location
+       * will be written below, after sorting */
+      var->data.driver_location = nir_var_to_dxil_sysvalue_type(var, other_stage_mask);
+      insert_sorted(&new_list, var);
+   }
+   exec_list_append(&s->variables, &new_list);
+
+   unsigned driver_loc = 0;
+   nir_foreach_variable_with_modes(var, s, modes) {
+      result |= 1ull << var->data.location;
+      var->data.driver_location = driver_loc++;
+   }
+   return result;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.h b/src/gallium/drivers/d3d12/d3d12_compiler.h
new file mode 100644 (file)
index 0000000..d838201
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_COMPILER_H
+#define D3D12_COMPILER_H
+
+#include "d3d12_context.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "compiler/shader_info.h"
+#include "program/prog_statevars.h"
+
+#include "nir.h"
+
+struct pipe_screen;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct d3d12_validation_tools *d3d12_validator_create();
+
+void d3d12_validator_destroy(struct d3d12_validation_tools *validator);
+
+const void *
+d3d12_get_compiler_options(struct pipe_screen *screen,
+                           enum pipe_shader_ir ir,
+                           enum pipe_shader_type shader);
+
+struct d3d12_varying_info {
+   struct {
+      const struct glsl_type *type;
+      unsigned interpolation:3;   // INTERP_MODE_COUNT = 5
+      unsigned driver_location:6; // VARYING_SLOT_MAX = 64
+   } vars[VARYING_SLOT_MAX];
+   uint64_t mask;
+};
+
+struct d3d12_shader_key {
+   enum pipe_shader_type stage;
+
+   struct d3d12_varying_info required_varying_inputs;
+   struct d3d12_varying_info required_varying_outputs;
+   uint64_t next_varying_inputs;
+   uint64_t prev_varying_outputs;
+   unsigned last_vertex_processing_stage : 1;
+   unsigned invert_depth : 1;
+   unsigned samples_int_textures : 1;
+   unsigned tex_saturate_s : PIPE_MAX_SAMPLERS;
+   unsigned tex_saturate_r : PIPE_MAX_SAMPLERS;
+   unsigned tex_saturate_t : PIPE_MAX_SAMPLERS;
+
+   struct {
+      unsigned needs_format_emulation:1;
+      enum pipe_format format_conversion[PIPE_MAX_ATTRIBS];
+   } vs;
+
+   struct {
+      unsigned sprite_coord_enable:24;
+      unsigned sprite_origin_upper_left:1;
+      unsigned point_pos_stream_out:1;
+      unsigned writes_psize:1;
+      unsigned point_size_per_vertex:1;
+      unsigned aa_point:1;
+      unsigned stream_output_factor:3;
+      unsigned primitive_id:1;
+      unsigned triangle_strip:1;
+   } gs;
+
+   struct {
+      unsigned missing_dual_src_outputs : 2;
+      unsigned frag_result_color_lowering : 4;
+      unsigned cast_to_uint : 1;
+      unsigned cast_to_int : 1;
+      unsigned provoking_vertex : 2;
+      unsigned manual_depth_range : 1;
+      unsigned polygon_stipple : 1;
+      unsigned remap_front_facing : 1;
+   } fs;
+
+   int n_texture_states;
+   dxil_wrap_sampler_state tex_wrap_states[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   dxil_texture_swizzle_state swizzle_state[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   enum compare_func sampler_compare_funcs[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+};
+
+struct d3d12_shader {
+   void *bytecode;
+   size_t bytecode_length;
+
+   nir_shader *nir;
+
+   struct {
+      unsigned binding;
+   } cb_bindings[PIPE_MAX_CONSTANT_BUFFERS];
+   size_t num_cb_bindings;
+
+   struct {
+      enum d3d12_state_var var;
+      unsigned offset;
+   } state_vars[D3D12_MAX_STATE_VARS];
+   unsigned num_state_vars;
+   size_t state_vars_size;
+   bool state_vars_used;
+
+   struct {
+      int index;
+      int binding;
+      uint32_t dimension;
+   } srv_bindings[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   size_t num_srv_bindings;
+
+   bool has_default_ubo0;
+   unsigned pstipple_binding;
+
+   struct d3d12_shader_key key;
+   struct d3d12_shader *next_variant;
+};
+
+struct d3d12_gs_variant_key
+{
+   unsigned passthrough:1;
+   unsigned provoking_vertex:3;
+   unsigned alternate_tri:1;
+   unsigned fill_mode:2;
+   unsigned cull_mode:2;
+   unsigned has_front_face:1;
+   unsigned front_ccw:1;
+   unsigned edge_flag_fix:1;
+   unsigned flatshade_first:1;
+   uint64_t flat_varyings;
+   struct d3d12_varying_info varyings;
+};
+
+struct d3d12_shader_selector {
+   enum pipe_shader_type stage;
+   nir_shader *initial;
+   struct d3d12_shader *first;
+   struct d3d12_shader *current;
+
+   struct pipe_stream_output_info so_info;
+
+   unsigned samples_int_textures:1;
+   unsigned compare_with_lod_bias_grad:1;
+
+   bool is_gs_variant;
+   struct d3d12_gs_variant_key gs_key;
+};
+
+
+struct d3d12_shader_selector *
+d3d12_create_shader(struct d3d12_context *ctx,
+                    enum pipe_shader_type stage,
+                    const struct pipe_shader_state *shader);
+
+void
+d3d12_shader_free(struct d3d12_shader_selector *shader);
+
+void
+d3d12_select_shader_variants(struct d3d12_context *ctx,
+                             const struct pipe_draw_info *dinfo);
+
+void
+d3d12_gs_variant_cache_init(struct d3d12_context *ctx);
+
+void
+d3d12_gs_variant_cache_destroy(struct d3d12_context *ctx);
+
+struct d3d12_shader_selector *
+d3d12_get_gs_variant(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key);
+
+uint64_t
+d3d12_reassign_driver_locations(nir_shader *s, nir_variable_mode modes,
+                                uint64_t other_stage_mask);
+
+uint64_t
+d3d12_sort_by_driver_location(nir_shader *s, nir_variable_mode modes);
+
+void
+d3d12_sort_ps_outputs(nir_shader *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp
new file mode 100644 (file)
index 0000000..aa7c8ed
--- /dev/null
@@ -0,0 +1,2052 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_blit.h"
+#include "d3d12_context.h"
+#include "d3d12_compiler.h"
+#include "d3d12_debug.h"
+#include "d3d12_fence.h"
+#include "d3d12_format.h"
+#include "d3d12_query.h"
+#include "d3d12_resource.h"
+#include "d3d12_root_signature.h"
+#include "d3d12_screen.h"
+#include "d3d12_surface.h"
+
+#include "util/u_blitter.h"
+#include "util/u_dual_blend.h"
+#include "util/u_framebuffer.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_pstipple.h"
+#include "nir_to_dxil.h"
+
+#include "D3D12ResourceState.h"
+
+extern "C" {
+#include "indices/u_primconvert.h"
+}
+
+#include <string.h>
+
+static void
+d3d12_context_destroy(struct pipe_context *pctx)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   d3d12_validator_destroy(ctx->validation_tools);
+
+   if (ctx->timestamp_query)
+      pctx->destroy_query(pctx, ctx->timestamp_query);
+
+   util_blitter_destroy(ctx->blitter);
+   d3d12_end_batch(ctx, d3d12_current_batch(ctx));
+   for (int i = 0; i < ARRAY_SIZE(ctx->batches); ++i)
+      d3d12_destroy_batch(ctx, &ctx->batches[i]);
+   ctx->cmdlist->Release();
+   ctx->cmdqueue_fence->Release();
+   d3d12_descriptor_pool_free(ctx->rtv_pool);
+   d3d12_descriptor_pool_free(ctx->dsv_pool);
+   d3d12_descriptor_pool_free(ctx->sampler_pool);
+   d3d12_descriptor_pool_free(ctx->view_pool);
+   util_primconvert_destroy(ctx->primconvert);
+   slab_destroy_child(&ctx->transfer_pool);
+   d3d12_gs_variant_cache_destroy(ctx);
+   d3d12_gfx_pipeline_state_cache_destroy(ctx);
+   d3d12_root_signature_cache_destroy(ctx);
+
+   u_suballocator_destroy(ctx->query_allocator);
+
+   if (pctx->stream_uploader)
+      u_upload_destroy(pctx->stream_uploader);
+   if (pctx->const_uploader)
+      u_upload_destroy(pctx->const_uploader);
+
+   delete ctx->resource_state_manager;
+
+   FREE(ctx);
+}
+
+static void *
+d3d12_create_vertex_elements_state(struct pipe_context *pctx,
+                                   unsigned num_elements,
+                                   const struct pipe_vertex_element *elements)
+{
+   struct d3d12_vertex_elements_state *cso = CALLOC_STRUCT(d3d12_vertex_elements_state);
+   if (!cso)
+      return NULL;
+
+   for (unsigned i = 0; i < num_elements; ++i) {
+      cso->elements[i].SemanticName = dxil_vs_attr_index_to_name(i);
+      cso->elements[i].SemanticIndex = 0;
+
+      enum pipe_format format_helper = d3d12_emulated_vtx_format(elements[i].src_format);
+      bool needs_emulation = format_helper != elements[i].src_format;
+      cso->needs_format_emulation |= needs_emulation;
+      cso->format_conversion[i] = needs_emulation ? elements[i].src_format : PIPE_FORMAT_NONE;
+
+      cso->elements[i].Format = d3d12_get_format(format_helper);
+      assert(cso->elements[i].Format != DXGI_FORMAT_UNKNOWN);
+      cso->elements[i].InputSlot = elements[i].vertex_buffer_index;
+      cso->elements[i].AlignedByteOffset = elements[i].src_offset;
+
+      if (elements[i].instance_divisor) {
+         cso->elements[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
+         cso->elements[i].InstanceDataStepRate = elements[i].instance_divisor;
+      } else {
+         cso->elements[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
+         cso->elements[i].InstanceDataStepRate = 0;
+      }
+   }
+
+   cso->num_elements = num_elements;
+   return cso;
+}
+
+static void
+d3d12_bind_vertex_elements_state(struct pipe_context *pctx,
+                                 void *ve)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   ctx->gfx_pipeline_state.ves = (struct d3d12_vertex_elements_state *)ve;
+   ctx->state_dirty |= D3D12_DIRTY_VERTEX_ELEMENTS;
+}
+
+static void
+d3d12_delete_vertex_elements_state(struct pipe_context *pctx,
+                                   void *ve)
+{
+   FREE(ve);
+}
+
+static D3D12_BLEND
+blend_factor_rgb(enum pipe_blendfactor factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ZERO: return D3D12_BLEND_ZERO;
+   case PIPE_BLENDFACTOR_ONE: return D3D12_BLEND_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
+   case PIPE_BLENDFACTOR_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
+   case PIPE_BLENDFACTOR_CONST_COLOR: return D3D12_BLEND_BLEND_FACTOR;
+   case PIPE_BLENDFACTOR_SRC1_COLOR: return D3D12_BLEND_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR: return D3D12_BLEND_INV_BLEND_FACTOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return D3D12_BLEND_INV_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_CONST_ALPHA: return D3D12_BLEND_BLEND_FACTOR; /* Doesn't exist in D3D12 */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return D3D12_BLEND_INV_BLEND_FACTOR; /* Doesn't exist in D3D12 */
+   }
+   unreachable("unexpected blend factor");
+}
+
+static D3D12_BLEND
+blend_factor_alpha(enum pipe_blendfactor factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ZERO: return D3D12_BLEND_ZERO;
+   case PIPE_BLENDFACTOR_ONE: return D3D12_BLEND_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+   case PIPE_BLENDFACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+   case PIPE_BLENDFACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_ALPHA: return D3D12_BLEND_BLEND_FACTOR;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return D3D12_BLEND_INV_BLEND_FACTOR;
+   }
+   unreachable("unexpected blend factor");
+}
+
+static unsigned
+need_blend_factor_rgb(enum pipe_blendfactor factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      return D3D12_BLEND_FACTOR_COLOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      return D3D12_BLEND_FACTOR_ALPHA;
+
+   default:
+      return D3D12_BLEND_FACTOR_NONE;
+   }
+}
+
+static unsigned
+need_blend_factor_alpha(enum pipe_blendfactor factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      return D3D12_BLEND_FACTOR_ANY;
+
+   default:
+      return D3D12_BLEND_FACTOR_NONE;
+   }
+}
+
+static D3D12_BLEND_OP
+blend_op(enum pipe_blend_func func)
+{
+   switch (func) {
+   case PIPE_BLEND_ADD: return D3D12_BLEND_OP_ADD;
+   case PIPE_BLEND_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
+   case PIPE_BLEND_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
+   case PIPE_BLEND_MIN: return D3D12_BLEND_OP_MIN;
+   case PIPE_BLEND_MAX: return D3D12_BLEND_OP_MAX;
+   }
+   unreachable("unexpected blend function");
+}
+
+static D3D12_COMPARISON_FUNC
+compare_op(enum pipe_compare_func op)
+{
+   switch (op) {
+      case PIPE_FUNC_NEVER: return D3D12_COMPARISON_FUNC_NEVER;
+      case PIPE_FUNC_LESS: return D3D12_COMPARISON_FUNC_LESS;
+      case PIPE_FUNC_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL;
+      case PIPE_FUNC_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL;
+      case PIPE_FUNC_GREATER: return D3D12_COMPARISON_FUNC_GREATER;
+      case PIPE_FUNC_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL;
+      case PIPE_FUNC_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
+      case PIPE_FUNC_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS;
+   }
+   unreachable("unexpected compare");
+}
+
+static D3D12_LOGIC_OP
+logic_op(enum pipe_logicop func)
+{
+   switch (func) {
+   case PIPE_LOGICOP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
+   case PIPE_LOGICOP_NOR: return D3D12_LOGIC_OP_NOR;
+   case PIPE_LOGICOP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
+   case PIPE_LOGICOP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
+   case PIPE_LOGICOP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
+   case PIPE_LOGICOP_INVERT: return D3D12_LOGIC_OP_INVERT;
+   case PIPE_LOGICOP_XOR: return D3D12_LOGIC_OP_XOR;
+   case PIPE_LOGICOP_NAND: return D3D12_LOGIC_OP_NAND;
+   case PIPE_LOGICOP_AND: return D3D12_LOGIC_OP_AND;
+   case PIPE_LOGICOP_EQUIV: return D3D12_LOGIC_OP_EQUIV;
+   case PIPE_LOGICOP_NOOP: return D3D12_LOGIC_OP_NOOP;
+   case PIPE_LOGICOP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
+   case PIPE_LOGICOP_COPY: return D3D12_LOGIC_OP_COPY;
+   case PIPE_LOGICOP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
+   case PIPE_LOGICOP_OR: return D3D12_LOGIC_OP_OR;
+   case PIPE_LOGICOP_SET: return D3D12_LOGIC_OP_SET;
+   }
+   unreachable("unexpected logicop function");
+}
+
+static UINT8
+color_write_mask(unsigned colormask)
+{
+   UINT8 mask = 0;
+
+   if (colormask & PIPE_MASK_R)
+      mask |= D3D12_COLOR_WRITE_ENABLE_RED;
+   if (colormask & PIPE_MASK_G)
+      mask |= D3D12_COLOR_WRITE_ENABLE_GREEN;
+   if (colormask & PIPE_MASK_B)
+      mask |= D3D12_COLOR_WRITE_ENABLE_BLUE;
+   if (colormask & PIPE_MASK_A)
+      mask |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
+
+   return mask;
+}
+
+static void *
+d3d12_create_blend_state(struct pipe_context *pctx,
+                         const struct pipe_blend_state *blend_state)
+{
+   struct d3d12_blend_state *state = CALLOC_STRUCT(d3d12_blend_state);
+   if (!state)
+      return NULL;
+
+   if (blend_state->logicop_enable) {
+      state->desc.RenderTarget[0].LogicOpEnable = TRUE;
+      state->desc.RenderTarget[0].LogicOp = logic_op((pipe_logicop) blend_state->logicop_func);
+   }
+
+   /* TODO Dithering */
+
+   state->desc.AlphaToCoverageEnable = blend_state->alpha_to_coverage;
+
+   int num_targets = 1;
+   if (blend_state->independent_blend_enable) {
+      state->desc.IndependentBlendEnable = TRUE;
+      num_targets = PIPE_MAX_COLOR_BUFS;
+   }
+
+   for (int i = 0; i < num_targets; ++i) {
+      const struct pipe_rt_blend_state *rt = blend_state->rt + i;
+
+      if (rt->blend_enable) {
+         state->desc.RenderTarget[i].BlendEnable = TRUE;
+         state->desc.RenderTarget[i].SrcBlend = blend_factor_rgb((pipe_blendfactor) rt->rgb_src_factor);
+         state->desc.RenderTarget[i].DestBlend = blend_factor_rgb((pipe_blendfactor) rt->rgb_dst_factor);
+         state->desc.RenderTarget[i].BlendOp = blend_op((pipe_blend_func) rt->rgb_func);
+         state->desc.RenderTarget[i].SrcBlendAlpha = blend_factor_alpha((pipe_blendfactor) rt->alpha_src_factor);
+         state->desc.RenderTarget[i].DestBlendAlpha = blend_factor_alpha((pipe_blendfactor) rt->alpha_dst_factor);
+         state->desc.RenderTarget[i].BlendOpAlpha = blend_op((pipe_blend_func) rt->alpha_func);
+
+         state->blend_factor_flags |= need_blend_factor_rgb((pipe_blendfactor) rt->rgb_src_factor);
+         state->blend_factor_flags |= need_blend_factor_rgb((pipe_blendfactor) rt->rgb_dst_factor);
+         state->blend_factor_flags |= need_blend_factor_alpha((pipe_blendfactor) rt->alpha_src_factor);
+         state->blend_factor_flags |= need_blend_factor_alpha((pipe_blendfactor) rt->alpha_dst_factor);
+
+         if (state->blend_factor_flags == (D3D12_BLEND_FACTOR_COLOR | D3D12_BLEND_FACTOR_ALPHA) &&
+             (d3d12_debug & D3D12_DEBUG_VERBOSE)) {
+            /* We can't set a blend factor for both constant color and constant alpha */
+            debug_printf("D3D12: unsupported blend factors combination (const color and const alpha)\n");
+         }
+
+         if (util_blend_state_is_dual(blend_state, i))
+            state->is_dual_src = true;
+      }
+
+      state->desc.RenderTarget[i].RenderTargetWriteMask = color_write_mask(rt->colormask);
+   }
+
+   return state;
+}
+
+static void
+d3d12_bind_blend_state(struct pipe_context *pctx, void *blend_state)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_blend_state *new_state = (struct d3d12_blend_state *) blend_state;
+   struct d3d12_blend_state *old_state = ctx->gfx_pipeline_state.blend;
+
+   ctx->gfx_pipeline_state.blend = new_state;
+   ctx->state_dirty |= D3D12_DIRTY_BLEND;
+   if (new_state == NULL || old_state == NULL ||
+       new_state->blend_factor_flags != old_state->blend_factor_flags)
+      ctx->state_dirty |= D3D12_DIRTY_BLEND_COLOR;
+}
+
+static void
+d3d12_delete_blend_state(struct pipe_context *pctx, void *blend_state)
+{
+   d3d12_gfx_pipeline_state_cache_invalidate(d3d12_context(pctx), blend_state);
+   FREE(blend_state);
+}
+
+static D3D12_STENCIL_OP
+stencil_op(enum pipe_stencil_op op)
+{
+   switch (op) {
+   case PIPE_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
+   case PIPE_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
+   case PIPE_STENCIL_OP_INCR: return D3D12_STENCIL_OP_INCR_SAT;
+   case PIPE_STENCIL_OP_DECR: return D3D12_STENCIL_OP_DECR_SAT;
+   case PIPE_STENCIL_OP_INCR_WRAP: return D3D12_STENCIL_OP_INCR;
+   case PIPE_STENCIL_OP_DECR_WRAP: return D3D12_STENCIL_OP_DECR;
+   case PIPE_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
+   }
+   unreachable("unexpected op");
+}
+
+static D3D12_DEPTH_STENCILOP_DESC
+stencil_op_state(const struct pipe_stencil_state *src)
+{
+   D3D12_DEPTH_STENCILOP_DESC ret;
+   ret.StencilFailOp = stencil_op((pipe_stencil_op) src->fail_op);
+   ret.StencilPassOp = stencil_op((pipe_stencil_op) src->zpass_op);
+   ret.StencilDepthFailOp = stencil_op((pipe_stencil_op) src->zfail_op);
+   ret.StencilFunc = compare_op((pipe_compare_func) src->func);
+   return ret;
+}
+
+static void *
+d3d12_create_depth_stencil_alpha_state(struct pipe_context *pctx,
+                                       const struct pipe_depth_stencil_alpha_state *depth_stencil_alpha)
+{
+   struct d3d12_depth_stencil_alpha_state *dsa = CALLOC_STRUCT(d3d12_depth_stencil_alpha_state);
+   if (!dsa)
+      return NULL;
+
+   if (depth_stencil_alpha->depth.enabled) {
+      dsa->desc.DepthEnable = TRUE;
+      dsa->desc.DepthFunc = compare_op((pipe_compare_func) depth_stencil_alpha->depth.func);
+   }
+
+   /* TODO Add support for GL_depth_bound_tests */
+   #if 0
+   if (depth_stencil_alpha->depth.bounds_test) {
+      dsa->desc.DepthBoundsTestEnable = TRUE;
+      dsa->min_depth_bounds = depth_stencil_alpha->depth.bounds_min;
+      dsa->max_depth_bounds = depth_stencil_alpha->depth.bounds_max;
+   }
+   #endif
+
+   if (depth_stencil_alpha->stencil[0].enabled) {
+      dsa->desc.StencilEnable = TRUE;
+      dsa->desc.FrontFace = stencil_op_state(depth_stencil_alpha->stencil);
+   }
+
+   if (depth_stencil_alpha->stencil[1].enabled)
+      dsa->desc.BackFace = stencil_op_state(depth_stencil_alpha->stencil + 1);
+   else
+      dsa->desc.BackFace = dsa->desc.FrontFace;
+
+   dsa->desc.StencilReadMask = depth_stencil_alpha->stencil[0].valuemask; /* FIXME Back face mask */
+   dsa->desc.StencilWriteMask = depth_stencil_alpha->stencil[0].writemask; /* FIXME Back face mask */
+   dsa->desc.DepthWriteMask = (D3D12_DEPTH_WRITE_MASK) depth_stencil_alpha->depth.writemask;
+
+   return dsa;
+}
+
+static void
+d3d12_bind_depth_stencil_alpha_state(struct pipe_context *pctx,
+                                     void *dsa)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   ctx->gfx_pipeline_state.zsa = (struct d3d12_depth_stencil_alpha_state *) dsa;
+   ctx->state_dirty |= D3D12_DIRTY_ZSA;
+}
+
+static void
+d3d12_delete_depth_stencil_alpha_state(struct pipe_context *pctx,
+                                       void *dsa_state)
+{
+   d3d12_gfx_pipeline_state_cache_invalidate(d3d12_context(pctx), dsa_state);
+   FREE(dsa_state);
+}
+
+static D3D12_FILL_MODE
+fill_mode(unsigned mode)
+{
+   switch (mode) {
+   case PIPE_POLYGON_MODE_FILL:
+      return D3D12_FILL_MODE_SOLID;
+   case PIPE_POLYGON_MODE_LINE:
+      return D3D12_FILL_MODE_WIREFRAME;
+   case PIPE_POLYGON_MODE_POINT:
+      return D3D12_FILL_MODE_SOLID;
+
+   default:
+      unreachable("unsupported fill-mode");
+   }
+}
+
+static void *
+d3d12_create_rasterizer_state(struct pipe_context *pctx,
+                              const struct pipe_rasterizer_state *rs_state)
+{
+   struct d3d12_rasterizer_state *cso = CALLOC_STRUCT(d3d12_rasterizer_state);
+   if (!cso)
+      return NULL;
+
+   cso->base = *rs_state;
+
+   assert(rs_state->depth_clip_near == rs_state->depth_clip_far);
+
+   switch (rs_state->cull_face) {
+   case PIPE_FACE_NONE:
+      if (rs_state->fill_front != rs_state->fill_back) {
+         cso->base.cull_face = PIPE_FACE_BACK;
+         cso->desc.CullMode = D3D12_CULL_MODE_BACK;
+         cso->desc.FillMode = fill_mode(rs_state->fill_front);
+
+         /* create a modified CSO for the back-state, so we can draw with
+          * either.
+          */
+         struct pipe_rasterizer_state templ = *rs_state;
+         templ.cull_face = PIPE_FACE_FRONT;
+         templ.fill_front = rs_state->fill_back;
+         cso->twoface_back = d3d12_create_rasterizer_state(pctx, &templ);
+
+         if (!cso->twoface_back) {
+            FREE(cso);
+            return NULL;
+         }
+      } else {
+         cso->desc.CullMode = D3D12_CULL_MODE_NONE;
+         cso->desc.FillMode = fill_mode(rs_state->fill_front);
+      }
+      break;
+
+   case PIPE_FACE_FRONT:
+      cso->desc.CullMode = D3D12_CULL_MODE_FRONT;
+      cso->desc.FillMode = fill_mode(rs_state->fill_back);
+      break;
+
+   case PIPE_FACE_BACK:
+      cso->desc.CullMode = D3D12_CULL_MODE_BACK;
+      cso->desc.FillMode = fill_mode(rs_state->fill_front);
+      break;
+
+   case PIPE_FACE_FRONT_AND_BACK:
+      /* this is wrong, and we shouldn't actually have to support this! */
+      cso->desc.CullMode = D3D12_CULL_MODE_NONE;
+      cso->desc.FillMode = D3D12_FILL_MODE_SOLID;
+      break;
+
+   default:
+      unreachable("unsupported cull-mode");
+   }
+
+   cso->desc.FrontCounterClockwise = rs_state->front_ccw;
+   cso->desc.DepthClipEnable = rs_state->depth_clip_near;
+   cso->desc.MultisampleEnable = rs_state->multisample;
+   cso->desc.AntialiasedLineEnable = rs_state->line_smooth;
+   cso->desc.ForcedSampleCount = 0; // TODO
+   cso->desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; /* Not Implemented */
+
+   return cso;
+}
+
+static void
+d3d12_bind_rasterizer_state(struct pipe_context *pctx, void *rs_state)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   ctx->gfx_pipeline_state.rast = (struct d3d12_rasterizer_state *)rs_state;
+   ctx->state_dirty |= D3D12_DIRTY_RASTERIZER | D3D12_DIRTY_SCISSOR;
+}
+
+static void
+d3d12_delete_rasterizer_state(struct pipe_context *pctx, void *rs_state)
+{
+   d3d12_gfx_pipeline_state_cache_invalidate(d3d12_context(pctx), rs_state);
+   FREE(rs_state);
+}
+
+static D3D12_TEXTURE_ADDRESS_MODE
+sampler_address_mode(enum pipe_tex_wrap wrap, enum pipe_tex_filter filter)
+{
+   switch (wrap) {
+   case PIPE_TEX_WRAP_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+   case PIPE_TEX_WRAP_CLAMP: return filter == PIPE_TEX_FILTER_NEAREST ?
+                                D3D12_TEXTURE_ADDRESS_MODE_CLAMP :
+                                D3D12_TEXTURE_ADDRESS_MODE_BORDER;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; /* not technically correct, but kinda works */
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; /* FIXME: Doesn't exist in D3D12 */
+   }
+   unreachable("unexpected wrap");
+}
+
+static D3D12_FILTER
+get_filter(const struct pipe_sampler_state *state)
+{
+   static const D3D12_FILTER lut[16] = {
+      D3D12_FILTER_MIN_MAG_MIP_POINT,
+      D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR,
+      D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT,
+      D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR,
+      D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT,
+      D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR,
+      D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT,
+      D3D12_FILTER_MIN_MAG_MIP_LINEAR,
+      D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT,
+      D3D12_FILTER_COMPARISON_MIN_MAG_POINT_MIP_LINEAR,
+      D3D12_FILTER_COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT,
+      D3D12_FILTER_COMPARISON_MIN_POINT_MAG_MIP_LINEAR,
+      D3D12_FILTER_COMPARISON_MIN_LINEAR_MAG_MIP_POINT,
+      D3D12_FILTER_COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR,
+      D3D12_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT,
+      D3D12_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR,
+   };
+
+   static const D3D12_FILTER anisotropic_lut[2] = {
+      D3D12_FILTER_ANISOTROPIC,
+      D3D12_FILTER_COMPARISON_ANISOTROPIC,
+   };
+
+   if (state->max_anisotropy > 1) {
+      return anisotropic_lut[state->compare_mode];
+   } else {
+      int idx = (state->mag_img_filter << 1) |
+                (state->min_img_filter << 2) |
+                (state->compare_mode << 3);
+      if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
+         idx |= state->min_mip_filter;
+      return lut[idx];
+   }
+}
+
+static void *
+d3d12_create_sampler_state(struct pipe_context *pctx,
+                           const struct pipe_sampler_state *state)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+   struct d3d12_sampler_state *ss = CALLOC_STRUCT(d3d12_sampler_state);
+   D3D12_SAMPLER_DESC desc = {0};
+   if (!state)
+      return NULL;
+
+   ss->filter = (pipe_tex_filter)state->min_img_filter;
+   ss->wrap_r = (pipe_tex_wrap)state->wrap_r;
+   ss->wrap_s = (pipe_tex_wrap)state->wrap_s;
+   ss->wrap_t = (pipe_tex_wrap)state->wrap_t;
+   ss->lod_bias = state->lod_bias;
+   ss->min_lod = state->min_lod;
+   ss->max_lod = state->max_lod;
+   memcpy(ss->border_color, state->border_color.f, sizeof(float) * 4);
+   ss->compare_func = (pipe_compare_func)state->compare_func;
+
+   if (state->min_mip_filter < PIPE_TEX_MIPFILTER_NONE) {
+      desc.MinLOD = state->min_lod;
+      desc.MaxLOD = state->max_lod;
+   } else if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      desc.MinLOD = 0;
+      desc.MaxLOD = 0;
+   } else {
+      unreachable("unexpected mip filter");
+   }
+
+   if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+      desc.ComparisonFunc = compare_op((pipe_compare_func) state->compare_func);
+      desc.Filter = D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT;
+   } else if (state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+      desc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+      desc.Filter = get_filter(state);
+   } else
+      unreachable("unexpected comparison mode");
+
+   desc.MaxAnisotropy = state->max_anisotropy;
+
+   desc.AddressU = sampler_address_mode((pipe_tex_wrap) state->wrap_s,
+                                        (pipe_tex_filter) state->min_img_filter);
+   desc.AddressV = sampler_address_mode((pipe_tex_wrap) state->wrap_t,
+                                        (pipe_tex_filter) state->min_img_filter);
+   desc.AddressW = sampler_address_mode((pipe_tex_wrap) state->wrap_r,
+                                        (pipe_tex_filter) state->min_img_filter);
+   desc.MipLODBias = CLAMP(state->lod_bias, -16.0f, 15.99f);
+   memcpy(desc.BorderColor, state->border_color.f, sizeof(float) * 4);
+
+   // TODO Normalized Coordinates?
+   d3d12_descriptor_pool_alloc_handle(ctx->sampler_pool, &ss->handle);
+   screen->dev->CreateSampler(&desc, ss->handle.cpu_handle);
+
+   if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+      desc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+      struct pipe_sampler_state fake_state = *state;
+      fake_state.compare_mode = PIPE_TEX_COMPARE_NONE;
+      desc.Filter = get_filter(&fake_state);
+
+      d3d12_descriptor_pool_alloc_handle(ctx->sampler_pool,
+                                         &ss->handle_without_shadow);
+      screen->dev->CreateSampler(&desc,
+                                 ss->handle_without_shadow.cpu_handle);
+      ss->is_shadow_sampler = true;
+   }
+
+   return ss;
+}
+
+static void
+d3d12_bind_sampler_states(struct pipe_context *pctx,
+                          enum pipe_shader_type shader,
+                          unsigned start_slot,
+                          unsigned num_samplers,
+                          void **samplers)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   bool shader_state_dirty = false;
+
+#define STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(X) \
+   static_assert((enum compare_func)PIPE_FUNC_##X == COMPARE_FUNC_##X, #X " needs switch case");
+
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(LESS);
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(GREATER);
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(LEQUAL);
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(GEQUAL);
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(NOTEQUAL);
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(NEVER);
+   STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(ALWAYS);
+
+#undef STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC
+
+   for (unsigned i = 0; i < num_samplers; ++i) {
+      d3d12_sampler_state *sampler = (struct d3d12_sampler_state*) samplers[i];
+      ctx->samplers[shader][start_slot + i] = sampler;
+      dxil_wrap_sampler_state &wrap = ctx->tex_wrap_states[shader][start_slot + i];
+      if (sampler) {
+         shader_state_dirty |= wrap.wrap[0] != sampler->wrap_s ||
+                               wrap.wrap[1] != sampler->wrap_t ||
+                               wrap.wrap[2] != sampler->wrap_r;
+         shader_state_dirty |= !!memcmp(wrap.border_color, sampler->border_color, 4 * sizeof(float));
+
+         wrap.wrap[0] = sampler->wrap_s;
+         wrap.wrap[1] = sampler->wrap_t;
+         wrap.wrap[2] = sampler->wrap_r;
+         wrap.lod_bias = sampler->lod_bias;
+         wrap.min_lod = sampler->min_lod;
+         wrap.max_lod = sampler->max_lod;
+         memcpy(wrap.border_color, sampler->border_color, 4 * sizeof(float));
+         ctx->tex_compare_func[shader][start_slot + i] = (enum compare_func)sampler->compare_func;
+      } else {
+         memset(&wrap, 0, sizeof (dxil_wrap_sampler_state));
+      }
+   }
+
+   ctx->num_samplers[shader] = start_slot + num_samplers;
+   ctx->shader_dirty[shader] |= D3D12_SHADER_DIRTY_SAMPLERS;
+   if (shader_state_dirty)
+      ctx->state_dirty |= D3D12_DIRTY_SHADER;
+}
+
+static void
+d3d12_delete_sampler_state(struct pipe_context *pctx,
+                           void *ss)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(d3d12_context(pctx));
+   struct d3d12_sampler_state *state = (struct d3d12_sampler_state*) ss;
+   util_dynarray_append(&batch->zombie_samplers, d3d12_descriptor_handle,
+                        state->handle);
+   if (state->is_shadow_sampler)
+      util_dynarray_append(&batch->zombie_samplers, d3d12_descriptor_handle,
+                           state->handle_without_shadow);
+   FREE(ss);
+}
+
+static D3D12_SRV_DIMENSION
+view_dimension(enum pipe_texture_target target, unsigned samples)
+{
+   switch (target) {
+   case PIPE_BUFFER: return D3D12_SRV_DIMENSION_BUFFER;
+   case PIPE_TEXTURE_1D: return D3D12_SRV_DIMENSION_TEXTURE1D;
+   case PIPE_TEXTURE_1D_ARRAY: return D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_2D:
+      return samples > 1 ? D3D12_SRV_DIMENSION_TEXTURE2DMS :
+                           D3D12_SRV_DIMENSION_TEXTURE2D;
+   case PIPE_TEXTURE_2D_ARRAY:
+      return samples > 1 ? D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY :
+                           D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+   case PIPE_TEXTURE_CUBE: return D3D12_SRV_DIMENSION_TEXTURECUBE;
+   case PIPE_TEXTURE_CUBE_ARRAY: return D3D12_SRV_DIMENSION_TEXTURECUBEARRAY;
+   case PIPE_TEXTURE_3D: return D3D12_SRV_DIMENSION_TEXTURE3D;
+   default:
+      unreachable("unexpected target");
+   }
+}
+
+static D3D12_SHADER_COMPONENT_MAPPING
+component_mapping(enum pipe_swizzle swizzle, D3D12_SHADER_COMPONENT_MAPPING id)
+{
+   switch (swizzle) {
+   case PIPE_SWIZZLE_X: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0;
+   case PIPE_SWIZZLE_Y: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1;
+   case PIPE_SWIZZLE_Z: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2;
+   case PIPE_SWIZZLE_W: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3;
+   case PIPE_SWIZZLE_0: return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0;
+   case PIPE_SWIZZLE_1: return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1;
+   case PIPE_SWIZZLE_NONE: return id;
+   default:
+      unreachable("unexpected swizzle");
+   }
+}
+
+static struct pipe_sampler_view *
+d3d12_create_sampler_view(struct pipe_context *pctx,
+                          struct pipe_resource *texture,
+                          const struct pipe_sampler_view *state)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+   struct d3d12_resource *res = d3d12_resource(texture);
+   struct d3d12_sampler_view *sampler_view = CALLOC_STRUCT(d3d12_sampler_view);
+
+   sampler_view->base = *state;
+   sampler_view->base.texture = NULL;
+   pipe_resource_reference(&sampler_view->base.texture, texture);
+   sampler_view->base.reference.count = 1;
+   sampler_view->base.context = pctx;
+   sampler_view->mip_levels = state->u.tex.last_level - state->u.tex.first_level + 1;
+   sampler_view->array_size = texture->array_size;
+
+   D3D12_SHADER_RESOURCE_VIEW_DESC desc = {};
+   struct d3d12_format_info format_info = d3d12_get_format_info(state->format, state->target);
+   pipe_swizzle swizzle[4] = {
+      format_info.swizzle[sampler_view->base.swizzle_r],
+      format_info.swizzle[sampler_view->base.swizzle_g],
+      format_info.swizzle[sampler_view->base.swizzle_b],
+      format_info.swizzle[sampler_view->base.swizzle_a]
+   };
+
+   sampler_view->swizzle_override_r = swizzle[0];
+   sampler_view->swizzle_override_g = swizzle[1];
+   sampler_view->swizzle_override_b = swizzle[2];
+   sampler_view->swizzle_override_a = swizzle[3];
+
+   desc.Format = d3d12_get_resource_srv_format(state->format, state->target);
+   desc.ViewDimension = view_dimension(state->target, texture->nr_samples);
+
+   /* Integer cube textures are not really supported, because TextureLoad doesn't exist
+    * for cube maps, and we sampling is not supported for integer textures, so we have to
+    * handle this SRV as if it were a 2D texture array */
+   if ((desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE ||
+        desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) &&
+       util_format_is_pure_integer(state->format)) {
+      desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+   }
+
+   desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
+         component_mapping(swizzle[0], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0),
+         component_mapping(swizzle[1], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1),
+         component_mapping(swizzle[2], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2),
+         component_mapping(swizzle[3], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3)
+         );
+
+   unsigned array_size = state->u.tex.last_layer - state->u.tex.first_layer + 1;
+   switch (desc.ViewDimension) {
+   case D3D12_SRV_DIMENSION_TEXTURE1D:
+      if (state->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 1D SRV from layer %d\n",
+                      state->u.tex.first_layer);
+
+      desc.Texture1D.MostDetailedMip = state->u.tex.first_level;
+      desc.Texture1D.MipLevels = sampler_view->mip_levels;
+      desc.Texture1D.ResourceMinLODClamp = 0.0f;
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURE1DARRAY:
+      desc.Texture1DArray.MostDetailedMip = state->u.tex.first_level;
+      desc.Texture1DArray.MipLevels = sampler_view->mip_levels;
+      desc.Texture1DArray.ResourceMinLODClamp = 0.0f;
+      desc.Texture1DArray.FirstArraySlice = state->u.tex.first_layer;
+      desc.Texture1DArray.ArraySize = array_size;
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURE2D:
+      if (state->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 2D SRV from layer %d\n",
+                      state->u.tex.first_layer);
+
+      desc.Texture2D.MostDetailedMip = state->u.tex.first_level;
+      desc.Texture2D.MipLevels = sampler_view->mip_levels;
+      desc.Texture2D.PlaneSlice = format_info.plane_slice;
+      desc.Texture2D.ResourceMinLODClamp = 0.0f;
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURE2DMS:
+      if (state->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 2DMS SRV from layer %d\n",
+                      state->u.tex.first_layer);
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURE2DARRAY:
+      desc.Texture2DArray.MostDetailedMip = state->u.tex.first_level;
+      desc.Texture2DArray.MipLevels = sampler_view->mip_levels;
+      desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
+      desc.Texture2DArray.FirstArraySlice = state->u.tex.first_layer;
+      desc.Texture2DArray.PlaneSlice = format_info.plane_slice;
+      desc.Texture2DArray.ArraySize = array_size;
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY:
+      desc.Texture2DMSArray.FirstArraySlice = state->u.tex.first_layer;
+      desc.Texture2DMSArray.ArraySize = array_size;
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURE3D:
+      if (state->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 3D SRV from layer %d\n",
+                      state->u.tex.first_layer);
+
+      desc.Texture3D.MostDetailedMip = state->u.tex.first_level;
+      desc.Texture3D.MipLevels = sampler_view->mip_levels;
+      desc.Texture3D.ResourceMinLODClamp = 0.0f;
+      break;
+   case D3D12_SRV_DIMENSION_TEXTURECUBE:
+      if (state->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create CUBE SRV from layer %d\n",
+                      state->u.tex.first_layer);
+
+      desc.TextureCube.MostDetailedMip = state->u.tex.first_level;
+      desc.TextureCube.MipLevels = sampler_view->mip_levels;
+      desc.TextureCube.ResourceMinLODClamp = 0.0f;
+      break;
+   case D3D12_SRV_DIMENSION_BUFFER:
+      desc.Buffer.FirstElement = 0;
+      desc.Buffer.StructureByteStride = 0;
+      desc.Buffer.NumElements = texture->width0 / util_format_get_blocksize(state->format);
+      break;
+   }
+
+   d3d12_descriptor_pool_alloc_handle(ctx->view_pool, &sampler_view->handle);
+   screen->dev->CreateShaderResourceView(d3d12_resource_resource(res), &desc,
+                                         sampler_view->handle.cpu_handle);
+
+   return &sampler_view->base;
+}
+
+static void
+d3d12_set_sampler_views(struct pipe_context *pctx,
+                        enum pipe_shader_type shader_type,
+                        unsigned start_slot,
+                        unsigned num_views,
+                        struct pipe_sampler_view **views)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   assert(views);
+   unsigned shader_bit = (1 << shader_type);
+   ctx->has_int_samplers &= ~shader_bit;
+
+   for (unsigned i = 0; i < num_views; ++i) {
+      pipe_sampler_view_reference(
+         &ctx->sampler_views[shader_type][start_slot + i],
+         views[i]);
+
+      if (views[i]) {
+         dxil_wrap_sampler_state &wss = ctx->tex_wrap_states[shader_type][start_slot + i];
+         dxil_texture_swizzle_state &swizzle_state = ctx->tex_swizzle_state[shader_type][i];
+         if (util_format_is_pure_integer(views[i]->format)) {
+            ctx->has_int_samplers |= shader_bit;
+            wss.is_int_sampler = 1;
+            wss.last_level = views[i]->texture->last_level;
+            /* When we emulate a integer cube texture (array) by using a texture 2d Array
+             * the coordinates are evaluated to always reside withing the acceptable range
+             * because the 3d ray for picking the texel is always pointing at one cube face,
+             * hence we can skip the boundary condition handling when the texture operations are
+             * lowered to texel fetches later. */
+            wss.skip_boundary_conditions = views[i]->target == PIPE_TEXTURE_CUBE ||
+                                           views[i]->target == PIPE_TEXTURE_CUBE_ARRAY;
+         } else {
+            wss.is_int_sampler = 0;
+         }
+         /* We need the swizzle state for compare texture lowering, because it
+          * encode the use of the shadow texture lookup result as either luminosity,
+          * intensity, or alpha. and we need the swizzle state for applying the
+          * boundary color correctly */
+         struct d3d12_sampler_view *ss = d3d12_sampler_view(views[i]);
+         swizzle_state.swizzle_r = ss->swizzle_override_r;
+         swizzle_state.swizzle_g = ss->swizzle_override_g;
+         swizzle_state.swizzle_b = ss->swizzle_override_b;
+         swizzle_state.swizzle_a = ss->swizzle_override_a;
+      }
+   }
+   ctx->num_sampler_views[shader_type] = start_slot + num_views;
+   ctx->shader_dirty[shader_type] |= D3D12_SHADER_DIRTY_SAMPLER_VIEWS;
+}
+
+static void
+d3d12_destroy_sampler_view(struct pipe_context *pctx,
+                           struct pipe_sampler_view *pview)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_sampler_view *view = d3d12_sampler_view(pview);
+   d3d12_descriptor_handle_free(&view->handle);
+   pipe_resource_reference(&view->base.texture, NULL);
+   FREE(view);
+}
+
+static void
+delete_shader(struct d3d12_context *ctx, enum pipe_shader_type stage,
+              struct d3d12_shader_selector *shader)
+{
+   d3d12_gfx_pipeline_state_cache_invalidate_shader(ctx, stage, shader);
+
+   /* Make sure the pipeline state no longer reference the deleted shader */
+   struct d3d12_shader *iter = shader->first;
+   while (iter) {
+      if (ctx->gfx_pipeline_state.stages[stage] == iter) {
+         ctx->gfx_pipeline_state.stages[stage] = NULL;
+         break;
+      }
+      iter = iter->next_variant;
+   }
+
+   d3d12_shader_free(shader);
+}
+
+static void
+bind_stage(struct d3d12_context *ctx, enum pipe_shader_type stage,
+           struct d3d12_shader_selector *shader)
+{
+   assert(stage < D3D12_GFX_SHADER_STAGES);
+   ctx->gfx_stages[stage] = shader;
+}
+
+static void *
+d3d12_create_vs_state(struct pipe_context *pctx,
+                      const struct pipe_shader_state *shader)
+{
+   return d3d12_create_shader(d3d12_context(pctx), PIPE_SHADER_VERTEX, shader);
+}
+
+static void
+d3d12_bind_vs_state(struct pipe_context *pctx,
+                    void *vss)
+{
+   bind_stage(d3d12_context(pctx), PIPE_SHADER_VERTEX,
+              (struct d3d12_shader_selector *) vss);
+}
+
+static void
+d3d12_delete_vs_state(struct pipe_context *pctx,
+                      void *vs)
+{
+   delete_shader(d3d12_context(pctx), PIPE_SHADER_VERTEX,
+                 (struct d3d12_shader_selector *) vs);
+}
+
+static void *
+d3d12_create_fs_state(struct pipe_context *pctx,
+                      const struct pipe_shader_state *shader)
+{
+   return d3d12_create_shader(d3d12_context(pctx), PIPE_SHADER_FRAGMENT, shader);
+}
+
+static void
+d3d12_bind_fs_state(struct pipe_context *pctx,
+                    void *fss)
+{
+   bind_stage(d3d12_context(pctx), PIPE_SHADER_FRAGMENT,
+              (struct d3d12_shader_selector *) fss);
+}
+
+static void
+d3d12_delete_fs_state(struct pipe_context *pctx,
+                      void *fs)
+{
+   delete_shader(d3d12_context(pctx), PIPE_SHADER_FRAGMENT,
+                 (struct d3d12_shader_selector *) fs);
+}
+
+static void *
+d3d12_create_gs_state(struct pipe_context *pctx,
+                      const struct pipe_shader_state *shader)
+{
+   return d3d12_create_shader(d3d12_context(pctx), PIPE_SHADER_GEOMETRY, shader);
+}
+
+static void
+d3d12_bind_gs_state(struct pipe_context *pctx, void *gss)
+{
+   bind_stage(d3d12_context(pctx), PIPE_SHADER_GEOMETRY,
+              (struct d3d12_shader_selector *) gss);
+}
+
+static void
+d3d12_delete_gs_state(struct pipe_context *pctx, void *gs)
+{
+   delete_shader(d3d12_context(pctx), PIPE_SHADER_GEOMETRY,
+                 (struct d3d12_shader_selector *) gs);
+}
+
+static bool
+d3d12_init_polygon_stipple(struct pipe_context *pctx)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   ctx->pstipple.texture = util_pstipple_create_stipple_texture(pctx, NULL);
+   if (!ctx->pstipple.texture)
+      return false;
+
+   ctx->pstipple.sampler_view = util_pstipple_create_sampler_view(pctx, ctx->pstipple.texture);
+   if (!ctx->pstipple.sampler_view)
+      return false;
+
+   ctx->pstipple.sampler_cso = (struct d3d12_sampler_state *)util_pstipple_create_sampler(pctx);
+   if (!ctx->pstipple.sampler_cso)
+      return false;
+
+   return true;
+}
+
+static void
+d3d12_set_polygon_stipple(struct pipe_context *pctx,
+                          const struct pipe_poly_stipple *ps)
+{
+   static bool initialized = false;
+   static const uint32_t zero[32] = {0};
+   static uint32_t undef[32] = {0};
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   if (!initialized)
+      memset(undef, UINT32_MAX, sizeof(undef));
+
+   if (!memcmp(ctx->pstipple.pattern, ps->stipple, sizeof(ps->stipple)))
+      return;
+
+   memcpy(ctx->pstipple.pattern, ps->stipple, sizeof(ps->stipple));
+   ctx->pstipple.enabled = !!memcmp(ps->stipple, undef, sizeof(ps->stipple)) &&
+                           !!memcmp(ps->stipple, zero, sizeof(ps->stipple));
+   if (ctx->pstipple.enabled)
+      util_pstipple_update_stipple_texture(pctx, ctx->pstipple.texture, ps->stipple);
+}
+
+static void
+d3d12_set_vertex_buffers(struct pipe_context *pctx,
+                         unsigned start_slot,
+                         unsigned num_buffers,
+                         const struct pipe_vertex_buffer *buffers)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   util_set_vertex_buffers_count(ctx->vbs, &ctx->num_vbs,
+                                 buffers, start_slot, num_buffers);
+
+   for (unsigned i = 0; i < ctx->num_vbs; ++i) {
+      const struct pipe_vertex_buffer* buf = ctx->vbs + i;
+      if (!buf->buffer.resource)
+         continue;
+      struct d3d12_resource *res = d3d12_resource(buf->buffer.resource);
+      ctx->vbvs[i].BufferLocation = d3d12_resource_gpu_virtual_address(res) + buf->buffer_offset;
+      ctx->vbvs[i].StrideInBytes = buf->stride;
+      ctx->vbvs[i].SizeInBytes = res->base.width0 - buf->buffer_offset;
+   }
+   ctx->state_dirty |= D3D12_DIRTY_VERTEX_BUFFERS;
+}
+
+static void
+d3d12_set_viewport_states(struct pipe_context *pctx,
+                          unsigned start_slot,
+                          unsigned num_viewports,
+                          const struct pipe_viewport_state *state)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   for (unsigned i = 0; i < num_viewports; ++i) {
+      if (state[i].scale[1] < 0) {
+         ctx->flip_y = 1.0f;
+         ctx->viewports[start_slot + i].TopLeftY = state[i].translate[1] + state[i].scale[1];
+         ctx->viewports[start_slot + i].Height = -state[i].scale[1] * 2;
+      } else {
+         ctx->flip_y = -1.0f;
+         ctx->viewports[start_slot + i].TopLeftY = state[i].translate[1] - state[i].scale[1];
+         ctx->viewports[start_slot + i].Height = state[i].scale[1] * 2;
+      }
+      ctx->viewports[start_slot + i].TopLeftX = state[i].translate[0] - state[i].scale[0];
+      ctx->viewports[start_slot + i].Width = state[i].scale[0] * 2;
+
+      float near_depth = state[i].translate[2] - state[i].scale[2];
+      float far_depth = state[i].translate[2] + state[i].scale[2];
+
+      ctx->reverse_depth_range = near_depth > far_depth;
+      if (ctx->reverse_depth_range) {
+         float tmp = near_depth;
+         near_depth = far_depth;
+         far_depth = tmp;
+      }
+      ctx->viewports[start_slot + i].MinDepth = near_depth;
+      ctx->viewports[start_slot + i].MaxDepth = far_depth;
+      ctx->viewport_states[start_slot + i] = state[i];
+   }
+   ctx->num_viewports = start_slot + num_viewports;
+   ctx->state_dirty |= D3D12_DIRTY_VIEWPORT;
+}
+
+
+static void
+d3d12_set_scissor_states(struct pipe_context *pctx,
+                         unsigned start_slot, unsigned num_scissors,
+                         const struct pipe_scissor_state *states)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   for (unsigned i = 0; i < num_scissors; i++) {
+      ctx->scissors[start_slot + i].left = states[i].minx;
+      ctx->scissors[start_slot + i].top = states[i].miny;
+      ctx->scissors[start_slot + i].right = states[i].maxx;
+      ctx->scissors[start_slot + i].bottom = states[i].maxy;
+      ctx->scissor_states[start_slot + i] = states[i];
+   }
+   ctx->state_dirty |= D3D12_DIRTY_SCISSOR;
+}
+
+static void
+d3d12_set_constant_buffer(struct pipe_context *pctx,
+                          enum pipe_shader_type shader, uint index,
+                          const struct pipe_constant_buffer *buf)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   if (buf) {
+      struct pipe_resource *buffer = buf->buffer;
+      unsigned offset = buf->buffer_offset;
+      if (buf->user_buffer) {
+         u_upload_data(pctx->const_uploader, 0, buf->buffer_size,
+                       D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+                       buf->user_buffer, &offset, &ctx->cbufs[shader][index].buffer);
+
+      } else
+         pipe_resource_reference(&ctx->cbufs[shader][index].buffer, buffer);
+
+
+      ctx->cbufs[shader][index].buffer_offset = offset;
+      ctx->cbufs[shader][index].buffer_size = buf->buffer_size;
+      ctx->cbufs[shader][index].user_buffer = NULL;
+
+   } else {
+      pipe_resource_reference(&ctx->cbufs[shader][index].buffer, NULL);
+      ctx->cbufs[shader][index].buffer_offset = 0;
+      ctx->cbufs[shader][index].buffer_size = 0;
+      ctx->cbufs[shader][index].user_buffer = NULL;
+   }
+   ctx->shader_dirty[shader] |= D3D12_SHADER_DIRTY_CONSTBUF;
+}
+
+static void
+d3d12_set_framebuffer_state(struct pipe_context *pctx,
+                            const struct pipe_framebuffer_state *state)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   int samples = -1;
+
+   util_copy_framebuffer_state(&d3d12_context(pctx)->fb, state);
+
+   ctx->gfx_pipeline_state.num_cbufs = state->nr_cbufs;
+   ctx->gfx_pipeline_state.has_float_rtv = false;
+   for (int i = 0; i < state->nr_cbufs; ++i) {
+      if (state->cbufs[i]) {
+         if (util_format_is_float(state->cbufs[i]->format))
+            ctx->gfx_pipeline_state.has_float_rtv = true;
+         ctx->gfx_pipeline_state.rtv_formats[i] = d3d12_get_format(state->cbufs[i]->format);
+         samples = MAX2(samples, (int)state->cbufs[i]->texture->nr_samples);
+      } else {
+         ctx->gfx_pipeline_state.rtv_formats[i] = DXGI_FORMAT_UNKNOWN;
+      }
+   }
+
+   if (state->zsbuf) {
+      ctx->gfx_pipeline_state.dsv_format = d3d12_get_resource_rt_format(state->zsbuf->format);
+      samples = MAX2(samples, (int)ctx->fb.zsbuf->texture->nr_samples);
+   } else
+      ctx->gfx_pipeline_state.dsv_format = DXGI_FORMAT_UNKNOWN;
+
+   if (samples < 0)
+      samples = state->samples;
+
+   ctx->gfx_pipeline_state.samples = MAX2(samples, 1);
+
+   ctx->state_dirty |= D3D12_DIRTY_FRAMEBUFFER;
+}
+
+static void
+d3d12_set_blend_color(struct pipe_context *pctx,
+                     const struct pipe_blend_color *color)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   memcpy(ctx->blend_factor, color->color, sizeof(float) * 4);
+   ctx->state_dirty |= D3D12_DIRTY_BLEND_COLOR;
+}
+
+static void
+d3d12_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   ctx->gfx_pipeline_state.sample_mask = sample_mask;
+   ctx->state_dirty |= D3D12_DIRTY_SAMPLE_MASK;
+}
+
+static void
+d3d12_set_stencil_ref(struct pipe_context *pctx,
+                      const struct pipe_stencil_ref *ref)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   if ((ref->ref_value[0] != ref->ref_value[1]) &&
+       (d3d12_debug & D3D12_DEBUG_VERBOSE))
+       debug_printf("D3D12: Different values for front and back stencil reference are not supported\n");
+   ctx->stencil_ref = *ref;
+   ctx->state_dirty |= D3D12_DIRTY_STENCIL_REF;
+}
+
+static void
+d3d12_set_clip_state(struct pipe_context *pctx,
+                     const struct pipe_clip_state *pcs)
+{
+}
+
+static struct pipe_stream_output_target *
+d3d12_create_stream_output_target(struct pipe_context *pctx,
+                                  struct pipe_resource *pres,
+                                  unsigned buffer_offset,
+                                  unsigned buffer_size)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+   struct d3d12_resource *res = d3d12_resource(pres);
+   struct d3d12_stream_output_target *cso = CALLOC_STRUCT(d3d12_stream_output_target);
+
+   if (!cso)
+      return NULL;
+
+   pipe_reference_init(&cso->base.reference, 1);
+   pipe_resource_reference(&cso->base.buffer, pres);
+   cso->base.buffer_offset = buffer_offset;
+   cso->base.buffer_size = buffer_size;
+   cso->base.context = pctx;
+
+   util_range_add(pres, &res->valid_buffer_range, buffer_offset,
+                  buffer_offset + buffer_size);
+
+   return &cso->base;
+}
+
+static void
+d3d12_stream_output_target_destroy(struct pipe_context *ctx,
+                                   struct pipe_stream_output_target *state)
+{
+   pipe_resource_reference(&state->buffer, NULL);
+
+   FREE(state);
+}
+
+static void
+fill_stream_output_buffer_view(D3D12_STREAM_OUTPUT_BUFFER_VIEW *view,
+                               struct d3d12_stream_output_target *target)
+{
+   struct d3d12_resource *res = d3d12_resource(target->base.buffer);
+   struct d3d12_resource *fill_res = d3d12_resource(target->fill_buffer);
+
+   view->SizeInBytes = target->base.buffer_size;
+   view->BufferLocation = d3d12_resource_gpu_virtual_address(res) + target->base.buffer_offset;
+   view->BufferFilledSizeLocation = d3d12_resource_gpu_virtual_address(fill_res) + target->fill_buffer_offset;
+}
+
+static void
+d3d12_set_stream_output_targets(struct pipe_context *pctx,
+                                unsigned num_targets,
+                                struct pipe_stream_output_target **targets,
+                                const unsigned *offsets)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   assert(num_targets <= ARRAY_SIZE(ctx->so_targets));
+
+   d3d12_disable_fake_so_buffers(ctx);
+
+   for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+      struct d3d12_stream_output_target *target =
+         i < num_targets ? (struct d3d12_stream_output_target *)targets[i] : NULL;
+
+      if (target) {
+         /* Sub-allocate a new fill buffer each time to avoid GPU/CPU synchronization */
+         u_suballocator_alloc(ctx->so_allocator, sizeof(uint64_t), 4,
+                              &target->fill_buffer_offset, &target->fill_buffer);
+         fill_stream_output_buffer_view(&ctx->so_buffer_views[i], target);
+         pipe_so_target_reference(&ctx->so_targets[i], targets[i]);
+      } else {
+         ctx->so_buffer_views[i].SizeInBytes = 0;
+         pipe_so_target_reference(&ctx->so_targets[i], NULL);
+      }
+   }
+
+   ctx->gfx_pipeline_state.num_so_targets = num_targets;
+   ctx->state_dirty |= D3D12_DIRTY_STREAM_OUTPUT;
+}
+
+bool
+d3d12_enable_fake_so_buffers(struct d3d12_context *ctx, unsigned factor)
+{
+   if (ctx->fake_so_buffer_factor == factor)
+      return true;
+
+   d3d12_disable_fake_so_buffers(ctx);
+
+   for (int i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) {
+      struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)ctx->so_targets[i];
+      struct d3d12_stream_output_target *fake_target;
+
+      fake_target = CALLOC_STRUCT(d3d12_stream_output_target);
+      if (!fake_target)
+         return false;
+      pipe_reference_init(&fake_target->base.reference, 1);
+      fake_target->base.context = &ctx->base;
+
+      d3d12_resource_wait_idle(ctx, d3d12_resource(target->base.buffer));
+
+      /* Check if another target is using the same buffer */
+      for (int j = i - 1; j >= 0; --j) {
+         if (ctx->so_targets[j] && ctx->so_targets[j]->buffer == target->base.buffer) {
+            struct d3d12_stream_output_target *prev_target =
+               (struct d3d12_stream_output_target *)ctx->fake_so_targets[j];
+            pipe_resource_reference(&fake_target->base.buffer, prev_target->base.buffer);
+            pipe_resource_reference(&fake_target->fill_buffer, prev_target->fill_buffer);
+            fake_target->fill_buffer_offset = prev_target->fill_buffer_offset;
+            fake_target->cached_filled_size = prev_target->cached_filled_size;
+            break;
+         }
+      }
+
+      /* Create new SO buffer 6x (2 triangles instead of 1 point) the original size if not */
+      if (!fake_target->base.buffer) {
+         fake_target->base.buffer = pipe_buffer_create(ctx->base.screen,
+                                                       PIPE_BIND_STREAM_OUTPUT,
+                                                       PIPE_USAGE_STAGING,
+                                                       target->base.buffer->width0 * factor);
+         u_suballocator_alloc(ctx->so_allocator, sizeof(uint64_t), 4,
+                              &fake_target->fill_buffer_offset, &fake_target->fill_buffer);
+         pipe_buffer_read(&ctx->base, target->fill_buffer,
+                          target->fill_buffer_offset, sizeof(uint64_t),
+                          &fake_target->cached_filled_size);
+      }
+
+      fake_target->base.buffer_offset = target->base.buffer_offset * factor;
+      fake_target->base.buffer_size = (target->base.buffer_size - fake_target->cached_filled_size) * factor;
+      ctx->fake_so_targets[i] = &fake_target->base;
+      fill_stream_output_buffer_view(&ctx->fake_so_buffer_views[i], fake_target);
+   }
+
+   ctx->fake_so_buffer_factor = factor;
+   ctx->cmdlist_dirty |= D3D12_DIRTY_STREAM_OUTPUT;
+
+   return true;
+}
+
+bool
+d3d12_disable_fake_so_buffers(struct d3d12_context *ctx)
+{
+   if (ctx->fake_so_buffer_factor == 0)
+      return true;
+
+   d3d12_flush_cmdlist_and_wait(ctx);
+
+   for (int i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) {
+      struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)ctx->so_targets[i];
+      struct d3d12_stream_output_target *fake_target = (struct d3d12_stream_output_target *)ctx->fake_so_targets[i];
+      uint64_t filled_size;
+      struct pipe_transfer *src_transfer, *dst_transfer;
+      uint8_t *src, *dst;
+
+      if (fake_target == NULL)
+         continue;
+
+      pipe_buffer_read(&ctx->base, fake_target->fill_buffer,
+                       fake_target->fill_buffer_offset, sizeof(uint64_t),
+                       &filled_size);
+
+      src = (uint8_t *)pipe_buffer_map_range(&ctx->base, fake_target->base.buffer,
+                                             fake_target->base.buffer_offset,
+                                             fake_target->base.buffer_size,
+                                             PIPE_MAP_READ, &src_transfer);
+      dst = (uint8_t *)pipe_buffer_map_range(&ctx->base, target->base.buffer,
+                                             target->base.buffer_offset,
+                                             target->base.buffer_size,
+                                             PIPE_MAP_READ, &dst_transfer);
+
+      /* Note: This will break once support for gl_SkipComponents is added */
+      uint32_t stride = ctx->gfx_pipeline_state.so_info.stride[i] * 4;
+      uint64_t src_offset = 0, dst_offset = fake_target->cached_filled_size;
+      while (src_offset < filled_size) {
+         memcpy(dst + dst_offset, src + src_offset, stride);
+         src_offset += stride * ctx->fake_so_buffer_factor;
+         dst_offset += stride;
+      }
+
+      pipe_buffer_unmap(&ctx->base, src_transfer);
+      pipe_buffer_unmap(&ctx->base, dst_transfer);
+
+      pipe_so_target_reference(&ctx->fake_so_targets[i], NULL);
+      ctx->fake_so_buffer_views[i].SizeInBytes = 0;
+
+      /* Make sure the buffer is not copied twice */
+      for (int j = i + 1; j <= ctx->gfx_pipeline_state.num_so_targets; ++j) {
+         if (ctx->so_targets[j] && ctx->so_targets[j]->buffer == target->base.buffer)
+            pipe_so_target_reference(&ctx->fake_so_targets[j], NULL);
+      }
+   }
+
+   ctx->fake_so_buffer_factor = 0;
+   ctx->cmdlist_dirty |= D3D12_DIRTY_STREAM_OUTPUT;
+
+   return true;
+}
+
+void
+d3d12_flush_cmdlist(struct d3d12_context *ctx)
+{
+   d3d12_end_batch(ctx, d3d12_current_batch(ctx));
+
+   ctx->current_batch_idx++;
+   if (ctx->current_batch_idx == ARRAY_SIZE(ctx->batches))
+      ctx->current_batch_idx = 0;
+
+   d3d12_start_batch(ctx, d3d12_current_batch(ctx));
+}
+
+void
+d3d12_flush_cmdlist_and_wait(struct d3d12_context *ctx)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+
+   d3d12_foreach_submitted_batch(ctx, old_batch)
+      d3d12_reset_batch(ctx, old_batch, PIPE_TIMEOUT_INFINITE);
+   d3d12_flush_cmdlist(ctx);
+   d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE);
+}
+
+void
+d3d12_transition_resource_state(struct d3d12_context *ctx,
+                                struct d3d12_resource *res,
+                                D3D12_RESOURCE_STATES state)
+{
+   TransitionableResourceState *xres = d3d12_resource_state(res);
+   ctx->resource_state_manager->TransitionResource(xres, state);
+}
+
+void
+d3d12_transition_subresources_state(struct d3d12_context *ctx,
+                                    struct d3d12_resource *res,
+                                    uint32_t start_level, uint32_t num_levels,
+                                    uint32_t start_layer, uint32_t num_layers,
+                                    uint32_t start_plane, uint32_t num_planes,
+                                    D3D12_RESOURCE_STATES state)
+{
+   TransitionableResourceState *xres = d3d12_resource_state(res);
+
+   for (uint32_t l = 0; l < num_levels; l++) {
+      const uint32_t level = start_level + l;
+      for (uint32_t a = 0; a < num_layers; a++) {
+         const uint32_t layer = start_layer + a;
+         for( uint32_t p = 0; p < num_planes; p++) {
+            const uint32_t plane = start_plane + p;
+            uint32_t subres_id = level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.array_size);
+            assert(subres_id < xres->NumSubresources());
+            ctx->resource_state_manager->TransitionSubresource(xres, subres_id, state);
+         }
+      }
+   }
+}
+
+void
+d3d12_apply_resource_states(struct d3d12_context *ctx)
+{
+   ctx->resource_state_manager->ApplyAllResourceTransitions(ctx->cmdlist, ctx->fence_value);
+}
+
+static void
+d3d12_clear_render_target(struct pipe_context *pctx,
+                          struct pipe_surface *psurf,
+                          const union pipe_color_union *color,
+                          unsigned dstx, unsigned dsty,
+                          unsigned width, unsigned height,
+                          bool render_condition_enabled)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_surface *surf = d3d12_surface(psurf);
+
+   if (!render_condition_enabled && ctx->current_predication)
+      ctx->cmdlist->SetPredication(NULL, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
+
+   d3d12_transition_resource_state(ctx, d3d12_resource(psurf->texture),
+                                   D3D12_RESOURCE_STATE_RENDER_TARGET);
+   d3d12_apply_resource_states(ctx);
+
+   enum pipe_format format = psurf->texture->format;
+   float clear_color[4];
+
+   if (util_format_is_pure_uint(format)) {
+      for (int c = 0; c < 4; ++c)
+         clear_color[c] = color->ui[c];
+   } else if (util_format_is_pure_sint(format)) {
+      for (int c = 0; c < 4; ++c)
+         clear_color[c] = color->i[c];
+   } else {
+      for (int c = 0; c < 4; ++c)
+         clear_color[c] = color->f[c];
+   }
+
+   D3D12_RECT rect = { dstx, dsty, dstx + width, dsty + height };
+   ctx->cmdlist->ClearRenderTargetView(surf->desc_handle.cpu_handle,
+                                       color->f, 1, &rect);
+
+   d3d12_batch_reference_surface_texture(d3d12_current_batch(ctx), surf);
+
+   if (!render_condition_enabled && ctx->current_predication) {
+      ctx->cmdlist->SetPredication(
+         d3d12_resource_resource(ctx->current_predication), 0,
+         D3D12_PREDICATION_OP_EQUAL_ZERO);
+   }
+}
+
+static void
+d3d12_clear_depth_stencil(struct pipe_context *pctx,
+                          struct pipe_surface *psurf,
+                          unsigned clear_flags,
+                          double depth,
+                          unsigned stencil,
+                          unsigned dstx, unsigned dsty,
+                          unsigned width, unsigned height,
+                          bool render_condition_enabled)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_surface *surf = d3d12_surface(psurf);
+
+   if (!render_condition_enabled && ctx->current_predication)
+      ctx->cmdlist->SetPredication(NULL, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
+
+   D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
+   if (clear_flags & PIPE_CLEAR_DEPTH)
+      flags |= D3D12_CLEAR_FLAG_DEPTH;
+   if (clear_flags & PIPE_CLEAR_STENCIL)
+      flags |= D3D12_CLEAR_FLAG_STENCIL;
+
+   d3d12_transition_resource_state(ctx, d3d12_resource(ctx->fb.zsbuf->texture),
+                                   D3D12_RESOURCE_STATE_DEPTH_WRITE);
+   d3d12_apply_resource_states(ctx);
+
+   D3D12_RECT rect = { dstx, dsty, dstx + width, dsty + height };
+   ctx->cmdlist->ClearDepthStencilView(surf->desc_handle.cpu_handle, flags,
+                                       depth, stencil, 1, &rect);
+
+   d3d12_batch_reference_surface_texture(d3d12_current_batch(ctx), surf);
+
+   if (!render_condition_enabled && ctx->current_predication) {
+      ctx->cmdlist->SetPredication(
+         d3d12_resource_resource(ctx->current_predication), 0,
+         D3D12_PREDICATION_OP_EQUAL_ZERO);
+   }
+}
+
+static void
+d3d12_clear(struct pipe_context *pctx,
+            unsigned buffers,
+            const struct pipe_scissor_state *scissor_state,
+            const union pipe_color_union *color,
+            double depth, unsigned stencil)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   if (buffers & PIPE_CLEAR_COLOR) {
+      for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
+         if (buffers & (PIPE_CLEAR_COLOR0 << i)) {
+            struct pipe_surface *psurf = ctx->fb.cbufs[i];
+            d3d12_clear_render_target(pctx, psurf, color,
+                                      0, 0, psurf->width, psurf->height,
+                                      true);
+         }
+      }
+   }
+
+   if (buffers & PIPE_CLEAR_DEPTHSTENCIL && ctx->fb.zsbuf) {
+      struct pipe_surface *psurf = ctx->fb.zsbuf;
+      d3d12_clear_depth_stencil(pctx, psurf,
+                                buffers & PIPE_CLEAR_DEPTHSTENCIL,
+                                depth, stencil,
+                                0, 0, psurf->width, psurf->height,
+                                true);
+   }
+}
+
+static void
+d3d12_flush(struct pipe_context *pipe,
+            struct pipe_fence_handle **fence,
+            unsigned flags)
+{
+   struct d3d12_context *ctx = d3d12_context(pipe);
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+
+   d3d12_flush_cmdlist(ctx);
+
+   if (fence)
+      d3d12_fence_reference((struct d3d12_fence **)fence, batch->fence);
+}
+
+static void
+d3d12_flush_resource(struct pipe_context *pctx,
+                     struct pipe_resource *pres)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_resource *res = d3d12_resource(pres);
+
+   d3d12_transition_resource_state(ctx, res,
+                                   D3D12_RESOURCE_STATE_COMMON);
+   d3d12_apply_resource_states(ctx);
+}
+
+static void
+d3d12_init_null_srvs(struct d3d12_context *ctx)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+
+   for (unsigned i = 0; i < RESOURCE_DIMENSION_COUNT; ++i) {
+      D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
+
+      srv.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
+      srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+      switch (i) {
+      case RESOURCE_DIMENSION_BUFFER:
+      case RESOURCE_DIMENSION_UNKNOWN:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
+         srv.Buffer.FirstElement = 0;
+         srv.Buffer.NumElements = 0;
+         srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
+         srv.Buffer.StructureByteStride = 0;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE1D:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
+         srv.Texture1D.MipLevels = 1;
+         srv.Texture1D.MostDetailedMip = 0;
+         srv.Texture1D.ResourceMinLODClamp = 0.0f;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE1DARRAY:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
+         srv.Texture1DArray.MipLevels = 1;
+         srv.Texture1DArray.ArraySize = 1;
+         srv.Texture1DArray.MostDetailedMip = 0;
+         srv.Texture1DArray.FirstArraySlice = 0;
+         srv.Texture1DArray.ResourceMinLODClamp = 0.0f;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE2D:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+         srv.Texture2D.MipLevels = 1;
+         srv.Texture2D.MostDetailedMip = 0;
+         srv.Texture2D.PlaneSlice = 0;
+         srv.Texture2D.ResourceMinLODClamp = 0.0f;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE2DARRAY:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+         srv.Texture2DArray.MipLevels = 1;
+         srv.Texture2DArray.ArraySize = 1;
+         srv.Texture2DArray.MostDetailedMip = 0;
+         srv.Texture2DArray.FirstArraySlice = 0;
+         srv.Texture2DArray.PlaneSlice = 0;
+         srv.Texture2DArray.ResourceMinLODClamp = 0.0f;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE2DMS:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY;
+         srv.Texture2DMSArray.ArraySize = 1;
+         srv.Texture2DMSArray.FirstArraySlice = 0;
+         break;
+      case RESOURCE_DIMENSION_TEXTURE3D:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
+         srv.Texture3D.MipLevels = 1;
+         srv.Texture3D.MostDetailedMip = 0;
+         srv.Texture3D.ResourceMinLODClamp = 0.0f;
+         break;
+      case RESOURCE_DIMENSION_TEXTURECUBE:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
+         srv.TextureCube.MipLevels = 1;
+         srv.TextureCube.MostDetailedMip = 0;
+         srv.TextureCube.ResourceMinLODClamp = 0.0f;
+         break;
+      case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
+         srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY;
+         srv.TextureCubeArray.MipLevels = 1;
+         srv.TextureCubeArray.NumCubes = 1;
+         srv.TextureCubeArray.MostDetailedMip = 0;
+         srv.TextureCubeArray.First2DArrayFace = 0;
+         srv.TextureCubeArray.ResourceMinLODClamp = 0.0f;
+         break;
+      }
+
+      if (srv.ViewDimension != D3D12_SRV_DIMENSION_UNKNOWN)
+      {
+         d3d12_descriptor_pool_alloc_handle(ctx->view_pool, &ctx->null_srvs[i]);
+         screen->dev->CreateShaderResourceView(NULL, &srv, ctx->null_srvs[i].cpu_handle);
+      }
+   }
+}
+
+static void
+d3d12_init_null_rtv(struct d3d12_context *ctx)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+
+   D3D12_RENDER_TARGET_VIEW_DESC rtv = {};
+   rtv.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+   rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
+   rtv.Texture2D.MipSlice = 0;
+   rtv.Texture2D.PlaneSlice = 0;
+   d3d12_descriptor_pool_alloc_handle(ctx->rtv_pool, &ctx->null_rtv);
+   screen->dev->CreateRenderTargetView(NULL, &rtv, ctx->null_rtv.cpu_handle);
+}
+
+static void
+d3d12_init_null_sampler(struct d3d12_context *ctx)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+
+   d3d12_descriptor_pool_alloc_handle(ctx->sampler_pool, &ctx->null_sampler);
+
+   D3D12_SAMPLER_DESC desc;
+   desc.Filter = D3D12_FILTER_ANISOTROPIC;
+   desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+   desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+   desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+   desc.MipLODBias = 0.0f;
+   desc.MaxAnisotropy = 0;
+   desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
+   desc.MinLOD = 0.0f;
+   desc.MaxLOD = 0.0f;
+   memset(desc.BorderColor, 0, sizeof(desc.BorderColor));
+   screen->dev->CreateSampler(&desc, ctx->null_sampler.cpu_handle);
+}
+
+static uint64_t
+d3d12_get_timestamp(struct pipe_context *pctx)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+
+   if (!ctx->timestamp_query)
+      ctx->timestamp_query =  pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
+
+   pipe_query_result result;
+   pctx->end_query(pctx, ctx->timestamp_query);
+   pctx->get_query_result(pctx, ctx->timestamp_query, true, &result);
+   return result.u64;
+}
+
+struct pipe_context *
+d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+
+   struct d3d12_context *ctx = CALLOC_STRUCT(d3d12_context);
+   if (!ctx)
+      return NULL;
+
+   ctx->base.screen = pscreen;
+   ctx->base.priv = priv;
+
+   ctx->base.destroy = d3d12_context_destroy;
+
+   ctx->base.create_vertex_elements_state = d3d12_create_vertex_elements_state;
+   ctx->base.bind_vertex_elements_state = d3d12_bind_vertex_elements_state;
+   ctx->base.delete_vertex_elements_state = d3d12_delete_vertex_elements_state;
+
+   ctx->base.create_blend_state = d3d12_create_blend_state;
+   ctx->base.bind_blend_state = d3d12_bind_blend_state;
+   ctx->base.delete_blend_state = d3d12_delete_blend_state;
+
+   ctx->base.create_depth_stencil_alpha_state = d3d12_create_depth_stencil_alpha_state;
+   ctx->base.bind_depth_stencil_alpha_state = d3d12_bind_depth_stencil_alpha_state;
+   ctx->base.delete_depth_stencil_alpha_state = d3d12_delete_depth_stencil_alpha_state;
+
+   ctx->base.create_rasterizer_state = d3d12_create_rasterizer_state;
+   ctx->base.bind_rasterizer_state = d3d12_bind_rasterizer_state;
+   ctx->base.delete_rasterizer_state = d3d12_delete_rasterizer_state;
+
+   ctx->base.create_sampler_state = d3d12_create_sampler_state;
+   ctx->base.bind_sampler_states = d3d12_bind_sampler_states;
+   ctx->base.delete_sampler_state = d3d12_delete_sampler_state;
+
+   ctx->base.create_sampler_view = d3d12_create_sampler_view;
+   ctx->base.set_sampler_views = d3d12_set_sampler_views;
+   ctx->base.sampler_view_destroy = d3d12_destroy_sampler_view;
+
+   ctx->base.create_vs_state = d3d12_create_vs_state;
+   ctx->base.bind_vs_state = d3d12_bind_vs_state;
+   ctx->base.delete_vs_state = d3d12_delete_vs_state;
+
+   ctx->base.create_fs_state = d3d12_create_fs_state;
+   ctx->base.bind_fs_state = d3d12_bind_fs_state;
+   ctx->base.delete_fs_state = d3d12_delete_fs_state;
+
+   ctx->base.create_gs_state = d3d12_create_gs_state;
+   ctx->base.bind_gs_state = d3d12_bind_gs_state;
+   ctx->base.delete_gs_state = d3d12_delete_gs_state;
+
+   ctx->base.set_polygon_stipple = d3d12_set_polygon_stipple;
+   ctx->base.set_vertex_buffers = d3d12_set_vertex_buffers;
+   ctx->base.set_viewport_states = d3d12_set_viewport_states;
+   ctx->base.set_scissor_states = d3d12_set_scissor_states;
+   ctx->base.set_constant_buffer = d3d12_set_constant_buffer;
+   ctx->base.set_framebuffer_state = d3d12_set_framebuffer_state;
+   ctx->base.set_clip_state = d3d12_set_clip_state;
+   ctx->base.set_blend_color = d3d12_set_blend_color;
+   ctx->base.set_sample_mask = d3d12_set_sample_mask;
+   ctx->base.set_stencil_ref = d3d12_set_stencil_ref;
+
+   ctx->base.create_stream_output_target = d3d12_create_stream_output_target;
+   ctx->base.stream_output_target_destroy = d3d12_stream_output_target_destroy;
+   ctx->base.set_stream_output_targets = d3d12_set_stream_output_targets;
+
+   ctx->base.get_timestamp = d3d12_get_timestamp;
+
+   ctx->base.clear = d3d12_clear;
+   ctx->base.clear_render_target = d3d12_clear_render_target;
+   ctx->base.clear_depth_stencil = d3d12_clear_depth_stencil;
+   ctx->base.draw_vbo = d3d12_draw_vbo;
+   ctx->base.flush = d3d12_flush;
+   ctx->base.flush_resource = d3d12_flush_resource;
+
+   ctx->gfx_pipeline_state.sample_mask = ~0;
+
+   d3d12_context_surface_init(&ctx->base);
+   d3d12_context_resource_init(&ctx->base);
+   d3d12_context_query_init(&ctx->base);
+   d3d12_context_blit_init(&ctx->base);
+
+
+   slab_create_child(&ctx->transfer_pool, &d3d12_screen(pscreen)->transfer_pool);
+
+   ctx->base.stream_uploader = u_upload_create_default(&ctx->base);
+   ctx->base.const_uploader = u_upload_create_default(&ctx->base);
+   ctx->so_allocator = u_suballocator_create(&ctx->base, 4096, 0,
+                                             PIPE_USAGE_DEFAULT,
+                                             0, true);
+
+   struct primconvert_config cfg;
+   cfg.primtypes_mask = 1 << PIPE_PRIM_POINTS |
+                        1 << PIPE_PRIM_LINES |
+                        1 << PIPE_PRIM_LINE_STRIP |
+                        1 << PIPE_PRIM_TRIANGLES |
+                        1 << PIPE_PRIM_TRIANGLE_STRIP;
+   cfg.fixed_prim_restart = true;
+   ctx->primconvert = util_primconvert_create_config(&ctx->base, &cfg);
+   if (!ctx->primconvert) {
+      debug_printf("D3D12: failed to create primconvert\n");
+      return NULL;
+   }
+
+   d3d12_gfx_pipeline_state_cache_init(ctx);
+   d3d12_root_signature_cache_init(ctx);
+   d3d12_gs_variant_cache_init(ctx);
+
+   HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
+   if (!hD3D12Mod) {
+      debug_printf("D3D12: failed to load D3D12.DLL\n");
+      return NULL;
+   }
+   ctx->D3D12SerializeVersionedRootSignature =
+      (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
+
+   if (FAILED(screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
+                                       __uuidof(ctx->cmdqueue_fence),
+                                       (void **)&ctx->cmdqueue_fence))) {
+      FREE(ctx);
+      return NULL;
+   }
+
+   for (int i = 0; i < ARRAY_SIZE(ctx->batches); ++i) {
+      if (!d3d12_init_batch(ctx, &ctx->batches[i])) {
+         FREE(ctx);
+         return NULL;
+      }
+   }
+   d3d12_start_batch(ctx, &ctx->batches[0]);
+
+   ctx->rtv_pool = d3d12_descriptor_pool_new(&ctx->base,
+                                             D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
+                                             64);
+   if (!ctx->rtv_pool) {
+      FREE(ctx);
+      return NULL;
+   }
+
+   ctx->dsv_pool = d3d12_descriptor_pool_new(&ctx->base,
+                                             D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
+                                             64);
+   if (!ctx->dsv_pool) {
+      FREE(ctx);
+      return NULL;
+   }
+
+   ctx->sampler_pool = d3d12_descriptor_pool_new(&ctx->base,
+                                                 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
+                                                 64);
+   if (!ctx->sampler_pool) {
+      FREE(ctx);
+      return NULL;
+   }
+
+   ctx->view_pool = d3d12_descriptor_pool_new(&ctx->base,
+                                             D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
+                                             1024);
+   if (!ctx->view_pool) {
+      debug_printf("D3D12: failed to create CBV/SRV descriptor pool\n");
+      FREE(ctx);
+      return NULL;
+   }
+
+   d3d12_init_null_srvs(ctx);
+   d3d12_init_null_rtv(ctx);
+   d3d12_init_null_sampler(ctx);
+
+   ctx->validation_tools = d3d12_validator_create();
+
+   ctx->blitter = util_blitter_create(&ctx->base);
+   if (!ctx->blitter)
+      return NULL;
+
+   ctx->resource_state_manager = new ResourceStateManager();
+
+   if (!d3d12_init_polygon_stipple(&ctx->base)) {
+      debug_printf("D3D12: failed to initialize polygon stipple resources\n");
+      FREE(ctx);
+      return NULL;
+   }
+
+   return &ctx->base;
+}
+
+bool
+d3d12_need_zero_one_depth_range(struct d3d12_context *ctx)
+{
+   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
+
+   /**
+    * OpenGL Compatibility spec, section 15.2.3 (Shader Outputs) says
+    * the following:
+    *
+    *    For fixed-point depth buffers, the final fragment depth written by
+    *    a fragment shader is first clamped to [0, 1] and then converted to
+    *    fixed-point as if it were a window z value (see section 13.8.1).
+    *    For floating-point depth buffers, conversion is not performed but
+    *    clamping is. Note that the depth range computation is not applied
+    *    here, only the conversion to fixed-point.
+    *
+    * However, the D3D11.3 Functional Spec, section 17.10 (Depth Clamp) says
+    * the following:
+    *
+    *    Depth values that reach the Output Merger, whether coming from
+    *    interpolation or from Pixel Shader output (replacing the
+    *    interpolated z), are always clamped:
+    *    z = min(Viewport.MaxDepth,max(Viewport.MinDepth,z))
+    *    following the D3D11 Floating Point Rules(3.1) for min/max.
+    *
+    * This means that we can't always use the fixed-function viewport-mapping
+    * D3D provides.
+    *
+    * There's only one case where the difference matters: When the fragment
+    * shader writes a non-implicit value to gl_FragDepth. In all other
+    * cases, the fragment either shouldn't have been rasterized in the
+    * first place, or the implicit gl_FragCoord.z-value should already have
+    * been clamped to the depth-range.
+    *
+    * For simplicity, let's assume that an explicitly written frag-result
+    * doesn't simply forward the value of gl_FragCoord.z. If it does, we'll
+    * end up generating needless code, but the result will be correct.
+    */
+
+   return fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h
new file mode 100644 (file)
index 0000000..b805218
--- /dev/null
@@ -0,0 +1,334 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_CONTEXT_H
+#define D3D12_CONTEXT_H
+
+#include "d3d12_batch.h"
+#include "d3d12_descriptor_pool.h"
+#include "d3d12_pipeline_state.h"
+#include "d3d12_nir_lower_texcmp.h"
+
+#include "dxil_nir_lower_int_samplers.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/list.h"
+#include "util/slab.h"
+#include "util/u_suballoc.h"
+
+#include <d3d12.h>
+
+#define D3D12_GFX_SHADER_STAGES (PIPE_SHADER_TYPES - 1)
+#define D3D12_MAX_POINT_SIZE 255.0f
+
+enum d3d12_dirty_flags
+{
+   D3D12_DIRTY_NONE             = 0,
+   D3D12_DIRTY_BLEND            = (1 << 0),
+   D3D12_DIRTY_RASTERIZER       = (1 << 1),
+   D3D12_DIRTY_ZSA              = (1 << 2),
+   D3D12_DIRTY_VERTEX_ELEMENTS  = (1 << 3),
+   D3D12_DIRTY_BLEND_COLOR      = (1 << 4),
+   D3D12_DIRTY_STENCIL_REF      = (1 << 5),
+   D3D12_DIRTY_SAMPLE_MASK      = (1 << 6),
+   D3D12_DIRTY_VIEWPORT         = (1 << 7),
+   D3D12_DIRTY_FRAMEBUFFER      = (1 << 8),
+   D3D12_DIRTY_SCISSOR          = (1 << 9),
+   D3D12_DIRTY_VERTEX_BUFFERS   = (1 << 10),
+   D3D12_DIRTY_INDEX_BUFFER     = (1 << 11),
+   D3D12_DIRTY_PRIM_MODE        = (1 << 12),
+   D3D12_DIRTY_SHADER           = (1 << 13),
+   D3D12_DIRTY_ROOT_SIGNATURE   = (1 << 14),
+   D3D12_DIRTY_STREAM_OUTPUT    = (1 << 15),
+   D3D12_DIRTY_STRIP_CUT_VALUE  = (1 << 16),
+};
+
+enum d3d12_shader_dirty_flags
+{
+   D3D12_SHADER_DIRTY_CONSTBUF      = (1 << 0),
+   D3D12_SHADER_DIRTY_SAMPLER_VIEWS = (1 << 1),
+   D3D12_SHADER_DIRTY_SAMPLERS      = (1 << 2),
+};
+
+#define D3D12_DIRTY_PSO (D3D12_DIRTY_BLEND | D3D12_DIRTY_RASTERIZER | D3D12_DIRTY_ZSA | \
+                         D3D12_DIRTY_FRAMEBUFFER | D3D12_DIRTY_SAMPLE_MASK | \
+                         D3D12_DIRTY_VERTEX_ELEMENTS | D3D12_DIRTY_PRIM_MODE | \
+                         D3D12_DIRTY_SHADER | D3D12_DIRTY_ROOT_SIGNATURE | \
+                         D3D12_DIRTY_STRIP_CUT_VALUE)
+
+#define D3D12_SHADER_DIRTY_ALL (D3D12_SHADER_DIRTY_CONSTBUF | D3D12_SHADER_DIRTY_SAMPLER_VIEWS | \
+                                D3D12_SHADER_DIRTY_SAMPLERS)
+
+enum d3d12_binding_type {
+   D3D12_BINDING_CONSTANT_BUFFER,
+   D3D12_BINDING_SHADER_RESOURCE_VIEW,
+   D3D12_BINDING_SAMPLER,
+   D3D12_BINDING_STATE_VARS,
+   D3D12_NUM_BINDING_TYPES
+};
+
+enum d3d12_state_var {
+   D3D12_STATE_VAR_Y_FLIP = 0,
+   D3D12_STATE_VAR_PT_SPRITE,
+   D3D12_STATE_VAR_FIRST_VERTEX,
+   D3D12_STATE_VAR_DEPTH_TRANSFORM,
+   D3D12_MAX_STATE_VARS
+};
+
+enum resource_dimension
+{
+   RESOURCE_DIMENSION_UNKNOWN = 0,
+   RESOURCE_DIMENSION_BUFFER = 1,
+   RESOURCE_DIMENSION_TEXTURE1D = 2,
+   RESOURCE_DIMENSION_TEXTURE2D = 3,
+   RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+   RESOURCE_DIMENSION_TEXTURE3D = 5,
+   RESOURCE_DIMENSION_TEXTURECUBE = 6,
+   RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+   RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+   RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+   RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
+   RESOURCE_DIMENSION_COUNT
+};
+
+struct d3d12_sampler_state {
+   struct d3d12_descriptor_handle handle, handle_without_shadow;
+   bool is_integer_texture;
+   bool is_shadow_sampler;
+   enum pipe_tex_wrap wrap_r;
+   enum pipe_tex_wrap wrap_s;
+   enum pipe_tex_wrap wrap_t;
+   enum pipe_tex_filter filter;
+   float lod_bias;
+   float min_lod, max_lod;
+   float border_color[4];
+   enum pipe_compare_func compare_func;
+};
+
+enum d3d12_blend_factor_flags {
+   D3D12_BLEND_FACTOR_NONE  = 0,
+   D3D12_BLEND_FACTOR_COLOR = 1 << 0,
+   D3D12_BLEND_FACTOR_ALPHA = 1 << 1,
+   D3D12_BLEND_FACTOR_ANY   = 1 << 2,
+};
+
+struct d3d12_sampler_view {
+   struct pipe_sampler_view base;
+   struct d3d12_descriptor_handle handle;
+   unsigned mip_levels;
+   unsigned array_size;
+   unsigned swizzle_override_r:3;         /**< PIPE_SWIZZLE_x for red component */
+   unsigned swizzle_override_g:3;         /**< PIPE_SWIZZLE_x for green component */
+   unsigned swizzle_override_b:3;         /**< PIPE_SWIZZLE_x for blue component */
+   unsigned swizzle_override_a:3;         /**< PIPE_SWIZZLE_x for alpha component */
+};
+
+static inline struct d3d12_sampler_view *
+d3d12_sampler_view(struct pipe_sampler_view *pview)
+{
+   return (struct d3d12_sampler_view *)pview;
+}
+
+struct d3d12_stream_output_target {
+   struct pipe_stream_output_target base;
+   struct pipe_resource *fill_buffer;
+   unsigned fill_buffer_offset;
+   uint64_t cached_filled_size;
+};
+
+struct d3d12_shader_state {
+   struct d3d12_shader *current;
+   unsigned state_dirty;
+};
+
+struct blitter_context;
+struct primconvert_context;
+struct d3d12_validation_tools;
+
+#ifdef __cplusplus
+class ResourceStateManager;
+#endif
+
+struct d3d12_context {
+   struct pipe_context base;
+   struct slab_child_pool transfer_pool;
+   struct primconvert_context *primconvert;
+   struct blitter_context *blitter;
+   struct u_suballocator *query_allocator;
+   struct u_suballocator *so_allocator;
+   struct hash_table *pso_cache;
+   struct hash_table *root_signature_cache;
+   struct hash_table *gs_variant_cache;
+
+   struct d3d12_batch batches[4];
+   unsigned current_batch_idx;
+
+   struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+   struct pipe_framebuffer_state fb;
+   struct pipe_vertex_buffer vbs[PIPE_MAX_ATTRIBS];
+   D3D12_VERTEX_BUFFER_VIEW vbvs[PIPE_MAX_ATTRIBS];
+   unsigned num_vbs;
+   float flip_y;
+   bool need_zero_one_depth_range;
+   enum pipe_prim_type initial_api_prim;
+   struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS];
+   D3D12_VIEWPORT viewports[PIPE_MAX_VIEWPORTS];
+   unsigned num_viewports;
+   struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS];
+   D3D12_RECT scissors[PIPE_MAX_VIEWPORTS];
+   float blend_factor[4];
+   struct pipe_stencil_ref stencil_ref;
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
+   unsigned has_int_samplers;
+   struct d3d12_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   D3D12_INDEX_BUFFER_VIEW ibv;
+   dxil_wrap_sampler_state tex_wrap_states[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   dxil_texture_swizzle_state tex_swizzle_state[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   enum compare_func tex_compare_func[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+   struct {
+      bool enabled;
+      uint32_t pattern[32];
+      struct pipe_resource *texture;
+      struct pipe_sampler_view *sampler_view;
+      struct d3d12_sampler_state *sampler_cso;
+   } pstipple;
+
+   struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS];
+   D3D12_STREAM_OUTPUT_BUFFER_VIEW so_buffer_views[PIPE_MAX_SO_BUFFERS];
+   struct pipe_stream_output_target *fake_so_targets[PIPE_MAX_SO_BUFFERS];
+   D3D12_STREAM_OUTPUT_BUFFER_VIEW fake_so_buffer_views[PIPE_MAX_SO_BUFFERS];
+   unsigned fake_so_buffer_factor;
+
+   struct d3d12_shader_selector *gfx_stages[D3D12_GFX_SHADER_STAGES];
+
+   struct d3d12_gfx_pipeline_state gfx_pipeline_state;
+   unsigned shader_dirty[D3D12_GFX_SHADER_STAGES];
+   unsigned state_dirty;
+   unsigned cmdlist_dirty;
+   ID3D12PipelineState *current_pso;
+   bool reverse_depth_range;
+
+   ID3D12Fence *cmdqueue_fence;
+   uint64_t fence_value;
+   ID3D12GraphicsCommandList *cmdlist;
+
+   struct list_head active_queries;
+   bool queries_disabled;
+
+   struct d3d12_descriptor_pool *rtv_pool;
+   struct d3d12_descriptor_pool *dsv_pool;
+   struct d3d12_descriptor_pool *sampler_pool;
+   struct d3d12_descriptor_pool *view_pool;
+
+   struct d3d12_descriptor_handle null_srvs[RESOURCE_DIMENSION_COUNT];
+   struct d3d12_descriptor_handle null_rtv;
+   struct d3d12_descriptor_handle null_sampler;
+
+   PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE D3D12SerializeVersionedRootSignature;
+   struct d3d12_validation_tools *validation_tools;
+
+   struct d3d12_resource *current_predication;
+
+#ifdef __cplusplus
+   ResourceStateManager *resource_state_manager;
+#else
+   void *resource_state_manager; /* opaque pointer; we don't know about classes in C */
+#endif
+   struct pipe_query *timestamp_query;
+
+   void *stencil_resolve_vs, *stencil_resolve_fs, *sampler_state; /* used by d3d12_blit.cpp */
+};
+
+static inline struct d3d12_context *
+d3d12_context(struct pipe_context *context)
+{
+   return (struct d3d12_context *)context;
+}
+
+static inline struct d3d12_batch *
+d3d12_current_batch(struct d3d12_context *ctx)
+{
+   assert(ctx->current_batch_idx < ARRAY_SIZE(ctx->batches));
+   return ctx->batches + ctx->current_batch_idx;
+}
+
+#define d3d12_foreach_submitted_batch(ctx, batch) \
+   unsigned oldest = (ctx->current_batch_idx + 1) % ARRAY_SIZE(ctx->batches); \
+   while (ctx->batches[oldest].fence == NULL && oldest != ctx->current_batch_idx) \
+      oldest = (oldest + 1) % ARRAY_SIZE(ctx->batches); \
+   struct d3d12_batch *batch = &ctx->batches[oldest]; \
+   for (; oldest != ctx->current_batch_idx; \
+        oldest = (oldest + 1) % ARRAY_SIZE(ctx->batches), \
+        batch = &ctx->batches[oldest])
+
+struct pipe_context *
+d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
+
+bool
+d3d12_enable_fake_so_buffers(struct d3d12_context *ctx, unsigned factor);
+
+bool
+d3d12_disable_fake_so_buffers(struct d3d12_context *ctx);
+
+void
+d3d12_flush_cmdlist(struct d3d12_context *ctx);
+
+void
+d3d12_flush_cmdlist_and_wait(struct d3d12_context *ctx);
+
+
+void
+d3d12_transition_resource_state(struct d3d12_context* ctx,
+                                struct d3d12_resource* res,
+                                D3D12_RESOURCE_STATES state);
+
+void
+d3d12_transition_subresources_state(struct d3d12_context *ctx,
+                                    struct d3d12_resource *res,
+                                    unsigned start_level, unsigned num_levels,
+                                    unsigned start_layer, unsigned num_layers,
+                                    unsigned start_plane, unsigned num_planes,
+                                    D3D12_RESOURCE_STATES state);
+
+void
+d3d12_apply_resource_states(struct d3d12_context* ctx);
+
+void
+d3d12_draw_vbo(struct pipe_context *pctx,
+               const struct pipe_draw_info *dinfo);
+
+void
+d3d12_blit(struct pipe_context *pctx,
+           const struct pipe_blit_info *info);
+
+void
+d3d12_context_query_init(struct pipe_context *pctx);
+
+bool
+d3d12_need_zero_one_depth_range(struct d3d12_context *ctx);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_debug.h b/src/gallium/drivers/d3d12/d3d12_debug.h
new file mode 100644 (file)
index 0000000..f6601bb
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_DEBUG_H
+#define D3D12_DEBUG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define D3D12_DEBUG_VERBOSE       (1 << 0)
+#define D3D12_DEBUG_EXPERIMENTAL  (1 << 1)
+#define D3D12_DEBUG_DXIL          (1 << 2)
+#define D3D12_DEBUG_DISASS        (1 << 3)
+#define D3D12_DEBUG_BLIT          (1 << 4)
+#define D3D12_DEBUG_RESOURCE      (1 << 5)
+#define D3D12_DEBUG_DEBUG_LAYER   (1 << 6)
+#define D3D12_DEBUG_GPU_VALIDATOR (1 << 7)
+
+extern uint32_t d3d12_debug;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp b/src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp
new file mode 100644 (file)
index 0000000..c7a2bc1
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_context.h"
+#include "d3d12_descriptor_pool.h"
+#include "d3d12_screen.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "util/list.h"
+#include "util/u_dynarray.h"
+#include "util/u_memory.h"
+
+#include <d3d12.h>
+
+struct d3d12_descriptor_pool {
+   ID3D12Device *dev;
+   D3D12_DESCRIPTOR_HEAP_TYPE type;
+   uint32_t num_descriptors;
+   list_head heaps;
+};
+
+struct d3d12_descriptor_heap {
+   struct d3d12_descriptor_pool *pool;
+
+   D3D12_DESCRIPTOR_HEAP_DESC desc;
+   ID3D12Device *dev;
+   ID3D12DescriptorHeap *heap;
+   uint32_t desc_size;
+   uint64_t cpu_base;
+   uint64_t gpu_base;
+   uint32_t size;
+   uint32_t next;
+   util_dynarray free_list;
+   list_head link;
+};
+
+struct d3d12_descriptor_heap*
+d3d12_descriptor_heap_new(ID3D12Device *dev,
+                          D3D12_DESCRIPTOR_HEAP_TYPE type,
+                          D3D12_DESCRIPTOR_HEAP_FLAGS flags,
+                          uint32_t num_descriptors)
+{
+   struct d3d12_descriptor_heap *heap = CALLOC_STRUCT(d3d12_descriptor_heap);
+
+   heap->desc.NumDescriptors = num_descriptors;
+   heap->desc.Type = type;
+   heap->desc.Flags = flags;
+   if (FAILED(dev->CreateDescriptorHeap(&heap->desc,
+                                        __uuidof(heap->heap),
+                                        (void **)&heap->heap))) {
+      FREE(heap);
+      return NULL;
+   }
+
+   heap->dev = dev;
+   heap->desc_size = dev->GetDescriptorHandleIncrementSize(type);
+   heap->size = num_descriptors * heap->desc_size;
+   heap->cpu_base = heap->heap->GetCPUDescriptorHandleForHeapStart().ptr;
+   if (flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)
+      heap->gpu_base = heap->heap->GetGPUDescriptorHandleForHeapStart().ptr;
+   util_dynarray_init(&heap->free_list, NULL);
+
+   return heap;
+}
+
+void
+d3d12_descriptor_heap_free(struct d3d12_descriptor_heap *heap)
+{
+   heap->heap->Release();
+   util_dynarray_fini(&heap->free_list);
+   FREE(heap);
+}
+
+ID3D12DescriptorHeap*
+d3d12_descriptor_heap_get(struct d3d12_descriptor_heap *heap)
+{
+   return heap->heap;
+}
+
+static uint32_t
+d3d12_descriptor_heap_is_online(struct d3d12_descriptor_heap *heap)
+{
+   return (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) ? 1 : 0;
+}
+
+static uint32_t
+d3d12_descriptor_heap_can_allocate(struct d3d12_descriptor_heap *heap)
+{
+   return (heap->free_list.size > 0 ||
+           heap->size >= heap->next + heap->desc_size);
+}
+
+uint32_t
+d3d12_descriptor_heap_get_remaining_handles(struct d3d12_descriptor_heap *heap)
+{
+   return (heap->size - heap->next) / heap->desc_size;
+}
+
+void
+d2d12_descriptor_heap_get_next_handle(struct d3d12_descriptor_heap *heap,
+                                      struct d3d12_descriptor_handle *handle)
+{
+   handle->heap = heap;
+   handle->cpu_handle.ptr = heap->cpu_base + heap->next;
+   handle->gpu_handle.ptr = d3d12_descriptor_heap_is_online(heap) ?
+                            heap->gpu_base + heap->next : 0;
+}
+
+uint32_t
+d3d12_descriptor_heap_alloc_handle(struct d3d12_descriptor_heap *heap,
+                                   struct d3d12_descriptor_handle *handle)
+{
+   uint32_t offset = 0;
+
+   assert(handle != NULL);
+
+   if (heap->free_list.size > 0) {
+      offset = util_dynarray_pop(&heap->free_list, uint32_t);
+   } else if (heap->size >= heap->next + heap->desc_size) {
+      offset = heap->next;
+      heap->next += heap->desc_size;
+   } else {
+      /* Todo: we should add a new descriptor heap to get more handles */
+      assert(0 && "No handles available in descriptor heap");
+      return 0;
+   }
+
+   handle->heap = heap;
+   handle->cpu_handle.ptr = heap->cpu_base + offset;
+   handle->gpu_handle.ptr = d3d12_descriptor_heap_is_online(heap) ?
+                            heap->gpu_base + offset : 0;
+
+   return 1;
+}
+
+void
+d3d12_descriptor_handle_free(struct d3d12_descriptor_handle *handle)
+{
+   const uint32_t index = handle->cpu_handle.ptr - handle->heap->cpu_base;
+   if (index + handle->heap->desc_size == handle->heap->next) {
+      handle->heap->next = index;
+   } else {
+      util_dynarray_append(&handle->heap->free_list, uint32_t, index);
+   }
+
+   handle->heap = NULL;
+   handle->cpu_handle.ptr = 0;
+   handle->gpu_handle.ptr = 0;
+}
+
+void
+d3d12_descriptor_heap_append_handles(struct d3d12_descriptor_heap *heap,
+                                     D3D12_CPU_DESCRIPTOR_HANDLE *handles,
+                                     unsigned num_handles)
+{
+   D3D12_CPU_DESCRIPTOR_HANDLE dst;
+
+   assert(heap->next + (num_handles * heap->desc_size) <= heap->size);
+   dst.ptr = heap->cpu_base + heap->next;
+   heap->dev->CopyDescriptors(1, &dst, &num_handles,
+                              num_handles, handles, NULL,
+                              heap->desc.Type);
+   heap->next += num_handles * heap->desc_size;
+}
+
+void
+d3d12_descriptor_heap_clear(struct d3d12_descriptor_heap *heap)
+{
+   heap->next = 0;
+   util_dynarray_clear(&heap->free_list);
+}
+
+struct d3d12_descriptor_pool*
+d3d12_descriptor_pool_new(pipe_context *pctx,
+                          D3D12_DESCRIPTOR_HEAP_TYPE type,
+                          uint32_t num_descriptors)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+
+   struct d3d12_descriptor_pool *pool = CALLOC_STRUCT(d3d12_descriptor_pool);
+   if (!pool)
+      return NULL;
+
+   pool->dev = d3d12_screen(pctx->screen)->dev;
+   pool->type = type;
+   pool->num_descriptors = num_descriptors;
+   list_inithead(&pool->heaps);
+
+   return pool;
+}
+
+void
+d3d12_descriptor_pool_free(struct d3d12_descriptor_pool *pool)
+{
+   list_for_each_entry_safe(struct d3d12_descriptor_heap, heap, &pool->heaps, link) {
+      list_del(&heap->link);
+      d3d12_descriptor_heap_free(heap);
+   }
+   FREE(pool);
+}
+
+uint32_t
+d3d12_descriptor_pool_alloc_handle(struct d3d12_descriptor_pool *pool,
+                                   struct d3d12_descriptor_handle *handle)
+{
+   struct d3d12_descriptor_heap *valid_heap = NULL;
+
+   list_for_each_entry(struct d3d12_descriptor_heap, heap, &pool->heaps, link) {
+      if (d3d12_descriptor_heap_can_allocate(heap)) {
+         valid_heap = heap;
+         break;
+      }
+   }
+
+   if (!valid_heap) {
+      valid_heap = d3d12_descriptor_heap_new(pool->dev,
+                                             pool->type,
+                                             D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
+                                             pool->num_descriptors);
+      list_addtail(&valid_heap->link, &pool->heaps);
+   }
+
+   return d3d12_descriptor_heap_alloc_handle(valid_heap, handle);
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_descriptor_pool.h b/src/gallium/drivers/d3d12/d3d12_descriptor_pool.h
new file mode 100644 (file)
index 0000000..324e55a
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_DESCRIPTOR_POOL_H
+#define D3D12_DESCRIPTOR_POOL_H
+
+#include "pipe/p_context.h"
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <d3d12.h>
+
+struct d3d12_descriptor_pool;
+struct d3d12_descriptor_heap;
+
+struct d3d12_descriptor_handle {
+    D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle;
+    D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle;
+    struct d3d12_descriptor_heap *heap;
+};
+
+inline bool
+d3d12_descriptor_handle_is_allocated(struct d3d12_descriptor_handle *handle)
+{
+    return (handle->heap != NULL);
+}
+
+void
+d3d12_descriptor_handle_free(struct d3d12_descriptor_handle *handle);
+
+/* Offline Descriptor Pool */
+
+struct d3d12_descriptor_pool*
+d3d12_descriptor_pool_new(struct pipe_context *pctx,
+                          D3D12_DESCRIPTOR_HEAP_TYPE type,
+                          uint32_t num_descriptors);
+
+void
+d3d12_descriptor_pool_free(struct d3d12_descriptor_pool *pool);
+
+uint32_t
+d3d12_descriptor_pool_alloc_handle(struct d3d12_descriptor_pool *pool,
+                                   struct d3d12_descriptor_handle *handle);
+
+
+/* Online/Offline Descriptor Heaps */
+
+struct d3d12_descriptor_heap*
+d3d12_descriptor_heap_new(ID3D12Device *device,
+                          D3D12_DESCRIPTOR_HEAP_TYPE type,
+                          D3D12_DESCRIPTOR_HEAP_FLAGS flags,
+                          uint32_t num_descriptors);
+
+void
+d3d12_descriptor_heap_free(struct d3d12_descriptor_heap *heap);
+
+ID3D12DescriptorHeap*
+d3d12_descriptor_heap_get(struct d3d12_descriptor_heap *heap);
+
+void
+d2d12_descriptor_heap_get_next_handle(struct d3d12_descriptor_heap *heap,
+                                      struct d3d12_descriptor_handle *handle);
+
+uint32_t
+d3d12_descriptor_heap_get_remaining_handles(struct d3d12_descriptor_heap *heap);
+
+uint32_t
+d3d12_descriptor_heap_alloc_handle(struct d3d12_descriptor_heap *heap,
+                                   struct d3d12_descriptor_handle *handle);
+
+void
+d3d12_descriptor_heap_append_handles(struct d3d12_descriptor_heap *heap,
+                                     D3D12_CPU_DESCRIPTOR_HANDLE *handles,
+                                     unsigned num_handles);
+
+void
+d3d12_descriptor_heap_clear(struct d3d12_descriptor_heap *heap);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp
new file mode 100644 (file)
index 0000000..44a5f01
--- /dev/null
@@ -0,0 +1,723 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_compiler.h"
+#include "d3d12_context.h"
+#include "d3d12_format.h"
+#include "d3d12_query.h"
+#include "d3d12_resource.h"
+#include "d3d12_root_signature.h"
+#include "d3d12_screen.h"
+#include "d3d12_surface.h"
+
+#include "util/u_debug.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_prim_restart.h"
+#include "util/u_math.h"
+
+extern "C" {
+#include "indices/u_primconvert.h"
+}
+
+static const D3D12_RECT MAX_SCISSOR = { D3D12_VIEWPORT_BOUNDS_MIN,
+                                        D3D12_VIEWPORT_BOUNDS_MIN,
+                                        D3D12_VIEWPORT_BOUNDS_MAX,
+                                        D3D12_VIEWPORT_BOUNDS_MAX };
+
+static D3D12_GPU_DESCRIPTOR_HANDLE
+fill_cbv_descriptors(struct d3d12_context *ctx,
+                     struct d3d12_shader *shader,
+                     int stage)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   struct d3d12_descriptor_handle table_start;
+   d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
+
+   for (unsigned i = 0; i < shader->num_cb_bindings; i++) {
+      unsigned binding = shader->cb_bindings[i].binding;
+      struct pipe_constant_buffer *buffer = &ctx->cbufs[stage][binding];
+
+      assert(buffer->buffer_size > 0);
+      assert(buffer->buffer);
+
+      struct d3d12_resource *res = d3d12_resource(buffer->buffer);
+      d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
+      D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {};
+      cbv_desc.BufferLocation = d3d12_resource_gpu_virtual_address(res) + buffer->buffer_offset;
+      cbv_desc.SizeInBytes = min(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, 
+                                 align(buffer->buffer_size, 256));
+      d3d12_batch_reference_resource(batch, res);
+
+      struct d3d12_descriptor_handle handle;
+      d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
+      d3d12_screen(ctx->base.screen)->dev->CreateConstantBufferView(&cbv_desc, handle.cpu_handle);
+   }
+
+   return table_start.gpu_handle;
+}
+
+static D3D12_GPU_DESCRIPTOR_HANDLE
+fill_srv_descriptors(struct d3d12_context *ctx,
+                     struct d3d12_shader *shader,
+                     unsigned stage)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   struct d3d12_descriptor_handle table_start;
+
+   d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
+
+   for (int i = 0; i < shader->num_srv_bindings; i++)
+   {
+      struct d3d12_sampler_view *view;
+
+      if (shader->srv_bindings[i].binding == shader->pstipple_binding) {
+         view = (struct d3d12_sampler_view*)ctx->pstipple.sampler_view;
+      } else {
+         int index = shader->srv_bindings[i].index;
+         view = (struct d3d12_sampler_view*)ctx->sampler_views[stage][index];
+      }
+
+      if (view != NULL) {
+         descs[i] = view->handle.cpu_handle ;
+         d3d12_batch_reference_sampler_view(batch, view);
+
+         D3D12_RESOURCE_STATES state = (stage == PIPE_SHADER_FRAGMENT) ?
+                                       D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE :
+                                       D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+         if (view->base.texture->target == PIPE_BUFFER) {
+            d3d12_transition_resource_state(ctx, d3d12_resource(view->base.texture),
+                                            state);
+         } else {
+            d3d12_transition_subresources_state(ctx, d3d12_resource(view->base.texture),
+                                                view->base.u.tex.first_level, view->mip_levels,
+                                                view->base.u.tex.first_layer, view->array_size,
+                                                0, d3d12_get_format_num_planes(view->base.format),
+                                                state);
+         }
+      } else {
+         descs[i] = ctx->null_srvs[shader->srv_bindings[i].dimension].cpu_handle;
+      }
+   }
+
+   d3d12_descriptor_heap_append_handles(batch->view_heap, descs, shader->num_srv_bindings);
+
+   return table_start.gpu_handle;
+}
+
+static D3D12_GPU_DESCRIPTOR_HANDLE
+fill_sampler_descriptors(struct d3d12_context *ctx,
+                         const struct d3d12_shader_selector *shader_sel,
+                         unsigned stage)
+{
+   const struct d3d12_shader *shader = shader_sel->current;
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   struct d3d12_descriptor_handle table_start;
+
+   d2d12_descriptor_heap_get_next_handle(batch->sampler_heap, &table_start);
+
+   for (int i = 0; i < shader->num_srv_bindings; i++)
+   {
+      struct d3d12_sampler_state *sampler;
+
+      if (shader->srv_bindings[i].binding == shader->pstipple_binding) {
+         sampler = ctx->pstipple.sampler_cso;
+      } else {
+         int index = shader->srv_bindings[i].index;
+         sampler = ctx->samplers[stage][index];
+      }
+
+      if (sampler != NULL) {
+         if (sampler->is_shadow_sampler && shader_sel->compare_with_lod_bias_grad)
+            descs[i] = sampler->handle_without_shadow.cpu_handle;
+         else
+            descs[i] = sampler->handle.cpu_handle;
+      } else
+         descs[i] = ctx->null_sampler.cpu_handle;
+   }
+
+   d3d12_descriptor_heap_append_handles(batch->sampler_heap, descs, shader->num_srv_bindings);
+   return table_start.gpu_handle;
+}
+
+static unsigned
+fill_state_vars(struct d3d12_context *ctx,
+                const struct pipe_draw_info *dinfo,
+                struct d3d12_shader *shader,
+                uint32_t *values)
+{
+   unsigned size = 0;
+
+   for (unsigned j = 0; j < shader->num_state_vars; ++j) {
+      uint32_t *ptr = values + size;
+
+      switch (shader->state_vars[j].var) {
+      case D3D12_STATE_VAR_Y_FLIP:
+         ptr[0] = fui(ctx->flip_y);
+         size += 4;
+         break;
+      case D3D12_STATE_VAR_PT_SPRITE:
+         ptr[0] = fui(1.0 / ctx->viewports[0].Width);
+         ptr[1] = fui(1.0 / ctx->viewports[0].Height);
+         ptr[2] = fui(ctx->gfx_pipeline_state.rast->base.point_size);
+         ptr[3] = fui(D3D12_MAX_POINT_SIZE);
+         size += 4;
+         break;
+      case D3D12_STATE_VAR_FIRST_VERTEX:
+         ptr[0] = dinfo->index_size ? dinfo->index_bias : dinfo->start;
+         size += 4;
+         break;
+      case D3D12_STATE_VAR_DEPTH_TRANSFORM:
+         ptr[0] = fui(2.0f * ctx->viewport_states[0].scale[2]);
+         ptr[1] = fui(ctx->viewport_states[0].translate[2] - ctx->viewport_states[0].scale[2]);
+         size += 4;
+         break;
+      default:
+         unreachable("unknown state variable");
+      }
+   }
+
+   return size;
+}
+
+static bool
+check_descriptors_left(struct d3d12_context *ctx)
+{
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   unsigned needed_descs = 0;
+
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
+      struct d3d12_shader_selector *shader = ctx->gfx_stages[i];
+
+      if (!shader)
+         continue;
+
+      needed_descs += shader->current->num_cb_bindings;
+      needed_descs += shader->current->num_srv_bindings;
+   }
+
+   if (d3d12_descriptor_heap_get_remaining_handles(batch->view_heap) < needed_descs)
+      return false;
+
+   needed_descs = 0;
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
+      struct d3d12_shader_selector *shader = ctx->gfx_stages[i];
+
+      if (!shader)
+         continue;
+
+      needed_descs += shader->current->num_srv_bindings;
+   }
+
+   if (d3d12_descriptor_heap_get_remaining_handles(batch->sampler_heap) < needed_descs)
+      return false;
+
+   return true;
+}
+
+static void
+set_graphics_root_parameters(struct d3d12_context *ctx,
+                             const struct pipe_draw_info *dinfo)
+{
+   unsigned num_params = 0;
+
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
+      if (!ctx->gfx_stages[i])
+         continue;
+
+      struct d3d12_shader_selector *shader_sel = ctx->gfx_stages[i];
+      struct d3d12_shader *shader = shader_sel->current;
+      uint64_t dirty = ctx->shader_dirty[i];
+      assert(shader);
+
+      if (shader->num_cb_bindings > 0) {
+         if (dirty & D3D12_SHADER_DIRTY_CONSTBUF)
+            ctx->cmdlist->SetGraphicsRootDescriptorTable(num_params, fill_cbv_descriptors(ctx, shader, i));
+         num_params++;
+      }
+      if (shader->num_srv_bindings > 0) {
+         if (dirty & D3D12_SHADER_DIRTY_SAMPLER_VIEWS)
+            ctx->cmdlist->SetGraphicsRootDescriptorTable(num_params, fill_srv_descriptors(ctx, shader, i));
+         num_params++;
+         if (dirty & D3D12_SHADER_DIRTY_SAMPLERS)
+            ctx->cmdlist->SetGraphicsRootDescriptorTable(num_params, fill_sampler_descriptors(ctx, shader_sel, i));
+         num_params++;
+      }
+      /* TODO Don't always update state vars */
+      if (shader->num_state_vars > 0) {
+         uint32_t constants[D3D12_MAX_STATE_VARS * 4];
+         unsigned size = fill_state_vars(ctx, dinfo, shader, constants);
+         ctx->cmdlist->SetGraphicsRoot32BitConstants(num_params, size, constants, 0);
+         num_params++;
+      }
+   }
+}
+
+static bool
+validate_stream_output_targets(struct d3d12_context *ctx)
+{
+   unsigned factor = 0;
+
+   if (ctx->gfx_pipeline_state.num_so_targets &&
+       ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY])
+      factor = ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY]->key.gs.stream_output_factor;
+
+   if (factor > 1)
+      return d3d12_enable_fake_so_buffers(ctx, factor);
+   else
+      return d3d12_disable_fake_so_buffers(ctx);
+}
+
+static D3D_PRIMITIVE_TOPOLOGY
+topology(enum pipe_prim_type prim_type)
+{
+   switch (prim_type) {
+   case PIPE_PRIM_POINTS:
+      return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
+
+   case PIPE_PRIM_LINES:
+      return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
+
+   case PIPE_PRIM_LINE_STRIP:
+      return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
+
+   case PIPE_PRIM_TRIANGLES:
+      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
+
+   case PIPE_PRIM_LINES_ADJACENCY:
+      return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
+
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
+
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
+
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
+
+/*
+   case PIPE_PRIM_PATCHES:
+      return D3D_PRIMITIVE_TOPOLOGY_PATCHLIST;
+*/
+
+   case PIPE_PRIM_QUADS:
+   case PIPE_PRIM_QUAD_STRIP:
+      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; /* HACK: this is just wrong! */
+
+   default:
+      debug_printf("pipe_prim_type: %s\n", u_prim_name(prim_type));
+      unreachable("unexpected enum pipe_prim_type");
+   }
+}
+
+static DXGI_FORMAT
+ib_format(unsigned index_size)
+{
+   switch (index_size) {
+   case 1: return DXGI_FORMAT_R8_UINT;
+   case 2: return DXGI_FORMAT_R16_UINT;
+   case 4: return DXGI_FORMAT_R32_UINT;
+
+   default:
+      unreachable("unexpected index-buffer size");
+   }
+}
+
+static void
+twoface_emulation(struct d3d12_context *ctx,
+                  struct d3d12_rasterizer_state *rast,
+                  const struct pipe_draw_info *dinfo)
+{
+   /* draw backfaces */
+   ctx->base.bind_rasterizer_state(&ctx->base, rast->twoface_back);
+   d3d12_draw_vbo(&ctx->base, dinfo);
+
+   /* restore real state */
+   ctx->base.bind_rasterizer_state(&ctx->base, rast);
+}
+
+static void
+transition_surface_subresources_state(struct d3d12_context *ctx,
+                                      struct pipe_surface *psurf,
+                                      struct pipe_resource *pres,
+                                      D3D12_RESOURCE_STATES state)
+{
+   struct d3d12_resource *res = d3d12_resource(pres);
+   unsigned start_layer, num_layers;
+   if (!d3d12_subresource_id_uses_layer(res->base.target)) {
+      start_layer = 0;
+      num_layers = 1;
+   } else {
+      start_layer = psurf->u.tex.first_layer;
+      num_layers = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1;
+   }
+   d3d12_transition_subresources_state(ctx, res,
+                                       psurf->u.tex.level, 1,
+                                       start_layer, num_layers,
+                                       0, d3d12_get_format_num_planes(psurf->format),
+                                       state);
+}
+
+static bool
+prim_supported(enum pipe_prim_type prim_type)
+{
+   switch (prim_type) {
+   case PIPE_PRIM_POINTS:
+   case PIPE_PRIM_LINES:
+   case PIPE_PRIM_LINE_STRIP:
+   case PIPE_PRIM_TRIANGLES:
+   case PIPE_PRIM_TRIANGLE_STRIP:
+   case PIPE_PRIM_LINES_ADJACENCY:
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return true;
+
+   default:
+      return false;
+   }
+}
+
+static inline struct d3d12_shader_selector *
+d3d12_last_vertex_stage(struct d3d12_context *ctx)
+{
+   struct d3d12_shader_selector *sel = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+   if (!sel || sel->is_gs_variant)
+      sel = ctx->gfx_stages[PIPE_SHADER_VERTEX];
+   return sel;
+}
+
+void
+d3d12_draw_vbo(struct pipe_context *pctx,
+               const struct pipe_draw_info *dinfo)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_batch *batch;
+   struct pipe_resource *index_buffer = NULL;
+   unsigned index_offset = 0;
+   enum d3d12_surface_conversion_mode conversion_modes[PIPE_MAX_COLOR_BUFS] = {0};
+
+   if (!prim_supported(dinfo->mode) ||
+       dinfo->index_size == 1 ||
+       (dinfo->primitive_restart && dinfo->restart_index != 0xffff &&
+        dinfo->restart_index != 0xffffffff)) {
+
+      if (!dinfo->primitive_restart &&
+          !u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count))
+         return;
+
+      ctx->initial_api_prim = dinfo->mode;
+      util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->gfx_pipeline_state.rast->base);
+      util_primconvert_draw_vbo(ctx->primconvert, dinfo);
+      return;
+   }
+
+   for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
+      if (ctx->fb.cbufs[i]) {
+         struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
+         conversion_modes[i] = d3d12_surface_update_pre_draw(surface, d3d12_rtv_format(ctx, i));
+         if (conversion_modes[i] != D3D12_SURFACE_CONVERSION_NONE)
+            ctx->cmdlist_dirty |= D3D12_DIRTY_FRAMEBUFFER;
+      }
+   }
+
+   struct d3d12_rasterizer_state *rast = ctx->gfx_pipeline_state.rast;
+   if (rast->twoface_back) {
+      enum pipe_prim_type saved_mode = ctx->initial_api_prim;
+      twoface_emulation(ctx, rast, dinfo);
+      ctx->initial_api_prim = saved_mode;
+   }
+
+   if (ctx->pstipple.enabled)
+      ctx->shader_dirty[PIPE_SHADER_FRAGMENT] |= D3D12_SHADER_DIRTY_SAMPLER_VIEWS |
+                                                 D3D12_SHADER_DIRTY_SAMPLERS;
+
+   /* this should *really* be fixed at a higher level than here! */
+   enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode);
+   if (reduced_prim == PIPE_PRIM_TRIANGLES &&
+       ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT_AND_BACK)
+      return;
+
+   if (ctx->gfx_pipeline_state.prim_type != dinfo->mode) {
+      ctx->gfx_pipeline_state.prim_type = dinfo->mode;
+      ctx->state_dirty |= D3D12_DIRTY_PRIM_MODE;
+   }
+
+   d3d12_select_shader_variants(ctx, dinfo);
+   d3d12_validate_queries(ctx);
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
+      struct d3d12_shader *shader = ctx->gfx_stages[i] ? ctx->gfx_stages[i]->current : NULL;
+      if (ctx->gfx_pipeline_state.stages[i] != shader) {
+         ctx->gfx_pipeline_state.stages[i] = shader;
+         ctx->state_dirty |= D3D12_DIRTY_SHADER;
+      }
+   }
+
+   /* Reset to an invalid value after it's been used */
+   ctx->initial_api_prim = PIPE_PRIM_MAX;
+
+   /* Copy the stream output info from the current vertex/geometry shader */
+   if (ctx->state_dirty & D3D12_DIRTY_SHADER) {
+      struct d3d12_shader_selector *sel = d3d12_last_vertex_stage(ctx);
+      if (sel) {
+         ctx->gfx_pipeline_state.so_info = sel->so_info;
+      } else {
+         memset(&ctx->gfx_pipeline_state.so_info, 0, sizeof(sel->so_info));
+      }
+   }
+   if (!validate_stream_output_targets(ctx)) {
+      debug_printf("validate_stream_output_targets() failed\n");
+      return;
+   }
+
+   D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value =
+      D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
+   if (dinfo->index_size > 0) {
+      assert(dinfo->index_size != 1);
+
+      if (dinfo->has_user_indices) {
+         if (!util_upload_index_buffer(pctx, dinfo, &index_buffer,
+             &index_offset, 4)) {
+            debug_printf("util_upload_index_buffer() failed\n");
+            return;
+         }
+      } else {
+         index_buffer = dinfo->index.resource;
+      }
+
+      if (dinfo->primitive_restart) {
+         assert(dinfo->restart_index == 0xffff ||
+                dinfo->restart_index == 0xffffffff);
+         ib_strip_cut_value = dinfo->restart_index == 0xffff ?
+            D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF :
+            D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
+      }
+   }
+
+   if (ctx->gfx_pipeline_state.ib_strip_cut_value != ib_strip_cut_value) {
+      ctx->gfx_pipeline_state.ib_strip_cut_value = ib_strip_cut_value;
+      ctx->state_dirty |= D3D12_DIRTY_STRIP_CUT_VALUE;
+   }
+
+   if (!ctx->gfx_pipeline_state.root_signature || ctx->state_dirty & D3D12_DIRTY_SHADER) {
+      ID3D12RootSignature *root_signature = d3d12_get_root_signature(ctx);
+      if (ctx->gfx_pipeline_state.root_signature != root_signature) {
+         ctx->gfx_pipeline_state.root_signature = root_signature;
+         ctx->state_dirty |= D3D12_DIRTY_ROOT_SIGNATURE;
+         for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
+            ctx->shader_dirty[i] |= D3D12_SHADER_DIRTY_ALL;
+      }
+   }
+
+   if (!ctx->current_pso || ctx->state_dirty & D3D12_DIRTY_PSO) {
+      ctx->current_pso = d3d12_get_gfx_pipeline_state(ctx);
+      assert(ctx->current_pso);
+   }
+
+   ctx->cmdlist_dirty |= ctx->state_dirty;
+
+   if (!check_descriptors_left(ctx))
+      d3d12_flush_cmdlist(ctx);
+   batch = d3d12_current_batch(ctx);
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_ROOT_SIGNATURE) {
+      d3d12_batch_reference_object(batch, ctx->gfx_pipeline_state.root_signature);
+      ctx->cmdlist->SetGraphicsRootSignature(ctx->gfx_pipeline_state.root_signature);
+   }
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_PSO) {
+      assert(ctx->current_pso);
+      d3d12_batch_reference_object(batch, ctx->current_pso);
+      ctx->cmdlist->SetPipelineState(ctx->current_pso);
+   }
+
+   set_graphics_root_parameters(ctx, dinfo);
+
+   bool need_zero_one_depth_range = d3d12_need_zero_one_depth_range(ctx);
+   if (need_zero_one_depth_range != ctx->need_zero_one_depth_range) {
+      ctx->cmdlist_dirty |= D3D12_DIRTY_VIEWPORT;
+      ctx->need_zero_one_depth_range = need_zero_one_depth_range;
+   }
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_VIEWPORT) {
+      if (ctx->need_zero_one_depth_range) {
+         D3D12_VIEWPORT viewports[PIPE_MAX_VIEWPORTS];
+         for (int i = 0; i < ctx->num_viewports; ++i) {
+            viewports[i] = ctx->viewports[i];
+            viewports[i].MinDepth = 0.0f;
+            viewports[i].MaxDepth = 1.0f;
+         }
+         ctx->cmdlist->RSSetViewports(ctx->num_viewports, viewports);
+      } else
+         ctx->cmdlist->RSSetViewports(ctx->num_viewports, ctx->viewports);
+   }
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_SCISSOR) {
+      if (ctx->gfx_pipeline_state.rast->base.scissor && ctx->num_viewports > 0)
+         ctx->cmdlist->RSSetScissorRects(ctx->num_viewports, ctx->scissors);
+      else
+         ctx->cmdlist->RSSetScissorRects(1, &MAX_SCISSOR);
+   }
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_BLEND_COLOR) {
+      unsigned blend_factor_flags = ctx->gfx_pipeline_state.blend->blend_factor_flags;
+      if (blend_factor_flags & (D3D12_BLEND_FACTOR_COLOR | D3D12_BLEND_FACTOR_ANY)) {
+         ctx->cmdlist->OMSetBlendFactor(ctx->blend_factor);
+      } else if (blend_factor_flags & D3D12_BLEND_FACTOR_ALPHA) {
+         float alpha_const[4] = { ctx->blend_factor[3], ctx->blend_factor[3],
+                                 ctx->blend_factor[3], ctx->blend_factor[3] };
+         ctx->cmdlist->OMSetBlendFactor(alpha_const);
+      }
+   }
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_STENCIL_REF)
+      ctx->cmdlist->OMSetStencilRef(ctx->stencil_ref.ref_value[0]);
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_PRIM_MODE)
+      ctx->cmdlist->IASetPrimitiveTopology(topology(dinfo->mode));
+
+   for (unsigned i = 0; i < ctx->num_vbs; ++i) {
+      if (ctx->vbs[i].buffer.resource) {
+         struct d3d12_resource *res = d3d12_resource(ctx->vbs[i].buffer.resource);
+         d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
+         if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS)
+            d3d12_batch_reference_resource(batch, res);
+      }
+   }
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS)
+      ctx->cmdlist->IASetVertexBuffers(0, ctx->num_vbs, ctx->vbvs);
+
+   if (index_buffer) {
+      D3D12_INDEX_BUFFER_VIEW ibv;
+      struct d3d12_resource *res = d3d12_resource(index_buffer);
+      ibv.BufferLocation = d3d12_resource_gpu_virtual_address(res) + index_offset;
+      ibv.SizeInBytes = res->base.width0 - index_offset;
+      ibv.Format = ib_format(dinfo->index_size);
+      d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER);
+      if (ctx->cmdlist_dirty & D3D12_DIRTY_INDEX_BUFFER ||
+          memcmp(&ctx->ibv, &ibv, sizeof(D3D12_INDEX_BUFFER_VIEW)) != 0) {
+         ctx->ibv = ibv;
+         d3d12_batch_reference_resource(batch, res);
+         ctx->cmdlist->IASetIndexBuffer(&ibv);
+      }
+
+      if (dinfo->has_user_indices)
+         pipe_resource_reference(&index_buffer, NULL);
+   }
+
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_FRAMEBUFFER) {
+      D3D12_CPU_DESCRIPTOR_HANDLE render_targets[PIPE_MAX_COLOR_BUFS] = {};
+      D3D12_CPU_DESCRIPTOR_HANDLE *depth_desc = NULL, tmp_desc;
+      for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
+         if (ctx->fb.cbufs[i]) {
+            struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
+            render_targets[i] = d3d12_surface_get_handle(surface, conversion_modes[i]);
+            d3d12_batch_reference_surface_texture(batch, surface);
+         } else
+            render_targets[i] = ctx->null_rtv.cpu_handle;
+      }
+      if (ctx->fb.zsbuf) {
+         struct d3d12_surface *surface = d3d12_surface(ctx->fb.zsbuf);
+         tmp_desc = surface->desc_handle.cpu_handle;
+         d3d12_batch_reference_surface_texture(batch, surface);
+         depth_desc = &tmp_desc;
+      }
+      ctx->cmdlist->OMSetRenderTargets(ctx->fb.nr_cbufs, render_targets, FALSE, depth_desc);
+   }
+
+   struct pipe_stream_output_target **so_targets = ctx->fake_so_buffer_factor ? ctx->fake_so_targets
+                                                                              : ctx->so_targets;
+   D3D12_STREAM_OUTPUT_BUFFER_VIEW *so_buffer_views = ctx->fake_so_buffer_factor ? ctx->fake_so_buffer_views
+                                                                                 : ctx->so_buffer_views;
+   for (int i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) {
+      struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)so_targets[i];
+
+      if (!target)
+         continue;
+
+      struct d3d12_resource *so_buffer = d3d12_resource(target->base.buffer);
+      struct d3d12_resource *fill_buffer = d3d12_resource(target->fill_buffer);
+
+      d3d12_resource_make_writeable(pctx, target->base.buffer);
+
+      if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT) {
+         d3d12_batch_reference_resource(batch, so_buffer);
+         d3d12_batch_reference_resource(batch, fill_buffer);
+      }
+
+      d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT);
+      d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT);
+   }
+   if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT)
+      ctx->cmdlist->SOSetTargets(0, 4, so_buffer_views);
+
+   for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
+      struct pipe_surface *psurf = ctx->fb.cbufs[i];
+      if (!psurf)
+         continue;
+
+      struct pipe_resource *pres = conversion_modes[i] == D3D12_SURFACE_CONVERSION_BGRA_UINT ?
+                                      d3d12_surface(psurf)->rgba_texture : psurf->texture;
+      transition_surface_subresources_state(ctx, psurf, pres,
+         D3D12_RESOURCE_STATE_RENDER_TARGET);
+   }
+   if (ctx->fb.zsbuf) {
+      struct pipe_surface *psurf = ctx->fb.zsbuf;
+      transition_surface_subresources_state(ctx, psurf, psurf->texture,
+         D3D12_RESOURCE_STATE_DEPTH_WRITE);
+   }
+
+   d3d12_apply_resource_states(ctx);
+
+   if (dinfo->index_size > 0)
+      ctx->cmdlist->DrawIndexedInstanced(dinfo->count, dinfo->instance_count,
+                                         dinfo->start, dinfo->index_bias,
+                                         dinfo->start_instance);
+   else
+      ctx->cmdlist->DrawInstanced(dinfo->count, dinfo->instance_count,
+                                  dinfo->start, dinfo->start_instance);
+
+   ctx->state_dirty = 0;
+
+   if (index_buffer)
+      ctx->cmdlist_dirty = 0;
+   else
+      ctx->cmdlist_dirty &= D3D12_DIRTY_INDEX_BUFFER;
+
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
+      ctx->shader_dirty[i] = 0;
+
+   for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
+      if (ctx->fb.cbufs[i]) {
+         struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
+         d3d12_surface_update_post_draw(surface, conversion_modes[i]);
+      }
+   }
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_fence.cpp b/src/gallium/drivers/d3d12/d3d12_fence.cpp
new file mode 100644 (file)
index 0000000..7b6f6a6
--- /dev/null
@@ -0,0 +1,93 @@
+
+#include "d3d12_fence.h"
+
+#include "d3d12_context.h"
+#include "d3d12_screen.h"
+
+#include "util/u_memory.h"
+
+static void
+destroy_fence(struct d3d12_fence *fence)
+{
+   if (fence->event)
+      CloseHandle(fence->event);
+   FREE(fence);
+}
+
+struct d3d12_fence *
+d3d12_create_fence(struct d3d12_screen *screen, struct d3d12_context *ctx)
+{
+   struct d3d12_fence *ret = CALLOC_STRUCT(d3d12_fence);
+   if (!ret) {
+      debug_printf("CALLOC_STRUCT failed\n");
+      return NULL;
+   }
+
+   ret->cmdqueue_fence = ctx->cmdqueue_fence;
+   ret->value = ++ctx->fence_value;
+   ret->event = CreateEvent(NULL, FALSE, FALSE, NULL);
+   if (FAILED(ctx->cmdqueue_fence->SetEventOnCompletion(ret->value, ret->event)))
+      goto fail;
+   if (FAILED(screen->cmdqueue->Signal(ctx->cmdqueue_fence, ret->value)))
+      goto fail;
+
+   pipe_reference_init(&ret->reference, 1);
+   return ret;
+
+fail:
+   destroy_fence(ret);
+   return NULL;
+}
+
+void
+d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence)
+{
+   if (pipe_reference(&(*ptr)->reference, &fence->reference))
+      destroy_fence((struct d3d12_fence *)*ptr);
+
+   *ptr = fence;
+}
+
+static void
+fence_reference(struct pipe_screen *pscreen,
+                struct pipe_fence_handle **pptr,
+                struct pipe_fence_handle *pfence)
+{
+   d3d12_fence_reference((struct d3d12_fence **)pptr, d3d12_fence(pfence));
+}
+
+bool
+d3d12_fence_finish(struct d3d12_fence *fence, uint64_t timeout_ns)
+{
+   if (fence->signaled)
+      return true;
+   
+   bool complete = fence->cmdqueue_fence->GetCompletedValue() >= fence->value;
+   if (!complete && timeout_ns) {
+      DWORD timeout_ms = (timeout_ns == PIPE_TIMEOUT_INFINITE) ? INFINITE : timeout_ns * 1000;
+      complete = WaitForSingleObject(fence->event, timeout_ms) == WAIT_OBJECT_0;
+   }
+
+   fence->signaled = complete;
+   return complete;
+}
+
+static bool
+fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx,
+             struct pipe_fence_handle *pfence, uint64_t timeout_ns)
+{
+   bool ret = d3d12_fence_finish(d3d12_fence(pfence), timeout_ns);
+   if (ret && pctx) {
+      struct d3d12_context *ctx = d3d12_context(pctx);
+      d3d12_foreach_submitted_batch(ctx, batch)
+         d3d12_reset_batch(ctx, batch, 0);
+   }
+   return ret;
+}
+
+void
+d3d12_screen_fence_init(struct pipe_screen *pscreen)
+{
+   pscreen->fence_reference = fence_reference;
+   pscreen->fence_finish = fence_finish;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_fence.h b/src/gallium/drivers/d3d12/d3d12_fence.h
new file mode 100644 (file)
index 0000000..d14204e
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_FENCE_H
+#define D3D12_FENCE_H
+
+#include "util/u_inlines.h"
+
+#include <d3d12.h>
+
+struct pipe_screen;
+struct d3d12_screen;
+
+struct d3d12_fence {
+   struct pipe_reference reference;
+   ID3D12Fence *cmdqueue_fence;
+   HANDLE event;
+   uint64_t value;
+   bool signaled;
+};
+
+static inline struct d3d12_fence *
+d3d12_fence(struct pipe_fence_handle *pfence)
+{
+   return (struct d3d12_fence *)pfence;
+}
+
+struct d3d12_fence *
+d3d12_create_fence(struct d3d12_screen *screen, struct d3d12_context *ctx);
+
+void
+d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence);
+
+bool
+d3d12_fence_finish(struct d3d12_fence *fence, uint64_t timeout_ns);
+
+void
+d3d12_screen_fence_init(struct pipe_screen *pscreen);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_format.c b/src/gallium/drivers/d3d12/d3d12_format.c
new file mode 100644 (file)
index 0000000..302b182
--- /dev/null
@@ -0,0 +1,298 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_format.h"
+
+#include "pipe/p_format.h"
+#include "util/format/u_format.h"
+#include "util/u_math.h"
+
+static const DXGI_FORMAT formats[PIPE_FORMAT_COUNT] = {
+#define MAP_FORMAT_NORM(FMT) \
+   [PIPE_FORMAT_ ## FMT ## _UNORM] = DXGI_FORMAT_ ## FMT ## _UNORM, \
+   [PIPE_FORMAT_ ## FMT ## _SNORM] = DXGI_FORMAT_ ## FMT ## _SNORM,
+
+#define MAP_FORMAT_INT(FMT) \
+   [PIPE_FORMAT_ ## FMT ## _UINT] = DXGI_FORMAT_ ## FMT ## _UINT, \
+   [PIPE_FORMAT_ ## FMT ## _SINT] = DXGI_FORMAT_ ## FMT ## _SINT,
+
+#define MAP_FORMAT_SRGB(FMT) \
+   [PIPE_FORMAT_ ## FMT ## _SRGB] = DXGI_FORMAT_ ## FMT ## _UNORM_SRGB,
+
+#define MAP_FORMAT_FLOAT(FMT) \
+   [PIPE_FORMAT_ ## FMT ## _FLOAT] = DXGI_FORMAT_ ## FMT ## _FLOAT,
+
+#define MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE) \
+   [PIPE_FORMAT_L ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \
+   [PIPE_FORMAT_I ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \
+   [PIPE_FORMAT_L ## BITS ## A ## BITS ## _ ## TYPE] = \
+          DXGI_FORMAT_R ## BITS ## G ## BITS ## _ ## TYPE,
+
+#define MAP_EMU_FORMAT(BITS, TYPE) \
+   [PIPE_FORMAT_A ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \
+   MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE)
+
+   MAP_FORMAT_NORM(R8)
+   MAP_FORMAT_INT(R8)
+
+   MAP_FORMAT_NORM(R8G8)
+   MAP_FORMAT_INT(R8G8)
+
+   MAP_FORMAT_NORM(R8G8B8A8)
+   MAP_FORMAT_INT(R8G8B8A8)
+   MAP_FORMAT_SRGB(R8G8B8A8)
+
+   [PIPE_FORMAT_B8G8R8X8_UNORM] = DXGI_FORMAT_B8G8R8X8_UNORM,
+   [PIPE_FORMAT_B8G8R8A8_UNORM] = DXGI_FORMAT_B8G8R8A8_UNORM,
+
+   MAP_FORMAT_SRGB(B8G8R8A8)
+
+   MAP_FORMAT_INT(R32)
+   MAP_FORMAT_FLOAT(R32)
+   MAP_FORMAT_INT(R32G32)
+   MAP_FORMAT_FLOAT(R32G32)
+   MAP_FORMAT_INT(R32G32B32)
+   MAP_FORMAT_FLOAT(R32G32B32)
+   MAP_FORMAT_INT(R32G32B32A32)
+   MAP_FORMAT_FLOAT(R32G32B32A32)
+
+   MAP_FORMAT_NORM(R16)
+   MAP_FORMAT_INT(R16)
+   MAP_FORMAT_FLOAT(R16)
+
+   MAP_FORMAT_NORM(R16G16)
+   MAP_FORMAT_INT(R16G16)
+   MAP_FORMAT_FLOAT(R16G16)
+
+   MAP_FORMAT_NORM(R16G16B16A16)
+   MAP_FORMAT_INT(R16G16B16A16)
+   MAP_FORMAT_FLOAT(R16G16B16A16)
+
+   [PIPE_FORMAT_A8_UNORM] = DXGI_FORMAT_A8_UNORM,
+   MAP_EMU_FORMAT_NO_ALPHA(8, UNORM)
+   MAP_EMU_FORMAT(8, SNORM)
+   MAP_EMU_FORMAT(8, SNORM)
+   MAP_EMU_FORMAT(8, SINT)
+   MAP_EMU_FORMAT(8, UINT)
+   MAP_EMU_FORMAT(16, UNORM)
+   MAP_EMU_FORMAT(16, SNORM)
+   MAP_EMU_FORMAT(16, SINT)
+   MAP_EMU_FORMAT(16, UINT)
+   MAP_EMU_FORMAT(16, FLOAT)
+   MAP_EMU_FORMAT(32, SINT)
+   MAP_EMU_FORMAT(32, UINT)
+   MAP_EMU_FORMAT(32, FLOAT)
+
+   [PIPE_FORMAT_R9G9B9E5_FLOAT] = DXGI_FORMAT_R9G9B9E5_SHAREDEXP,
+   [PIPE_FORMAT_R11G11B10_FLOAT] = DXGI_FORMAT_R11G11B10_FLOAT,
+   [PIPE_FORMAT_R10G10B10A2_UINT] = DXGI_FORMAT_R10G10B10A2_UINT,
+   [PIPE_FORMAT_R10G10B10A2_UNORM] = DXGI_FORMAT_R10G10B10A2_UNORM,
+
+   [PIPE_FORMAT_DXT1_RGB] = DXGI_FORMAT_BC1_UNORM,
+   [PIPE_FORMAT_DXT1_RGBA] = DXGI_FORMAT_BC1_UNORM,
+   [PIPE_FORMAT_DXT3_RGBA] = DXGI_FORMAT_BC2_UNORM,
+   [PIPE_FORMAT_DXT5_RGBA] = DXGI_FORMAT_BC3_UNORM,
+
+   [PIPE_FORMAT_DXT1_SRGB] = DXGI_FORMAT_BC1_UNORM_SRGB,
+   [PIPE_FORMAT_DXT1_SRGBA] = DXGI_FORMAT_BC1_UNORM_SRGB,
+   [PIPE_FORMAT_DXT3_SRGBA] = DXGI_FORMAT_BC2_UNORM_SRGB,
+   [PIPE_FORMAT_DXT5_SRGBA] = DXGI_FORMAT_BC3_UNORM_SRGB,
+
+   [PIPE_FORMAT_RGTC1_UNORM] = DXGI_FORMAT_BC4_UNORM,
+   [PIPE_FORMAT_RGTC1_SNORM] = DXGI_FORMAT_BC4_SNORM,
+   [PIPE_FORMAT_RGTC2_UNORM] = DXGI_FORMAT_BC5_UNORM,
+   [PIPE_FORMAT_RGTC2_SNORM] = DXGI_FORMAT_BC5_SNORM,
+
+   [PIPE_FORMAT_Z32_FLOAT] = DXGI_FORMAT_R32_TYPELESS,
+   [PIPE_FORMAT_Z16_UNORM] = DXGI_FORMAT_R16_TYPELESS,
+   [PIPE_FORMAT_Z24X8_UNORM] = DXGI_FORMAT_R24G8_TYPELESS,
+   [PIPE_FORMAT_X24S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS,
+
+   [PIPE_FORMAT_Z24_UNORM_S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS,
+   [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS,
+   [PIPE_FORMAT_X32_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS,
+};
+
+DXGI_FORMAT
+d3d12_get_format(enum pipe_format format)
+{
+   return formats[format];
+}
+
+DXGI_FORMAT
+d3d12_get_resource_rt_format(enum pipe_format f)
+{
+   switch (f) {
+   case PIPE_FORMAT_Z16_UNORM:
+      return DXGI_FORMAT_D16_UNORM;
+   case PIPE_FORMAT_Z32_FLOAT:
+      return DXGI_FORMAT_D32_FLOAT;
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X24S8_UINT:
+      return DXGI_FORMAT_D24_UNORM_S8_UINT;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+   case PIPE_FORMAT_X32_S8X24_UINT:
+      return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      return DXGI_FORMAT_D24_UNORM_S8_UINT;
+   default:
+      return d3d12_get_format(f);
+   }
+}
+
+DXGI_FORMAT
+d3d12_get_resource_srv_format(enum pipe_format f, enum pipe_texture_target target)
+{
+   switch (f) {
+   case PIPE_FORMAT_Z16_UNORM:
+      return DXGI_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_Z32_FLOAT:
+      return DXGI_FORMAT_R32_FLOAT;
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
+   case PIPE_FORMAT_X24S8_UINT:
+      return DXGI_FORMAT_X24_TYPELESS_G8_UINT;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+   case PIPE_FORMAT_X32_S8X24_UINT:
+      return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT;
+   case PIPE_FORMAT_A8_UNORM:
+      if (target == PIPE_BUFFER)
+         return DXGI_FORMAT_R8_UNORM; /* A8_UNORM is not supported for buffer SRV */
+      /* passthrough */
+   default:
+      return d3d12_get_format(f);
+   }
+}
+
+#define DEF_SWIZZLE(name, X, Y, Z, W) \
+   static const enum pipe_swizzle name ## _SWIZZLE[PIPE_SWIZZLE_MAX] = \
+      { PIPE_SWIZZLE_ ## X, PIPE_SWIZZLE_ ## Y, PIPE_SWIZZLE_ ## Z, PIPE_SWIZZLE_ ## W, \
+        PIPE_SWIZZLE_0, PIPE_SWIZZLE_1, PIPE_SWIZZLE_NONE }
+
+struct d3d12_format_info
+d3d12_get_format_info(enum pipe_format pformat, enum pipe_texture_target target)
+{
+   DEF_SWIZZLE(IDENTITY, X, Y, Z, W);
+   DEF_SWIZZLE(RGB1, X, Y, Z, 1);
+   DEF_SWIZZLE(ALPHA, 0, 0, 0, W);
+   DEF_SWIZZLE(BUFFER, 0, 0, 0, X);
+   DEF_SWIZZLE(INTENSITY, X, X, X, X);
+   DEF_SWIZZLE(LUMINANCE, X, X, X, 1);
+   DEF_SWIZZLE(LUMINANCE_ALPHA, X, X, X, Y);
+   DEF_SWIZZLE(DEPTH, X, X, X, X);
+   DEF_SWIZZLE(STENCIL, Y, Y, Y, Y);
+
+   const enum pipe_swizzle *swizzle = IDENTITY_SWIZZLE;
+   unsigned plane_slice = 0;
+
+   if (pformat == PIPE_FORMAT_DXT1_RGB ||
+       pformat == PIPE_FORMAT_DXT1_SRGB)
+      swizzle = RGB1_SWIZZLE;
+
+   const struct util_format_description
+      *format_desc = util_format_description(pformat);
+   if (!util_format_is_srgb(pformat)) {
+      if (target == PIPE_BUFFER && util_format_is_alpha(pformat)) {
+         swizzle = BUFFER_SWIZZLE;
+      } else if (pformat == PIPE_FORMAT_A8_UNORM) {
+         /* no need to swizzle, it's natively supported */
+      } else if (util_format_is_intensity(pformat)) {
+         swizzle = INTENSITY_SWIZZLE;
+      } else if (util_format_is_luminance(pformat)) {
+         swizzle = LUMINANCE_SWIZZLE;
+      } else if (util_format_is_luminance_alpha(pformat)) {
+         swizzle = LUMINANCE_ALPHA_SWIZZLE;
+      } else if (util_format_is_alpha(pformat)) {
+         swizzle = ALPHA_SWIZZLE;
+      } else if (util_format_has_depth(format_desc)) {
+         swizzle = DEPTH_SWIZZLE;
+      } else if (util_format_has_stencil(format_desc)) {
+         /* When reading from a stencil texture we have to use plane 1, and
+          * the formats X24S8 and X32_S8X24 have the actual data in the y-channel
+          * but the shader will read the x component so we need to adjust the swizzle. */
+         plane_slice = 1;
+         swizzle = STENCIL_SWIZZLE;
+      }
+   }
+
+   return (struct d3d12_format_info) { .swizzle = swizzle, .plane_slice = plane_slice };
+}
+
+enum pipe_format
+d3d12_emulated_vtx_format(enum pipe_format fmt)
+{
+   switch (fmt) {
+   case PIPE_FORMAT_R10G10B10A2_SNORM:
+   case PIPE_FORMAT_R10G10B10A2_SSCALED:
+   case PIPE_FORMAT_R10G10B10A2_USCALED:
+   case PIPE_FORMAT_B10G10R10A2_UNORM:
+   case PIPE_FORMAT_B10G10R10A2_SNORM:
+   case PIPE_FORMAT_B10G10R10A2_SSCALED:
+   case PIPE_FORMAT_B10G10R10A2_USCALED:
+      return PIPE_FORMAT_R32_UINT;
+
+   case PIPE_FORMAT_R8G8B8_SINT:
+      return PIPE_FORMAT_R8G8B8A8_SINT;
+   case PIPE_FORMAT_R8G8B8_UINT:
+      return PIPE_FORMAT_R8G8B8A8_UINT;
+
+   case PIPE_FORMAT_R16G16B16_SINT:
+      return PIPE_FORMAT_R16G16B16A16_SINT;
+   case PIPE_FORMAT_R16G16B16_UINT:
+      return PIPE_FORMAT_R16G16B16A16_UINT;
+
+   default:
+      return fmt;
+   }
+}
+
+
+unsigned
+d3d12_non_opaque_plane_count(DXGI_FORMAT format)
+{
+   switch (format) {
+   case DXGI_FORMAT_V208:
+   case DXGI_FORMAT_V408:
+      return 3;
+
+   case DXGI_FORMAT_NV12:
+   case DXGI_FORMAT_P010:
+   case DXGI_FORMAT_P016:
+   case DXGI_FORMAT_YUY2:
+   case DXGI_FORMAT_Y210:
+   case DXGI_FORMAT_Y216:
+   case DXGI_FORMAT_NV11:
+      return 2;
+   }
+
+   return 1;
+}
+
+unsigned
+d3d12_get_format_num_planes(enum pipe_format fmt)
+{
+   return util_format_is_depth_or_stencil(fmt) ?
+      util_bitcount(util_format_get_mask(fmt)) : 1;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_format.h b/src/gallium/drivers/d3d12/d3d12_format.h
new file mode 100644 (file)
index 0000000..b578f4c
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_FORMATS_H
+#define D3D12_FORMATS_H
+
+#include <dxgiformat.h>
+
+#include "pipe/p_format.h"
+#include "pipe/p_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+DXGI_FORMAT
+d3d12_get_format(enum pipe_format format);
+
+DXGI_FORMAT
+d3d12_get_resource_srv_format(enum pipe_format f, enum pipe_texture_target target);
+
+DXGI_FORMAT
+d3d12_get_resource_rt_format(enum pipe_format f);
+
+unsigned
+d3d12_non_opaque_plane_count(DXGI_FORMAT f);
+
+struct d3d12_format_info {
+   const enum pipe_swizzle *swizzle;
+   int plane_slice;
+};
+
+struct d3d12_format_info
+d3d12_get_format_info(enum pipe_format format, enum pipe_texture_target);
+
+enum pipe_format
+d3d12_emulated_vtx_format(enum pipe_format fmt);
+
+unsigned
+d3d12_get_format_num_planes(enum pipe_format fmt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_gs_variant.cpp b/src/gallium/drivers/d3d12/d3d12_gs_variant.cpp
new file mode 100644 (file)
index 0000000..8064085
--- /dev/null
@@ -0,0 +1,516 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_compiler.h"
+#include "d3d12_context.h"
+#include "d3d12_debug.h"
+#include "d3d12_screen.h"
+#include "nir_to_dxil.h"
+
+#include "nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_builtin_builder.h"
+
+#include "util/u_memory.h"
+#include "util/u_simple_shaders.h"
+
+static void
+nir_emit_vertex(nir_builder *b, unsigned stream_id)
+{
+   nir_intrinsic_instr *instr;
+
+   instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex);
+   nir_intrinsic_set_stream_id(instr, stream_id);
+   nir_builder_instr_insert(b, &instr->instr);
+}
+
+static void
+nir_end_primitve(nir_builder *b, unsigned stream_id)
+{
+   nir_intrinsic_instr *instr;
+
+   instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive);
+   nir_intrinsic_set_stream_id(instr, 0);
+   nir_builder_instr_insert(b, &instr->instr);
+}
+
+static nir_ssa_def *
+nir_cull_face(nir_builder *b, nir_variable *vertices, bool ccw)
+{
+   nir_ssa_def *v0 =
+       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 0)));
+   nir_ssa_def *v1 =
+       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 1)));
+   nir_ssa_def *v2 =
+       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 2)));
+
+   nir_ssa_def *dir = nir_fdot(b, nir_cross4(b, nir_fsub(b, v1, v0),
+                                               nir_fsub(b, v2, v0)),
+                                   nir_imm_vec4(b, 0.0, 0.0, -1.0, 0.0));
+   if (ccw)
+       return nir_fge(b, nir_imm_int(b, 0), dir);
+   else
+       return nir_flt(b, nir_imm_int(b, 0), dir);
+}
+
+static d3d12_shader_selector*
+d3d12_make_passthrough_gs(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key)
+{
+   struct d3d12_shader_selector *gs;
+   uint64_t varyings = key->varyings.mask;
+   nir_builder b;
+   nir_shader *nir;
+   nir_intrinsic_instr *instr;
+   struct pipe_shader_state templ;
+
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_GEOMETRY,
+                                  dxil_get_nir_compiler_options());
+
+   nir = b.shader;
+   nir->info.inputs_read = varyings;
+   nir->info.outputs_written = varyings;
+   nir->info.gs.input_primitive = GL_POINTS;
+   nir->info.gs.output_primitive = GL_POINTS;
+   nir->info.gs.vertices_in = 1;
+   nir->info.gs.vertices_out = 1;
+   nir->info.gs.invocations = 1;
+   nir->info.gs.active_stream_mask = 1;
+   nir->info.name = ralloc_strdup(nir, "passthrough");
+
+   /* Copy inputs to outputs. */
+   while (varyings) {
+      nir_variable *in, *out;
+      char tmp[100];
+      const int i = u_bit_scan64(&varyings);
+
+      snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", key->varyings.vars[i].driver_location);
+      in = nir_variable_create(nir,
+                               nir_var_shader_in,
+                               glsl_array_type(key->varyings.vars[i].type, 1, false),
+                               tmp);
+      in->data.location = i;
+      in->data.driver_location = key->varyings.vars[i].driver_location;
+      in->data.interpolation = key->varyings.vars[i].interpolation;
+
+      snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", key->varyings.vars[i].driver_location);
+      out = nir_variable_create(nir,
+                                nir_var_shader_out,
+                                key->varyings.vars[i].type,
+                                tmp);
+      out->data.location = i;
+      out->data.driver_location = key->varyings.vars[i].driver_location;
+      out->data.interpolation = key->varyings.vars[i].interpolation;
+
+      nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in),
+                                                            nir_imm_int(&b, 0));
+      nir_copy_deref(&b, nir_build_deref_var(&b, out), in_value);
+   }
+
+   nir_emit_vertex(&b, 0);
+   nir_end_primitve(&b, 0);
+
+   NIR_PASS_V(nir, nir_lower_var_copies);
+   nir_validate_shader(nir, "in d3d12_create_passthrough_gs");
+
+   templ.type = PIPE_SHADER_IR_NIR;
+   templ.ir.nir = nir;
+   templ.stream_output.num_outputs = 0;
+
+   gs = d3d12_create_shader(ctx, PIPE_SHADER_GEOMETRY, &templ);
+
+   return gs;
+}
+
+struct emit_primitives_context
+{
+   struct d3d12_context *ctx;
+   nir_builder b;
+
+   unsigned num_vars;
+   nir_variable *in[MAX_VARYING];
+   nir_variable *out[MAX_VARYING];
+   nir_variable *front_facing_var;
+
+   nir_loop *loop;
+   nir_deref_instr *loop_index_deref;
+   nir_ssa_def *loop_index;
+   nir_ssa_def *edgeflag_cmp;
+   nir_ssa_def *front_facing;
+};
+
+static bool
+d3d12_begin_emit_primitives_gs(struct emit_primitives_context *emit_ctx,
+                               struct d3d12_context *ctx,
+                               struct d3d12_gs_variant_key *key,
+                               uint16_t output_primitive,
+                               unsigned vertices_out)
+{
+   nir_builder *b = &emit_ctx->b;
+   nir_intrinsic_instr *instr;
+   nir_variable *edgeflag_var = NULL;
+   nir_variable *pos_var = NULL;
+   uint64_t varyings = key->varyings.mask;
+
+   emit_ctx->ctx = ctx;
+
+   nir_builder_init_simple_shader(b, NULL, MESA_SHADER_GEOMETRY,
+                                  dxil_get_nir_compiler_options());
+
+   nir_shader *nir = b->shader;
+   nir->info.inputs_read = varyings;
+   nir->info.outputs_written = varyings;
+   nir->info.gs.input_primitive = GL_TRIANGLES;
+   nir->info.gs.output_primitive = output_primitive;
+   nir->info.gs.vertices_in = 3;
+   nir->info.gs.vertices_out = vertices_out;
+   nir->info.gs.invocations = 1;
+   nir->info.gs.active_stream_mask = 1;
+   nir->info.name = ralloc_strdup(nir, "edgeflags");
+
+   while (varyings) {
+      char tmp[100];
+      const int i = u_bit_scan64(&varyings);
+
+      snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", emit_ctx->num_vars);
+      emit_ctx->in[emit_ctx->num_vars] = nir_variable_create(nir,
+                                                             nir_var_shader_in,
+                                                             glsl_array_type(key->varyings.vars[i].type, 3, 0),
+                                                             tmp);
+      emit_ctx->in[emit_ctx->num_vars]->data.location = i;
+      emit_ctx->in[emit_ctx->num_vars]->data.driver_location = key->varyings.vars[i].driver_location;
+      emit_ctx->in[emit_ctx->num_vars]->data.interpolation = key->varyings.vars[i].interpolation;
+
+      /* Don't create an output for the edge flag variable */
+      if (i == VARYING_SLOT_EDGE) {
+         edgeflag_var = emit_ctx->in[emit_ctx->num_vars];
+         continue;
+      } else if (i == VARYING_SLOT_POS) {
+          pos_var = emit_ctx->in[emit_ctx->num_vars];
+      }
+
+      snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", emit_ctx->num_vars);
+      emit_ctx->out[emit_ctx->num_vars] = nir_variable_create(nir,
+                                                              nir_var_shader_out,
+                                                              key->varyings.vars[i].type,
+                                                              tmp);
+      emit_ctx->out[emit_ctx->num_vars]->data.location = i;
+      emit_ctx->out[emit_ctx->num_vars]->data.driver_location = key->varyings.vars[i].driver_location;
+      emit_ctx->out[emit_ctx->num_vars]->data.interpolation = key->varyings.vars[i].interpolation;
+
+      emit_ctx->num_vars++;
+   }
+
+   if (key->has_front_face) {
+      emit_ctx->front_facing_var = nir_variable_create(nir,
+                                                       nir_var_shader_out,
+                                                       glsl_uint_type(),
+                                                       "gl_FrontFacing");
+      emit_ctx->front_facing_var->data.location = VARYING_SLOT_VAR12;
+      emit_ctx->front_facing_var->data.driver_location = emit_ctx->num_vars;
+      emit_ctx->front_facing_var->data.interpolation = INTERP_MODE_FLAT;
+   }
+
+   /* Temporary variable "loop_index" to loop over input vertices */
+   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+   nir_variable *loop_index_var =
+      nir_local_variable_create(impl, glsl_uint_type(), "loop_index");
+   emit_ctx->loop_index_deref = nir_build_deref_var(b, loop_index_var);
+   nir_store_deref(b, emit_ctx->loop_index_deref, nir_imm_int(b, 0), 1);
+
+   nir_ssa_def *diagonal_vertex = NULL;
+   if (key->edge_flag_fix) {
+      nir_ssa_def *prim_id = nir_load_primitive_id(b);
+      nir_ssa_def *odd = nir_build_alu(b, nir_op_imod,
+                                       prim_id,
+                                       nir_imm_int(b, 2),
+                                       NULL, NULL);
+      diagonal_vertex = nir_bcsel(b, nir_i2b(b, odd),
+                                  nir_imm_int(b, 2),
+                                  nir_imm_int(b, 1));
+   }
+
+   if (key->cull_mode != PIPE_FACE_NONE || key->has_front_face) {
+      if (key->cull_mode == PIPE_FACE_BACK)
+         emit_ctx->edgeflag_cmp = nir_cull_face(b, pos_var, key->front_ccw);
+      else if (key->cull_mode == PIPE_FACE_FRONT)
+         emit_ctx->edgeflag_cmp = nir_cull_face(b, pos_var, !key->front_ccw);
+
+      if (key->has_front_face) {
+         if (key->cull_mode == PIPE_FACE_BACK)
+            emit_ctx->front_facing = emit_ctx->edgeflag_cmp;
+         else
+            emit_ctx->front_facing = nir_cull_face(b, pos_var, key->front_ccw);
+         emit_ctx->front_facing = nir_i2i32(b, emit_ctx->front_facing);
+      }
+   }
+
+   /**
+    *  while {
+    *     if (loop_index >= 3)
+    *        break;
+    */
+   emit_ctx->loop = nir_push_loop(b);
+
+   emit_ctx->loop_index = nir_load_deref(b, emit_ctx->loop_index_deref);
+   nir_ssa_def *cmp = nir_ige(b, emit_ctx->loop_index,
+                              nir_imm_int(b, 3));
+   nir_if *loop_check = nir_push_if(b, cmp);
+   nir_jump(b, nir_jump_break);
+   nir_pop_if(b, loop_check);
+
+   if (edgeflag_var) {
+      nir_ssa_def *edge_flag =
+         nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, edgeflag_var), emit_ctx->loop_index));
+      nir_ssa_def *is_edge = nir_feq(b, nir_channel(b, edge_flag, 0), nir_imm_float(b, 1.0));
+      if (emit_ctx->edgeflag_cmp)
+         emit_ctx->edgeflag_cmp = nir_iand(b, emit_ctx->edgeflag_cmp, is_edge);
+      else
+         emit_ctx->edgeflag_cmp = is_edge;
+   }
+
+   if (key->edge_flag_fix) {
+      nir_ssa_def *is_edge = nir_ine(b, emit_ctx->loop_index, diagonal_vertex);
+      if (emit_ctx->edgeflag_cmp)
+         emit_ctx->edgeflag_cmp = nir_iand(b, emit_ctx->edgeflag_cmp, is_edge);
+      else
+         emit_ctx->edgeflag_cmp = is_edge;
+   }
+
+   return true;
+}
+
+static struct d3d12_shader_selector *
+d3d12_finish_emit_primitives_gs(struct emit_primitives_context *emit_ctx, bool end_primitive)
+{
+   struct d3d12_shader_selector *gs;
+   struct pipe_shader_state templ;
+   nir_builder *b = &emit_ctx->b;
+   nir_shader *nir = b->shader;
+
+   /**
+    *     loop_index++;
+    *  }
+    */
+   nir_store_deref(b, emit_ctx->loop_index_deref, nir_iadd_imm(b, emit_ctx->loop_index, 1), 1);
+   nir_pop_loop(b, emit_ctx->loop);
+
+   if (end_primitive)
+      nir_end_primitve(b, 0);
+
+   nir_validate_shader(nir, "in d3d12_lower_edge_flags");
+
+   NIR_PASS_V(nir, nir_lower_var_copies);
+
+   templ.type = PIPE_SHADER_IR_NIR;
+   templ.ir.nir = nir;
+   templ.stream_output.num_outputs = 0;
+
+   return d3d12_create_shader(emit_ctx->ctx, PIPE_SHADER_GEOMETRY, &templ);
+}
+
+static d3d12_shader_selector*
+d3d12_emit_points(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key)
+{
+   struct emit_primitives_context emit_ctx = {0};
+   nir_builder *b = &emit_ctx.b;
+   nir_intrinsic_instr *instr;
+
+   d3d12_begin_emit_primitives_gs(&emit_ctx, ctx, key, GL_POINTS, 3);
+
+   /**
+    *  if (edge_flag)
+    *     out_position = in_position;
+    *  else
+    *     out_position = vec4(-2.0, -2.0, 0.0, 1.0); // Invalid position
+    *
+    *  [...] // Copy other variables
+    *
+    *  EmitVertex();
+    */
+   for (unsigned i = 0; i < emit_ctx.num_vars; ++i) {
+      nir_ssa_def *index = (key->flat_varyings & (1 << emit_ctx.in[i]->data.location))  ?
+                              nir_imm_int(b, (key->flatshade_first ? 0 : 2)) : emit_ctx.loop_index;
+      nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index);
+      if (emit_ctx.in[i]->data.location == VARYING_SLOT_POS && emit_ctx.edgeflag_cmp) {
+         nir_if *edge_check = nir_push_if(b, emit_ctx.edgeflag_cmp);
+         nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value);
+         nir_if *edge_else = nir_push_else(b, edge_check);
+         nir_store_deref(b, nir_build_deref_var(b, emit_ctx.out[i]),
+                         nir_imm_vec4(b, -2.0, -2.0, 0.0, 1.0), 0xf);
+         nir_pop_if(b, edge_else);
+      } else {
+         nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value);
+      }
+   }
+   if (key->has_front_face)
+       nir_store_var(b, emit_ctx.front_facing_var, emit_ctx.front_facing, 0x1);
+   nir_emit_vertex(b, 0);
+
+   return d3d12_finish_emit_primitives_gs(&emit_ctx, false);
+}
+
+static d3d12_shader_selector*
+d3d12_emit_lines(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key)
+{
+   struct emit_primitives_context emit_ctx = {0};
+   nir_builder *b = &emit_ctx.b;
+
+   d3d12_begin_emit_primitives_gs(&emit_ctx, ctx, key, GL_LINE_STRIP, 6);
+
+   nir_ssa_def *next_index = nir_imod(b, nir_iadd_imm(b, emit_ctx.loop_index, 1), nir_imm_int(b, 3));
+
+   /* First vertex */
+   for (unsigned i = 0; i < emit_ctx.num_vars; ++i) {
+      nir_ssa_def *index = (key->flat_varyings & (1 << emit_ctx.in[i]->data.location)) ?
+                              nir_imm_int(b, (key->flatshade_first ? 0 : 2)) : emit_ctx.loop_index;
+      nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index);
+      nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value);
+   }
+   if (key->has_front_face)
+       nir_store_var(b, emit_ctx.front_facing_var, emit_ctx.front_facing, 0x1);
+   nir_emit_vertex(b, 0);
+
+   /* Second vertex. If not an edge, use same position as first vertex */
+   for (unsigned i = 0; i < emit_ctx.num_vars; ++i) {
+      nir_ssa_def *index = next_index;
+      if (emit_ctx.in[i]->data.location == VARYING_SLOT_POS)
+         index = nir_bcsel(b, emit_ctx.edgeflag_cmp, next_index, emit_ctx.loop_index);
+      else if (key->flat_varyings & (1 << emit_ctx.in[i]->data.location))
+         index = nir_imm_int(b, 2);
+      nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]),
+                     nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index));
+   }
+   if (key->has_front_face)
+       nir_store_var(b, emit_ctx.front_facing_var, emit_ctx.front_facing, 0x1);
+   nir_emit_vertex(b, 0);
+
+   nir_end_primitve(b, 0);
+
+   return d3d12_finish_emit_primitives_gs(&emit_ctx, false);
+}
+
+static d3d12_shader_selector*
+d3d12_emit_triangles(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key)
+{
+   struct emit_primitives_context emit_ctx = {0};
+   nir_builder *b = &emit_ctx.b;
+   nir_intrinsic_instr *instr;
+
+   d3d12_begin_emit_primitives_gs(&emit_ctx, ctx, key, GL_TRIANGLE_STRIP, 3);
+
+   /**
+    *  [...] // Copy variables
+    *
+    *  EmitVertex();
+    */
+
+   nir_ssa_def *incr = NULL;
+
+   if (key->provoking_vertex > 0)
+      incr = nir_imm_int(b, key->provoking_vertex);
+   else
+      incr = nir_imm_int(b, 3);
+
+   if (key->alternate_tri) {
+      nir_ssa_def *odd = nir_imod(b, nir_load_primitive_id(b), nir_imm_int(b, 2));
+      incr = nir_isub(b, incr, odd);
+   }
+
+   assert(incr != NULL);
+   nir_ssa_def *index = nir_imod(b, nir_iadd(b, emit_ctx.loop_index, incr), nir_imm_int(b, 3));
+   for (unsigned i = 0; i < emit_ctx.num_vars; ++i) {
+      nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index);
+      nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value);
+   }
+   nir_emit_vertex(b, 0);
+
+   return d3d12_finish_emit_primitives_gs(&emit_ctx, true);
+}
+
+static uint32_t
+hash_gs_variant_key(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct d3d12_gs_variant_key));
+}
+
+static bool
+equals_gs_variant_key(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct d3d12_gs_variant_key)) == 0;
+}
+
+void
+d3d12_gs_variant_cache_init(struct d3d12_context *ctx)
+{
+   ctx->gs_variant_cache = _mesa_hash_table_create(NULL, NULL, equals_gs_variant_key);
+}
+
+static void
+delete_entry(struct hash_entry *entry)
+{
+   d3d12_shader_free((d3d12_shader_selector *)entry->data);
+}
+
+void
+d3d12_gs_variant_cache_destroy(struct d3d12_context *ctx)
+{
+   _mesa_hash_table_destroy(ctx->gs_variant_cache, delete_entry);
+}
+
+static struct d3d12_shader_selector *
+create_geometry_shader_variant(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key)
+{
+   d3d12_shader_selector *gs = NULL;
+
+   if (key->passthrough)
+      gs = d3d12_make_passthrough_gs(ctx, key);
+   else if (key->provoking_vertex > 0 || key->alternate_tri)
+      gs = d3d12_emit_triangles(ctx, key);
+   else if (key->fill_mode == PIPE_POLYGON_MODE_POINT)
+      gs = d3d12_emit_points(ctx, key);
+   else if (key->fill_mode == PIPE_POLYGON_MODE_LINE)
+      gs = d3d12_emit_lines(ctx, key);
+
+   if (gs) {
+      gs->is_gs_variant = true;
+      gs->gs_key = *key;
+   }
+
+   return gs;
+}
+
+d3d12_shader_selector *
+d3d12_get_gs_variant(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key)
+{
+   uint32_t hash = hash_gs_variant_key(key);
+   struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->gs_variant_cache,
+                                                                 hash, key);
+   if (!entry) {
+      d3d12_shader_selector *gs = create_geometry_shader_variant(ctx, key);
+      entry = _mesa_hash_table_insert_pre_hashed(ctx->gs_variant_cache,
+                                                 hash, &gs->gs_key, gs);
+      assert(entry);
+   }
+
+   return (d3d12_shader_selector *)entry->data;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c b/src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c
new file mode 100644 (file)
index 0000000..d71689f
--- /dev/null
@@ -0,0 +1,273 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "d3d12_nir_passes.h"
+
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+static bool
+lower_int_cubmap_to_array_filter(const nir_instr *instr,
+                                 UNUSED const void *_options)
+{
+   if (instr->type != nir_instr_type_tex)
+      return false;
+
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+   if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+      return false;
+
+   switch (tex->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txl:
+   case nir_texop_txs:
+   case nir_texop_lod:
+      break;
+   default:
+      return false;
+   }
+
+   int sampler_deref = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
+   assert(sampler_deref >= 0);
+   nir_deref_instr *deref = nir_instr_as_deref(tex->src[sampler_deref].src.ssa->parent_instr);
+   nir_variable *cube = nir_deref_instr_get_variable(deref);
+   return glsl_base_type_is_integer(glsl_get_sampler_result_type(cube->type));
+}
+
+typedef struct {
+   nir_ssa_def *rx;
+   nir_ssa_def *ry;
+   nir_ssa_def *rz;
+   nir_ssa_def *arx;
+   nir_ssa_def *ary;
+   nir_ssa_def *arz;
+} coord_t;
+
+
+/* This is taken from from sp_tex_sample:convert_cube */
+static nir_ssa_def *
+evaluate_face_x(nir_builder *b, coord_t *coord)
+{
+   nir_ssa_def *sign = nir_fsign(b, coord->rx);
+   nir_ssa_def *positive = nir_fge(b, coord->rx, nir_imm_float(b, 0.0));
+   nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arx);
+
+   nir_ssa_def *x = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), nir_imm_float(b, 0.5));
+   nir_ssa_def *y = nir_fadd(b, nir_fmul(b, ima, coord->ry), nir_imm_float(b, 0.5));
+   nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
+
+   return nir_vec3(b, x,y, face);
+}
+
+static nir_ssa_def *
+evaluate_face_y(nir_builder *b, coord_t *coord)
+{
+   nir_ssa_def *sign = nir_fsign(b, coord->ry);
+   nir_ssa_def *positive = nir_fge(b, coord->ry, nir_imm_float(b, 0.0));
+   nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, 0.5), coord->ary);
+
+   nir_ssa_def *x = nir_fadd(b, nir_fmul(b, ima, coord->rx), nir_imm_float(b, 0.5));
+   nir_ssa_def *y = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), nir_imm_float(b, 0.5));
+   nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 2.0), nir_imm_float(b, 3.0));
+
+   return nir_vec3(b, x,y, face);
+}
+
+static nir_ssa_def *
+evaluate_face_z(nir_builder *b, coord_t *coord)
+{
+   nir_ssa_def *sign = nir_fsign(b, coord->rz);
+   nir_ssa_def *positive = nir_fge(b, coord->rz, nir_imm_float(b, 0.0));
+   nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arz);
+
+   nir_ssa_def *x = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), nir_fneg(b, coord->rx)), nir_imm_float(b, 0.5));
+   nir_ssa_def *y = nir_fadd(b, nir_fmul(b, ima, coord->ry), nir_imm_float(b, 0.5));
+   nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 4.0), nir_imm_float(b, 5.0));
+
+   return nir_vec3(b, x,y, face);
+}
+
+static nir_ssa_def *
+create_array_tex_from_cube_tex(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coord)
+{
+   nir_tex_instr *array_tex;
+
+   array_tex = nir_tex_instr_create(b->shader, tex->num_srcs);
+   array_tex->op = tex->op;
+   array_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+   array_tex->is_array = true;
+   array_tex->is_shadow = tex->is_shadow;
+   array_tex->is_new_style_shadow = tex->is_new_style_shadow;
+   array_tex->texture_index = tex->texture_index;
+   array_tex->sampler_index = tex->sampler_index;
+   array_tex->dest_type = tex->dest_type;
+   array_tex->coord_components = 3;
+
+   nir_src coord_src = nir_src_for_ssa(coord);
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      nir_src *psrc = (tex->src[i].src_type == nir_tex_src_coord) ?
+                         &coord_src : &tex->src[i].src;
+
+      nir_src_copy(&array_tex->src[i].src, psrc, array_tex);
+      array_tex->src[i].src_type = tex->src[i].src_type;
+   }
+
+   nir_ssa_dest_init(&array_tex->instr, &array_tex->dest,
+                     nir_tex_instr_dest_size(array_tex), 32, NULL);
+   nir_builder_instr_insert(b, &array_tex->instr);
+   return &array_tex->dest.ssa;
+}
+
+static nir_ssa_def *
+lower_cube_sample(nir_builder *b, nir_tex_instr *tex)
+{
+   /* We don't support cube map arrays yet */
+   assert(!tex->is_array);
+
+   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   assert(coord_index >= 0);
+
+   /* Evaluate the face and the xy coordinates for a 2D tex op */
+   nir_ssa_def *coord = tex->src[coord_index].src.ssa;
+
+   coord_t coords;
+   coords.rx = nir_channel(b, coord, 0);
+   coords.ry = nir_channel(b, coord, 1);
+   coords.rz = nir_channel(b, coord, 2);
+   coords.arx = nir_fabs(b, coords.rx);
+   coords.ary = nir_fabs(b, coords.ry);
+   coords.arz = nir_fabs(b, coords.rz);
+
+   nir_ssa_def *use_face_x = nir_iand(b,
+                                      nir_fge(b, coords.arx, coords.ary),
+                                      nir_fge(b, coords.arx, coords.arz));
+
+   nir_if *use_face_x_if = nir_push_if(b, use_face_x);
+   nir_ssa_def *face_x_coord = evaluate_face_x(b, &coords);
+   nir_if *use_face_x_else = nir_push_else(b, use_face_x_if);
+
+   nir_ssa_def *use_face_y = nir_iand(b,
+                                      nir_fge(b, coords.ary, coords.arx),
+                                      nir_fge(b, coords.ary, coords.arz));
+
+   nir_if *use_face_y_if = nir_push_if(b, use_face_y);
+   nir_ssa_def *face_y_coord = evaluate_face_y(b, &coords);
+   nir_if *use_face_y_else = nir_push_else(b, use_face_y_if);
+
+   nir_ssa_def *face_z_coord = evaluate_face_z(b, &coords);
+
+   nir_pop_if(b, use_face_y_else);
+   nir_ssa_def *face_y_or_z_coord = nir_if_phi(b, face_y_coord, face_z_coord);
+   nir_pop_if(b, use_face_x_else);
+
+   // This contains in xy the normalized sample coordinates, and in z the face index
+   nir_ssa_def *coord_and_face = nir_if_phi(b, face_x_coord, face_y_or_z_coord);
+
+   return create_array_tex_from_cube_tex(b, tex, coord_and_face);
+}
+
+/* We don't expect the array size here */
+static nir_ssa_def *
+lower_cube_txs(nir_builder *b, nir_tex_instr *tex)
+{
+   b->cursor = nir_after_instr(&tex->instr);
+   return nir_channels(b, &tex->dest.ssa, 3);
+}
+
+static const struct glsl_type *
+make_2darray_from_cubemap(const struct glsl_type *type)
+{
+   return  glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ?
+            glsl_sampler_type(
+               GLSL_SAMPLER_DIM_2D,
+               false, true,
+               glsl_get_sampler_result_type(type)) : type;
+}
+
+static const struct glsl_type *
+make_2darray_from_cubemap_with_array(const struct glsl_type *type)
+{
+   /* While we don't (yet) support cube map arrays, there still may be arrays
+    * of cube maps */
+   if (glsl_type_is_array(type)) {
+      const struct glsl_type *new_type = glsl_without_array(type);
+      return new_type != type ? glsl_array_type(make_2darray_from_cubemap(glsl_without_array(type)),
+                                                glsl_get_length(type), 0) : type;
+   } else
+      return make_2darray_from_cubemap(type);
+}
+
+static nir_ssa_def *
+lower_int_cubmap_to_array_impl(nir_builder *b, nir_instr *instr,
+                               UNUSED void *_options)
+{
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+   int sampler_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
+   assert(sampler_index >= 0);
+
+   nir_deref_instr *sampler_deref = nir_instr_as_deref(tex->src[sampler_index].src.ssa->parent_instr);
+   nir_variable *sampler = nir_deref_instr_get_variable(sampler_deref);
+
+   sampler->type = make_2darray_from_cubemap_with_array(sampler->type);
+   sampler_deref->type = sampler->type;
+
+   switch (tex->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txl:
+   case nir_texop_lod:
+      return lower_cube_sample(b, tex);
+   case nir_texop_txs:
+      return lower_cube_txs(b, tex);
+   default:
+      unreachable("Unsupported cupe map texture operation");
+   }
+}
+
+bool
+d3d12_lower_int_cubmap_to_array(nir_shader *s)
+{
+   bool result =
+         nir_shader_lower_instructions(s,
+                                       lower_int_cubmap_to_array_filter,
+                                       lower_int_cubmap_to_array_impl,
+                                       NULL);
+
+   if (result) {
+      nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
+         if (glsl_type_is_sampler(var->type)) {
+            if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE &&
+                (glsl_base_type_is_integer(glsl_get_sampler_result_type(var->type)))) {
+               var->type = make_2darray_from_cubemap_with_array(var->type);
+            }
+         }
+      }
+   }
+   return result;
+
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c b/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c
new file mode 100644 (file)
index 0000000..8b8a5a4
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "d3d12_compiler.h"
+#include "program/prog_statevars.h"
+
+struct lower_state {
+   nir_variable *uniform; /* (1/w, 1/h, pt_sz, max_sz) */
+   nir_variable *pos_out;
+   nir_variable *psiz_out;
+   nir_variable *point_coord_out[9];
+   unsigned num_point_coords;
+   nir_variable *varying_out[VARYING_SLOT_MAX];
+
+   nir_ssa_def *point_dir_imm[4];
+   nir_ssa_def *point_coord_imm[4];
+
+   /* Current point primitive */
+   nir_ssa_def *point_pos;
+   nir_ssa_def *point_size;
+   nir_ssa_def *varying[VARYING_SLOT_MAX];
+   unsigned varying_write_mask[VARYING_SLOT_MAX];
+
+   bool sprite_origin_lower_left;
+   bool point_size_per_vertex;
+   bool aa_point;
+};
+
+static void
+find_outputs(nir_shader *shader, struct lower_state *state)
+{
+   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
+      switch (var->data.location) {
+      case VARYING_SLOT_POS:
+         state->pos_out = var;
+         break;
+      case VARYING_SLOT_PSIZ:
+         state->psiz_out = var;
+         break;
+      default:
+         state->varying_out[var->data.location] = var;
+         break;
+      }
+   }
+}
+
+static nir_ssa_def *
+get_point_dir(nir_builder *b, struct lower_state *state, unsigned i)
+{
+   if (state->point_dir_imm[0] == NULL) {
+      state->point_dir_imm[0] = nir_imm_vec2(b, -1, -1);
+      state->point_dir_imm[1] = nir_imm_vec2(b, -1, 1);
+      state->point_dir_imm[2] = nir_imm_vec2(b, 1, -1);
+      state->point_dir_imm[3] = nir_imm_vec2(b, 1, 1);
+   }
+
+   return state->point_dir_imm[i];
+}
+
+static nir_ssa_def *
+get_point_coord(nir_builder *b, struct lower_state *state, unsigned i)
+{
+   if (state->point_coord_imm[0] == NULL) {
+      if (state->sprite_origin_lower_left) {
+         state->point_coord_imm[0] = nir_imm_vec4(b, 0, 0, 0, 1);
+         state->point_coord_imm[1] = nir_imm_vec4(b, 0, 1, 0, 1);
+         state->point_coord_imm[2] = nir_imm_vec4(b, 1, 0, 0, 1);
+         state->point_coord_imm[3] = nir_imm_vec4(b, 1, 1, 0, 1);
+      } else {
+         state->point_coord_imm[0] = nir_imm_vec4(b, 0, 1, 0, 1);
+         state->point_coord_imm[1] = nir_imm_vec4(b, 0, 0, 0, 1);
+         state->point_coord_imm[2] = nir_imm_vec4(b, 1, 1, 0, 1);
+         state->point_coord_imm[3] = nir_imm_vec4(b, 1, 0, 0, 1);
+      }
+   }
+
+   return state->point_coord_imm[i];
+}
+
+/**
+ * scaled_point_size = pointSize * pos.w * ViewportSizeRcp
+ */
+static void
+get_scaled_point_size(nir_builder *b, struct lower_state *state,
+                      nir_ssa_def **x, nir_ssa_def **y)
+{
+   /* State uniform contains: (1/ViewportWidth, 1/ViewportHeight, PointSize, MaxPointSize) */
+   nir_ssa_def *uniform = nir_load_var(b, state->uniform);
+   nir_ssa_def *point_size = state->point_size;
+
+   /* clamp point-size to valid range */
+   if (point_size && state->point_size_per_vertex) {
+      point_size = nir_fmax(b, point_size, nir_imm_float(b, 1.0f));
+      point_size = nir_fmin(b, point_size, nir_imm_float(b, D3D12_MAX_POINT_SIZE));
+   } else {
+      /* Use static point size (from uniform) if the shader output was not set */
+      point_size = nir_channel(b, uniform, 2);
+   }
+
+   point_size = nir_fmul(b, point_size, nir_channel(b, state->point_pos, 3));
+   *x = nir_fmul(b, point_size, nir_channel(b, uniform, 0));
+   *y = nir_fmul(b, point_size, nir_channel(b, uniform, 1));
+}
+
+static bool
+lower_store(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state *state)
+{
+   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
+   if (nir_deref_mode_is(deref, nir_var_shader_out)) {
+      nir_variable *var = nir_deref_instr_get_variable(deref);
+
+      switch (var->data.location) {
+      case VARYING_SLOT_POS:
+         state->point_pos = instr->src[1].ssa;
+         break;
+      case VARYING_SLOT_PSIZ:
+         state->point_size = instr->src[1].ssa;
+         break;
+      default:
+         state->varying[var->data.location] = instr->src[1].ssa;
+         state->varying_write_mask[var->data.location] = nir_intrinsic_write_mask(instr);
+         break;
+      }
+
+      nir_instr_remove(&instr->instr);
+      return true;
+   }
+
+   return false;
+}
+
+static bool
+lower_emit_vertex(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state *state)
+{
+   unsigned stream_id = nir_intrinsic_stream_id(instr);
+
+   nir_ssa_def *point_width, *point_height;
+   get_scaled_point_size(b, state, &point_width, &point_height);
+
+   nir_instr_remove(&instr->instr);
+
+   for (unsigned i = 0; i < 4; i++) {
+      /* All outputs need to be emitted for each vertex */
+      for (unsigned slot = 0; slot < VARYING_SLOT_MAX; ++slot) {
+         if (state->varying[slot] != NULL) {
+            nir_store_var(b, state->varying_out[slot], state->varying[slot],
+                          state->varying_write_mask[slot]);
+         }
+      }
+
+      /* pos = scaled_point_size * point_dir + point_pos */
+      nir_ssa_def *point_dir = get_point_dir(b, state, i);
+      nir_ssa_def *pos = nir_vec4(b,
+                                  nir_ffma(b,
+                                           point_width,
+                                           nir_channel(b, point_dir, 0),
+                                           nir_channel(b, state->point_pos, 0)),
+                                  nir_ffma(b,
+                                           point_height,
+                                           nir_channel(b, point_dir, 1),
+                                           nir_channel(b, state->point_pos, 1)),
+                                  nir_channel(b, state->point_pos, 2),
+                                  nir_channel(b, state->point_pos, 3));
+      nir_store_var(b, state->pos_out, pos, 0xf);
+
+      /* point coord */
+      nir_ssa_def *point_coord = get_point_coord(b, state, i);
+      for (unsigned j = 0; j < state->num_point_coords; ++j)
+         nir_store_var(b, state->point_coord_out[j], point_coord, 0xf);
+
+      /* EmitVertex */
+      instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex);
+      nir_intrinsic_set_stream_id(instr, stream_id);
+      nir_builder_instr_insert(b, &instr->instr);
+   }
+
+   /* EndPrimitive */
+   instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive);
+   nir_intrinsic_set_stream_id(instr, stream_id);
+   nir_builder_instr_insert(b, &instr->instr);
+
+   /* Reset everything */
+   state->point_pos = NULL;
+   state->point_size = NULL;
+   for (unsigned i = 0; i < VARYING_SLOT_MAX; ++i)
+      state->varying[i] = NULL;
+
+   return true;
+}
+
+static bool
+lower_instr(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state *state)
+{
+   b->cursor = nir_before_instr(&instr->instr);
+
+   if (instr->intrinsic == nir_intrinsic_store_deref) {
+      return lower_store(instr, b, state);
+   } else if (instr->intrinsic == nir_intrinsic_emit_vertex) {
+      return lower_emit_vertex(instr, b, state);
+   } else if (instr->intrinsic == nir_intrinsic_end_primitive) {
+      nir_instr_remove(&instr->instr);
+      return true;
+   }
+
+   return false;
+}
+
+bool
+d3d12_lower_point_sprite(nir_shader *shader,
+                         bool sprite_origin_lower_left,
+                         bool point_size_per_vertex,
+                         unsigned point_coord_enable,
+                         uint64_t next_inputs_read)
+{
+   const gl_state_index16 tokens[5] = { STATE_INTERNAL,
+                                        STATE_INTERNAL_DRIVER,
+                                        D3D12_STATE_VAR_PT_SPRITE };
+   struct lower_state state;
+   bool progress = false;
+
+   assert(shader->info.gs.output_primitive == GL_POINTS);
+
+   memset(&state, 0, sizeof(state));
+   find_outputs(shader, &state);
+   state.sprite_origin_lower_left = sprite_origin_lower_left;
+   state.point_size_per_vertex = point_size_per_vertex;
+
+   /* Create uniform to retrieve inverse of viewport size and point size:
+    * (1/ViewportWidth, 1/ViewportHeight, PointSize, MaxPointSize) */
+   state.uniform = nir_variable_create(shader,
+                                       nir_var_uniform,
+                                       glsl_vec4_type(),
+                                       "d3d12_ViewportSizeRcp");
+   state.uniform->num_state_slots = 1;
+   state.uniform->state_slots = ralloc_array(state.uniform, nir_state_slot, 1);
+   memcpy(state.uniform->state_slots[0].tokens, tokens,
+          sizeof(state.uniform->state_slots[0].tokens));
+   shader->num_uniforms++;
+
+   /* Create new outputs for point tex coordinates */
+   unsigned count = 0;
+   for (unsigned int sem = 0; sem < 9; sem++) {
+      if (point_coord_enable & BITFIELD64_BIT(sem)) {
+         char tmp[100];
+         unsigned location = VARYING_SLOT_VAR0 + sem;
+
+         snprintf(tmp, ARRAY_SIZE(tmp), "gl_TexCoord%dMESA", count);
+
+         nir_variable *var = nir_variable_create(shader,
+                                                 nir_var_shader_out,
+                                                 glsl_vec4_type(),
+                                                 tmp);
+         var->data.location = location;
+         state.point_coord_out[count++] = var;
+      }
+   }
+   state.num_point_coords = count;
+   if (point_coord_enable) {
+      d3d12_reassign_driver_locations(shader, nir_var_shader_out,
+                                      next_inputs_read);
+   }
+
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         nir_builder builder;
+         nir_builder_init(&builder, function->impl);
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               if (instr->type == nir_instr_type_intrinsic)
+                  progress |= lower_instr(nir_instr_as_intrinsic(instr),
+                                          &builder,
+                                          &state);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+
+   shader->info.gs.output_primitive = GL_TRIANGLE_STRIP;
+   shader->info.gs.vertices_out *= 4;
+
+   return progress;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.c b/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.c
new file mode 100644 (file)
index 0000000..0002b4c
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "d3d12_nir_lower_texcmp.h"
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+bool
+lower_sample_tex_compare_filter(const nir_instr *instr,
+                                UNUSED const void *_options)
+{
+   if (instr->type != nir_instr_type_tex)
+      return false;
+
+   /* To be consistent we also want to lower tex when we lower anything,
+    * otherwise the differences in evaluating the shadow value might lead
+    * to artifacts. */
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+   if (tex->op != nir_texop_txb &&
+       tex->op != nir_texop_txl &&
+       tex->op != nir_texop_txd &&
+       tex->op != nir_texop_tex)
+      return false;
+
+   return tex->is_shadow;
+}
+
+static const struct glsl_type *
+strip_shadow(const struct glsl_type *type)
+{
+   const struct glsl_type *new_type =
+         glsl_sampler_type(
+            glsl_get_sampler_dim(type),
+            false, glsl_sampler_type_is_array(type),
+            GLSL_TYPE_FLOAT);
+   return new_type;
+}
+
+
+static const struct glsl_type *
+strip_shadow_with_array(const struct glsl_type *type)
+{
+   if (glsl_type_is_array(type))
+      return glsl_array_type(strip_shadow(glsl_without_array(type)),
+                             glsl_get_length(type), 0);
+   return strip_shadow(type);
+}
+
+typedef struct {
+   unsigned n_states;
+   enum compare_func *compare_func;
+   dxil_texture_swizzle_state *tex_swizzles;
+} sampler_state;
+
+static nir_ssa_def *
+lower_sample_tex_compare_impl(nir_builder *b, nir_instr *instr,
+                              void *options)
+
+{
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+   sampler_state *state = (sampler_state *)options;
+
+   b->cursor = nir_after_instr(instr);
+   tex->is_shadow = false;
+
+   int comp_index = nir_tex_instr_src_index(tex, nir_tex_src_comparator);
+
+   nir_deref_instr *sampler_deref = NULL;
+   nir_variable *sampler = NULL;
+
+   int sampler_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
+   assert(sampler_index >= 0);
+
+   sampler_deref = nir_instr_as_deref(tex->src[sampler_index].src.ssa->parent_instr);
+   sampler = nir_deref_instr_get_variable(sampler_deref);
+
+   /* NIR expects a vec4 result from the above texture instructions */
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+
+   nir_ssa_def *tex_r = nir_channel(b, &tex->dest.ssa, 0);
+   nir_ssa_def *cmp = tex->src[comp_index].src.ssa;
+
+   int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
+   if (proj_index >= 0)
+      cmp = nir_fmul(b, cmp, nir_frcp(b, tex->src[proj_index].src.ssa));
+
+   nir_ssa_def * result =
+         nir_compare_func(b,
+                          sampler->data.binding < state->n_states ?
+                             state->compare_func[sampler->data.binding] : COMPARE_FUNC_ALWAYS,
+                          cmp, tex_r);
+
+   result = nir_b2f32(b, result);
+   nir_ssa_def *one = nir_imm_float(b, 1.0);
+   nir_ssa_def *zero = nir_imm_float(b, 0.0);
+
+   nir_ssa_def *lookup[6] = {result, NULL, NULL, NULL, zero, one};
+   nir_ssa_def *r[4] = {lookup[state->tex_swizzles[sampler->data.binding].swizzle_r],
+                        lookup[state->tex_swizzles[sampler->data.binding].swizzle_g],
+                        lookup[state->tex_swizzles[sampler->data.binding].swizzle_b],
+                        lookup[state->tex_swizzles[sampler->data.binding].swizzle_a]
+                       };
+
+   result = nir_vec(b, r, nir_dest_num_components(tex->dest));
+
+   sampler->type = strip_shadow_with_array(sampler->type);
+   sampler_deref->type = sampler->type;
+
+   tex->is_shadow = false;
+   nir_tex_instr_remove_src(tex, comp_index);
+
+   return result;
+}
+
+bool
+d3d12_lower_sample_tex_compare(nir_shader *s,
+                               unsigned n_states,
+                               enum compare_func *compare_func,
+                               dxil_texture_swizzle_state *tex_swizzles)
+{
+   sampler_state state = {n_states, compare_func, tex_swizzles};
+
+   bool result =
+         nir_shader_lower_instructions(s,
+                                       lower_sample_tex_compare_filter,
+                                       lower_sample_tex_compare_impl,
+                                       &state);
+   return result;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.h b/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.h
new file mode 100644 (file)
index 0000000..2bb3707
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_NIR_LOWER_TEXCOMP_H
+#define D3D12_NIR_LOWER_TEXCOMP_H
+
+#include "dxil_nir_lower_int_samplers.h"
+
+#include "pipe/p_state.h"
+#include "compiler/shader_enums.h"
+#include "nir.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+d3d12_lower_sample_tex_compare(nir_shader *s,
+                               unsigned n_states,
+                               enum compare_func *compare_func,
+                               dxil_texture_swizzle_state *tex_swizzles);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LALA_H
diff --git a/src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c b/src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c
new file mode 100644 (file)
index 0000000..eec0ee6
--- /dev/null
@@ -0,0 +1,159 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "d3d12_nir_passes.h"
+
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+static enum pipe_format
+get_input_target_format(nir_variable *var, const void *options)
+{
+   enum pipe_format *target_formats = (enum pipe_format *)options;
+   return target_formats[var->data.driver_location];
+}
+
+static bool
+lower_vs_vertex_conversion_filter(const nir_instr *instr, const void *options)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *inst = nir_instr_as_intrinsic(instr);
+   if (inst->intrinsic != nir_intrinsic_load_deref)
+      return false;
+
+   nir_variable *var = nir_intrinsic_get_var(inst, 0);
+   return (var->data.mode == nir_var_shader_in) &&
+         (get_input_target_format(var, options) != PIPE_FORMAT_NONE);
+}
+
+typedef  nir_ssa_def *
+(*shift_right_func)(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1);
+
+/* decoding the signed vs unsigned scaled format is handled
+ * by applying the signed or unsigned shift right function
+ * accordingly */
+static nir_ssa_def *
+from_10_10_10_2_scaled(nir_builder *b, nir_ssa_def *src,
+                       nir_ssa_def *lshift, shift_right_func shr)
+{
+   nir_ssa_def *rshift = nir_imm_ivec4(b, 22, 22, 22, 30);
+   return nir_i2f32(b, shr(b, nir_ishl(b, src, lshift), rshift));
+}
+
+static nir_ssa_def *
+from_10_10_10_2_snorm(nir_builder *b, nir_ssa_def *src, nir_ssa_def *lshift)
+{
+   nir_ssa_def *split = from_10_10_10_2_scaled(b, src, lshift, nir_ishr);
+   nir_ssa_def *scale_rgb = nir_imm_vec4(b,
+                                         1.0f / 0x1ff,
+                                         1.0f / 0x1ff,
+                                         1.0f / 0x1ff,
+                                         1.0f);
+   return nir_fmul(b, split, scale_rgb);
+}
+
+static nir_ssa_def *
+from_10_10_10_2_unorm(nir_builder *b, nir_ssa_def *src, nir_ssa_def *lshift)
+{
+   nir_ssa_def *split = from_10_10_10_2_scaled(b, src, lshift, nir_ushr);
+   nir_ssa_def *scale_rgb = nir_imm_vec4(b,
+                                         1.0f / 0x3ff,
+                                         1.0f / 0x3ff,
+                                         1.0f / 0x3ff,
+                                         1.0f / 3.0f);
+   return nir_fmul(b, split, scale_rgb);
+}
+
+inline static nir_ssa_def *
+lshift_rgba(nir_builder *b)
+{
+   return nir_imm_ivec4(b, 22, 12, 2, 0);
+}
+
+inline static nir_ssa_def *
+lshift_bgra(nir_builder *b)
+{
+   return nir_imm_ivec4(b, 2, 12, 22, 0);
+}
+
+static nir_ssa_def *
+lower_vs_vertex_conversion_impl(nir_builder *b, nir_instr *instr, void *options)
+{
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+   enum pipe_format fmt = get_input_target_format(var, options);
+
+   if (!util_format_has_alpha(fmt)) {
+      /* these formats need the alpha channel replaced with 1: */
+      assert(fmt == PIPE_FORMAT_R8G8B8_SINT ||
+             fmt == PIPE_FORMAT_R8G8B8_UINT ||
+             fmt == PIPE_FORMAT_R16G16B16_SINT ||
+             fmt == PIPE_FORMAT_R16G16B16_UINT);
+      return nir_vector_insert_imm(b, &intr->dest.ssa, nir_imm_int(b, 1), 3);
+   } else {
+      nir_ssa_def *src = nir_channel(b, &intr->dest.ssa, 0);
+
+      switch (fmt) {
+      case PIPE_FORMAT_R10G10B10A2_SNORM:
+         return from_10_10_10_2_snorm(b, src, lshift_rgba(b));
+      case PIPE_FORMAT_B10G10R10A2_SNORM:
+         return from_10_10_10_2_snorm(b, src, lshift_bgra(b));
+      case PIPE_FORMAT_B10G10R10A2_UNORM:
+         return from_10_10_10_2_unorm(b, src, lshift_bgra(b));
+      case PIPE_FORMAT_R10G10B10A2_SSCALED:
+         return from_10_10_10_2_scaled(b, src, lshift_rgba(b), nir_ishr);
+      case PIPE_FORMAT_B10G10R10A2_SSCALED:
+         return from_10_10_10_2_scaled(b, src, lshift_bgra(b), nir_ishr);
+      case PIPE_FORMAT_R10G10B10A2_USCALED:
+         return from_10_10_10_2_scaled(b, src, lshift_rgba(b), nir_ushr);
+      case PIPE_FORMAT_B10G10R10A2_USCALED:
+         return from_10_10_10_2_scaled(b, src, lshift_bgra(b), nir_ushr);
+
+      default:
+         unreachable("Unsupported emulated vertex format");
+      }
+   }
+}
+
+/* Lower emulated vertex attribute input
+ * The vertex attributes are passed as R32_UINT that needs to be converted
+ * to one of the RGB10A2 formats that need to be emulated.
+ *
+ * @param target_formats contains the per attribute format to convert to
+ * or PIPE_FORMAT_NONE if no conversion is needed
+ */
+bool
+d3d12_nir_lower_vs_vertex_conversion(nir_shader *s,
+                                     enum pipe_format target_formats[])
+{
+   assert(s->info.stage == MESA_SHADER_VERTEX);
+
+   bool result =
+         nir_shader_lower_instructions(s,
+                                       lower_vs_vertex_conversion_filter,
+                                       lower_vs_vertex_conversion_impl,
+                                       target_formats);
+   return result;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_nir_passes.c b/src/gallium/drivers/d3d12/d3d12_nir_passes.c
new file mode 100644 (file)
index 0000000..e8f1a1f
--- /dev/null
@@ -0,0 +1,998 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "d3d12_nir_passes.h"
+#include "d3d12_compiler.h"
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+#include "nir_format_convert.h"
+#include "program/prog_instruction.h"
+#include "dxil_nir.h"
+
+/**
+ * Lower Y Flip:
+ *
+ * We can't do a Y flip simply by negating the viewport height,
+ * so we need to lower the flip into the NIR shader.
+ */
+
+static nir_ssa_def *
+get_state_var(nir_builder *b,
+              enum d3d12_state_var var_enum,
+              const char *var_name,
+              const struct glsl_type *var_type,
+              nir_variable **out_var)
+{
+   const gl_state_index16 tokens[5] = { STATE_INTERNAL, STATE_INTERNAL_DRIVER, var_enum };
+   if (*out_var == NULL) {
+      nir_variable *var = nir_variable_create(b->shader,
+                                              nir_var_uniform,
+                                              var_type,
+                                              var_name);
+
+      var->num_state_slots = 1;
+      var->state_slots = ralloc_array(var, nir_state_slot, 1);
+      memcpy(var->state_slots[0].tokens, tokens,
+             sizeof(var->state_slots[0].tokens));
+      var->data.how_declared = nir_var_hidden;
+      b->shader->num_uniforms++;
+      *out_var = var;
+   }
+   return nir_load_var(b, *out_var);
+}
+
+static void
+lower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return;
+
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+   if (var->data.mode != nir_var_shader_out ||
+       var->data.location != VARYING_SLOT_POS)
+      return;
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4);
+   nir_ssa_def *flip_y = get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY",
+                                       glsl_float_type(), flip);
+   nir_ssa_def *def = nir_vec4(b,
+                               nir_channel(b, pos, 0),
+                               nir_fmul(b, nir_channel(b, pos, 1), flip_y),
+                               nir_channel(b, pos, 2),
+                               nir_channel(b, pos, 3));
+   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
+}
+
+void
+d3d12_lower_yflip(nir_shader *nir)
+{
+   nir_variable *flip = NULL;
+
+   if (nir->info.stage != MESA_SHADER_VERTEX &&
+       nir->info.stage != MESA_SHADER_GEOMETRY)
+      return;
+
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               lower_pos_write(&b, instr, &flip);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
+
+static void
+lower_load_face(nir_builder *b, struct nir_instr *instr, nir_variable *var)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_front_face)
+      return;
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_ssa_def *load = nir_load_var(b, var);
+
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
+   nir_instr_remove(instr);
+}
+
+void
+d3d12_forward_front_face(nir_shader *nir)
+{
+   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+
+   nir_variable *var = nir_variable_create(nir, nir_var_shader_in,
+                                           glsl_bool_type(),
+                                           "gl_FrontFacing");
+   var->data.location = VARYING_SLOT_VAR12;
+   var->data.interpolation = INTERP_MODE_FLAT;
+
+
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               lower_load_face(&b, instr, var);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
+
+static void
+lower_pos_read(nir_builder *b, struct nir_instr *instr,
+               nir_variable **depth_transform_var)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_deref)
+      return;
+
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+   if (var->data.mode != nir_var_shader_in ||
+       var->data.location != VARYING_SLOT_POS)
+      return;
+
+   b->cursor = nir_after_instr(instr);
+
+   nir_ssa_def *pos = nir_instr_ssa_def(instr);
+   nir_ssa_def *depth = nir_channel(b, pos, 2);
+
+   assert(depth_transform_var);
+   nir_ssa_def *depth_transform = get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM,
+                                                "d3d12_DepthTransform",
+                                                glsl_vec_type(2),
+                                                depth_transform_var);
+   depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0),
+                              nir_channel(b, depth_transform, 1));
+
+   pos = nir_vector_insert_imm(b, pos, depth, 2);
+
+   assert(intr->dest.is_ssa);
+   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(pos),
+                                  pos->parent_instr);
+}
+
+void
+d3d12_lower_depth_range(nir_shader *nir)
+{
+   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+   nir_variable *depth_transform = NULL;
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               lower_pos_read(&b, instr, &depth_transform);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
+
+static bool
+is_color_output(nir_variable *var)
+{
+   return (var->data.mode == nir_var_shader_out &&
+           (var->data.location == FRAG_RESULT_COLOR ||
+            var->data.location >= FRAG_RESULT_DATA0));
+}
+
+static void
+lower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed)
+{
+   const unsigned NUM_BITS = 8;
+   const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS };
+
+   if (instr->type != nir_instr_type_intrinsic)
+      return;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return;
+
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+   if (!is_color_output(var))
+      return;
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_ssa_def *col = nir_ssa_for_src(b, intr->src[1], intr->num_components);
+   nir_ssa_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) :
+                                  nir_format_float_to_unorm(b, col, bits);
+   if (is_signed)
+      def = nir_bcsel(b, nir_ilt(b, def, nir_imm_int(b, 0)),
+                      nir_iadd(b, def, nir_imm_int(b, 1 << NUM_BITS)),
+                      def);
+   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
+}
+
+void
+d3d12_lower_uint_cast(nir_shader *nir, bool is_signed)
+{
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               lower_uint_color_write(&b, instr, is_signed);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
+
+static bool
+lower_load_first_vertex(nir_builder *b, nir_instr *instr, nir_variable **first_vertex)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   if (intr->intrinsic != nir_intrinsic_load_first_vertex)
+      return false;
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_ssa_def *load = get_state_var(b, D3D12_STATE_VAR_FIRST_VERTEX, "d3d12_FirstVertex",
+                                     glsl_uint_type(), first_vertex);
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
+   nir_instr_remove(instr);
+
+   return true;
+}
+
+bool
+d3d12_lower_load_first_vertex(struct nir_shader *nir)
+{
+   nir_variable *first_vertex = NULL;
+   bool progress = false;
+
+   if (nir->info.stage != MESA_SHADER_VERTEX)
+      return false;
+
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               progress |= lower_load_first_vertex(&b, instr, &first_vertex);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+   return progress;
+}
+
+static void
+invert_depth(nir_builder *b, struct nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return;
+
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+   if (var->data.mode != nir_var_shader_out ||
+       var->data.location != VARYING_SLOT_POS)
+      return;
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4);
+   nir_ssa_def *def = nir_vec4(b,
+                               nir_channel(b, pos, 0),
+                               nir_channel(b, pos, 1),
+                               nir_fneg(b, nir_channel(b, pos, 2)),
+                               nir_channel(b, pos, 3));
+   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
+}
+
+/* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b"
+ * with  "s + (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1].
+ * When we switch the far and near value to satisfy DirectX requirements we have
+ * to compensate by inverting "z_d' = -z_d" with this lowering pass.
+ */
+void
+d3d12_nir_invert_depth(nir_shader *shader)
+{
+   if (shader->info.stage != MESA_SHADER_VERTEX &&
+       shader->info.stage != MESA_SHADER_GEOMETRY)
+      return;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               invert_depth(&b, instr);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
+
+
+/**
+ * Lower State Vars:
+ *
+ * All uniforms related to internal D3D12 variables are
+ * condensed into a UBO that is appended at the end of the
+ * current ones.
+ */
+
+static unsigned
+get_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var)
+{
+   for (unsigned i = 0; i < shader->num_state_vars; ++i) {
+      if (shader->state_vars[i].var == var)
+         return shader->state_vars[i].offset;
+   }
+
+   unsigned offset = shader->state_vars_size;
+   shader->state_vars[shader->num_state_vars].offset = offset;
+   shader->state_vars[shader->num_state_vars].var = var;
+   shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */
+   shader->num_state_vars++;
+
+   return offset;
+}
+
+static bool
+lower_instr(nir_intrinsic_instr *instr, nir_builder *b,
+            struct d3d12_shader *shader, unsigned binding)
+{
+   nir_variable *variable = NULL;
+   nir_deref_instr *deref = NULL;
+
+   b->cursor = nir_before_instr(&instr->instr);
+
+   if (instr->intrinsic == nir_intrinsic_load_uniform) {
+      nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
+         if (var->data.driver_location == nir_intrinsic_base(instr)) {
+            variable = var;
+            break;
+         }
+      }
+   } else if (instr->intrinsic == nir_intrinsic_load_deref) {
+      deref = nir_src_as_deref(instr->src[0]);
+      variable = nir_intrinsic_get_var(instr, 0);
+   }
+
+   if (variable == NULL ||
+       variable->num_state_slots != 1 ||
+       variable->state_slots[0].tokens[1] != STATE_INTERNAL_DRIVER)
+      return false;
+
+   enum d3d12_state_var var = variable->state_slots[0].tokens[2];
+   nir_ssa_def *ubo_idx = nir_imm_int(b, binding);
+   nir_ssa_def *ubo_offset =  nir_imm_int(b, get_state_var_offset(shader, var) * 4);
+   nir_intrinsic_instr *load =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
+   load->num_components = instr->num_components;
+   load->src[0] = nir_src_for_ssa(ubo_idx);
+   load->src[1] = nir_src_for_ssa(ubo_offset);
+   assert(instr->dest.ssa.bit_size >= 8);
+   nir_intrinsic_set_align(load, instr->dest.ssa.bit_size / 8, 0);
+   nir_intrinsic_set_range_base(load, 0);
+   nir_intrinsic_set_range(load, ~0);
+
+   nir_ssa_dest_init(&load->instr, &load->dest,
+                     load->num_components, instr->dest.ssa.bit_size,
+                     instr->dest.ssa.name);
+   nir_builder_instr_insert(b, &load->instr);
+   nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
+
+   /* Remove the old load_* instruction and any parent derefs */
+   nir_instr_remove(&instr->instr);
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+      /* If anyone is using this deref, leave it alone */
+      assert(d->dest.is_ssa);
+      if (!list_is_empty(&d->dest.ssa.uses))
+         break;
+
+      nir_instr_remove(&d->instr);
+   }
+
+   return true;
+}
+
+bool
+d3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader)
+{
+   bool progress = false;
+
+   /* The state var UBO is added after all the other UBOs if it already
+    * exists it will be replaced by using the same binding.
+    * In the event there are no other UBO's, use binding slot 1 to
+    * be consistent with other non-default UBO's */
+   unsigned binding = max(nir->info.num_ubos, 1);
+
+   nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
+      if (var->num_state_slots == 1 &&
+          var->state_slots[0].tokens[1] == STATE_INTERNAL_DRIVER) {
+         if (var->data.mode == nir_var_mem_ubo) {
+            binding = var->data.binding;
+         }
+      }
+   }
+
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder builder;
+         nir_builder_init(&builder, function->impl);
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               if (instr->type == nir_instr_type_intrinsic)
+                  progress |= lower_instr(nir_instr_as_intrinsic(instr),
+                                          &builder,
+                                          shader,
+                                          binding);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+
+   if (progress) {
+      assert(shader->num_state_vars > 0);
+
+      shader->state_vars_used = true;
+
+      /* Remove state variables */
+      nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
+         if (var->num_state_slots == 1 &&
+             var->state_slots[0].tokens[1] == STATE_INTERNAL_DRIVER) {
+            exec_node_remove(&var->node);
+            nir->num_uniforms--;
+         }
+      }
+
+      const gl_state_index16 tokens[5] = { STATE_INTERNAL, STATE_INTERNAL_DRIVER };
+      const struct glsl_type *type = glsl_array_type(glsl_vec4_type(),
+                                                     shader->state_vars_size / 4, 0);
+      nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type,
+                                                  "d3d12_state_vars");
+      if (binding >= nir->info.num_ubos)
+         nir->info.num_ubos = binding + 1;
+      ubo->data.binding = binding;
+      ubo->num_state_slots = 1;
+      ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1);
+      memcpy(ubo->state_slots[0].tokens, tokens,
+              sizeof(ubo->state_slots[0].tokens));
+
+      struct glsl_struct_field field = {
+          .type = type,
+          .name = "data",
+          .location = -1,
+      };
+      ubo->interface_type =
+              glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
+                                  false, "__d3d12_state_vars_interface");
+   }
+
+   return progress;
+}
+
+static const struct glsl_type *
+get_bare_samplers_for_type(const struct glsl_type *type)
+{
+   if (glsl_type_is_sampler(type)) {
+      if (glsl_sampler_type_is_shadow(type))
+         return glsl_bare_shadow_sampler_type();
+      else
+         return glsl_bare_sampler_type();
+   } else if (glsl_type_is_array(type)) {
+      return glsl_array_type(
+         get_bare_samplers_for_type(glsl_get_array_element(type)),
+         glsl_get_length(type),
+         0 /*explicit size*/);
+   }
+   assert(!"Unexpected type");
+   return NULL;
+}
+
+void
+d3d12_create_bare_samplers(nir_shader *nir)
+{
+   nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
+      const struct glsl_type *type = glsl_without_array(var->type);
+      if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
+         /* Since samplers are already lowered to be accessed by index, all we need to do
+         /* here is create a bare sampler with the same binding */
+         nir_variable *clone = nir_variable_clone(var, nir);
+         clone->type = get_bare_samplers_for_type(var->type);
+         nir_shader_add_variable(nir, clone);
+      }
+   }
+}
+
+bool
+lower_bool_input_filter(const nir_instr *instr,
+                        UNUSED const void *_options)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic == nir_intrinsic_load_front_face)
+      return true;
+
+   if (intr->intrinsic == nir_intrinsic_load_deref) {
+      nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+      nir_variable *var = nir_deref_instr_get_variable(deref);
+      return var->data.mode == nir_var_shader_in &&
+             glsl_get_base_type(var->type) == GLSL_TYPE_BOOL;
+   }
+
+   return false;
+}
+
+static nir_ssa_def *
+lower_bool_input_impl(nir_builder *b, nir_instr *instr,
+                      UNUSED void *_options)
+{
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   if (intr->intrinsic == nir_intrinsic_load_deref) {
+      nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+      nir_variable *var = nir_deref_instr_get_variable(deref);
+
+      /* rewrite var->type */
+      var->type = glsl_vector_type(GLSL_TYPE_UINT,
+                                   glsl_get_vector_elements(var->type));
+      deref->type = var->type;
+   }
+
+   intr->dest.ssa.bit_size = 32;
+   return nir_i2b1(b, &intr->dest.ssa);
+}
+
+bool
+d3d12_lower_bool_input(struct nir_shader *s)
+{
+   return nir_shader_lower_instructions(s, lower_bool_input_filter,
+                                        lower_bool_input_impl, NULL);
+}
+
+static bool
+lower_color_write(nir_builder *b, struct nir_instr *instr, unsigned nr_cbufs)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return false;
+
+   nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   if (var->data.mode != nir_var_shader_out ||
+       var->data.location != FRAG_RESULT_COLOR)
+      return false;
+
+   /* lower the original write to data #0 */
+   var->name = ralloc_strdup(var, "gl_FragData[0]");
+   var->data.location = FRAG_RESULT_DATA0;
+   var->data.driver_location = 0;
+
+   b->cursor = nir_after_instr(&intr->instr);
+
+   /* Then create new variables and write them as well */
+   nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1],
+                                        nir_src_num_components(intr->src[1]));
+   unsigned writemask = nir_intrinsic_write_mask(intr);
+   for (int i = 1; i < nr_cbufs; ++i) {
+      char name[256];
+      snprintf(name, sizeof(name), "gl_FragData[%d]", i);
+      nir_variable *new_var = nir_variable_create(b->shader,
+                                                  nir_var_shader_out,
+                                                  var->type, name);
+      new_var->data.location = FRAG_RESULT_DATA0 + i;
+      new_var->data.driver_location = i;
+      nir_store_var(b, new_var, value, writemask);
+   }
+
+   return true;
+}
+
+bool
+d3d12_lower_frag_result(struct nir_shader *nir, unsigned nr_cbufs)
+{
+   bool progress = false;
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return false;
+
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               progress |= lower_color_write(&b, instr, nr_cbufs);
+            }
+         }
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+   return progress;
+}
+
+void
+d3d12_add_missing_dual_src_target(struct nir_shader *s,
+                                  unsigned missing_mask)
+{
+   assert(missing_mask != 0);
+   nir_builder b;
+   nir_function_impl *impl = nir_shader_get_entrypoint(s);
+   nir_builder_init(&b, impl);
+   b.cursor = nir_before_cf_list(&impl->body);
+
+   nir_ssa_def *zero = nir_imm_zero(&b, 4, 32);
+   for (unsigned i = 0; i < 2; ++i) {
+
+      if (!(missing_mask & (1u << i)))
+         continue;
+
+      const char *name = i == 0 ? "gl_FragData[0]" :
+                                  "gl_SecondaryFragDataEXT[0]";
+      nir_variable *out = nir_variable_create(s, nir_var_shader_out,
+                                              glsl_vec4_type(), name);
+      out->data.location = FRAG_RESULT_DATA0;
+      out->data.driver_location = i;
+      out->data.index = i;
+
+      nir_store_var(&b, out, zero, 0xf);
+   }
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+static bool
+fix_io_uint_type(struct nir_shader *s, nir_variable_mode modes, int slot)
+{
+   nir_variable *fixed_var = NULL;
+   nir_foreach_variable_with_modes(var, s, modes) {
+      if (var->data.location == slot) {
+         var->type = glsl_uint_type();
+         fixed_var = var;
+         break;
+      }
+   }
+
+   assert(fixed_var);
+
+   nir_foreach_function(function, s) {
+      if (function->impl) {
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               if (instr->type == nir_instr_type_deref) {
+                  nir_deref_instr *deref = nir_instr_as_deref(instr);
+                  if (deref->var == fixed_var)
+                     deref->type = fixed_var->type;
+               }
+            }
+         }
+      }
+   }
+   return true;
+}
+
+bool
+d3d12_fix_io_uint_type(struct nir_shader *s, uint64_t in_mask, uint64_t out_mask)
+{
+   if (!(s->info.outputs_written & out_mask) &&
+       !(s->info.inputs_read & in_mask))
+      return false;
+
+   bool progress = false;
+
+   while (in_mask) {
+      int slot = u_bit_scan64(&in_mask);
+      progress |= (s->info.inputs_read & (1ull << slot)) &&
+                  fix_io_uint_type(s, nir_var_shader_in, slot);
+   }
+
+   while (out_mask) {
+      int slot = u_bit_scan64(&out_mask);
+      progress |= (s->info.outputs_written & (1ull << slot)) &&
+                  fix_io_uint_type(s, nir_var_shader_out, slot);
+   }
+
+   return progress;
+}
+
+bool
+lower_load_ubo_packed_filter(const nir_instr *instr,
+                             UNUSED const void *_options) {
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   return intr->intrinsic == nir_intrinsic_load_ubo;
+}
+
+static nir_ssa_def *
+lower_load_ubo_packed_impl(nir_builder *b, nir_instr *instr,
+                              UNUSED const void *_options) {
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   nir_ssa_def *buffer = intr->src[0].ssa;
+   nir_ssa_def *offset = intr->src[1].ssa;
+
+   nir_ssa_def *result =
+      build_load_ubo_dxil(b, buffer,
+                          offset,
+                          nir_dest_num_components(intr->dest),
+                          nir_dest_bit_size(intr->dest));
+   return result;
+}
+
+bool
+nir_lower_packed_ubo_loads(nir_shader *nir) {
+   return nir_shader_lower_instructions(nir,
+                                        lower_load_ubo_packed_filter,
+                                        lower_load_ubo_packed_impl,
+                                        NULL);
+}
+
+void
+d3d12_lower_primitive_id(nir_shader *shader)
+{
+   nir_builder b;
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+   nir_ssa_def *primitive_id;
+   nir_builder_init(&b, impl);
+
+   nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out,
+                                                        glsl_uint_type(), "primitive_id");
+   primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
+   primitive_id_var->data.interpolation = INTERP_MODE_FLAT;
+
+   nir_foreach_block(block, impl) {
+      b.cursor = nir_before_block(block);
+      primitive_id = nir_load_primitive_id(&b);
+
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic ||
+             nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex)
+            continue;
+
+         b.cursor = nir_before_instr(instr);
+         nir_store_var(&b, primitive_id_var, primitive_id, 0x1);
+      }
+   }
+
+   nir_metadata_preserve(impl, 0);
+}
+
+static void
+lower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr,
+                           nir_variable *vertex_count_var,
+                           nir_variable **varyings)
+{
+   /**
+    * tmp_varying[slot][min(vertex_count, 2)] = src
+    */
+   nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var);
+   nir_ssa_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2));
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+
+   if (var->data.mode != nir_var_shader_out)
+      return;
+
+   nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, varyings[var->data.location]), index);
+   nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], intr->num_components);
+   nir_store_deref(b, deref, value, 0xf);
+   nir_instr_remove(&intr->instr);
+}
+
+static void
+nir_emit_vertex(nir_builder *b, unsigned stream_id)
+{
+   nir_intrinsic_instr *instr;
+
+   instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex);
+   nir_intrinsic_set_stream_id(instr, stream_id);
+   nir_builder_instr_insert(b, &instr->instr);
+}
+
+static void
+lower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr,
+                                 nir_variable *vertex_count_var,
+                                 nir_variable **varyings,
+                                 nir_variable **out_varyings)
+{
+   // TODO xfb + flat shading + last_pv
+   /**
+    * if (vertex_count >= 2) {
+    *    for (i = 0; i < 3; i++) {
+    *       foreach(slot)
+    *          out[slot] = tmp_varying[slot][i];
+    *       EmitVertex();
+    *    }
+    *    EndPrimitive();
+    *    foreach(slot)
+    *       tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2];
+    * }
+    * vertex_count++;
+    */
+
+   nir_intrinsic_instr *instr;
+   nir_ssa_def *two = nir_imm_int(b, 2);
+   nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var);
+   nir_ssa_def *count_cmp = nir_uge(b, vertex_count, two);
+   nir_if *count_check = nir_push_if(b, count_cmp);
+
+   for (int j = 0; j < 3; ++j) {
+      for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
+         if (!varyings[i])
+            continue;
+         nir_copy_deref(b, nir_build_deref_var(b, out_varyings[i]),
+                        nir_build_deref_array_imm(b, nir_build_deref_var(b, varyings[i]), j));
+      }
+      nir_emit_vertex(b, 0);
+   }
+
+   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
+      if (!varyings[i])
+         continue;
+      nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), nir_umod(b, vertex_count, two)),
+                        nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), two));
+   }
+
+   instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive);
+   nir_intrinsic_set_stream_id(instr, 0);
+   nir_builder_instr_insert(b, &instr->instr);
+
+   nir_pop_if(b, count_check);
+
+   vertex_count = nir_iadd(b, vertex_count, nir_imm_int(b, 1));
+   nir_store_var(b, vertex_count_var, vertex_count, 0x1);
+
+   nir_instr_remove(&intr->instr);
+}
+
+static void
+lower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr,
+                                   nir_variable *vertex_count_var)
+{
+   /**
+    * vertex_count = 0;
+    */
+   nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1);
+   nir_instr_remove(&intr->instr);
+}
+
+void
+d3d12_lower_triangle_strip(nir_shader *shader)
+{
+   nir_builder b;
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+   nir_variable *tmp_vars[VARYING_SLOT_MAX] = {0};
+   nir_variable *out_vars[VARYING_SLOT_MAX] = {0};
+   nir_builder_init(&b, impl);
+
+   shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3;
+
+   nir_variable *vertex_count_var =
+      nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
+
+   nir_block *first = nir_start_block(impl);
+   b.cursor = nir_before_block(first);
+   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
+      const struct glsl_type *type = glsl_array_type(var->type, 3, 0);
+      tmp_vars[var->data.location] =  nir_local_variable_create(impl, type, "tmp_var");
+      out_vars[var->data.location] = var;
+   }
+   nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_store_deref:
+            b.cursor = nir_before_instr(instr);
+            lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars);
+            break;
+         case nir_intrinsic_emit_vertex_with_counter:
+         case nir_intrinsic_emit_vertex:
+            b.cursor = nir_before_instr(instr);
+            lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var,
+                                             tmp_vars, out_vars);
+            break;
+         case nir_intrinsic_end_primitive:
+         case nir_intrinsic_end_primitive_with_counter:
+            b.cursor = nir_before_instr(instr);
+            lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var);
+            break;
+         default:
+            break;
+         }
+      }
+   }
+
+   nir_metadata_preserve(impl, 0);
+   NIR_PASS_V(shader, nir_lower_var_copies);
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_nir_passes.h b/src/gallium/drivers/d3d12/d3d12_nir_passes.h
new file mode 100644 (file)
index 0000000..65af5a5
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_NIR_PASSES_H
+#define D3D12_NIR_PASSES_H
+
+#include "nir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct d3d12_shader;
+
+bool
+d3d12_lower_point_sprite(nir_shader *shader,
+                         bool sprite_origin_lower_left,
+                         bool point_size_per_vertex,
+                         unsigned point_coord_enable,
+                         uint64_t next_inputs_read);
+
+bool
+d3d12_lower_state_vars(struct nir_shader *s, struct d3d12_shader *shader);
+
+void
+d3d12_lower_yflip(nir_shader *s);
+
+void
+d3d12_forward_front_face(nir_shader *nir);
+
+void
+d3d12_lower_depth_range(nir_shader *nir);
+
+bool
+d3d12_lower_load_first_vertex(nir_shader *nir);
+
+void
+d3d12_create_bare_samplers(nir_shader *s);
+
+bool
+d3d12_lower_bool_input(struct nir_shader *s);
+
+void
+d3d12_lower_uint_cast(nir_shader *nir, bool is_signed);
+
+bool
+d3d12_lower_frag_result(struct nir_shader *s, unsigned nr_cbufs);
+
+void
+d3d12_add_missing_dual_src_target(struct nir_shader *s,
+                                  unsigned missing_mask);
+
+bool
+d3d12_fix_io_uint_type(struct nir_shader *s, uint64_t in_mask, uint64_t out_mask);
+
+void
+d3d12_nir_invert_depth(nir_shader *s);
+
+bool
+d3d12_lower_int_cubmap_to_array(nir_shader *s);
+
+bool
+nir_lower_packed_ubo_loads(struct nir_shader *nir);
+
+bool
+d3d12_nir_lower_vs_vertex_conversion(nir_shader *s, enum pipe_format target_formats[]);
+
+void
+d3d12_lower_primitive_id(nir_shader *shader);
+
+void
+d3d12_lower_triangle_strip(nir_shader *shader);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // D3D12_NIR_PASSES_H
diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp b/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp
new file mode 100644 (file)
index 0000000..0498c0b
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_pipeline_state.h"
+#include "d3d12_compiler.h"
+#include "d3d12_context.h"
+#include "d3d12_screen.h"
+
+#include "util/hash_table.h"
+#include "util/set.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+struct d3d12_pso_entry {
+   struct d3d12_gfx_pipeline_state key;
+   ID3D12PipelineState *pso;
+};
+
+static const char *slot_to_varying[] = {
+   "VARYINGAA", "VARYINGAB", "VARYINGAC", "VARYINGAD", "VARYINGAE", "VARYINGAF", "VARYINGAG", "VARYINGAH",
+   "VARYINGAI", "VARYINGAJ", "VARYINGAK", "VARYINGAL", "VARYINGAM", "VARYINGAN", "VARYINGAO", "VARYINGAP",
+   "VARYINGBA", "VARYINGBB", "VARYINGBC", "VARYINGBD", "VARYINGBE", "VARYINGBF", "VARYINGBG", "VARYINGBH",
+   "VARYINGBI", "VARYINGBJ", "VARYINGBK", "VARYINGBL", "VARYINGBM", "VARYINGBN", "VARYINGBO", "VARYINGBP",
+   "VARYINGCA", "VARYINGCB", "VARYINGCC", "VARYINGCD", "VARYINGCE", "VARYINGCF", "VARYINGCG", "VARYINGCH",
+   "VARYINGCI", "VARYINGCJ", "VARYINGCK", "VARYINGCL", "VARYINGCM", "VARYINGCN", "VARYINGCO", "VARYINGCP",
+   "VARYINGDA", "VARYINGDB", "VARYINGDC", "VARYINGDD", "VARYINGDE", "VARYINGDF", "VARYINGDG", "VARYINGDH",
+   "VARYINGDI", "VARYINGDJ", "VARYINGDK", "VARYINGDL", "VARYINGDM", "VARYINGDN", "VARYINGDO", "VARYINGDP",
+};
+
+static const char *
+get_semantic_name(int slot, unsigned *index)
+{
+   *index = 0; /* Default index */
+
+   switch (slot) {
+
+   case VARYING_SLOT_POS:
+      return "SV_Position";
+
+    case VARYING_SLOT_FACE:
+      return "SV_IsFrontFace";
+
+   case VARYING_SLOT_CLIP_DIST1:
+      *index = 1;
+      /* fallthrough */
+   case VARYING_SLOT_CLIP_DIST0:
+      return "SV_ClipDistance";
+
+   case VARYING_SLOT_PRIMITIVE_ID:
+      return "SV_PrimitiveID";
+
+   default: {
+         int index = slot - VARYING_SLOT_POS;
+         return slot_to_varying[index];
+      }
+   }
+}
+
+static void
+fill_so_declaration(const struct pipe_stream_output_info *info,
+                    D3D12_SO_DECLARATION_ENTRY *entries, UINT *num_entries,
+                    UINT *strides, UINT *num_strides)
+{
+   int next_offset[MAX_VERTEX_STREAMS] = { 0 };
+
+   *num_entries = 0;
+
+   for (unsigned i = 0; i < info->num_outputs; i++) {
+      const struct pipe_stream_output *output = &info->output[i];
+      const int buffer = output->output_buffer;
+      const int varying = output->register_index;
+      unsigned index;
+
+      /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
+       * array.  Instead, it simply increments DstOffset for the following
+       * input by the number of components that should be skipped.
+       *
+       * DirectX12 requires that we create gap entries.
+       */
+      int skip_components = output->dst_offset - next_offset[buffer];
+
+      if (skip_components > 0) {
+         entries[*num_entries].Stream = output->stream;
+         entries[*num_entries].SemanticName = NULL;
+         entries[*num_entries].ComponentCount = skip_components;
+         entries[*num_entries].OutputSlot = buffer;
+         (*num_entries)++;
+      }
+
+      next_offset[buffer] = output->dst_offset + output->num_components;
+
+      entries[*num_entries].Stream = output->stream;
+      entries[*num_entries].SemanticName = get_semantic_name(output->register_index, &index);
+      entries[*num_entries].SemanticIndex = index;
+      entries[*num_entries].StartComponent = output->start_component;
+      entries[*num_entries].ComponentCount = output->num_components;
+      entries[*num_entries].OutputSlot = buffer;
+      (*num_entries)++;
+   }
+
+   for (unsigned i = 0; i < MAX_VERTEX_STREAMS; i++)
+      strides[i] = info->stride[i] * 4;
+   *num_strides = MAX_VERTEX_STREAMS;
+}
+
+static bool
+depth_bias(struct d3d12_rasterizer_state *state, enum pipe_prim_type reduced_prim)
+{
+   /* glPolygonOffset is supposed to be only enabled when rendering polygons.
+    * In d3d12 case, all polygons (and quads) are lowered to triangles */
+   if (reduced_prim != PIPE_PRIM_TRIANGLES)
+      return false;
+
+   unsigned fill_mode = state->base.cull_face == PIPE_FACE_FRONT ? state->base.fill_back
+                                                                 : state->base.fill_front;
+
+   switch (fill_mode) {
+   case PIPE_POLYGON_MODE_FILL:
+      return state->base.offset_tri;
+
+   case PIPE_POLYGON_MODE_LINE:
+      return state->base.offset_line;
+
+   case PIPE_POLYGON_MODE_POINT:
+      return state->base.offset_point;
+
+   default:
+      unreachable("unexpected fill mode");
+   }
+}
+
+static D3D12_PRIMITIVE_TOPOLOGY_TYPE
+topology_type(enum pipe_prim_type reduced_prim)
+{
+   switch (reduced_prim) {
+   case PIPE_PRIM_POINTS:
+      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
+
+   case PIPE_PRIM_LINES:
+      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
+
+   case PIPE_PRIM_TRIANGLES:
+      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+
+   case PIPE_PRIM_PATCHES:
+      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
+
+   default:
+      debug_printf("pipe_prim_type: %s\n", u_prim_name(reduced_prim));
+      unreachable("unexpected enum pipe_prim_type");
+   }
+}
+
+DXGI_FORMAT
+d3d12_rtv_format(struct d3d12_context *ctx, unsigned index)
+{
+   DXGI_FORMAT fmt = ctx->gfx_pipeline_state.rtv_formats[index];
+
+   if (ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
+       !ctx->gfx_pipeline_state.has_float_rtv) {
+      switch (fmt) {
+      case DXGI_FORMAT_R8G8B8A8_SNORM:
+      case DXGI_FORMAT_R8G8B8A8_UNORM:
+      case DXGI_FORMAT_B8G8R8A8_UNORM:
+      case DXGI_FORMAT_B8G8R8X8_UNORM:
+         return DXGI_FORMAT_R8G8B8A8_UINT;
+      default:
+         unreachable("unsupported logic-op format");
+      }
+   }
+
+   return fmt;
+}
+
+static ID3D12PipelineState *
+create_gfx_pipeline_state(struct d3d12_context *ctx)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+   struct d3d12_gfx_pipeline_state *state = &ctx->gfx_pipeline_state;
+   enum pipe_prim_type reduced_prim = u_reduced_prim(state->prim_type);
+   D3D12_SO_DECLARATION_ENTRY entries[PIPE_MAX_SO_OUTPUTS] = { 0 };
+   UINT strides[PIPE_MAX_SO_OUTPUTS] = { 0 };
+   UINT num_entries = 0, num_strides = 0;
+
+   D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 };
+   pso_desc.pRootSignature = state->root_signature;
+
+   bool last_vertex_stage_writes_pos = false;
+
+   if (state->stages[PIPE_SHADER_VERTEX]) {
+      auto shader = state->stages[PIPE_SHADER_VERTEX];
+      pso_desc.VS.BytecodeLength = shader->bytecode_length;
+      pso_desc.VS.pShaderBytecode = shader->bytecode;
+      last_vertex_stage_writes_pos = (shader->nir->info.outputs_written & VARYING_BIT_POS) != 0;
+   }
+
+   if (state->stages[PIPE_SHADER_GEOMETRY]) {
+      auto shader = state->stages[PIPE_SHADER_GEOMETRY];
+      pso_desc.GS.BytecodeLength = shader->bytecode_length;
+      pso_desc.GS.pShaderBytecode = shader->bytecode;
+      last_vertex_stage_writes_pos = (shader->nir->info.outputs_written & VARYING_BIT_POS) != 0;
+   }
+
+   if (last_vertex_stage_writes_pos && state->stages[PIPE_SHADER_FRAGMENT] &&
+       !state->rast->base.rasterizer_discard) {
+      auto shader = state->stages[PIPE_SHADER_FRAGMENT];
+      pso_desc.PS.BytecodeLength = shader->bytecode_length;
+      pso_desc.PS.pShaderBytecode = shader->bytecode;
+   }
+
+   if (state->num_so_targets)
+      fill_so_declaration(&state->so_info, entries, &num_entries,
+                          strides, &num_strides);
+   pso_desc.StreamOutput.NumEntries = num_entries;
+   pso_desc.StreamOutput.pSODeclaration = entries;
+   pso_desc.StreamOutput.RasterizedStream = state->rast->base.rasterizer_discard ? D3D12_SO_NO_RASTERIZED_STREAM : 0;
+   pso_desc.StreamOutput.NumStrides = num_strides;
+   pso_desc.StreamOutput.pBufferStrides = strides;
+
+   pso_desc.BlendState = state->blend->desc;
+   if (state->has_float_rtv)
+      pso_desc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
+
+   pso_desc.DepthStencilState = state->zsa->desc;
+   pso_desc.SampleMask = state->sample_mask;
+   pso_desc.RasterizerState = state->rast->desc;
+
+   if (reduced_prim != PIPE_PRIM_TRIANGLES)
+      pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+
+   if (depth_bias(state->rast, reduced_prim)) {
+      pso_desc.RasterizerState.DepthBias = state->rast->base.offset_units * 2;
+      pso_desc.RasterizerState.DepthBiasClamp = state->rast->base.offset_clamp;
+      pso_desc.RasterizerState.SlopeScaledDepthBias = state->rast->base.offset_scale;
+   }
+
+   pso_desc.InputLayout.pInputElementDescs = state->ves->elements;
+   pso_desc.InputLayout.NumElements = state->ves->num_elements;
+
+   pso_desc.IBStripCutValue = state->ib_strip_cut_value;
+
+   pso_desc.PrimitiveTopologyType = topology_type(reduced_prim);
+
+   pso_desc.NumRenderTargets = state->num_cbufs;
+   for (int i = 0; i < state->num_cbufs; ++i)
+      pso_desc.RTVFormats[i] = d3d12_rtv_format(ctx, i);
+   pso_desc.DSVFormat = state->dsv_format;
+
+   pso_desc.SampleDesc.Count = state->samples;
+   pso_desc.SampleDesc.Quality = 0;
+
+   pso_desc.NodeMask = 0;
+
+   pso_desc.CachedPSO.pCachedBlob = NULL;
+   pso_desc.CachedPSO.CachedBlobSizeInBytes = 0;
+
+   pso_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+
+   ID3D12PipelineState *ret;
+   if (FAILED(screen->dev->CreateGraphicsPipelineState(&pso_desc,
+                                                       __uuidof(ret),
+                                                       (void **)&ret))) {
+      debug_printf("D3D12: CreateGraphicsPipelineState failed!\n");
+      return NULL;
+   }
+
+   return ret;
+}
+
+static uint32_t
+hash_gfx_pipeline_state(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct d3d12_gfx_pipeline_state));
+}
+
+static bool
+equals_gfx_pipeline_state(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct d3d12_gfx_pipeline_state)) == 0;
+}
+
+ID3D12PipelineState *
+d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx)
+{
+   uint32_t hash = hash_gfx_pipeline_state(&ctx->gfx_pipeline_state);
+   struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->pso_cache, hash,
+                                                                 &ctx->gfx_pipeline_state);
+   if (!entry) {
+      struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)MALLOC(sizeof(struct d3d12_pso_entry));
+      if (!data)
+         return NULL;
+
+      data->key = ctx->gfx_pipeline_state;
+      data->pso = create_gfx_pipeline_state(ctx);
+      if (!data->pso)
+         return NULL;
+
+      entry = _mesa_hash_table_insert_pre_hashed(ctx->pso_cache, hash, &data->key, data);
+      assert(entry);
+   }
+
+   return ((struct d3d12_pso_entry *)(entry->data))->pso;
+}
+
+void
+d3d12_gfx_pipeline_state_cache_init(struct d3d12_context *ctx)
+{
+   ctx->pso_cache = _mesa_hash_table_create(NULL, NULL, equals_gfx_pipeline_state);
+}
+
+static void
+delete_entry(struct hash_entry *entry)
+{
+   struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data;
+   data->pso->Release();
+   FREE(data);
+}
+
+static void
+remove_entry(struct d3d12_context *ctx, struct hash_entry *entry)
+{
+   struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data;
+
+   if (ctx->current_pso == data->pso)
+      ctx->current_pso = NULL;
+   _mesa_hash_table_remove(ctx->pso_cache, entry);
+   delete_entry(entry);
+}
+
+void
+d3d12_gfx_pipeline_state_cache_destroy(struct d3d12_context *ctx)
+{
+   _mesa_hash_table_destroy(ctx->pso_cache, delete_entry);
+}
+
+void
+d3d12_gfx_pipeline_state_cache_invalidate(struct d3d12_context *ctx, const void *state)
+{
+   hash_table_foreach(ctx->pso_cache, entry) {
+      const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key;
+      if (key->blend == state || key->zsa == state || key->rast == state)
+         remove_entry(ctx, entry);
+   }
+}
+
+void
+d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx,
+                                                 enum pipe_shader_type stage,
+                                                 struct d3d12_shader_selector *selector)
+{
+   struct d3d12_shader *shader = selector->first;
+
+   while (shader) {
+      hash_table_foreach(ctx->pso_cache, entry) {
+         const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key;
+         if (key->stages[stage] == shader)
+            remove_entry(ctx, entry);
+      }
+      shader = shader->next_variant;
+   }
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.h b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h
new file mode 100644 (file)
index 0000000..015b1a1
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_PIPELINE_STATE_H
+#define D3D12_PIPELINE_STATE_H
+
+#include "pipe/p_state.h"
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <d3d12.h>
+
+struct d3d12_context;
+struct d3d12_root_signature;
+
+struct d3d12_vertex_elements_state {
+   D3D12_INPUT_ELEMENT_DESC elements[PIPE_MAX_ATTRIBS];
+   enum pipe_format format_conversion[PIPE_MAX_ATTRIBS];
+   unsigned num_elements:6; // <= PIPE_MAX_ATTRIBS
+   unsigned needs_format_emulation:1;
+   unsigned unused:25;
+};
+
+struct d3d12_rasterizer_state {
+   struct pipe_rasterizer_state base;
+   D3D12_RASTERIZER_DESC desc;
+   void *twoface_back;
+};
+
+struct d3d12_blend_state {
+   D3D12_BLEND_DESC desc;
+   unsigned blend_factor_flags;
+   bool is_dual_src;
+};
+
+struct d3d12_depth_stencil_alpha_state {
+   D3D12_DEPTH_STENCIL_DESC desc;
+};
+
+struct d3d12_gfx_pipeline_state {
+   ID3D12RootSignature *root_signature;
+   struct d3d12_shader *stages[PIPE_SHADER_TYPES - 1];
+   struct pipe_stream_output_info so_info;
+
+   struct d3d12_vertex_elements_state *ves;
+   struct d3d12_blend_state *blend;
+   struct d3d12_depth_stencil_alpha_state *zsa;
+   struct d3d12_rasterizer_state *rast;
+
+   unsigned samples;
+   unsigned sample_mask;
+   unsigned num_cbufs;
+   unsigned num_so_targets;
+   bool has_float_rtv;
+   DXGI_FORMAT rtv_formats[8];
+   DXGI_FORMAT dsv_format;
+   D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value;
+   enum pipe_prim_type prim_type;
+};
+
+DXGI_FORMAT
+d3d12_rtv_format(struct d3d12_context *ctx, unsigned index);
+
+void
+d3d12_gfx_pipeline_state_cache_init(struct d3d12_context *ctx);
+
+void
+d3d12_gfx_pipeline_state_cache_destroy(struct d3d12_context *ctx);
+
+ID3D12PipelineState *
+d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx);
+
+void
+d3d12_gfx_pipeline_state_cache_invalidate(struct d3d12_context *ctx, const void *state);
+
+void
+d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx,
+                                                 enum pipe_shader_type stage,
+                                                 struct d3d12_shader_selector *selector);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_public.h b/src/gallium/drivers/d3d12/d3d12_public.h
new file mode 100644 (file)
index 0000000..bd485b3
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_PUBLIC_H
+#define D3D12_PUBLIC_H
+
+struct pipe_screen;
+struct sw_winsys;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen *
+d3d12_create_screen(struct sw_winsys *winsys, LUID *adapter_luid);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_query.cpp b/src/gallium/drivers/d3d12/d3d12_query.cpp
new file mode 100644 (file)
index 0000000..c6da498
--- /dev/null
@@ -0,0 +1,524 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_query.h"
+#include "d3d12_context.h"
+#include "d3d12_resource.h"
+#include "d3d12_screen.h"
+
+#include "util/u_dump.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+struct d3d12_query {
+   enum pipe_query_type type;
+
+   ID3D12QueryHeap *query_heap;
+   unsigned curr_query, num_queries;
+   size_t query_size;
+   struct d3d12_query *subquery;
+
+   D3D12_QUERY_TYPE d3d12qtype;
+
+   pipe_resource *buffer;
+   unsigned buffer_offset;
+   uint64_t fence_value;
+
+   struct list_head active_list;
+   struct d3d12_resource *predicate;
+};
+
+static D3D12_QUERY_HEAP_TYPE
+d3d12_query_heap_type(unsigned query_type)
+{
+   switch (query_type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+      return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIME_ELAPSED:
+      return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
+
+   default:
+      debug_printf("unknown query: %s\n",
+                   util_str_query_type(query_type, true));
+      unreachable("d3d12: unknown query type");
+   }
+}
+
+static D3D12_QUERY_TYPE
+d3d12_query_type(unsigned query_type)
+{
+   switch (query_type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      return D3D12_QUERY_TYPE_OCCLUSION;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      return D3D12_QUERY_TYPE_BINARY_OCCLUSION;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+      return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIME_ELAPSED:
+      return D3D12_QUERY_TYPE_TIMESTAMP;
+   default:
+      debug_printf("unknown query: %s\n",
+                   util_str_query_type(query_type, true));
+      unreachable("d3d12: unknown query type");
+   }
+}
+
+static struct pipe_query *
+d3d12_create_query(struct pipe_context *pctx,
+                   unsigned query_type, unsigned index)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+   struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);
+   D3D12_QUERY_HEAP_DESC desc = {};
+   D3D12_RESOURCE_DESC res_desc = {};
+
+   if (!query)
+      return NULL;
+
+   query->type = (pipe_query_type)query_type;
+   query->d3d12qtype = d3d12_query_type(query_type);
+   query->num_queries = 16;
+
+   /* With timer queries we want a few more queries, especially since we need two slots
+    * per query for TIME_ELAPSED queries */
+   if (unlikely(query->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP))
+      query->num_queries = 64;
+
+   query->curr_query = 0;
+
+   switch (query->d3d12qtype) {
+   case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
+      query->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
+      break;
+   case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:
+      query->query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
+      break;
+   default:
+      query->query_size = sizeof(uint64_t);
+      break;
+   }
+
+   desc.Count = query->num_queries;
+   desc.Type = d3d12_query_heap_type(query_type);
+   if (FAILED(screen->dev->CreateQueryHeap(&desc,
+                                           __uuidof(query->query_heap),
+                                           (void **)&query->query_heap))) {
+      FREE(query);
+      return NULL;
+   }
+
+   /* Query result goes into a readback buffer */
+   size_t buffer_size = query->query_size * query->num_queries;
+   u_suballocator_alloc(ctx->query_allocator, buffer_size, 256,
+                        &query->buffer_offset, &query->buffer);
+
+   return (struct pipe_query *)query;
+}
+
+static void
+d3d12_destroy_query(struct pipe_context *pctx,
+                    struct pipe_query *q)
+{
+   struct d3d12_query *query = (struct d3d12_query *)q;
+   pipe_resource *predicate = &query->predicate->base;
+   if (query->subquery)
+      d3d12_destroy_query(pctx, (struct pipe_query *)query->subquery);
+   pipe_resource_reference(&predicate, NULL);
+   query->query_heap->Release();
+   FREE(query);
+}
+
+static bool
+accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q,
+                  union pipe_query_result *result, bool write)
+{
+   struct pipe_transfer *transfer = NULL;
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+   unsigned access = PIPE_MAP_READ;
+   void *results;
+
+   if (write)
+      access |= PIPE_MAP_WRITE;
+   results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
+                                   q->num_queries * q->query_size,
+                                   access, &transfer);
+
+   if (results == NULL)
+      return false;
+
+   uint64_t *results_u64 = (uint64_t *)results;
+   D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;
+   D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;
+
+   util_query_clear_result(result, q->type);
+   for (int i = 0; i < q->curr_query; ++i) {
+      switch (q->type) {
+      case PIPE_QUERY_OCCLUSION_PREDICATE:
+      case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+         result->b |= results_u64[i] != 0;
+         break;
+
+      case PIPE_QUERY_OCCLUSION_COUNTER:
+         result->u64 += results_u64[i];
+         break;
+      case PIPE_QUERY_TIMESTAMP:
+         result->u64 = results_u64[i];
+         break;
+
+      case PIPE_QUERY_PIPELINE_STATISTICS:
+         result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;
+         result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;
+         result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;
+         result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;
+         result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;
+         result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;
+         result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;
+         result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;
+         result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;
+         result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;
+         result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;
+         break;
+
+      case PIPE_QUERY_PRIMITIVES_GENERATED:
+         result->u64 += results_so[i].PrimitivesStorageNeeded;
+         break;
+
+      case PIPE_QUERY_PRIMITIVES_EMITTED:
+         result->u64 += results_so[i].NumPrimitivesWritten;
+         break;
+
+      case PIPE_QUERY_TIME_ELAPSED:
+         result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];
+         break;
+
+      case PIPE_QUERY_SO_STATISTICS:
+         result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
+         result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
+         break;
+
+      default:
+         debug_printf("unsupported query type: %s\n",
+                      util_str_query_type(q->type, true));
+         unreachable("unexpected query type");
+      }
+   }
+
+   if (q->subquery) {
+      union pipe_query_result subresult;
+
+      accumulate_result(ctx, q->subquery, &subresult, false);
+      q->subquery->curr_query = 0;
+      if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+         result->u64 += subresult.pipeline_statistics.ia_primitives;
+   }
+
+   if (write) {
+      if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
+         results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices;
+         results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives;
+         results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations;
+         results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations;
+         results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives;
+         results_stats[0].CInvocations = result->pipeline_statistics.c_invocations;
+         results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives;
+         results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations;
+         results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations;
+         results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations;
+         results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations;
+      } else if (q->type == PIPE_QUERY_SO_STATISTICS) {
+         results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written;
+         results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed;
+      } else {
+         if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) {
+            results_u64[0] = 0;
+            results_u64[1] = result->u64;
+         } else {
+            results_u64[0] = result->u64;
+         }
+      }
+   }
+
+   pipe_buffer_unmap(&ctx->base, transfer);
+
+   if (q->type == PIPE_QUERY_TIME_ELAPSED ||
+       q->type == PIPE_QUERY_TIMESTAMP)
+      result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);
+
+   return true;
+}
+
+static void
+begin_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
+{
+   if (restart) {
+      q->curr_query = 0;
+   } else if (q->curr_query == q->num_queries) {
+      union pipe_query_result result;
+
+      /* Accumulate current results and store in first slot */
+      d3d12_flush_cmdlist_and_wait(ctx);
+      accumulate_result(ctx, q, &result, true);
+      q->curr_query = 1;
+   }
+
+   if (q->subquery)
+      begin_query(ctx, q->subquery, restart);
+
+   ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);
+}
+
+
+static void
+begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
+{
+   /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in
+    * EndQuery, so we need two query slots */
+   unsigned query_index = 2 * q->curr_query;
+
+   if (restart) {
+      q->curr_query = 0;
+      query_index = 0;
+   } else if (query_index == q->num_queries) {
+      union pipe_query_result result;
+
+      /* Accumulate current results and store in first slot */
+      d3d12_flush_cmdlist_and_wait(ctx);
+      accumulate_result(ctx, q, &result, true);
+      q->curr_query = 2;
+   }
+
+   ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
+}
+
+static bool
+d3d12_begin_query(struct pipe_context *pctx,
+                  struct pipe_query *q)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_query *query = (struct d3d12_query *)q;
+
+   assert(query->type != PIPE_QUERY_TIMESTAMP);
+
+   if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))
+      begin_timer_query(ctx, query, true);
+   else {
+      begin_query(ctx, query, true);
+      list_addtail(&query->active_list, &ctx->active_queries);
+   }
+
+   return true;
+}
+
+static void
+end_query(struct d3d12_context *ctx, struct d3d12_query *q)
+{
+   uint64_t offset = 0;
+   struct d3d12_batch *batch = d3d12_current_batch(ctx);
+   struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;
+   ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
+
+   /* End subquery first so that we can use fence value from parent */
+   if (q->subquery)
+      end_query(ctx, q->subquery);
+
+   /* With QUERY_TIME_ELAPSED we have recorded one value at
+    * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)
+    * and when resolving the query we subtract the latter from the former */
+
+   unsigned resolve_count = q->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;
+   unsigned resolve_index = resolve_count * q->curr_query;
+   unsigned end_index = resolve_index + resolve_count - 1;
+
+   offset += q->buffer_offset + resolve_index * q->query_size;
+   ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);
+   d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST);
+   d3d12_apply_resource_states(ctx);
+   ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,
+                                  resolve_count, d3d12_res, offset);
+
+   d3d12_batch_reference_object(batch, q->query_heap);
+   d3d12_batch_reference_resource(batch, res);
+
+   assert(q->curr_query < q->num_queries);
+   q->curr_query++;
+}
+
+static bool
+d3d12_end_query(struct pipe_context *pctx,
+               struct pipe_query *q)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_query *query = (struct d3d12_query *)q;
+
+   end_query(ctx, query);
+
+   if (query->type != PIPE_QUERY_TIMESTAMP &&
+       query->type != PIPE_QUERY_TIME_ELAPSED)
+      list_delinit(&query->active_list);
+
+   query->fence_value = ctx->fence_value;
+   return true;
+}
+
+static bool
+d3d12_get_query_result(struct pipe_context *pctx,
+                      struct pipe_query *q,
+                      bool wait,
+                      union pipe_query_result *result)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_query *query = (struct d3d12_query *)q;
+
+   if (ctx->cmdqueue_fence->GetCompletedValue() < query->fence_value) {
+      if (!wait)
+         return false;
+      d3d12_flush_cmdlist_and_wait(ctx);
+   }
+
+   return accumulate_result(ctx, query, result, false);
+}
+
+void
+d3d12_suspend_queries(struct d3d12_context *ctx)
+{
+   list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
+      end_query(ctx, query);
+   }
+}
+
+void
+d3d12_resume_queries(struct d3d12_context *ctx)
+{
+   list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
+      begin_query(ctx, query, false);
+   }
+}
+
+void
+d3d12_validate_queries(struct d3d12_context *ctx)
+{
+   bool have_xfb = !!ctx->gfx_pipeline_state.num_so_targets;
+
+   list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
+      if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED && !have_xfb && !query->subquery) {
+         struct pipe_query *subquery = d3d12_create_query(&ctx->base, PIPE_QUERY_PIPELINE_STATISTICS, 0);
+         query->subquery = (struct d3d12_query *)subquery;
+         if (!ctx->queries_disabled)
+            begin_query(ctx, query->subquery, true);
+      }
+   }
+}
+
+static void
+d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   ctx->queries_disabled = !enable;
+
+   if (enable)
+      d3d12_resume_queries(ctx);
+   else
+      d3d12_suspend_queries(ctx);
+}
+
+static void
+d3d12_render_condition(struct pipe_context *pctx,
+                       struct pipe_query *pquery,
+                       bool condition,
+                       enum pipe_render_cond_flag mode)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_query *query = (struct d3d12_query *)pquery;
+
+   if (query == nullptr) {
+      ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
+      ctx->current_predication = nullptr;
+      return;
+   }
+
+   if (!query->predicate)
+      query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
+                                                           PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
+
+   if (mode == PIPE_RENDER_COND_WAIT) {
+      d3d12_flush_cmdlist_and_wait(ctx);
+      union pipe_query_result result;
+      accumulate_result(ctx, (d3d12_query *)pquery, &result, true);
+   }
+
+   struct d3d12_resource *res = (struct d3d12_resource *)query->buffer;
+   d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE);
+   d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST);
+   d3d12_apply_resource_states(ctx);
+   ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0,
+                                  d3d12_resource_resource(res), 0,
+                                  sizeof(uint64_t));
+
+   d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION);
+   d3d12_apply_resource_states(ctx);
+
+   ctx->current_predication = query->predicate;
+   /* documentation of ID3D12GraphicsCommandList::SetPredication method:
+    * "resource manipulation commands are _not_ actually performed
+    *  if the resulting predicate data of the predicate is equal to
+    *  the operation specified."
+    */
+   ctx->cmdlist->SetPredication(d3d12_resource_resource(query->predicate), 0,
+                                condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :
+                                D3D12_PREDICATION_OP_EQUAL_ZERO);
+}
+
+void
+d3d12_context_query_init(struct pipe_context *pctx)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   list_inithead(&ctx->active_queries);
+
+   ctx->query_allocator =
+       u_suballocator_create(&ctx->base, 4096, 0, PIPE_USAGE_STAGING,
+                             0, true);
+
+   pctx->create_query = d3d12_create_query;
+   pctx->destroy_query = d3d12_destroy_query;
+   pctx->begin_query = d3d12_begin_query;
+   pctx->end_query = d3d12_end_query;
+   pctx->get_query_result = d3d12_get_query_result;
+   pctx->set_active_query_state = d3d12_set_active_query_state;
+   pctx->render_condition = d3d12_render_condition;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_query.h b/src/gallium/drivers/d3d12/d3d12_query.h
new file mode 100644 (file)
index 0000000..61ce225
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_QUERY_H
+#define D3D12_QUERY_H
+
+struct d3d12_context;
+
+void
+d3d12_suspend_queries(struct d3d12_context *ctx);
+
+void
+d3d12_resume_queries(struct d3d12_context *ctx);
+
+void
+d3d12_validate_queries(struct d3d12_context *ctx);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_resource.cpp b/src/gallium/drivers/d3d12/d3d12_resource.cpp
new file mode 100644 (file)
index 0000000..52adedc
--- /dev/null
@@ -0,0 +1,999 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_resource.h"
+
+#include "d3d12_blit.h"
+#include "d3d12_context.h"
+#include "d3d12_format.h"
+#include "d3d12_screen.h"
+#include "d3d12_debug.h"
+
+#include "pipebuffer/pb_bufmgr.h"
+#include "util/slab.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/format/u_format_zs.h"
+
+#include "frontend/sw_winsys.h"
+
+#include <d3d12.h>
+#include <memory>
+
+static bool
+can_map_directly(struct pipe_resource *pres)
+{
+   return pres->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_LINEAR) ||
+          pres->target == PIPE_BUFFER;
+}
+
+static void
+init_valid_range(struct d3d12_resource *res)
+{
+   if (can_map_directly(&res->base))
+      util_range_init(&res->valid_buffer_range);
+}
+
+static void
+d3d12_resource_destroy(struct pipe_screen *pscreen,
+                       struct pipe_resource *presource)
+{
+   struct d3d12_resource *resource = d3d12_resource(presource);
+   if (can_map_directly(presource))
+      util_range_destroy(&resource->valid_buffer_range);
+   if (resource->bo)
+      d3d12_bo_unreference(resource->bo);
+   FREE(resource);
+}
+
+static bool
+resource_is_busy(struct d3d12_context *ctx,
+                 struct d3d12_resource *res)
+{
+   bool busy = false;
+
+   for (int i = 0; i < ARRAY_SIZE(ctx->batches); i++)
+      busy |= d3d12_batch_has_references(&ctx->batches[i], res->bo);
+
+   return busy;
+}
+
+void
+d3d12_resource_wait_idle(struct d3d12_context *ctx,
+                         struct d3d12_resource *res)
+{
+   if (d3d12_batch_has_references(d3d12_current_batch(ctx), res->bo)) {
+      d3d12_flush_cmdlist_and_wait(ctx);
+   } else {
+      d3d12_foreach_submitted_batch(ctx, batch) {
+         d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE);
+         if (!resource_is_busy(ctx, res))
+            break;
+      }
+   }
+}
+
+void
+d3d12_resource_release(struct d3d12_resource *resource)
+{
+   if (!resource->bo)
+      return;
+   d3d12_bo_unreference(resource->bo);
+   resource->bo = NULL;
+}
+
+static bool
+init_buffer(struct d3d12_screen *screen,
+            struct d3d12_resource *res,
+            const struct pipe_resource *templ)
+{
+   struct pb_desc buf_desc;
+   struct pb_manager *bufmgr;
+   struct pb_buffer *buf;
+
+   /* Assert that we don't want to create a buffer with one of the emulated
+    * formats, these are (currently) only supported when passing the vertex
+    * element state */
+   assert(templ->format == d3d12_emulated_vtx_format(templ->format));
+
+   /* Don't use slab buffer manager for GPU writable buffers */
+   bufmgr = templ->bind & PIPE_BIND_STREAM_OUTPUT ? screen->cache_bufmgr
+                                                  : screen->slab_bufmgr;
+   buf_desc.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+   buf_desc.usage = (pb_usage_flags)PB_USAGE_ALL;
+   res->dxgi_format = DXGI_FORMAT_UNKNOWN;
+   buf = bufmgr->create_buffer(bufmgr, templ->width0, &buf_desc);
+   if (!buf)
+      return false;
+   res->bo = d3d12_bo_wrap_buffer(buf);
+
+   return true;
+}
+
+static bool
+init_texture(struct d3d12_screen *screen,
+             struct d3d12_resource *res,
+             const struct pipe_resource *templ)
+{
+   ID3D12Resource *d3d12_res;
+
+   res->mip_levels = templ->last_level + 1;
+   res->dxgi_format = d3d12_get_format(templ->format);
+
+   D3D12_RESOURCE_DESC desc;
+   desc.Format = res->dxgi_format;
+   desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
+   desc.Width = templ->width0;
+   desc.Height = templ->height0;
+   desc.DepthOrArraySize = templ->array_size;
+   desc.MipLevels = templ->last_level + 1;
+
+   desc.SampleDesc.Count = MAX2(templ->nr_samples, 1);
+   desc.SampleDesc.Quality = 0; /* TODO: figure this one out */
+
+   switch (templ->target) {
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D;
+      break;
+
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      desc.DepthOrArraySize *= 6;
+      /* fall-through */
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_RECT:
+      desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+      break;
+
+   case PIPE_TEXTURE_3D:
+      desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
+      desc.DepthOrArraySize = templ->depth0;
+      break;
+   }
+
+   desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+   if (templ->bind & PIPE_BIND_SHADER_BUFFER)
+      desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+   if (templ->bind & PIPE_BIND_RENDER_TARGET)
+      desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+
+   if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
+      desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+
+      /* Sadly, we can't set D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE in the
+       * case where PIPE_BIND_SAMPLER_VIEW isn't set, because that would
+       * prevent us from using the resource with u_blitter, which requires
+       * sneaking in sampler-usage throught the back-door.
+       */
+   }
+
+   desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
+   if (templ->bind & (PIPE_BIND_SCANOUT |
+                      PIPE_BIND_SHARED | PIPE_BIND_LINEAR))
+      desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+
+   D3D12_HEAP_TYPE heap_type = D3D12_HEAP_TYPE_DEFAULT;
+
+   if (templ->bind & (PIPE_BIND_DISPLAY_TARGET |
+                      PIPE_BIND_SCANOUT |
+                      PIPE_BIND_SHARED))
+      heap_type = D3D12_HEAP_TYPE_READBACK;
+   else if (templ->usage == PIPE_USAGE_STAGING)
+      heap_type = D3D12_HEAP_TYPE_UPLOAD;
+
+   D3D12_HEAP_PROPERTIES heap_pris = screen->dev->GetCustomHeapProperties(0, heap_type);
+
+   HRESULT hres = screen->dev->CreateCommittedResource(&heap_pris,
+                                                   D3D12_HEAP_FLAG_NONE,
+                                                   &desc,
+                                                   D3D12_RESOURCE_STATE_COMMON,
+                                                   NULL,
+                                                   __uuidof(ID3D12Resource),
+                                                   (void **)&d3d12_res);
+   if (FAILED(hres))
+      return false;
+
+   if (screen->winsys && (templ->bind & (PIPE_BIND_DISPLAY_TARGET |
+                                         PIPE_BIND_SCANOUT |
+                                         PIPE_BIND_SHARED))) {
+      struct sw_winsys *winsys = screen->winsys;
+      res->dt = winsys->displaytarget_create(screen->winsys,
+                                             res->base.bind,
+                                             res->base.format,
+                                             templ->width0,
+                                             templ->height0,
+                                             64, NULL,
+                                             &res->dt_stride);
+   }
+
+   res->bo = d3d12_bo_wrap_res(d3d12_res, templ->format);
+
+   return true;
+}
+
+static struct pipe_resource *
+d3d12_resource_create(struct pipe_screen *pscreen,
+                      const struct pipe_resource *templ)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+   struct d3d12_resource *res = CALLOC_STRUCT(d3d12_resource);
+   bool ret;
+
+   res->base = *templ;
+
+   if (D3D12_DEBUG_RESOURCE & d3d12_debug) {
+      debug_printf("D3D12: Create %sresource %s@%d %dx%dx%d as:%d mip:%d\n",
+                   templ->usage == PIPE_USAGE_STAGING ? "STAGING " :"",
+                   util_format_name(templ->format), templ->nr_samples,
+                   templ->width0, templ->height0, templ->depth0,
+                   templ->array_size, templ->last_level, templ);
+   }
+
+   pipe_reference_init(&res->base.reference, 1);
+   res->base.screen = pscreen;
+
+   if (templ->target == PIPE_BUFFER) {
+      ret = init_buffer(screen, res, templ);
+   } else {
+      ret = init_texture(screen, res, templ);
+   }
+
+   if (!ret) {
+      FREE(res);
+      return NULL;
+   }
+
+   init_valid_range(res);
+
+   return &res->base;
+}
+
+static struct pipe_resource *
+d3d12_resource_from_handle(struct pipe_screen *pscreen,
+                          const struct pipe_resource *templ,
+                          struct winsys_handle *handle, unsigned usage)
+{
+   return NULL;
+}
+
+static bool
+d3d12_resource_get_handle(struct pipe_screen *pscreen,
+                          struct pipe_context *pcontext,
+                          struct pipe_resource *pres,
+                          struct winsys_handle *handle,
+                          unsigned usage)
+{
+   return false;
+}
+
+void
+d3d12_screen_resource_init(struct pipe_screen *pscreen)
+{
+   pscreen->resource_create = d3d12_resource_create;
+   pscreen->resource_from_handle = d3d12_resource_from_handle;
+   pscreen->resource_get_handle = d3d12_resource_get_handle;
+   pscreen->resource_destroy = d3d12_resource_destroy;
+}
+
+unsigned int
+get_subresource_id(struct d3d12_resource *res, unsigned resid,
+                   unsigned z, unsigned base_level)
+{
+   unsigned resource_stride = res->base.last_level + 1;
+   if (res->base.target == PIPE_TEXTURE_1D_ARRAY ||
+       res->base.target == PIPE_TEXTURE_2D_ARRAY)
+      resource_stride *= res->base.array_size;
+
+   if (res->base.target == PIPE_TEXTURE_CUBE)
+      resource_stride *= 6;
+
+   if (res->base.target == PIPE_TEXTURE_CUBE_ARRAY)
+      resource_stride *= 6 * res->base.array_size;
+
+   unsigned layer_stride = res->base.last_level + 1;
+
+   return resid * resource_stride + z * layer_stride +
+         base_level;
+}
+
+static D3D12_TEXTURE_COPY_LOCATION
+fill_texture_location(struct d3d12_resource *res,
+                      struct d3d12_transfer *trans, unsigned resid, unsigned z)
+{
+   D3D12_TEXTURE_COPY_LOCATION tex_loc = {0};
+   int subres = get_subresource_id(res, resid, z, trans->base.level);
+
+   tex_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+   tex_loc.SubresourceIndex = subres;
+   tex_loc.pResource = d3d12_resource_resource(res);
+   return tex_loc;
+}
+
+static D3D12_TEXTURE_COPY_LOCATION
+fill_buffer_location(struct d3d12_context *ctx,
+                     struct d3d12_resource *res,
+                     struct d3d12_resource *staging_res,
+                     struct d3d12_transfer *trans,
+                     unsigned depth,
+                     unsigned resid, unsigned z)
+{
+   D3D12_TEXTURE_COPY_LOCATION buf_loc = {0};
+   D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
+   uint64_t offset = 0;
+   auto descr = d3d12_resource_underlying(res, &offset)->GetDesc();
+   ID3D12Device* dev = d3d12_screen(ctx->base.screen)->dev;
+
+   unsigned sub_resid = get_subresource_id(res, resid, z, trans->base.level);
+   dev->GetCopyableFootprints(&descr, sub_resid, 1, 0, &footprint, nullptr, nullptr, nullptr);
+
+   buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+   buf_loc.pResource = d3d12_resource_underlying(staging_res, &offset);
+   buf_loc.PlacedFootprint = footprint;
+   buf_loc.PlacedFootprint.Offset += offset;
+
+   buf_loc.PlacedFootprint.Footprint.Width = ALIGN(trans->base.box.width,
+                                                   util_format_get_blockwidth(res->base.format));
+   buf_loc.PlacedFootprint.Footprint.Height = ALIGN(trans->base.box.height,
+                                                    util_format_get_blockheight(res->base.format));
+   buf_loc.PlacedFootprint.Footprint.Depth = ALIGN(depth,
+                                                   util_format_get_blockdepth(res->base.format));
+
+   buf_loc.PlacedFootprint.Footprint.RowPitch = trans->base.stride;
+
+   return buf_loc;
+}
+
+struct copy_info {
+   struct d3d12_resource *dst;
+   D3D12_TEXTURE_COPY_LOCATION dst_loc;
+   UINT dst_x, dst_y, dst_z;
+   struct d3d12_resource *src;
+   D3D12_TEXTURE_COPY_LOCATION src_loc;
+   D3D12_BOX *src_box;
+};
+
+
+static void
+copy_texture_region(struct d3d12_context *ctx,
+                    struct copy_info& info)
+{
+   auto batch = d3d12_current_batch(ctx);
+
+   d3d12_batch_reference_resource(batch, info.src);
+   d3d12_batch_reference_resource(batch, info.dst);
+
+   d3d12_transition_resource_state(ctx, info.src, D3D12_RESOURCE_STATE_COPY_SOURCE);
+   d3d12_transition_resource_state(ctx, info.dst, D3D12_RESOURCE_STATE_COPY_DEST);
+   d3d12_apply_resource_states(ctx);
+   ctx->cmdlist->CopyTextureRegion(&info.dst_loc, info.dst_x, info.dst_y, info.dst_z,
+                                   &info.src_loc, info.src_box);
+}
+
+static void
+transfer_buf_to_image_part(struct d3d12_context *ctx,
+                           struct d3d12_resource *res,
+                           struct d3d12_resource *staging_res,
+                           struct d3d12_transfer *trans,
+                           int z, int depth, int start_z, int dest_z,
+                           int resid)
+{
+   if (D3D12_DEBUG_RESOURCE & d3d12_debug) {
+      debug_printf("D3D12: Copy %dx%dx%d + %dx%dx%d from buffer %s to image %s\n",
+                   trans->base.box.x, trans->base.box.y, trans->base.box.z,
+                   trans->base.box.width, trans->base.box.height, trans->base.box.depth,
+                   util_format_name(staging_res->base.format),
+                   util_format_name(res->base.format));
+   }
+
+   struct copy_info copy_info;
+   copy_info.src = staging_res;
+   copy_info.src_loc = fill_buffer_location(ctx, res, staging_res, trans, depth, resid, z);
+   copy_info.src_loc.PlacedFootprint.Offset = (z  - start_z) * trans->base.layer_stride;
+   copy_info.src_box = nullptr;
+   copy_info.dst = res;
+   copy_info.dst_loc = fill_texture_location(res, trans, resid, z);
+   copy_info.dst_x = trans->base.box.x;
+   copy_info.dst_y = trans->base.box.y;
+   copy_info.dst_z = res->base.target == PIPE_TEXTURE_CUBE ? 0 : dest_z;
+   copy_info.src_box = nullptr;
+
+   copy_texture_region(ctx, copy_info);
+}
+
+static bool
+transfer_buf_to_image(struct d3d12_context *ctx,
+                      struct d3d12_resource *res,
+                      struct d3d12_resource *staging_res,
+                      struct d3d12_transfer *trans, int resid)
+{
+   if (res->base.target == PIPE_TEXTURE_3D) {
+      assert(resid == 0);
+      transfer_buf_to_image_part(ctx, res, staging_res, trans,
+                                 0, trans->base.box.depth, 0,
+                                 trans->base.box.z, 0);
+   } else {
+      int num_layers = trans->base.box.depth;
+      int start_z = trans->base.box.z;
+
+      for (int z = start_z; z < start_z + num_layers; ++z) {
+         transfer_buf_to_image_part(ctx, res, staging_res, trans,
+                                           z, 1, start_z, 0, resid);
+      }
+   }
+   return true;
+}
+
+static void
+transfer_image_part_to_buf(struct d3d12_context *ctx,
+                           struct d3d12_resource *res,
+                           struct d3d12_resource *staging_res,
+                           struct d3d12_transfer *trans,
+                           unsigned resid, int z, int start_layer,
+                           int start_box_z, int depth)
+{
+   struct pipe_box *box = &trans->base.box;
+   D3D12_BOX src_box = {};
+
+   struct copy_info copy_info;
+   copy_info.src_box = nullptr;
+   copy_info.src = res;
+   copy_info.src_loc = fill_texture_location(res, trans, resid, z);
+   copy_info.dst = staging_res;
+   copy_info.dst_loc = fill_buffer_location(ctx, res, staging_res, trans,
+                                            depth, resid, z);
+   copy_info.dst_loc.PlacedFootprint.Offset = (z  - start_layer) * trans->base.layer_stride;
+   copy_info.dst_x = copy_info.dst_y = copy_info.dst_z = 0;
+
+   if (!util_texrange_covers_whole_level(&res->base, trans->base.level,
+                                         box->x, box->y, start_box_z,
+                                         box->width, box->height, depth)) {
+      src_box.left = box->x;
+      src_box.right = box->x + box->width;
+      src_box.top = box->y;
+      src_box.bottom = box->y + box->height;
+      src_box.front = start_box_z;
+      src_box.back = start_box_z + depth;
+      copy_info.src_box = &src_box;
+   }
+
+   copy_texture_region(ctx, copy_info);
+}
+
+static bool
+transfer_image_to_buf(struct d3d12_context *ctx,
+                            struct d3d12_resource *res,
+                            struct d3d12_resource *staging_res,
+                            struct d3d12_transfer *trans,
+                            unsigned resid)
+{
+
+   /* We only suppport loading from either an texture array
+    * or a ZS texture, so either resid is zero, or num_layers == 1)
+    */
+   assert(resid == 0 || trans->base.box.depth == 1);
+
+   if (D3D12_DEBUG_RESOURCE & d3d12_debug) {
+      debug_printf("D3D12: Copy %dx%dx%d + %dx%dx%d from %s@%d to %s\n",
+                   trans->base.box.x, trans->base.box.y, trans->base.box.z,
+                   trans->base.box.width, trans->base.box.height, trans->base.box.depth,
+                   util_format_name(res->base.format), resid,
+                   util_format_name(staging_res->base.format));
+   }
+
+   struct pipe_resource *resolved_resource = nullptr;
+   if (res->base.nr_samples > 1) {
+      struct pipe_resource tmpl = res->base;
+      tmpl.nr_samples = 0;
+      resolved_resource = d3d12_resource_create(ctx->base.screen, &tmpl);
+      struct pipe_blit_info resolve_info = {0};
+      struct pipe_box box = {0,0,0, (int)res->base.width0, (int16_t)res->base.height0, (int16_t)res->base.depth0};
+      resolve_info.dst.resource = resolved_resource;
+      resolve_info.dst.box = box;
+      resolve_info.dst.format = res->base.format;
+      resolve_info.src.resource = &res->base;
+      resolve_info.src.box = box;
+      resolve_info.src.format = res->base.format;
+      resolve_info.filter = PIPE_TEX_FILTER_NEAREST;
+      resolve_info.mask = util_format_get_mask(tmpl.format);
+
+
+
+      d3d12_blit(&ctx->base, &resolve_info);
+      res = (struct d3d12_resource *)resolved_resource;
+   }
+
+
+   if (res->base.target == PIPE_TEXTURE_3D) {
+      transfer_image_part_to_buf(ctx, res, staging_res, trans, resid,
+                                 0, 0, trans->base.box.z, trans->base.box.depth);
+   } else {
+      int start_layer = trans->base.box.z;
+      for (int z = start_layer; z < start_layer + trans->base.box.depth; ++z) {
+         transfer_image_part_to_buf(ctx, res, staging_res, trans, resid,
+                                    z, start_layer, 0, 1);
+      }
+   }
+
+   pipe_resource_reference(&resolved_resource, NULL);
+
+   return true;
+}
+
+static unsigned
+linear_offset(int x, int y, int z, unsigned stride, unsigned layer_stride)
+{
+   return x +
+          y * stride +
+          z * layer_stride;
+}
+
+static D3D12_RANGE
+linear_range(const struct pipe_box *box, unsigned stride, unsigned layer_stride)
+{
+   D3D12_RANGE range;
+
+   range.Begin = linear_offset(box->x, box->y, box->z,
+                               stride, layer_stride);
+   range.End = linear_offset(box->x + box->width,
+                             box->y + box->height - 1,
+                             box->z + box->depth - 1,
+                             stride, layer_stride);
+
+   return range;
+}
+
+static bool
+synchronize(struct d3d12_context *ctx,
+            struct d3d12_resource *res,
+            unsigned usage,
+            D3D12_RANGE *range)
+{
+   assert(can_map_directly(&res->base));
+
+   /* Check whether that range contains valid data; if not, we might not need to sync */
+   if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
+       usage & PIPE_MAP_WRITE &&
+       !util_ranges_intersect(&res->valid_buffer_range, range->Begin, range->End)) {
+      usage |= PIPE_MAP_UNSYNCHRONIZED;
+   }
+
+   if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && resource_is_busy(ctx, res)) {
+      if (usage & PIPE_MAP_DONTBLOCK)
+         return false;
+
+      d3d12_resource_wait_idle(ctx, res);
+   }
+
+   if (usage & PIPE_MAP_WRITE)
+      util_range_add(&res->base, &res->valid_buffer_range,
+                     range->Begin, range->End);
+
+   return true;
+}
+
+/* A wrapper to make sure local resources are freed and unmapped with
+ * any exit path */
+struct local_resource {
+   local_resource(pipe_screen *s, struct pipe_resource *tmpl)
+   {
+      res = d3d12_resource(d3d12_resource_create(s, tmpl));
+   }
+
+   ~local_resource() {
+      if (res) {
+         if (mapped)
+            d3d12_bo_unmap(res->bo, nullptr);
+         pipe_resource_reference((struct pipe_resource **)&res, NULL);
+      }
+   }
+
+   void *
+   map() {
+      void *ptr;
+      ptr = d3d12_bo_map(res->bo, nullptr);
+      if (ptr)
+         mapped = true;
+      return ptr;
+   }
+
+   void unmap()
+   {
+      if (mapped)
+         d3d12_bo_unmap(res->bo, nullptr);
+      mapped = false;
+   }
+
+   operator struct d3d12_resource *() {
+      return res;
+   }
+
+   bool operator !() {
+      return !res;
+   }
+private:
+   struct d3d12_resource *res;
+   bool mapped;
+};
+
+/* Combined depth-stencil needs a special handling for reading back: DX handled
+ * depth and stencil parts as separate resources and handles copying them only
+ * by using seperate texture copy calls with different formats. So create two
+ * buffers, read back both resources and interleave the data.
+ */
+static void
+prepare_zs_layer_strides(struct d3d12_resource *res,
+                         const struct pipe_box *box,
+                         struct d3d12_transfer *trans)
+{
+   trans->base.stride = align(util_format_get_stride(res->base.format, box->width),
+                              D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+   trans->base.layer_stride = util_format_get_2d_size(res->base.format,
+                                                      trans->base.stride,
+                                                      box->height);
+}
+
+static void *
+read_zs_surface(struct d3d12_context *ctx, struct d3d12_resource *res,
+                const struct pipe_box *box,
+                struct d3d12_transfer *trans)
+{
+   pipe_screen *pscreen = ctx->base.screen;
+
+   prepare_zs_layer_strides(res, box, trans);
+
+   struct pipe_resource tmpl;
+   memset(&tmpl, 0, sizeof tmpl);
+   tmpl.target = PIPE_BUFFER;
+   tmpl.format = PIPE_FORMAT_R32_UNORM;
+   tmpl.bind = 0;
+   tmpl.usage = PIPE_USAGE_STAGING;
+   tmpl.flags = 0;
+   tmpl.width0 = trans->base.layer_stride;
+   tmpl.height0 = 1;
+   tmpl.depth0 = 1;
+   tmpl.array_size = 1;
+
+   local_resource depth_buffer(pscreen, &tmpl);
+   if (!depth_buffer) {
+      debug_printf("Allocating staging buffer for depth failed\n");
+      return NULL;
+   }
+
+   if (!transfer_image_to_buf(ctx, res, depth_buffer, trans, 0))
+      return NULL;
+
+   tmpl.format = PIPE_FORMAT_R8_UINT;
+
+   local_resource stencil_buffer(pscreen, &tmpl);
+   if (!stencil_buffer) {
+      debug_printf("Allocating staging buffer for stencilfailed\n");
+      return NULL;
+   }
+
+   if (!transfer_image_to_buf(ctx, res, stencil_buffer, trans, 1))
+      return NULL;
+
+   d3d12_flush_cmdlist_and_wait(ctx);
+
+   void *depth_ptr = depth_buffer.map();
+   if (!depth_ptr) {
+      debug_printf("Mapping staging depth buffer failed\n");
+      return NULL;
+   }
+
+   uint8_t *stencil_ptr =  (uint8_t *)stencil_buffer.map();
+   if (!stencil_ptr) {
+      debug_printf("Mapping staging stencil buffer failed\n");
+      return NULL;
+   }
+
+   uint8_t *buf = (uint8_t *)malloc(trans->base.layer_stride);
+   if (!buf)
+      return NULL;
+
+   trans->data = buf;
+
+   switch (res->base.format) {
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      util_format_z24_unorm_s8_uint_pack_separate(buf, trans->base.stride,
+                                                  (uint32_t *)depth_ptr, trans->base.stride,
+                                                  stencil_ptr, trans->base.stride,
+                                                  trans->base.box.width, trans->base.box.height);
+      break;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      util_format_z32_float_s8x24_uint_pack_z_float(buf, trans->base.stride,
+                                                    (float *)depth_ptr, trans->base.stride,
+                                                    trans->base.box.width, trans->base.box.height);
+      util_format_z32_float_s8x24_uint_pack_s_8uint(buf, trans->base.stride,
+                                                    stencil_ptr, trans->base.stride,
+                                                    trans->base.box.width, trans->base.box.height);
+      break;
+   default:
+      unreachable("Unsupported depth steancil format");
+   };
+
+   return trans->data;
+}
+
+static void *
+prepare_write_zs_surface(struct d3d12_resource *res,
+                         const struct pipe_box *box,
+                         struct d3d12_transfer *trans)
+{
+   prepare_zs_layer_strides(res, box, trans);
+   uint32_t *buf = (uint32_t *)malloc(trans->base.layer_stride);
+   if (!buf)
+      return NULL;
+
+   trans->data = buf;
+   return trans->data;
+}
+
+static void
+write_zs_surface(struct pipe_context *pctx, struct d3d12_resource *res,
+                 struct d3d12_transfer *trans)
+{
+   struct pipe_resource tmpl;
+   memset(&tmpl, 0, sizeof tmpl);
+   tmpl.target = PIPE_BUFFER;
+   tmpl.format = PIPE_FORMAT_R32_UNORM;
+   tmpl.bind = 0;
+   tmpl.usage = PIPE_USAGE_STAGING;
+   tmpl.flags = 0;
+   tmpl.width0 = trans->base.layer_stride;
+   tmpl.height0 = 1;
+   tmpl.depth0 = 1;
+   tmpl.array_size = 1;
+
+   local_resource depth_buffer(pctx->screen, &tmpl);
+   if (!depth_buffer) {
+      debug_printf("Allocating staging buffer for depth failed\n");
+      return;
+   }
+
+   local_resource stencil_buffer(pctx->screen, &tmpl);
+   if (!stencil_buffer) {
+      debug_printf("Allocating staging buffer for depth failed\n");
+      return;
+   }
+
+   void *depth_ptr = depth_buffer.map();
+   if (!depth_ptr) {
+      debug_printf("Mapping staging depth buffer failed\n");
+      return;
+   }
+
+   uint8_t *stencil_ptr =  (uint8_t *)stencil_buffer.map();
+   if (!stencil_ptr) {
+      debug_printf("Mapping staging stencil buffer failed\n");
+      return;
+   }
+
+   switch (res->base.format) {
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      util_format_z32_unorm_unpack_z_32unorm((uint32_t *)depth_ptr, trans->base.stride, (uint8_t*)trans->data,
+                                             trans->base.stride, trans->base.box.width,
+                                             trans->base.box.height);
+      util_format_z24_unorm_s8_uint_unpack_s_8uint(stencil_ptr, trans->base.stride, (uint8_t*)trans->data,
+                                                   trans->base.stride, trans->base.box.width,
+                                                   trans->base.box.height);
+      break;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      util_format_z32_float_s8x24_uint_unpack_z_float((float *)depth_ptr, trans->base.stride, (uint8_t*)trans->data,
+                                                      trans->base.stride, trans->base.box.width,
+                                                      trans->base.box.height);
+      util_format_z32_float_s8x24_uint_unpack_s_8uint(stencil_ptr, trans->base.stride, (uint8_t*)trans->data,
+                                                      trans->base.stride, trans->base.box.width,
+                                                      trans->base.box.height);
+      break;
+   default:
+      unreachable("Unsupported depth steancil format");
+   };
+
+   stencil_buffer.unmap();
+   depth_buffer.unmap();
+
+   transfer_buf_to_image(d3d12_context(pctx), res, depth_buffer, trans, 0);
+   transfer_buf_to_image(d3d12_context(pctx), res, stencil_buffer, trans, 1);
+}
+
+static void *
+d3d12_transfer_map(struct pipe_context *pctx,
+                   struct pipe_resource *pres,
+                   unsigned level,
+                   unsigned usage,
+                   const struct pipe_box *box,
+                   struct pipe_transfer **transfer)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_resource *res = d3d12_resource(pres);
+
+   if (usage & PIPE_MAP_DIRECTLY || !res->bo)
+      return NULL;
+
+   struct d3d12_transfer *trans = (struct d3d12_transfer *)slab_alloc(&ctx->transfer_pool);
+   struct pipe_transfer *ptrans = &trans->base;
+   if (!trans)
+      return NULL;
+
+   memset(trans, 0, sizeof(*trans));
+   pipe_resource_reference(&ptrans->resource, pres);
+
+   ptrans->resource = pres;
+   ptrans->level = level;
+   ptrans->usage = (enum pipe_map_flags)usage;
+   ptrans->box = *box;
+
+   D3D12_RANGE range;
+   range.Begin = 0;
+
+   void *ptr;
+   if (can_map_directly(&res->base)) {
+      if (pres->target == PIPE_BUFFER) {
+         ptrans->stride = 0;
+         ptrans->layer_stride = 0;
+      } else {
+         ptrans->stride = util_format_get_stride(pres->format, box->width);
+         ptrans->layer_stride = util_format_get_2d_size(pres->format,
+                                                        ptrans->stride,
+                                                        box->height);
+      }
+
+      range = linear_range(box, ptrans->stride, ptrans->layer_stride);
+      if (!synchronize(ctx, res, usage, &range))
+         return NULL;
+      ptr = d3d12_bo_map(res->bo, &range);
+   } else if (unlikely(pres->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
+                       pres->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+      if (usage & PIPE_MAP_READ) {
+         ptr = read_zs_surface(ctx, res, box, trans);
+      } else if (usage & PIPE_MAP_WRITE){
+         ptr = prepare_write_zs_surface(res, box, trans);
+      }
+   } else {
+      ptrans->stride = align(util_format_get_stride(pres->format, box->width),
+                              D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+      ptrans->layer_stride = util_format_get_2d_size(pres->format,
+                                                     ptrans->stride,
+                                                     box->height);
+
+      if (res->base.target != PIPE_TEXTURE_3D)
+         ptrans->layer_stride = align(ptrans->layer_stride,
+                                      D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
+
+      trans->staging_res = pipe_buffer_create(pctx->screen, 0,
+                                              PIPE_USAGE_STAGING,
+                                              ptrans->layer_stride * box->depth);
+      if (!trans->staging_res)
+         return NULL;
+
+      struct d3d12_resource *staging_res = d3d12_resource(trans->staging_res);
+
+      if (usage & PIPE_MAP_READ) {
+         bool ret = transfer_image_to_buf(ctx, res, staging_res, trans, 0);
+         if (ret == false)
+            return NULL;
+         d3d12_flush_cmdlist_and_wait(ctx);
+      }
+
+      range.Begin = 0;
+      range.End = ptrans->layer_stride * box->depth;
+
+      ptr = d3d12_bo_map(staging_res->bo, &range);
+   }
+
+   *transfer = ptrans;
+   return ptr;
+}
+
+static void
+d3d12_transfer_unmap(struct pipe_context *pctx,
+                     struct pipe_transfer *ptrans)
+{
+   struct d3d12_resource *res = d3d12_resource(ptrans->resource);
+   struct d3d12_transfer *trans = (struct d3d12_transfer *)ptrans;
+   D3D12_RANGE range = { 0, 0 };
+
+   if (trans->data != nullptr) {
+      if (trans->base.usage & PIPE_MAP_WRITE)
+         write_zs_surface(pctx, res, trans);
+      free(trans->data);
+   } else if (trans->staging_res) {
+      struct d3d12_resource *staging_res = d3d12_resource(trans->staging_res);
+
+      if (trans->base.usage & PIPE_MAP_WRITE) {
+         range.Begin = 0;
+         range.End = ptrans->layer_stride * ptrans->box.depth;
+      }
+      d3d12_bo_unmap(staging_res->bo, &range);
+
+      if (trans->base.usage & PIPE_MAP_WRITE) {
+         struct d3d12_context *ctx = d3d12_context(pctx);
+         transfer_buf_to_image(ctx, res, staging_res, trans, 0);
+      }
+
+      pipe_resource_reference(&trans->staging_res, NULL);
+   } else {
+      if (trans->base.usage & PIPE_MAP_WRITE) {
+         range.Begin = ptrans->box.x;
+         range.End = ptrans->box.x + ptrans->box.width;
+      }
+      d3d12_bo_unmap(res->bo, &range);
+   }
+
+   pipe_resource_reference(&ptrans->resource, NULL);
+   slab_free(&d3d12_context(pctx)->transfer_pool, ptrans);
+}
+
+void
+d3d12_resource_make_writeable(struct pipe_context *pctx,
+                              struct pipe_resource *pres)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_resource *res = d3d12_resource(pres);
+   struct d3d12_resource *dup_res;
+
+   if (!res->bo || !d3d12_bo_is_suballocated(res->bo))
+      return;
+
+   dup_res = d3d12_resource(pipe_buffer_create(pres->screen,
+                                               pres->bind & PIPE_BIND_STREAM_OUTPUT,
+                                               (pipe_resource_usage) pres->usage,
+                                               pres->width0));
+
+   if (res->valid_buffer_range.end > res->valid_buffer_range.start) {
+      struct pipe_box box;
+
+      box.x = res->valid_buffer_range.start;
+      box.y = 0;
+      box.z = 0;
+      box.width = res->valid_buffer_range.end - res->valid_buffer_range.start;
+      box.height = 1;
+      box.depth = 1;
+
+      d3d12_direct_copy(ctx, dup_res, 0, &box, res, 0, &box, PIPE_MASK_RGBAZS);
+   }
+
+   /* Move new BO to old resource */
+   d3d12_bo_unreference(res->bo);
+   res->bo = dup_res->bo;
+   d3d12_bo_reference(res->bo);
+
+   d3d12_resource_destroy(dup_res->base.screen, &dup_res->base);
+}
+
+void
+d3d12_context_resource_init(struct pipe_context *pctx)
+{
+   pctx->transfer_map = d3d12_transfer_map;
+   pctx->transfer_unmap = d3d12_transfer_unmap;
+
+   pctx->transfer_flush_region = u_default_transfer_flush_region;
+   pctx->buffer_subdata = u_default_buffer_subdata;
+   pctx->texture_subdata = u_default_texture_subdata;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_resource.h b/src/gallium/drivers/d3d12/d3d12_resource.h
new file mode 100644 (file)
index 0000000..62ecf6b
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_RESOURCE_H
+#define D3D12_RESOURCE_H
+
+struct pipe_screen;
+#include "d3d12_bufmgr.h"
+#include "util/u_range.h"
+#include "util/u_transfer.h"
+
+#include <d3d12.h>
+
+struct d3d12_resource {
+   struct pipe_resource base;
+   struct d3d12_bo *bo;
+   DXGI_FORMAT dxgi_format;
+   unsigned mip_levels;
+   struct sw_displaytarget *dt;
+   unsigned dt_stride;
+   struct util_range valid_buffer_range;
+};
+
+struct d3d12_transfer {
+   struct pipe_transfer base;
+   struct pipe_resource *staging_res;
+   void *data;
+};
+
+static inline struct d3d12_resource *
+d3d12_resource(struct pipe_resource *r)
+{
+   return (struct d3d12_resource *)r;
+}
+
+/* Returns the underlying ID3D12Resource and offset for this resource */
+static inline ID3D12Resource *
+d3d12_resource_underlying(struct d3d12_resource *res, uint64_t *offset)
+{
+   if (!res->bo)
+      return NULL;
+
+   return d3d12_bo_get_base(res->bo, offset)->res;
+}
+
+/* Returns the underlying ID3D12Resource for this resource. */
+static inline ID3D12Resource *
+d3d12_resource_resource(struct d3d12_resource *res)
+{
+   ID3D12Resource *ret;
+   uint64_t offset;
+   ret = d3d12_resource_underlying(res, &offset);
+   return ret;
+}
+
+static inline struct TransitionableResourceState *
+d3d12_resource_state(struct d3d12_resource *res)
+{
+   uint64_t offset;
+   if (!res->bo)
+      return NULL;
+   return d3d12_bo_get_base(res->bo, &offset)->trans_state;
+}
+
+static inline D3D12_GPU_VIRTUAL_ADDRESS
+d3d12_resource_gpu_virtual_address(struct d3d12_resource *res)
+{
+   uint64_t offset;
+   ID3D12Resource *base_res = d3d12_resource_underlying(res, &offset);
+   return base_res->GetGPUVirtualAddress() + offset;
+}
+
+static inline bool
+d3d12_subresource_id_uses_layer(enum pipe_texture_target target)
+{
+   return target == PIPE_TEXTURE_CUBE ||
+          target == PIPE_TEXTURE_1D_ARRAY ||
+          target == PIPE_TEXTURE_2D_ARRAY;
+}
+
+void
+d3d12_resource_release(struct d3d12_resource *res);
+
+void
+d3d12_resource_wait_idle(struct d3d12_context *ctx,
+                         struct d3d12_resource *res);
+
+void
+d3d12_resource_make_writeable(struct pipe_context *pctx,
+                              struct pipe_resource *pres);
+
+void
+d3d12_screen_resource_init(struct pipe_screen *pscreen);
+
+void
+d3d12_context_resource_init(struct pipe_context *pctx);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_root_signature.cpp b/src/gallium/drivers/d3d12/d3d12_root_signature.cpp
new file mode 100644 (file)
index 0000000..26229f5
--- /dev/null
@@ -0,0 +1,255 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_root_signature.h"
+#include "d3d12_compiler.h"
+#include "d3d12_screen.h"
+
+#include "util/u_memory.h"
+
+#include <wrl.h>
+using Microsoft::WRL::ComPtr;
+
+struct d3d12_root_signature {
+   struct d3d12_root_signature_key key;
+   ID3D12RootSignature *sig;
+};
+
+static D3D12_SHADER_VISIBILITY
+get_shader_visibility(enum pipe_shader_type stage)
+{
+   switch (stage) {
+   case PIPE_SHADER_VERTEX:
+      return D3D12_SHADER_VISIBILITY_VERTEX;
+   case PIPE_SHADER_FRAGMENT:
+      return D3D12_SHADER_VISIBILITY_PIXEL;
+   case PIPE_SHADER_GEOMETRY:
+      return D3D12_SHADER_VISIBILITY_GEOMETRY;
+   case PIPE_SHADER_TESS_CTRL:
+      return D3D12_SHADER_VISIBILITY_HULL;
+   case PIPE_SHADER_TESS_EVAL:
+      return D3D12_SHADER_VISIBILITY_DOMAIN;
+   default:
+      unreachable("unknown shader stage");
+   }
+}
+
+static inline void
+init_constant_root_param(D3D12_ROOT_PARAMETER1 *param,
+                         unsigned reg,
+                         unsigned size,
+                         D3D12_SHADER_VISIBILITY visibility)
+{
+   param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+   param->ShaderVisibility = visibility;
+   param->Constants.RegisterSpace = 0;
+   param->Constants.ShaderRegister = reg;
+   param->Constants.Num32BitValues = size;
+}
+
+static inline void
+init_range_root_param(D3D12_ROOT_PARAMETER1 *param,
+                      D3D12_DESCRIPTOR_RANGE1 *range,
+                      D3D12_DESCRIPTOR_RANGE_TYPE type,
+                      uint32_t num_descs,
+                      D3D12_SHADER_VISIBILITY visibility,
+                      uint32_t base_shader_register)
+{
+   range->RangeType = type;
+   range->NumDescriptors = num_descs;
+   range->BaseShaderRegister = base_shader_register;
+   range->RegisterSpace = 0;
+   if (type == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER)
+      range->Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
+   else
+      range->Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
+   range->OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+   param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+   param->DescriptorTable.NumDescriptorRanges = 1;
+   param->DescriptorTable.pDescriptorRanges = range;
+   param->ShaderVisibility = visibility;
+}
+
+static ID3D12RootSignature *
+create_root_signature(struct d3d12_context *ctx, struct d3d12_root_signature_key *key)
+{
+   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
+   D3D12_ROOT_PARAMETER1 root_params[D3D12_GFX_SHADER_STAGES * D3D12_NUM_BINDING_TYPES];
+   D3D12_DESCRIPTOR_RANGE1 desc_ranges[D3D12_GFX_SHADER_STAGES * D3D12_NUM_BINDING_TYPES];
+   unsigned num_params = 0;
+
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
+      D3D12_SHADER_VISIBILITY visibility = get_shader_visibility((enum pipe_shader_type)i);
+
+      if (key->stages[i].num_cb_bindings > 0) {
+         assert(num_params < PIPE_SHADER_TYPES * D3D12_NUM_BINDING_TYPES);
+         init_range_root_param(&root_params[num_params],
+                               &desc_ranges[num_params],
+                               D3D12_DESCRIPTOR_RANGE_TYPE_CBV,
+                               key->stages[i].num_cb_bindings,
+                               visibility,
+                               key->stages[i].has_default_ubo0 ? 0 : 1);
+         num_params++;
+      }
+
+      if (key->stages[i].num_srv_bindings > 0) {
+         init_range_root_param(&root_params[num_params],
+                               &desc_ranges[num_params],
+                               D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
+                               key->stages[i].num_srv_bindings,
+                               visibility,
+                               0);
+         num_params++;
+      }
+
+      if (key->stages[i].num_srv_bindings > 0) {
+         init_range_root_param(&root_params[num_params],
+                               &desc_ranges[num_params],
+                               D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
+                               key->stages[i].num_srv_bindings,
+                               visibility,
+                               0);
+         num_params++;
+      }
+
+      if (key->stages[i].state_vars_size > 0) {
+         init_constant_root_param(&root_params[num_params],
+                                  key->stages[i].num_cb_bindings + (key->stages[i].has_default_ubo0 ? 0 : 1),
+                                  key->stages[i].state_vars_size,
+                                  visibility);
+         num_params++;
+      }
+   }
+
+   D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc;
+   root_sig_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
+   root_sig_desc.Desc_1_1.NumParameters = num_params;
+   root_sig_desc.Desc_1_1.pParameters = (num_params > 0) ? root_params : NULL;
+   root_sig_desc.Desc_1_1.NumStaticSamplers = 0;
+   root_sig_desc.Desc_1_1.pStaticSamplers = NULL;
+   root_sig_desc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
+
+   /* TODO Only enable this flag when needed (optimization) */
+   root_sig_desc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
+
+   if (key->has_stream_output)
+      root_sig_desc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
+
+   ComPtr<ID3DBlob> sig, error;
+   if (FAILED(ctx->D3D12SerializeVersionedRootSignature(&root_sig_desc,
+                                                        &sig, &error))) {
+      debug_printf("D3D12SerializeRootSignature failed\n");
+      return NULL;
+   }
+
+   ID3D12RootSignature *ret;
+   if (FAILED(screen->dev->CreateRootSignature(0,
+                                               sig->GetBufferPointer(),
+                                               sig->GetBufferSize(),
+                                               __uuidof(ret),
+                                               (void **)&ret))) {
+      debug_printf("CreateRootSignature failed\n");
+      return NULL;
+   }
+   return ret;
+}
+
+static void
+fill_key(struct d3d12_context *ctx, struct d3d12_root_signature_key *key)
+{
+   memset(key, 0, sizeof(struct d3d12_root_signature_key));
+
+   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
+      struct d3d12_shader *shader = ctx->gfx_pipeline_state.stages[i];
+
+      if (shader) {
+         key->stages[i].num_cb_bindings = shader->num_cb_bindings;
+         key->stages[i].num_srv_bindings = shader->num_srv_bindings;
+         key->stages[i].state_vars_size = shader->state_vars_size;
+         key->stages[i].has_default_ubo0 = shader->has_default_ubo0;
+
+         if (ctx->gfx_stages[i]->so_info.num_outputs > 0)
+            key->has_stream_output = true;
+      }
+   }
+}
+
+ID3D12RootSignature *
+d3d12_get_root_signature(struct d3d12_context *ctx)
+{
+   struct d3d12_root_signature_key key;
+
+   fill_key(ctx, &key);
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->root_signature_cache, &key);
+   if (!entry) {
+      struct d3d12_root_signature *data =
+         (struct d3d12_root_signature *)MALLOC(sizeof(struct d3d12_root_signature));
+      if (!data)
+         return NULL;
+
+      data->key = key;
+      data->sig = create_root_signature(ctx, &key);
+      if (!data->sig)
+         return NULL;
+
+      entry = _mesa_hash_table_insert(ctx->root_signature_cache, &data->key, data);
+      assert(entry);
+   }
+
+   return ((struct d3d12_root_signature *)entry->data)->sig;
+}
+
+static uint32_t
+hash_root_signature_key(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct d3d12_root_signature_key));
+}
+
+static bool
+equals_root_signature_key(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct d3d12_root_signature_key)) == 0;
+}
+
+void
+d3d12_root_signature_cache_init(struct d3d12_context *ctx)
+{
+   ctx->root_signature_cache = _mesa_hash_table_create(NULL,
+                                                       hash_root_signature_key,
+                                                       equals_root_signature_key);
+}
+
+static void
+delete_entry(struct hash_entry *entry)
+{
+   struct d3d12_root_signature *data = (struct d3d12_root_signature *)entry->data;
+   data->sig->Release();
+   FREE(data);
+}
+
+void
+d3d12_root_signature_cache_destroy(struct d3d12_context *ctx)
+{
+   _mesa_hash_table_destroy(ctx->root_signature_cache, delete_entry);
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_root_signature.h b/src/gallium/drivers/d3d12/d3d12_root_signature.h
new file mode 100644 (file)
index 0000000..1a821a5
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_ROOT_SIGNATURE_H
+#define D3D12_ROOT_SIGNATURE_H
+
+#include "d3d12_context.h"
+
+struct d3d12_root_signature_key {
+   bool has_stream_output;
+   struct {
+      unsigned num_cb_bindings;
+      unsigned num_srv_bindings;
+      unsigned state_vars_size;
+      bool has_default_ubo0;
+   } stages[D3D12_GFX_SHADER_STAGES];
+};
+
+void
+d3d12_root_signature_cache_init(struct d3d12_context *ctx);
+
+void
+d3d12_root_signature_cache_destroy(struct d3d12_context *ctx);
+
+ID3D12RootSignature *
+d3d12_get_root_signature(struct d3d12_context *ctx);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_screen.cpp b/src/gallium/drivers/d3d12/d3d12_screen.cpp
new file mode 100644 (file)
index 0000000..f273e55
--- /dev/null
@@ -0,0 +1,970 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_screen.h"
+
+#include "d3d12_bufmgr.h"
+#include "d3d12_compiler.h"
+#include "d3d12_context.h"
+#include "d3d12_debug.h"
+#include "d3d12_fence.h"
+#include "d3d12_format.h"
+#include "d3d12_public.h"
+#include "d3d12_resource.h"
+#include "d3d12_nir_passes.h"
+
+#include "pipebuffer/pb_bufmgr.h"
+#include "util/debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_screen.h"
+
+#include "nir.h"
+#include "frontend/sw_winsys.h"
+
+#include <dxgi1_4.h>
+#include <d3d12sdklayers.h>
+
+static const struct debug_named_value
+debug_options[] = {
+   { "verbose",      D3D12_DEBUG_VERBOSE,       NULL },
+   { "blit",         D3D12_DEBUG_BLIT,          "Trace blit and copy resource calls" },
+   { "experimental", D3D12_DEBUG_EXPERIMENTAL,  "Enable experimental shader models feature" },
+   { "dxil",         D3D12_DEBUG_DXIL,          "Dump DXIL during program compile" },
+   { "disass",       D3D12_DEBUG_DISASS,        "Dump disassambly of created DXIL shader" },
+   { "res",          D3D12_DEBUG_RESOURCE,      "Debug resources" },
+   { "debuglayer",   D3D12_DEBUG_DEBUG_LAYER,   "Enable debug layer" },
+   { "gpuvalidator", D3D12_DEBUG_GPU_VALIDATOR, "Enable GPU validator" },
+   DEBUG_NAMED_VALUE_END
+};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(d3d12_debug, "D3D12_DEBUG", debug_options, 0)
+
+uint32_t
+d3d12_debug;
+
+enum {
+    HW_VENDOR_AMD                   = 0x1002,
+    HW_VENDOR_INTEL                 = 0x8086,
+    HW_VENDOR_MICROSOFT             = 0x1414,
+    HW_VENDOR_NVIDIA                = 0x10de,
+};
+
+static const char *
+d3d12_get_vendor(struct pipe_screen *pscreen)
+{
+   return "Microsoft Corporation";
+}
+
+static const char *
+d3d12_get_device_vendor(struct pipe_screen *pscreen)
+{
+   struct d3d12_screen* screen = d3d12_screen(pscreen);
+
+   switch (screen->adapter_desc.VendorId) {
+   case HW_VENDOR_MICROSOFT:
+      return "Microsoft";
+   case HW_VENDOR_AMD:
+      return "AMD";
+   case HW_VENDOR_NVIDIA:
+      return "NVIDIA";
+   case HW_VENDOR_INTEL:
+      return "Intel";
+   default:
+      return "Unknown";
+   }
+}
+
+static const char *
+d3d12_get_name(struct pipe_screen *pscreen)
+{
+   struct d3d12_screen* screen = d3d12_screen(pscreen);
+
+   if (screen->adapter_desc.Description[0] == '\0')
+      return "D3D12 (Unknown)";
+
+   static char buf[1000];
+   snprintf(buf, sizeof(buf), "D3D12 (%S)", screen->adapter_desc.Description);
+   return buf;
+}
+
+static int
+d3d12_get_video_mem(struct pipe_screen *pscreen)
+{
+   struct d3d12_screen* screen = d3d12_screen(pscreen);
+
+   // Note: memory sizes in bytes, but stored in size_t, so may be capped at 4GB.
+   // In that case, adding before conversion to MB can easily overflow.
+   return (screen->adapter_desc.DedicatedVideoMemory >> 20) +
+          (screen->adapter_desc.DedicatedSystemMemory >> 20) +
+          (screen->adapter_desc.SharedSystemMemory >> 20);
+}
+
+static int
+d3d12_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+
+   switch (param) {
+   case PIPE_CAP_NPOT_TEXTURES:
+      return 1;
+
+   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+      /* D3D12 only supports dual-source blending for a single
+       * render-target. From the D3D11 functional spec (which also defines
+       * this for D3D12):
+       *
+       * "When Dual Source Color Blending is enabled, the Pixel Shader must
+       *  have only a single RenderTarget bound, at slot 0, and must output
+       *  both o0 and o1. Writing to other outputs (o2, o3 etc.) produces
+       *  undefined results for the corresponding RenderTargets, if bound
+       *  illegally."
+       *
+       * Source: https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#17.6%20Dual%20Source%20Color%20Blending
+       */
+      return 1;
+
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 1;
+
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0)
+         return 8;
+      else if (screen->max_feature_level == D3D_FEATURE_LEVEL_9_3)
+         return 4;
+      return 1;
+
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+      return 1;
+
+   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
+      if (screen->max_feature_level >= D3D_FEATURE_LEVEL_11_0)
+         return 16384;
+      else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0)
+         return 8192;
+      else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_9_3)
+         return 4096;
+      return 2048;
+
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0)
+         return 11;
+      return 9;
+
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      if (screen->max_feature_level >= D3D_FEATURE_LEVEL_11_0)
+         return 14;
+      else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0)
+         return 13;
+      else if (screen->max_feature_level == D3D_FEATURE_LEVEL_9_3)
+         return 12;
+      return 9;
+
+   case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
+   case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
+   case PIPE_CAP_VERTEX_SHADER_SATURATE:
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
+      return 1;
+
+   /* We need to do some lowering that requires a link to the sampler */
+   case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+      return 1;
+
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      if (screen->max_feature_level >= D3D_FEATURE_LEVEL_11_0)
+         return 1 << 14;
+      else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0)
+         return 1 << 13;
+      return 0;
+
+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+      return 1;
+
+   case PIPE_CAP_TGSI_TEXCOORD:
+      return 0;
+
+   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+      return 1;
+
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+      return 1;
+
+   case PIPE_CAP_GLSL_FEATURE_LEVEL:
+      return 330;
+   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+      return 140;
+
+#if 0 /* TODO: Enable me */
+   case PIPE_CAP_COMPUTE:
+      return 0;
+#endif
+
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+      return 1;
+
+#if 0 /* TODO: Enable me */
+   case PIPE_CAP_CUBE_MAP_ARRAY:
+      return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_1;
+#endif
+
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+      return 1;
+
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return 0; /* unsure */
+
+   case PIPE_CAP_ENDIANNESS:
+      return PIPE_ENDIAN_NATIVE; /* unsure */
+
+   case PIPE_CAP_MAX_VIEWPORTS:
+      return 1; /* probably wrong */
+
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+      return 1;
+
+#if 0 /* TODO: Enable me. Enables ARB_texture_gather */
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+      return 4;
+#endif
+
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+      return 1;
+
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+      return 1;
+
+   case PIPE_CAP_ACCELERATED:
+      return 1;
+
+   case PIPE_CAP_VIDEO_MEMORY:
+      return d3d12_get_video_mem(pscreen);
+
+   case PIPE_CAP_UMA:
+      return screen->architecture.UMA;
+
+   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+      return 2048; /* FIXME: no clue how to query this */
+
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+      return 1;
+
+   case PIPE_CAP_SHAREABLE_SHADERS:
+      return 1;
+
+#if 0 /* TODO: Enable me. Enables GL_ARB_shader_storage_buffer_object */
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+      return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0;
+#endif
+
+   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      return 256;
+
+   case PIPE_CAP_PCI_GROUP:
+   case PIPE_CAP_PCI_BUS:
+   case PIPE_CAP_PCI_DEVICE:
+   case PIPE_CAP_PCI_FUNCTION:
+      return 0; /* TODO: figure these out */
+
+   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+      return 0; /* not sure */
+
+   case PIPE_CAP_FLATSHADE:
+   case PIPE_CAP_ALPHA_TEST:
+   case PIPE_CAP_TWO_SIDED_COLOR:
+   case PIPE_CAP_CLIP_PLANES:
+      return 0;
+
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+      return screen->opts.PSSpecifiedStencilRefSupported;
+
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_TGSI_TEX_TXF_LZ:
+   case PIPE_CAP_OCCLUSION_QUERY:
+   case PIPE_CAP_POINT_SPRITE:
+   case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
+   case PIPE_CAP_PSIZ_CLAMPED:
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_QUERY_TIMESTAMP:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+      return 1;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return 4;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return 16 * 4;
+
+   /* Geometry shader output. */
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+      return 256;
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return 256 * 4;
+
+   case PIPE_CAP_MAX_VARYINGS:
+      return 32;
+
+   default:
+      return u_pipe_screen_get_param_defaults(pscreen, param);
+   }
+}
+
+static float
+d3d12_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+
+   switch (param) {
+   case PIPE_CAPF_MAX_LINE_WIDTH:
+   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+      return 1.0f; /* no clue */
+
+   case PIPE_CAPF_MAX_POINT_WIDTH:
+   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+      return D3D12_MAX_POINT_SIZE;
+
+   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+      return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0 ? 16.0f : 2.0f;
+
+   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+      return 15.99f;
+
+   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
+      return 0.0f; /* not implemented */
+
+   default:
+      unreachable("unknown pipe_capf");
+   }
+
+   return 0.0;
+}
+
+static int
+d3d12_get_shader_param(struct pipe_screen *pscreen,
+                       enum pipe_shader_type shader,
+                       enum pipe_shader_cap param)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      if (shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_FRAGMENT ||
+          shader == PIPE_SHADER_GEOMETRY)
+         return INT_MAX;
+      return 0;
+
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_1 ? 32 : 16;
+
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT) {
+         /* same as max MRTs (not sure if this is correct) */
+         if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0)
+            return 8;
+         else if (screen->max_feature_level == D3D_FEATURE_LEVEL_9_3)
+            return 4;
+         return 1;
+      }
+      return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_1 ? 32 : 16;
+
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+      if (screen->opts.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1)
+         return 16;
+      return PIPE_MAX_SAMPLERS;
+
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return 65536;
+
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return 13; /* 15 - 2 for lowered uniforms and state vars*/
+
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return INT_MAX;
+
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_SUBROUTINES:
+      return 0; /* not implemented */
+
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 1;
+
+   case PIPE_SHADER_CAP_INT64_ATOMICS:
+   case PIPE_SHADER_CAP_FP16:
+      return 0; /* not implemented */
+
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_NIR;
+
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+      return 0; /* not implemented */
+
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      if (screen->opts.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1)
+         return 128;
+      return PIPE_MAX_SHADER_SAMPLER_VIEWS;
+
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      return 0; /* not implemented */
+
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      return 0; /* no idea */
+
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32; /* arbitrary */
+
+#if 0
+   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+      return 8; /* no clue */
+#endif
+
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+      return 1 << PIPE_SHADER_IR_NIR;
+
+   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+      return 0; /* TODO: enable me */
+
+   case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+   case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+      return 0; /* unsure */
+
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+      return 0; /* not implemented */
+   }
+
+   /* should only get here on unhandled cases */
+   return 0;
+}
+
+static bool
+d3d12_is_format_supported(struct pipe_screen *pscreen,
+                          enum pipe_format format,
+                          enum pipe_texture_target target,
+                          unsigned sample_count,
+                          unsigned storage_sample_count,
+                          unsigned bind)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+
+   if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+      return false;
+
+   if (target == PIPE_BUFFER) {
+      /* Replace emulated vertex element formats for the tests */
+      format = d3d12_emulated_vtx_format(format);
+   } else {
+      /* Allow 3-comp 32 bit formats only for BOs (needed for ARB_tbo_rgb32) */
+      if ((format == PIPE_FORMAT_R32G32B32_FLOAT ||
+           format == PIPE_FORMAT_R32G32B32_SINT ||
+           format == PIPE_FORMAT_R32G32B32_UINT))
+         return false;
+   }
+
+   /* Don't advertise alpha/luminance_alpha formats because they can't be used
+    * for render targets (except A8_UNORM) and can't be emulated by R/RG formats.
+    * Let the state tracker choose an RGBA format instead. */
+   if (format != PIPE_FORMAT_A8_UNORM &&
+       (util_format_is_alpha(format) ||
+        util_format_is_luminance_alpha(format)))
+      return false;
+
+   DXGI_FORMAT dxgi_format = d3d12_get_format(format);
+   if (dxgi_format == DXGI_FORMAT_UNKNOWN)
+      return false;
+
+   enum D3D12_FORMAT_SUPPORT1 dim_support = D3D12_FORMAT_SUPPORT1_NONE;
+   switch (target) {
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      dim_support = D3D12_FORMAT_SUPPORT1_TEXTURE1D;
+      break;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_2D_ARRAY:
+      dim_support = D3D12_FORMAT_SUPPORT1_TEXTURE2D;
+      break;
+   case PIPE_TEXTURE_3D:
+      dim_support = D3D12_FORMAT_SUPPORT1_TEXTURE3D;
+      break;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      dim_support = D3D12_FORMAT_SUPPORT1_TEXTURECUBE;
+      break;
+   case PIPE_BUFFER:
+      dim_support = D3D12_FORMAT_SUPPORT1_BUFFER;
+      break;
+   default:
+      unreachable("Unknown target");
+   }
+
+   D3D12_FEATURE_DATA_FORMAT_SUPPORT fmt_info;
+   fmt_info.Format = d3d12_get_resource_rt_format(format);
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT,
+                                               &fmt_info, sizeof(fmt_info))))
+      return false;
+
+   if (!(fmt_info.Support1 & dim_support))
+      return false;
+
+   if (target == PIPE_BUFFER) {
+      if (bind & PIPE_BIND_VERTEX_BUFFER &&
+          !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER))
+         return false;
+
+      if (bind & PIPE_BIND_INDEX_BUFFER &&
+          !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER))
+         return false;
+
+      if (sample_count > 0)
+         return false;
+   } else {
+      /* all other targets are texture-targets */
+      if (bind & PIPE_BIND_RENDER_TARGET &&
+          !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET))
+         return false;
+
+      if (bind & PIPE_BIND_BLENDABLE &&
+         !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE))
+         return false;
+
+      D3D12_FEATURE_DATA_FORMAT_SUPPORT fmt_info_sv;
+      if (util_format_is_depth_or_stencil(format)) {
+         fmt_info_sv.Format = d3d12_get_resource_srv_format(format, target);
+         if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT,
+                                                     &fmt_info_sv, sizeof(fmt_info_sv))))
+            return false;
+      } else
+         fmt_info_sv = fmt_info;
+
+      if (bind & PIPE_BIND_DISPLAY_TARGET &&
+         (!(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DISPLAY) ||
+            // Disable formats that don't support flip model
+            dxgi_format == DXGI_FORMAT_B8G8R8X8_UNORM ||
+            dxgi_format == DXGI_FORMAT_B5G5R5A1_UNORM ||
+            dxgi_format == DXGI_FORMAT_B5G6R5_UNORM ||
+            dxgi_format == DXGI_FORMAT_B4G4R4A4_UNORM))
+         return false;
+
+      if (bind & PIPE_BIND_DEPTH_STENCIL &&
+          !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL))
+            return false;
+
+      if (sample_count > 0) {
+         if (!(fmt_info_sv.Support1 & D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD))
+            return false;
+
+         if (!util_is_power_of_two_nonzero(sample_count))
+            return false;
+
+         D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = {};
+         ms_info.Format = dxgi_format;
+         ms_info.SampleCount = sample_count;
+         if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
+                                                     &ms_info,
+                                                     sizeof(ms_info))) ||
+             !ms_info.NumQualityLevels)
+            return false;
+      }
+   }
+   return true;
+}
+
+static void
+d3d12_destroy_screen(struct pipe_screen *pscreen)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+   slab_destroy_parent(&screen->transfer_pool);
+   screen->slab_bufmgr->destroy(screen->slab_bufmgr);
+   screen->cache_bufmgr->destroy(screen->cache_bufmgr);
+   screen->bufmgr->destroy(screen->bufmgr);
+   FREE(screen);
+}
+
+static void
+d3d12_flush_frontbuffer(struct pipe_screen * pscreen,
+                        struct pipe_resource *pres,
+                        unsigned level, unsigned layer,
+                        void *winsys_drawable_handle,
+                        struct pipe_box *sub_box)
+{
+   struct d3d12_screen *screen = d3d12_screen(pscreen);
+   struct sw_winsys *winsys = screen->winsys;
+   struct d3d12_resource *res = d3d12_resource(pres);
+   ID3D12Resource *d3d12_res = d3d12_resource_resource(res);
+
+   if (!winsys)
+     return;
+
+   assert(res->dt);
+   void *map = winsys->displaytarget_map(winsys, res->dt, 0);
+
+   if (map) {
+      d3d12_res->ReadFromSubresource(map, res->dt_stride, 0, 0, NULL);
+      winsys->displaytarget_unmap(winsys, res->dt);
+   }
+
+   ID3D12SharingContract *sharing_contract;
+   if (SUCCEEDED(screen->cmdqueue->QueryInterface(__uuidof(sharing_contract),
+                                                  (void **)&sharing_contract)))
+      sharing_contract->Present(d3d12_res, 0, WindowFromDC((HDC)winsys_drawable_handle));
+
+   winsys->displaytarget_display(winsys, res->dt, winsys_drawable_handle, sub_box);
+}
+
+static ID3D12Debug *
+get_debug_interface()
+{
+   typedef HRESULT(WINAPI *PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid, void **ppFactory);
+   PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface;
+
+   HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
+   if (!hD3D12Mod) {
+      debug_printf("D3D12: failed to load D3D12.DLL\n");
+      return NULL;
+   }
+
+   D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
+   if (!D3D12GetDebugInterface) {
+      debug_printf("D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
+      return NULL;
+   }
+
+   ID3D12Debug *debug;
+   if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)&debug))) {
+      debug_printf("D3D12: D3D12GetDebugInterface failed\n");
+      return NULL;
+   }
+
+   return debug;
+}
+
+static void
+enable_d3d12_debug_layer()
+{
+   ID3D12Debug *debug = get_debug_interface();
+   if (debug)
+      debug->EnableDebugLayer();
+}
+
+static void
+enable_gpu_validation()
+{
+   ID3D12Debug *debug = get_debug_interface();
+   ID3D12Debug3 *debug3;
+   if (debug &&
+       SUCCEEDED(debug->QueryInterface(__uuidof(debug), (void **)&debug3)))
+      debug3->SetEnableGPUBasedValidation(true);
+}
+
+static IDXGIFactory4 *
+get_dxgi_factory()
+{
+   static const GUID IID_IDXGIFactory4 = {
+      0x1bc6ea02, 0xef36, 0x464f,
+      { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
+   };
+
+   typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory);
+   PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
+
+   HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
+   if (!hDXGIMod) {
+      debug_printf("D3D12: failed to load DXGI.DLL\n");
+      return NULL;
+   }
+
+   CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
+   if (!CreateDXGIFactory) {
+      debug_printf("D3D12: failed to load CreateDXGIFactory from DXGI.DLL\n");
+      return NULL;
+   }
+
+   IDXGIFactory4 *factory = NULL;
+   HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
+   if (FAILED(hr)) {
+      debug_printf("D3D12: CreateDXGIFactory failed: %08x\n", hr);
+      return NULL;
+   }
+
+   return factory;
+}
+
+static IDXGIAdapter1 *
+choose_adapter(IDXGIFactory4 *factory, LUID *adapter)
+{
+   IDXGIAdapter1 *ret;
+   if (adapter) {
+      if (SUCCEEDED(factory->EnumAdapterByLuid(*adapter,
+                                               __uuidof(IDXGIAdapter1),
+                                               (void**)&ret)))
+         return ret;
+      debug_printf("D3D12: requested adapter missing, falling back to auto-detection...\n");
+   }
+
+   bool want_warp = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false);
+   if (want_warp) {
+      if (SUCCEEDED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
+                                             (void**)&ret)))
+         return ret;
+      debug_printf("D3D12: failed to enum warp adapter\n");
+      return NULL;
+   }
+
+   // The first adapter is the default
+   if (SUCCEEDED(factory->EnumAdapters1(0, &ret)))
+      return ret;
+
+   return NULL;
+}
+
+static ID3D12Device *
+create_device(IDXGIAdapter1 *adapter)
+{
+   typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**);
+   typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID*, void*, UINT*);
+   PFN_D3D12CREATEDEVICE D3D12CreateDevice;
+   PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
+
+   HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
+   if (!hD3D12Mod) {
+      debug_printf("D3D12: failed to load D3D12.DLL\n");
+      return NULL;
+   }
+
+   if (d3d12_debug & D3D12_DEBUG_EXPERIMENTAL) {
+      D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
+      D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL);
+   }
+
+   D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
+   if (!D3D12CreateDevice) {
+      debug_printf("D3D12: failed to load D3D12CreateDevice from D3D12.DLL\n");
+      return NULL;
+   }
+
+   ID3D12Device *dev;
+   if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0,
+                 __uuidof(ID3D12Device), (void **)&dev)))
+      return dev;
+
+   debug_printf("D3D12: D3D12CreateDevice failed\n");
+   return NULL;
+}
+
+static bool
+can_attribute_at_vertex(struct d3d12_screen *screen)
+{
+   switch (screen->adapter_desc.VendorId)  {
+   case HW_VENDOR_MICROSOFT:
+      return true;
+   default:
+      return screen->opts3.BarycentricsSupported;
+   }
+}
+
+struct pipe_screen *
+d3d12_create_screen(struct sw_winsys *winsys, LUID *adapter_luid)
+{
+   struct d3d12_screen *screen = CALLOC_STRUCT(d3d12_screen);
+   if (!screen)
+      return NULL;
+
+   d3d12_debug = debug_get_option_d3d12_debug();
+
+   screen->winsys = winsys;
+
+   screen->base.get_name = d3d12_get_name;
+   screen->base.get_vendor = d3d12_get_vendor;
+   screen->base.get_device_vendor = d3d12_get_device_vendor;
+   screen->base.get_param = d3d12_get_param;
+   screen->base.get_paramf = d3d12_get_paramf;
+   screen->base.get_shader_param = d3d12_get_shader_param;
+   screen->base.is_format_supported = d3d12_is_format_supported;
+   screen->base.get_compiler_options = d3d12_get_compiler_options;
+   screen->base.context_create = d3d12_context_create;
+   screen->base.flush_frontbuffer = d3d12_flush_frontbuffer;
+   screen->base.destroy = d3d12_destroy_screen;
+
+#ifndef DEBUG
+   if (d3d12_debug & D3D12_DEBUG_DEBUG_LAYER)
+#endif
+      enable_d3d12_debug_layer();
+
+   if (d3d12_debug & D3D12_DEBUG_GPU_VALIDATOR)
+      enable_gpu_validation();
+
+   screen->factory = get_dxgi_factory();
+   if (!screen->factory) {
+      debug_printf("D3D12: failed to create DXGI factory\n");
+      goto failed;
+   }
+
+   screen->adapter = choose_adapter(screen->factory, adapter_luid);
+   if (!screen->adapter) {
+      debug_printf("D3D12: no suitable adapter\n");
+      return NULL;
+   }
+
+   if (FAILED(screen->adapter->GetDesc1(&screen->adapter_desc))) {
+      debug_printf("D3D12: failed to retrieve adapter description\n");
+      return NULL;
+   }
+
+   screen->dev = create_device(screen->adapter);
+   if (!screen->dev) {
+      debug_printf("D3D12: failed to create device\n");
+      goto failed;
+   }
+
+   ID3D12InfoQueue *info_queue;
+   if (SUCCEEDED(screen->dev->QueryInterface(__uuidof(info_queue),
+                                             (void **)&info_queue))) {
+      D3D12_MESSAGE_SEVERITY severities[] = {
+         D3D12_MESSAGE_SEVERITY_INFO,
+         D3D12_MESSAGE_SEVERITY_WARNING,
+      };
+
+      D3D12_MESSAGE_ID msg_ids[] = {
+         D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
+      };
+
+      D3D12_INFO_QUEUE_FILTER NewFilter = {};
+      NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities);
+      NewFilter.DenyList.pSeverityList = severities;
+      NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids);
+      NewFilter.DenyList.pIDList = msg_ids;
+
+      info_queue->PushStorageFilter(&NewFilter);
+   }
+
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
+                                               &screen->opts,
+                                               sizeof(screen->opts)))) {
+      debug_printf("D3D12: failed to get device options\n");
+      goto failed;
+   }
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2,
+                                               &screen->opts2,
+                                               sizeof(screen->opts2)))) {
+      debug_printf("D3D12: failed to get device options\n");
+      goto failed;
+   }
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3,
+                                               &screen->opts3,
+                                               sizeof(screen->opts3)))) {
+      debug_printf("D3D12: failed to get device options\n");
+      goto failed;
+   }
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4,
+                                               &screen->opts4,
+                                               sizeof(screen->opts4)))) {
+      debug_printf("D3D12: failed to get device options\n");
+      goto failed;
+   }
+
+   screen->architecture.NodeIndex = 0;
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE,
+                                               &screen->architecture,
+                                               sizeof(screen->architecture)))) {
+      debug_printf("D3D12: failed to get device architecture\n");
+      goto failed;
+   }
+
+   D3D12_FEATURE_DATA_FEATURE_LEVELS feature_levels;
+   static const D3D_FEATURE_LEVEL levels[] = {
+      D3D_FEATURE_LEVEL_11_0,
+      D3D_FEATURE_LEVEL_11_1,
+      D3D_FEATURE_LEVEL_12_0,
+      D3D_FEATURE_LEVEL_12_1,
+   };
+   feature_levels.NumFeatureLevels = ARRAY_SIZE(levels);
+   feature_levels.pFeatureLevelsRequested = levels;
+   if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS,
+                                               &feature_levels,
+                                               sizeof(feature_levels)))) {
+      debug_printf("D3D12: failed to get device feature levels\n");
+      goto failed;
+   }
+   screen->max_feature_level = feature_levels.MaxSupportedFeatureLevel;
+
+   D3D12_COMMAND_QUEUE_DESC queue_desc;
+   queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
+   queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
+   queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
+   queue_desc.NodeMask = 0;
+   if (FAILED(screen->dev->CreateCommandQueue(&queue_desc,
+                                              __uuidof(screen->cmdqueue),
+                                              (void **)&screen->cmdqueue)))
+      goto failed;
+
+   UINT64 timestamp_freq;
+   if (FAILED(screen->cmdqueue->GetTimestampFrequency(&timestamp_freq)))
+       timestamp_freq = 10000000;
+   screen->timestamp_multiplier = 1000000000.0 / timestamp_freq;
+
+   d3d12_screen_fence_init(&screen->base);
+   d3d12_screen_resource_init(&screen->base);
+   slab_create_parent(&screen->transfer_pool, sizeof(struct d3d12_transfer), 16);
+
+   struct pb_desc desc;
+   desc.alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
+   desc.usage = (pb_usage_flags)PB_USAGE_ALL;
+
+   screen->bufmgr = d3d12_bufmgr_create(screen);
+   screen->cache_bufmgr = pb_cache_manager_create(screen->bufmgr, 0xfffff, 2, 0, 64 * 1024 * 1024);
+   screen->slab_bufmgr = pb_slab_range_manager_create(screen->cache_bufmgr, 16, 512,
+                                                      D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+                                                      &desc);
+
+   screen->have_load_at_vertex = can_attribute_at_vertex(screen);
+   return &screen->base;
+
+failed:
+   FREE(screen);
+   return NULL;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_screen.h b/src/gallium/drivers/d3d12/d3d12_screen.h
new file mode 100644 (file)
index 0000000..8370939
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_SCREEN_H
+#define D3D12_SCREEN_H
+
+#include "pipe/p_screen.h"
+
+#include "util/slab.h"
+
+#define D3D12_IGNORE_SDK_LAYERS
+#include <d3d12.h>
+#include <dxgi1_4.h>
+
+struct pb_manager;
+
+struct d3d12_screen {
+   struct pipe_screen base;
+   struct sw_winsys *winsys;
+
+   IDXGIFactory4 *factory;
+   IDXGIAdapter1 *adapter;
+   ID3D12Device *dev;
+   ID3D12CommandQueue *cmdqueue;
+
+   struct slab_parent_pool transfer_pool;
+   struct pb_manager *bufmgr;
+   struct pb_manager *cache_bufmgr;
+   struct pb_manager *slab_bufmgr;
+
+   /* capabilities */
+   D3D_FEATURE_LEVEL max_feature_level;
+   D3D12_FEATURE_DATA_ARCHITECTURE architecture;
+   D3D12_FEATURE_DATA_D3D12_OPTIONS opts;
+   D3D12_FEATURE_DATA_D3D12_OPTIONS2 opts2;
+   D3D12_FEATURE_DATA_D3D12_OPTIONS3 opts3;
+   D3D12_FEATURE_DATA_D3D12_OPTIONS4 opts4;
+
+   /* description */
+   DXGI_ADAPTER_DESC1 adapter_desc;
+   double timestamp_multiplier;
+   bool have_load_at_vertex;
+};
+
+static inline struct d3d12_screen *
+d3d12_screen(struct pipe_screen *pipe)
+{
+   return (struct d3d12_screen *)pipe;
+}
+
+#endif
diff --git a/src/gallium/drivers/d3d12/d3d12_surface.cpp b/src/gallium/drivers/d3d12/d3d12_surface.cpp
new file mode 100644 (file)
index 0000000..9ddbb7e
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "d3d12_context.h"
+#include "d3d12_format.h"
+#include "d3d12_resource.h"
+#include "d3d12_screen.h"
+#include "d3d12_surface.h"
+
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+static D3D12_DSV_DIMENSION
+view_dsv_dimension(enum pipe_texture_target target, unsigned samples)
+{
+   switch (target) {
+   case PIPE_TEXTURE_1D: return D3D12_DSV_DIMENSION_TEXTURE1D;
+   case PIPE_TEXTURE_1D_ARRAY: return D3D12_DSV_DIMENSION_TEXTURE1DARRAY;
+
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      return samples > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMS :
+                           D3D12_DSV_DIMENSION_TEXTURE2D;
+
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE:
+      return samples > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY :
+                           D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
+
+   default:
+      unreachable("unexpected target");
+   }
+}
+
+static D3D12_RTV_DIMENSION
+view_rtv_dimension(enum pipe_texture_target target, unsigned samples)
+{
+   switch (target) {
+   case PIPE_BUFFER: return D3D12_RTV_DIMENSION_BUFFER;
+   case PIPE_TEXTURE_1D: return D3D12_RTV_DIMENSION_TEXTURE1D;
+   case PIPE_TEXTURE_1D_ARRAY: return D3D12_RTV_DIMENSION_TEXTURE1DARRAY;
+
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      return samples > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMS :
+                           D3D12_RTV_DIMENSION_TEXTURE2D;
+
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE:
+      return samples > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY :
+                           D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
+
+   case PIPE_TEXTURE_3D: return D3D12_RTV_DIMENSION_TEXTURE3D;
+
+   default:
+      unreachable("unexpected target");
+   }
+}
+
+static void
+initialize_dsv(struct pipe_context *pctx,
+               struct pipe_resource *pres,
+               const struct pipe_surface *tpl,
+               struct d3d12_descriptor_handle *handle,
+               DXGI_FORMAT dxgi_format)
+{
+   struct d3d12_resource *res = d3d12_resource(pres);
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+
+   D3D12_DEPTH_STENCIL_VIEW_DESC desc;
+   desc.Format = dxgi_format;
+   desc.Flags = D3D12_DSV_FLAG_NONE;
+
+   desc.ViewDimension = view_dsv_dimension(pres->target, pres->nr_samples);
+   switch (desc.ViewDimension) {
+   case D3D12_DSV_DIMENSION_TEXTURE1D:
+      if (tpl->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 1D DSV from layer %d\n",
+                      tpl->u.tex.first_layer);
+
+      desc.Texture1D.MipSlice = tpl->u.tex.level;
+      break;
+
+   case D3D12_DSV_DIMENSION_TEXTURE1DARRAY:
+      desc.Texture1DArray.MipSlice = tpl->u.tex.level;
+      desc.Texture1DArray.FirstArraySlice = tpl->u.tex.first_layer;
+      desc.Texture1DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      break;
+
+   case D3D12_DSV_DIMENSION_TEXTURE2DMS:
+      if (tpl->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 2DMS DSV from layer %d\n",
+                      tpl->u.tex.first_layer);
+
+      break;
+
+   case D3D12_DSV_DIMENSION_TEXTURE2D:
+      if (tpl->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 2D DSV from layer %d\n",
+                      tpl->u.tex.first_layer);
+
+      desc.Texture2D.MipSlice = tpl->u.tex.level;
+      break;
+
+   case D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY:
+      desc.Texture2DMSArray.FirstArraySlice = tpl->u.tex.first_layer;
+      desc.Texture2DMSArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      break;
+
+   case D3D12_DSV_DIMENSION_TEXTURE2DARRAY:
+      desc.Texture2DArray.MipSlice = tpl->u.tex.level;
+      desc.Texture2DArray.FirstArraySlice = tpl->u.tex.first_layer;
+      desc.Texture2DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      break;
+   }
+
+   d3d12_descriptor_pool_alloc_handle(ctx->dsv_pool, handle);
+   screen->dev->CreateDepthStencilView(d3d12_resource_resource(res), &desc,
+                                       handle->cpu_handle);
+}
+
+static void
+initialize_rtv(struct pipe_context *pctx,
+               struct pipe_resource *pres,
+               const struct pipe_surface *tpl,
+               struct d3d12_descriptor_handle *handle,
+               DXGI_FORMAT dxgi_format)
+{
+   struct d3d12_resource *res = d3d12_resource(pres);
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+
+   D3D12_RENDER_TARGET_VIEW_DESC desc;
+   desc.Format = dxgi_format;
+
+   desc.ViewDimension = view_rtv_dimension(pres->target, pres->nr_samples);
+   switch (desc.ViewDimension) {
+   case D3D12_RTV_DIMENSION_BUFFER:
+      desc.Buffer.FirstElement = 0;
+      desc.Buffer.NumElements = pres->width0 / util_format_get_blocksize(tpl->format);
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE1D:
+      if (tpl->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 1D RTV from layer %d\n",
+                      tpl->u.tex.first_layer);
+
+      desc.Texture1D.MipSlice = tpl->u.tex.level;
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE1DARRAY:
+      desc.Texture1DArray.MipSlice = tpl->u.tex.level;
+      desc.Texture1DArray.FirstArraySlice = tpl->u.tex.first_layer;
+      desc.Texture1DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE2DMS:
+      if (tpl->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 2DMS RTV from layer %d\n",
+                      tpl->u.tex.first_layer);
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE2D:
+      if (tpl->u.tex.first_layer > 0)
+         debug_printf("D3D12: can't create 2D RTV from layer %d\n",
+                      tpl->u.tex.first_layer);
+
+      desc.Texture2D.MipSlice = tpl->u.tex.level;
+      desc.Texture2D.PlaneSlice = 0;
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY:
+      desc.Texture2DMSArray.FirstArraySlice = tpl->u.tex.first_layer;
+      desc.Texture2DMSArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE2DARRAY:
+      desc.Texture2DArray.MipSlice = tpl->u.tex.level;
+      desc.Texture2DArray.FirstArraySlice = tpl->u.tex.first_layer;
+      desc.Texture2DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      desc.Texture2DArray.PlaneSlice = 0;
+      break;
+
+   case D3D12_RTV_DIMENSION_TEXTURE3D:
+      desc.Texture3D.MipSlice = tpl->u.tex.level;
+      desc.Texture3D.FirstWSlice = tpl->u.tex.first_layer;
+      desc.Texture3D.WSize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1;
+      break;
+   }
+
+   d3d12_descriptor_pool_alloc_handle(ctx->rtv_pool, handle);
+   screen->dev->CreateRenderTargetView(d3d12_resource_resource(res), &desc,
+                                       handle->cpu_handle);
+}
+
+static struct pipe_surface *
+d3d12_create_surface(struct pipe_context *pctx,
+                     struct pipe_resource *pres,
+                     const struct pipe_surface *tpl)
+{
+   struct d3d12_resource *res = d3d12_resource(pres);
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
+
+   bool is_depth_or_stencil = util_format_is_depth_or_stencil(tpl->format);
+   unsigned bind = is_depth_or_stencil ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET;
+
+   /* Don't bother if we don't support the requested format as RT or DS */
+   if (!pctx->screen->is_format_supported(pctx->screen, tpl->format, PIPE_TEXTURE_2D,
+                                          tpl->nr_samples, tpl->nr_samples,bind))
+      return NULL;
+
+   struct d3d12_surface *surface = CALLOC_STRUCT(d3d12_surface);
+   if (!surface)
+      return NULL;
+
+   pipe_resource_reference(&surface->base.texture, pres);
+   pipe_reference_init(&surface->base.reference, 1);
+   surface->base.context = pctx;
+   surface->base.format = tpl->format;
+   surface->base.width = u_minify(pres->width0, tpl->u.tex.level);
+   surface->base.height = u_minify(pres->height0, tpl->u.tex.level);
+   surface->base.u.tex.level = tpl->u.tex.level;
+   surface->base.u.tex.first_layer = tpl->u.tex.first_layer;
+   surface->base.u.tex.last_layer = tpl->u.tex.last_layer;
+
+   DXGI_FORMAT dxgi_format = d3d12_get_resource_rt_format(tpl->format);
+   if (is_depth_or_stencil)
+      initialize_dsv(pctx, pres, tpl, &surface->desc_handle, dxgi_format);
+   else
+      initialize_rtv(pctx, pres, tpl, &surface->desc_handle, dxgi_format);
+
+   return &surface->base;
+}
+
+static void
+d3d12_surface_destroy(struct pipe_context *pctx,
+                      struct pipe_surface *psurf)
+{
+   struct d3d12_context *ctx = d3d12_context(pctx);
+   struct d3d12_surface *surface = (struct d3d12_surface*) psurf;
+
+   d3d12_descriptor_handle_free(&surface->desc_handle);
+   if (d3d12_descriptor_handle_is_allocated(&surface->uint_rtv_handle))
+      d3d12_descriptor_handle_free(&surface->uint_rtv_handle);
+   pipe_resource_reference(&psurf->texture, NULL);
+   pipe_resource_reference(&surface->rgba_texture, NULL);
+   FREE(surface);
+}
+
+static void
+blit_surface(struct d3d12_surface *surface, bool pre)
+{
+   struct pipe_blit_info info = {0};
+
+   info.src.resource = pre ? surface->base.texture : surface->rgba_texture;
+   info.dst.resource = pre ? surface->rgba_texture : surface->base.texture;
+   info.src.format = pre ? surface->base.texture->format : PIPE_FORMAT_R8G8B8A8_UNORM;
+   info.dst.format = pre ? PIPE_FORMAT_R8G8B8A8_UNORM : surface->base.texture->format;
+   info.src.level = info.dst.level = 0;
+   info.src.box.x = info.dst.box.x = 0;
+   info.src.box.y = info.dst.box.y = 0;
+   info.src.box.z = info.dst.box.z = 0;
+   info.src.box.width = info.dst.box.width = surface->base.width;
+   info.src.box.height = info.dst.box.height = surface->base.height;
+   info.src.box.depth = info.dst.box.depth = 0;
+   info.mask = PIPE_MASK_RGBA;
+
+   d3d12_blit(surface->base.context, &info);
+}
+
+enum d3d12_surface_conversion_mode
+d3d12_surface_update_pre_draw(struct d3d12_surface *surface,
+                              DXGI_FORMAT format)
+{
+   struct d3d12_screen *screen = d3d12_screen(surface->base.context->screen);
+   struct d3d12_resource *res = d3d12_resource(surface->base.texture);
+   DXGI_FORMAT dxgi_format = d3d12_get_resource_rt_format(surface->base.format);
+   enum d3d12_surface_conversion_mode mode;
+
+   if (dxgi_format == format)
+      return D3D12_SURFACE_CONVERSION_NONE;
+
+   if (dxgi_format == DXGI_FORMAT_B8G8R8A8_UNORM ||
+       dxgi_format == DXGI_FORMAT_B8G8R8X8_UNORM)
+      mode = D3D12_SURFACE_CONVERSION_BGRA_UINT;
+   else
+      mode = D3D12_SURFACE_CONVERSION_RGBA_UINT;
+
+   if (mode == D3D12_SURFACE_CONVERSION_BGRA_UINT) {
+      if (!surface->rgba_texture) {
+         struct pipe_resource templ = {{0}};
+         struct pipe_resource *src = surface->base.texture;
+
+         templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+         templ.width0 = src->width0;
+         templ.height0 = src->height0;
+         templ.depth0 = src->depth0;
+         templ.array_size = src->array_size;
+         templ.nr_samples = src->nr_samples;
+         templ.nr_storage_samples = src->nr_storage_samples;
+         templ.usage = PIPE_USAGE_DEFAULT | PIPE_USAGE_STAGING;
+         templ.bind = src->bind;
+         templ.target = src->target;
+
+         surface->rgba_texture = screen->base.resource_create(&screen->base, &templ);
+      }
+
+      blit_surface(surface, true);
+      res = d3d12_resource(surface->rgba_texture);
+   }
+
+   if (!d3d12_descriptor_handle_is_allocated(&surface->uint_rtv_handle)) {
+      initialize_rtv(surface->base.context, &res->base, &surface->base,
+                     &surface->uint_rtv_handle, DXGI_FORMAT_R8G8B8A8_UINT);
+   }
+
+   return mode;
+}
+
+void
+d3d12_surface_update_post_draw(struct d3d12_surface *surface,
+                               enum d3d12_surface_conversion_mode mode)
+{
+   if (mode == D3D12_SURFACE_CONVERSION_BGRA_UINT)
+      blit_surface(surface, false);
+}
+
+D3D12_CPU_DESCRIPTOR_HANDLE
+d3d12_surface_get_handle(struct d3d12_surface *surface,
+                         enum d3d12_surface_conversion_mode mode)
+{
+   if (mode != D3D12_SURFACE_CONVERSION_NONE)
+      return surface->uint_rtv_handle.cpu_handle;
+   return surface->desc_handle.cpu_handle;
+}
+
+void
+d3d12_context_surface_init(struct pipe_context *context)
+{
+   context->create_surface = d3d12_create_surface;
+   context->surface_destroy = d3d12_surface_destroy;
+}
diff --git a/src/gallium/drivers/d3d12/d3d12_surface.h b/src/gallium/drivers/d3d12/d3d12_surface.h
new file mode 100644 (file)
index 0000000..c21933a
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef D3D12_SURFACE_H
+#define D3D12_SURFACE_H
+
+#include "pipe/p_state.h"
+
+#include <d3d12.h>
+
+struct d3d12_descriptor_handle;
+struct pipe_context;
+
+struct d3d12_surface {
+   struct pipe_surface base;
+   struct d3d12_descriptor_handle uint_rtv_handle;
+   struct pipe_resource *rgba_texture;
+
+   struct d3d12_descriptor_handle desc_handle;
+};
+
+enum d3d12_surface_conversion_mode {
+   D3D12_SURFACE_CONVERSION_NONE,
+   D3D12_SURFACE_CONVERSION_RGBA_UINT,
+   D3D12_SURFACE_CONVERSION_BGRA_UINT,
+};
+
+static inline struct d3d12_surface *
+d3d12_surface(struct pipe_surface *psurf)
+{
+   return (struct d3d12_surface *)psurf;
+}
+
+enum d3d12_surface_conversion_mode
+d3d12_surface_update_pre_draw(struct d3d12_surface *surface,
+                              DXGI_FORMAT format);
+
+void
+d3d12_surface_update_post_draw(struct d3d12_surface *surface,
+                               enum d3d12_surface_conversion_mode mode);
+
+D3D12_CPU_DESCRIPTOR_HANDLE
+d3d12_surface_get_handle(struct d3d12_surface *surface,
+                         enum d3d12_surface_conversion_mode mode);
+
+void
+d3d12_context_surface_init(struct pipe_context *context);
+
+#endif
diff --git a/src/gallium/drivers/d3d12/meson.build b/src/gallium/drivers/d3d12/meson.build
new file mode 100644 (file)
index 0000000..dc2ae22
--- /dev/null
@@ -0,0 +1,57 @@
+# Copyright © Microsoft Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+files_libd3d12 = files(
+  'd3d12_batch.cpp',
+  'd3d12_blit.cpp',
+  'd3d12_bufmgr.cpp',
+  'd3d12_compiler.cpp',
+  'd3d12_context.cpp',
+  'd3d12_descriptor_pool.cpp',
+  'd3d12_draw.cpp',
+  'd3d12_fence.cpp',
+  'd3d12_format.c',
+  'd3d12_gs_variant.cpp',
+  'd3d12_lower_int_cubemap_to_array.c',
+  'd3d12_lower_point_sprite.c',
+  'd3d12_nir_lower_texcmp.c',
+  'd3d12_nir_lower_vs_vertex_conversion.c',
+  'd3d12_nir_passes.c',
+  'd3d12_pipeline_state.cpp',
+  'd3d12_query.cpp',
+  'd3d12_resource.cpp',
+  'd3d12_root_signature.cpp',
+  'd3d12_screen.cpp',
+  'd3d12_surface.cpp',
+)
+
+libd3d12 = static_library(
+  'd3d12',
+  files_libd3d12,
+  gnu_symbol_visibility : 'hidden',
+  include_directories : [inc_include, inc_src, inc_mesa, inc_gallium, inc_gallium_aux],
+  dependencies: [idep_nir_headers, idep_libdxil_compiler, idep_libd3d12_resource_state],
+)
+
+driver_d3d12 = declare_dependency(
+  compile_args : '-DGALLIUM_D3D12',
+  link_with : [libd3d12],
+)
index 7d3b6c3..ae0e7fc 100644 (file)
@@ -156,13 +156,16 @@ if with_gallium_lima
 else
   driver_lima = declare_dependency()
 endif
-
 if with_gallium_zink
   subdir('drivers/zink')
 else
   driver_zink = declare_dependency()
 endif
-
+if with_gallium_d3d12
+  subdir('drivers/d3d12')
+else
+  driver_d3d12 = declare_dependency()
+endif
 if with_gallium_opencl
   # TODO: this isn't really clover specific, but ATM clover is the only
   # consumer
index 782ffea..5ef0c9f 100644 (file)
@@ -31,7 +31,7 @@ libgraw_gdi = shared_library(
     libgraw_util, libgallium, libwsgdi,
   ],
   dependencies : [
-    dep_ws2_32, idep_mesautil, driver_swrast,
+    dep_ws2_32, idep_mesautil, driver_swrast, driver_d3d12
   ],
   name_prefix : host_machine.system() == 'windows' ? '' : 'lib',  # otherwise mingw will create libgraw.dll
 )
index 5f4fbac..00b9de0 100644 (file)
@@ -56,6 +56,9 @@
 #ifdef GALLIUM_SWR
 #include "swr/swr_public.h"
 #endif
+#ifdef GALLIUM_D3D12
+#include "d3d12/d3d12_public.h"
+#endif
 
 #ifdef GALLIUM_LLVMPIPE
 static boolean use_llvmpipe = FALSE;
@@ -63,6 +66,9 @@ static boolean use_llvmpipe = FALSE;
 #ifdef GALLIUM_SWR
 static boolean use_swr = FALSE;
 #endif
+#ifdef GALLIUM_D3D12
+static boolean use_d3d12 = FALSE;
+#endif
 
 static struct pipe_screen *
 gdi_screen_create(void)
@@ -102,6 +108,13 @@ gdi_screen_create(void)
          use_swr = TRUE;
    }
 #endif
+#ifdef GALLIUM_D3D12
+   if (strcmp(driver, "d3d12") == 0) {
+      screen = d3d12_create_screen( winsys, NULL );
+      if (screen)
+         use_d3d12 = TRUE;
+   }
+#endif
    (void) driver;
 
 #ifdef GALLIUM_SOFTPIPE
@@ -154,6 +167,13 @@ gdi_present(struct pipe_screen *screen,
    }
 #endif
 
+#ifdef GALLIUM_D3D12
+   if (use_d3d12) {
+      screen->flush_frontbuffer(screen, res, 0, 0, hDC, NULL);
+      return;
+   }
+#endif
+
 #ifdef GALLIUM_SOFTPIPE
    winsys = softpipe_screen(screen)->winsys,
    dt = softpipe_resource(res)->dt,
index d753944..50a308b 100644 (file)
@@ -39,6 +39,7 @@ libopengl32 = shared_library(
   ],
   dependencies : [
     dep_ws2_32, idep_nir, idep_mesautil, driver_swrast, driver_swr,
+    driver_d3d12
   ],
   name_prefix : '',  # otherwise mingw will create libopengl32.dll
   install : true,
index 895d627..b5f4933 100644 (file)
@@ -91,6 +91,9 @@ endif
 if with_any_intel
   subdir('intel')
 endif
+if with_gallium_d3d12
+  subdir('microsoft')
+endif
 subdir('mesa')
 subdir('loader')
 if with_platform_haiku