Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / gallium / state_trackers / d3d1x / gd3d11 / d3d11_context.h
1 /**************************************************************************
2  *
3  * Copyright 2010 Luca Barbieri
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial
15  * portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26
27 /* used to unbind things, we need 128 due to resources */
28 static const void* zero_data[128];
29
30 #define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31 #define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32 #define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34 #if API >= 11
35 template<typename PtrTraits>
36 struct GalliumD3D11DeviceContext :
37         public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38 {
39 #else
40 template<bool threadsafe>
41 struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42 {
43         typedef simple_ptr_traits PtrTraits;
44         typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45 #endif
46
47         refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48         refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49         refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50         refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51         refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52         refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53         refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54         refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56         refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57         refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58         refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59         refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60         refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61         refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63 #if API >= 11
64         refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65         refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66 #endif
67
68         D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69         D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70         unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71         D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72         DXGI_FORMAT index_format;
73         unsigned index_offset;
74         BOOL render_predicate_value;
75         float blend_color[4];
76         unsigned sample_mask;
77         unsigned stencil_ref;
78         bool depth_clamp;
79
80         void* default_input_layout;
81         void* default_rasterizer;
82         void* default_depth_stencil;
83         void* default_blend;
84         void* default_sampler;
85         void* ld_sampler;
86         void * default_shaders[D3D11_STAGES];
87
88         // derived state
89         int primitive_mode;
90         struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91         struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92         struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93         struct
94         {
95                 void* ld; // accessed with a -1 index from v
96                 void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97         } sampler_csos[D3D11_STAGES];
98         struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99         unsigned num_shader_resource_views[D3D11_STAGES];
100         unsigned num_samplers[D3D11_STAGES];
101         unsigned num_vertex_buffers;
102         unsigned num_render_target_views;
103         unsigned num_viewports;
104         unsigned num_scissor_rects;
105         unsigned num_so_targets;
106
107         struct pipe_context* pipe;
108         unsigned update_flags;
109
110         bool owns_pipe;
111         unsigned context_flags;
112
113         GalliumD3D11Caps caps;
114
115         cso_context* cso_ctx;
116         gen_mipmap_state* gen_mipmap;
117
118 #if API >= 11
119 #define SYNCHRONIZED do {} while(0)
120
121         GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122         : GalliumD3D11DeviceChild<ID3D11DeviceContext>(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123         {
124                 caps = device->screen_caps;
125                 init_context();
126         }
127
128         ~GalliumD3D11DeviceContext()
129         {
130                 destroy_context();
131         }
132 #else
133 #define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135         GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136         : GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137         {
138                 caps = this->screen_caps;
139                 init_context();
140         }
141
142         ~GalliumD3D10Device()
143         {
144                 destroy_context();
145         }
146 #endif
147
148         void init_context()
149         {
150                 if(!pipe->begin_query)
151                         caps.queries = false;
152                 if(!pipe->render_condition)
153                         caps.render_condition = false;
154                 if(!pipe->bind_gs_state)
155                 {
156                         caps.gs = false;
157                         caps.stages = 2;
158                 }
159                 if(!pipe->set_stream_output_buffers)
160                         caps.so = false;
161                 if(!pipe->set_geometry_sampler_views)
162                         caps.stages_with_sampling &=~ (1 << PIPE_SHADER_GEOMETRY);
163                 if(!pipe->set_fragment_sampler_views)
164                         caps.stages_with_sampling &=~ (1 << PIPE_SHADER_FRAGMENT);
165                 if(!pipe->set_vertex_sampler_views)
166                         caps.stages_with_sampling &=~ (1 << PIPE_SHADER_VERTEX);
167
168                 update_flags = 0;
169
170                 // pipeline state
171                 memset(viewports, 0, sizeof(viewports));
172                 memset(scissor_rects, 0, sizeof(scissor_rects));
173                 memset(so_offsets, 0, sizeof(so_offsets));
174                 primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
175                 index_format = DXGI_FORMAT_UNKNOWN;
176                 index_offset = 0;
177                 render_predicate_value = 0;
178                 memset(blend_color, 0, sizeof(blend_color));
179                 sample_mask = ~0;
180                 stencil_ref = 0;
181                 depth_clamp = 0;
182
183                 // derived state
184                 primitive_mode = 0;
185                 memset(vertex_buffers, 0, sizeof(vertex_buffers));
186                 memset(so_buffers, 0, sizeof(so_buffers));
187                 memset(sampler_views, 0, sizeof(sampler_views));
188                 memset(sampler_csos, 0, sizeof(sampler_csos));
189                 memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
190                 memset(num_samplers, 0, sizeof(num_samplers));
191                 num_vertex_buffers = 0;
192                 num_render_target_views = 0;
193                 num_viewports = 0;
194                 num_scissor_rects = 0;
195                 num_so_targets = 0;
196
197                 default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
198
199                 struct pipe_rasterizer_state rasterizerd;
200                 memset(&rasterizerd, 0, sizeof(rasterizerd));
201                 rasterizerd.gl_rasterization_rules = 1;
202                 rasterizerd.cull_face = PIPE_FACE_BACK;
203                 default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
204
205                 struct pipe_depth_stencil_alpha_state depth_stencild;
206                 memset(&depth_stencild, 0, sizeof(depth_stencild));
207                 depth_stencild.depth.enabled = TRUE;
208                 depth_stencild.depth.writemask = 1;
209                 depth_stencild.depth.func = PIPE_FUNC_LESS;
210                 default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
211
212                 struct pipe_blend_state blendd;
213                 memset(&blendd, 0, sizeof(blendd));
214                 blendd.rt[0].colormask = 0xf;
215                 default_blend = pipe->create_blend_state(pipe, &blendd);
216
217                 struct pipe_sampler_state samplerd;
218                 memset(&samplerd, 0, sizeof(samplerd));
219                 samplerd.normalized_coords = 1;
220                 samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
221                 samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
222                 samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
223                 samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
224                 samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
225                 samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
226                 samplerd.border_color[0] = 1.0f;
227                 samplerd.border_color[1] = 1.0f;
228                 samplerd.border_color[2] = 1.0f;
229                 samplerd.border_color[3] = 1.0f;
230                 samplerd.min_lod = -FLT_MAX;
231                 samplerd.max_lod = FLT_MAX;
232                 samplerd.max_anisotropy = 1;
233                 default_sampler = pipe->create_sampler_state(pipe, &samplerd);
234
235                 memset(&samplerd, 0, sizeof(samplerd));
236                 samplerd.normalized_coords = 0;
237                 samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
238                 samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
239                 samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
240                 samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
241                 samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
242                 samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
243                 samplerd.min_lod = -FLT_MAX;
244                 samplerd.max_lod = FLT_MAX;
245                 samplerd.max_anisotropy = 1;
246                 ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
247
248                 for(unsigned s = 0; s < D3D11_STAGES; ++s)
249                 {
250                         sampler_csos[s].ld = ld_sampler;
251                         for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
252                                 sampler_csos[s].v[i] = default_sampler;
253                 }
254
255                 // TODO: should this really be empty shaders, or should they be all-passthrough?
256                 memset(default_shaders, 0, sizeof(default_shaders));
257                 struct ureg_program *ureg;
258                 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
259                 ureg_END(ureg);
260                 default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
261
262                 ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
263                 ureg_END(ureg);
264                 default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
265
266                 cso_ctx = cso_create_context(pipe);
267                 gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
268
269                 RestoreGalliumState();
270         }
271
272         void destroy_context()
273         {
274                 util_destroy_gen_mipmap(gen_mipmap);
275                 cso_destroy_context(cso_ctx);
276
277                 pipe->bind_vertex_elements_state(pipe, 0);
278                 pipe->delete_vertex_elements_state(pipe, default_input_layout);
279
280                 pipe->bind_rasterizer_state(pipe, 0);
281                 pipe->delete_rasterizer_state(pipe, default_rasterizer);
282
283                 pipe->bind_depth_stencil_alpha_state(pipe, 0);
284                 pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
285
286                 pipe->bind_blend_state(pipe, 0);
287                 pipe->delete_blend_state(pipe, default_blend);
288
289                 pipe->bind_fragment_sampler_states(pipe, 0, 0);
290                 pipe->bind_vertex_sampler_states(pipe, 0, 0);
291                 if(pipe->bind_geometry_sampler_states)
292                         pipe->bind_geometry_sampler_states(pipe, 0, 0);
293                 pipe->delete_sampler_state(pipe, default_sampler);
294                 pipe->delete_sampler_state(pipe, ld_sampler);
295
296                 pipe->bind_fs_state(pipe, 0);
297                 pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
298
299                 pipe->bind_vs_state(pipe, 0);
300                 pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
301
302                 if(owns_pipe)
303                         pipe->destroy(pipe);
304         }
305
306         virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
307         {
308                 return context_flags;
309         }
310 #if API >= 11
311 #define SET_SHADER_EXTRA_ARGS , \
312         ID3D11ClassInstance *const *ppClassInstances, \
313         unsigned count
314 #define GET_SHADER_EXTRA_ARGS , \
315                 ID3D11ClassInstance **ppClassInstances, \
316                 unsigned *out_count
317 #else
318 #define SET_SHADER_EXTRA_ARGS
319 #define GET_SHADER_EXTRA_ARGS
320 #endif
321
322 /* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
323  * Instead, you have to pass a pointer to nulls to unbind things.
324  * We do the same.
325  * TODO: is D3D10 the same?
326  */
327         template<unsigned s>
328         void xs_set_shader(GalliumD3D11Shader<>* shader)
329         {
330                 if(shader != shaders[s].p)
331                 {
332                         shaders[s] = shader;
333                         void* shader_cso = shader ? shader->object : default_shaders[s];
334                         switch(s)
335                         {
336                         case PIPE_SHADER_VERTEX:
337                                 pipe->bind_vs_state(pipe, shader_cso);
338                                 break;
339                         case PIPE_SHADER_FRAGMENT:
340                                 pipe->bind_fs_state(pipe, shader_cso);
341                                 break;
342                         case PIPE_SHADER_GEOMETRY:
343                                 pipe->bind_gs_state(pipe, shader_cso);
344                                 break;
345                         }
346                         update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
347                 }
348         }
349
350         template<unsigned s>
351         void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
352         {
353                 for(unsigned i = 0; i < count; ++i)
354                 {
355                         if(constbufs[i] != constant_buffers[s][i].p)
356                         {
357                                 constant_buffers[s][i] = constbufs[i];
358                                 if(s < caps.stages && start + i < caps.constant_buffers[s])
359                                         pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
360                         }
361                 }
362         }
363
364         template<unsigned s>
365         void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
366         {
367                 int last_different = -1;
368                 for(unsigned i = 0; i < count; ++i)
369                 {
370                         if(shader_resource_views[s][start + i].p != srvs[i])
371                         {
372                                 shader_resource_views[s][start + i] = srvs[i];
373                                 sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
374                                 last_different = i;
375                         }
376                 }
377                 if(last_different >= 0)
378                 {
379                         num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
380                         update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
381                 }
382         }
383
384         template<unsigned s>
385         void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
386         {
387                 int last_different = -1;
388                 for(unsigned i = 0; i < count; ++i)
389                 {
390                         if(samplers[s][start + i].p != samps[i])
391                         {
392                                 samplers[s][start + i] = samps[i];
393                                 sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
394                         }
395                         if(last_different >= 0)
396                         {
397                                 num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
398                                 update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
399                         }
400                 }
401         }
402
403 #define IMPLEMENT_SHADER_STAGE(XS, Stage) \
404         virtual void STDMETHODCALLTYPE XS##SetShader( \
405                 ID3D11##Stage##Shader *pShader \
406                 SET_SHADER_EXTRA_ARGS) \
407         { \
408                 SYNCHRONIZED; \
409                 xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
410         } \
411         virtual void STDMETHODCALLTYPE XS##GetShader(\
412                 ID3D11##Stage##Shader **ppShader \
413                 GET_SHADER_EXTRA_ARGS) \
414         { \
415                 SYNCHRONIZED; \
416                 *ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
417         } \
418         virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
419                 unsigned start, \
420                 unsigned count, \
421                 ID3D11Buffer *const* constant_buffers) \
422         { \
423                 SYNCHRONIZED; \
424                 xs_set_constant_buffers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11Buffer *const *)constant_buffers); \
425         } \
426         virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
427                 unsigned start, \
428                 unsigned count, \
429                 ID3D11Buffer **out_constant_buffers) \
430         { \
431                 SYNCHRONIZED; \
432                 for(unsigned i = 0; i < count; ++i) \
433                         out_constant_buffers[i] = constant_buffers[D3D11_STAGE_##XS][start + i].ref(); \
434         } \
435         virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
436                 unsigned start, \
437                 unsigned count, \
438                 ID3D11ShaderResourceView *const *new_shader_resource_views) \
439         { \
440                 SYNCHRONIZED; \
441                 xs_set_shader_resources<D3D11_STAGE_##XS>(start, count, (GalliumD3D11ShaderResourceView *const *)new_shader_resource_views); \
442         } \
443         virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
444                 unsigned start, \
445                 unsigned count, \
446                 ID3D11ShaderResourceView **out_shader_resource_views) \
447         { \
448                 SYNCHRONIZED; \
449                 for(unsigned i = 0; i < count; ++i) \
450                         out_shader_resource_views[i] = shader_resource_views[D3D11_STAGE_##XS][start + i].ref(); \
451         } \
452         virtual void STDMETHODCALLTYPE XS##SetSamplers(\
453                 unsigned start, \
454                 unsigned count, \
455                 ID3D11SamplerState *const *new_samplers) \
456         { \
457                 SYNCHRONIZED; \
458                 xs_set_samplers<D3D11_STAGE_##XS>(start, count, (GalliumD3D11SamplerState *const *)new_samplers); \
459         } \
460         virtual void STDMETHODCALLTYPE XS##GetSamplers( \
461                 unsigned start, \
462                 unsigned count, \
463                 ID3D11SamplerState **out_samplers) \
464         { \
465                 SYNCHRONIZED; \
466                 for(unsigned i = 0; i < count; ++i) \
467                         out_samplers[i] = samplers[D3D11_STAGE_##XS][start + i].ref(); \
468         }
469
470 #define DO_VS(x) x
471 #define DO_GS(x) do {if(caps.gs) {x;}} while(0)
472 #define DO_PS(x) x
473 #define DO_HS(x)
474 #define DO_DS(x)
475 #define DO_CS(x)
476         IMPLEMENT_SHADER_STAGE(VS, Vertex)
477         IMPLEMENT_SHADER_STAGE(GS, Geometry)
478         IMPLEMENT_SHADER_STAGE(PS, Pixel)
479
480 #if API >= 11
481         IMPLEMENT_SHADER_STAGE(HS, Hull)
482         IMPLEMENT_SHADER_STAGE(DS, Domain)
483         IMPLEMENT_SHADER_STAGE(CS, Compute)
484
485         virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
486                 unsigned start,
487                 unsigned count,
488                 ID3D11UnorderedAccessView *const *new_unordered_access_views,
489                 const unsigned *new_uav_initial_counts)
490         {
491                 SYNCHRONIZED;
492                 for(unsigned i = 0; i < count; ++i)
493                         cs_unordered_access_views[start + i] = new_unordered_access_views[i];
494         }
495
496         virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
497                 unsigned start,
498                 unsigned count,
499                 ID3D11UnorderedAccessView **out_unordered_access_views)
500         {
501                 SYNCHRONIZED;
502                 for(unsigned i = 0; i < count; ++i)
503                         out_unordered_access_views[i] = cs_unordered_access_views[start + i].ref();
504         }
505 #endif
506
507         template<unsigned s>
508         void update_stage()
509         {
510                 if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
511                 {
512                         while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
513                                 --num_shader_resource_views[s];
514                         if((1 << s) & caps.stages_with_sampling)
515                         {
516                                 struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
517                                 unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
518                                 for(unsigned i = 0; i < num_views_to_bind; ++i)
519                                 {
520                                         views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
521                                 }
522                                 switch(s)
523                                 {
524                                 case PIPE_SHADER_VERTEX:
525                                         pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
526                                         break;
527                                 case PIPE_SHADER_FRAGMENT:
528                                         pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
529                                         break;
530                                 case PIPE_SHADER_GEOMETRY:
531                                         pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
532                                         break;
533                                 }
534                         }
535                 }
536
537                 if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
538                 {
539                         while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
540                                 --num_samplers[s];
541                         if((1 << s) & caps.stages_with_sampling)
542                         {
543                                 void* samplers_to_bind[PIPE_MAX_SAMPLERS];
544                                 unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
545                                 for(unsigned i = 0; i < num_samplers_to_bind; ++i)
546                                 {
547                                         // index can be -1 to access sampler_csos[s].ld
548                                         samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
549                                 }
550                                 switch(s)
551                                 {
552                                 case PIPE_SHADER_VERTEX:
553                                         pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
554                                         break;
555                                 case PIPE_SHADER_FRAGMENT:
556                                         pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
557                                         break;
558                                 case PIPE_SHADER_GEOMETRY:
559                                         pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
560                                         break;
561                                 }
562                         }
563                 }
564         }
565
566         void update_state()
567         {
568                 update_stage<D3D11_STAGE_PS>();
569                 update_stage<D3D11_STAGE_VS>();
570                 update_stage<D3D11_STAGE_GS>();
571 #if API >= 11
572                 update_stage<D3D11_STAGE_HS>();
573                 update_stage<D3D11_STAGE_DS>();
574                 update_stage<D3D11_STAGE_CS>();
575 #endif
576
577                 if(update_flags & UPDATE_VERTEX_BUFFERS)
578                 {
579                         while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
580                                 --num_vertex_buffers;
581                         pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
582                 }
583
584                 update_flags = 0;
585         }
586
587         virtual void STDMETHODCALLTYPE IASetInputLayout(
588                 ID3D11InputLayout *new_input_layout)
589         {
590                 SYNCHRONIZED;
591                 if(new_input_layout != input_layout.p)
592                 {
593                         input_layout = new_input_layout;
594                         pipe->bind_vertex_elements_state(pipe, new_input_layout ? ((GalliumD3D11InputLayout*)new_input_layout)->object : default_input_layout);
595                 }
596         }
597
598         virtual void STDMETHODCALLTYPE IAGetInputLayout(
599                 ID3D11InputLayout **out_input_layout)
600         {
601                 SYNCHRONIZED;
602                 *out_input_layout = input_layout.ref();
603         }
604
605         virtual void STDMETHODCALLTYPE IASetVertexBuffers(
606                 unsigned start,
607                 unsigned count,
608                 ID3D11Buffer *const *new_vertex_buffers,
609                 const unsigned *new_strides,
610                 const unsigned *new_offsets)
611         {
612                 SYNCHRONIZED;
613                 int last_different = -1;
614                 for(unsigned i = 0; i < count; ++i)
615                 {
616                         ID3D11Buffer* buffer = new_vertex_buffers[i];
617                         if(buffer != input_buffers[start + i].p
618                                 || vertex_buffers[start + i].buffer_offset != new_offsets[i]
619                                 || vertex_buffers[start + i].stride != new_offsets[i]
620                         )
621                         {
622                                 input_buffers[start + i] = buffer;
623                                 vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
624                                 vertex_buffers[start + i].buffer_offset = new_offsets[i];
625                                 vertex_buffers[start + i].stride = new_strides[i];
626                                 last_different = i;
627                         }
628                 }
629                 if(last_different >= 0)
630                 {
631                         num_vertex_buffers = std::max(num_vertex_buffers, start + count);
632                         update_flags |= UPDATE_VERTEX_BUFFERS;
633                 }
634         }
635
636         virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
637                 unsigned start,
638                 unsigned count,
639                 ID3D11Buffer **out_vertex_buffers,
640                 unsigned *out_strides,
641                 unsigned *out_offsets)
642         {
643                 SYNCHRONIZED;
644                 if(out_vertex_buffers)
645                 {
646                         for(unsigned i = 0; i < count; ++i)
647                                 out_vertex_buffers[i] = input_buffers[start + i].ref();
648                 }
649
650                 if(out_offsets)
651                 {
652                         for(unsigned i = 0; i < count; ++i)
653                                 out_offsets[i] = vertex_buffers[start + i].buffer_offset;
654                 }
655
656                 if(out_strides)
657                 {
658                         for(unsigned i = 0; i < count; ++i)
659                                 out_strides[i] = vertex_buffers[start + i].stride;
660                 }
661         }
662
663         void set_index_buffer()
664         {
665                 pipe_index_buffer ib;
666                 if(!index_buffer)
667                 {
668                         memset(&ib, 0, sizeof(ib));
669                 }
670                 else
671                 {
672                         if(index_format == DXGI_FORMAT_R32_UINT)
673                                 ib.index_size = 4;
674                         else if(index_format == DXGI_FORMAT_R16_UINT)
675                                 ib.index_size = 2;
676                         else
677                                 ib.index_size = 1;
678                         ib.offset = index_offset;
679                         ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
680                 }
681                 pipe->set_index_buffer(pipe, &ib);
682         }
683
684         virtual void STDMETHODCALLTYPE IASetIndexBuffer(
685                 ID3D11Buffer *new_index_buffer,
686                 DXGI_FORMAT new_index_format,
687                 unsigned new_index_offset)
688         {
689                 SYNCHRONIZED;
690                 if(index_buffer.p != new_index_buffer || index_format != new_index_format || index_offset != new_index_offset)
691                 {
692                         index_buffer = new_index_buffer;
693                         index_format = new_index_format;
694                         index_offset = new_index_offset;
695
696                         set_index_buffer();
697                 }
698         }
699
700         virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
701                 ID3D11Buffer **out_index_buffer,
702                 DXGI_FORMAT *out_index_format,
703                 unsigned *out_index_offset)
704         {
705                 SYNCHRONIZED;
706                 if(out_index_buffer)
707                         *out_index_buffer = index_buffer.ref();
708                 if(out_index_format)
709                         *out_index_format = index_format;
710                 if(out_index_offset)
711                         *out_index_offset = index_offset;
712         }
713
714         virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
715                 D3D11_PRIMITIVE_TOPOLOGY new_primitive_topology)
716         {
717                 SYNCHRONIZED;
718                 if(primitive_topology != new_primitive_topology)
719                 {
720                         if(new_primitive_topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
721                                 primitive_mode = d3d_to_pipe_prim[new_primitive_topology];
722                         else
723                                 primitive_mode = 0;
724                         primitive_topology = new_primitive_topology;
725                 }
726         }
727
728         virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
729                 D3D11_PRIMITIVE_TOPOLOGY *out_primitive_topology)
730         {
731                 SYNCHRONIZED;
732                 *out_primitive_topology = primitive_topology;
733         }
734
735         virtual void STDMETHODCALLTYPE DrawIndexed(
736                 unsigned index_count,
737                 unsigned start_index_location,
738                 int base_vertex_location)
739         {
740                 SYNCHRONIZED;
741                 if(update_flags)
742                         update_state();
743
744                 pipe_draw_info info;
745                 info.mode = primitive_mode;
746                 info.indexed = TRUE;
747                 info.count = index_count;
748                 info.start = start_index_location;
749                 info.index_bias = base_vertex_location;
750                 info.min_index = 0;
751                 info.max_index = ~0;
752                 info.start_instance = 0;
753                 info.instance_count = 1;
754
755                 pipe->draw_vbo(pipe, &info);
756         }
757
758         virtual void STDMETHODCALLTYPE Draw(
759                 unsigned vertex_count,
760                 unsigned start_vertex_location)
761         {
762                 SYNCHRONIZED;
763                 if(update_flags)
764                         update_state();
765
766                 pipe_draw_info info;
767                 info.mode = primitive_mode;
768                 info.indexed = FALSE;
769                 info.count = vertex_count;
770                 info.start = start_vertex_location;
771                 info.index_bias = 0;
772                 info.min_index = 0;
773                 info.max_index = ~0;
774                 info.start_instance = 0;
775                 info.instance_count = 1;
776
777                 pipe->draw_vbo(pipe, &info);
778         }
779
780         virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
781                 unsigned index_countPerInstance,
782                 unsigned instance_count,
783                 unsigned start_index_location,
784                 int base_vertex_location,
785                 unsigned start_instance_location)
786         {
787                 SYNCHRONIZED;
788                 if(update_flags)
789                         update_state();
790
791                 pipe_draw_info info;
792                 info.mode = primitive_mode;
793                 info.indexed = TRUE;
794                 info.count = index_countPerInstance;
795                 info.start = start_index_location;
796                 info.index_bias = base_vertex_location;
797                 info.min_index = 0;
798                 info.max_index = ~0;
799                 info.start_instance = start_instance_location;
800                 info.instance_count = instance_count;
801
802                 pipe->draw_vbo(pipe, &info);
803         }
804
805         virtual void STDMETHODCALLTYPE DrawInstanced(
806                 unsigned vertex_countPerInstance,
807                 unsigned instance_count,
808                 unsigned start_vertex_location,
809                 unsigned start_instance_location)
810         {
811                 SYNCHRONIZED;
812                 if(update_flags)
813                         update_state();
814
815                 pipe_draw_info info;
816                 info.mode = primitive_mode;
817                 info.indexed = FALSE;
818                 info.count = vertex_countPerInstance;
819                 info.start = start_vertex_location;
820                 info.index_bias = 0;
821                 info.min_index = 0;
822                 info.max_index = ~0;
823                 info.start_instance = start_instance_location;
824                 info.instance_count = instance_count;
825
826                 pipe->draw_vbo(pipe, &info);
827         }
828
829         virtual void STDMETHODCALLTYPE DrawAuto(void)
830         {
831                 if(!caps.so)
832                         return;
833
834                 SYNCHRONIZED;
835                 if(update_flags)
836                         update_state();
837
838                 pipe->draw_stream_output(pipe, primitive_mode);
839         }
840
841         virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
842                 ID3D11Buffer *buffer,
843                 unsigned aligned_byte_offset)
844         {
845                 SYNCHRONIZED;
846                 if(update_flags)
847                         update_state();
848
849                 struct {
850                         unsigned count;
851                         unsigned instance_count;
852                         unsigned start;
853                         unsigned index_bias;
854                 } data;
855
856                 pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
857
858                 pipe_draw_info info;
859                 info.mode = primitive_mode;
860                 info.indexed = TRUE;
861                 info.start = data.start;
862                 info.count = data.count;
863                 info.index_bias = data.index_bias;
864                 info.min_index = 0;
865                 info.max_index = ~0;
866                 info.start_instance = 0;
867                 info.instance_count = data.instance_count;
868
869                 pipe->draw_vbo(pipe, &info);
870         }
871
872         virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
873                 ID3D11Buffer *buffer,
874                 unsigned aligned_byte_offset)
875         {
876                 SYNCHRONIZED;
877                 if(update_flags)
878                         update_state();
879
880                 struct {
881                         unsigned count;
882                         unsigned instance_count;
883                         unsigned start;
884                 } data;
885
886                 pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)buffer)->resource, aligned_byte_offset, sizeof(data), &data);
887
888                 pipe_draw_info info;
889                 info.mode = primitive_mode;
890                 info.indexed = FALSE;
891                 info.start = data.start;
892                 info.count = data.count;
893                 info.index_bias = 0;
894                 info.min_index = 0;
895                 info.max_index = ~0;
896                 info.start_instance = 0;
897                 info.instance_count = data.instance_count;
898
899                 pipe->draw_vbo(pipe, &info);
900         }
901
902 #if API >= 11
903         virtual void STDMETHODCALLTYPE Dispatch(
904                 unsigned thread_group_count_x,
905                 unsigned thread_group_count_y,
906                 unsigned thread_group_count_z)
907         {
908 // uncomment this when this is implemented
909 //              SYNCHRONIZED;
910 //              if(update_flags)
911 //                      update_state();
912         }
913
914         virtual void STDMETHODCALLTYPE DispatchIndirect(
915                 ID3D11Buffer *buffer,
916                 unsigned aligned_byte_offset)
917         {
918 // uncomment this when this is implemented
919 //              SYNCHRONIZED;
920 //              if(update_flags)
921 //                      update_state();
922         }
923 #endif
924
925         void set_clip()
926         {
927                 pipe_clip_state clip;
928                 clip.nr = 0;
929                 clip.depth_clamp = depth_clamp;
930                 pipe->set_clip_state(pipe, &clip);
931         }
932
933         virtual void STDMETHODCALLTYPE RSSetState(
934                 ID3D11RasterizerState *new_rasterizer_state)
935         {
936                 SYNCHRONIZED;
937                 if(new_rasterizer_state != rasterizer_state.p)
938                 {
939                         rasterizer_state = new_rasterizer_state;
940                         pipe->bind_rasterizer_state(pipe, new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->object : default_rasterizer);
941                         bool new_depth_clamp = new_rasterizer_state ? ((GalliumD3D11RasterizerState*)new_rasterizer_state)->depth_clamp : false;
942                         if(depth_clamp != new_depth_clamp)
943                         {
944                                 depth_clamp = new_depth_clamp;
945                                 set_clip();
946                         }
947                 }
948         }
949
950         virtual void STDMETHODCALLTYPE RSGetState(
951                 ID3D11RasterizerState **out_rasterizer_state)
952         {
953                 SYNCHRONIZED;
954                 *out_rasterizer_state = rasterizer_state.ref();
955         }
956
957         void set_viewport()
958         {
959                 // TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
960                 pipe_viewport_state viewport;
961                 float half_width = viewports[0].Width * 0.5f;
962                 float half_height = viewports[0].Height * 0.5f;
963
964                 viewport.scale[0] = half_width;
965                 viewport.scale[1] = -half_height;
966                 viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
967                 viewport.scale[3] = 1.0f;
968                 viewport.translate[0] = half_width + viewports[0].TopLeftX;
969                 viewport.translate[1] = half_height + viewports[0].TopLeftY;
970                 viewport.translate[2] = viewports[0].MinDepth;
971                 viewport.translate[3] = 1.0f;
972                 pipe->set_viewport_state(pipe, &viewport);
973         }
974
975         virtual void STDMETHODCALLTYPE RSSetViewports(
976                 unsigned count,
977                 const D3D11_VIEWPORT *new_viewports)
978         {
979                 SYNCHRONIZED;
980                 if(count)
981                 {
982                         if(memcmp(&viewports[0], &new_viewports[0], sizeof(viewports[0])))
983                         {
984                                 viewports[0] = new_viewports[0];
985                                 set_viewport();
986                         }
987                         for(unsigned i = 1; i < count; ++i)
988                                 viewports[i] = new_viewports[i];
989                 }
990                 else if(num_viewports)
991                 {
992                         // TODO: what should we do here?
993                         memset(&viewports[0], 0, sizeof(viewports[0]));
994                         set_viewport();
995                 }
996                 num_viewports = count;
997         }
998
999         virtual void STDMETHODCALLTYPE RSGetViewports(
1000                 unsigned *out_count,
1001                 D3D11_VIEWPORT *out_viewports)
1002         {
1003                 SYNCHRONIZED;
1004                 if(out_viewports)
1005                 {
1006                         unsigned i;
1007                         for(i = 0; i < std::min(*out_count, num_viewports); ++i)
1008                                 out_viewports[i] = viewports[i];
1009
1010                         memset(out_viewports + i, 0, (*out_count - i) * sizeof(D3D11_VIEWPORT));
1011                 }
1012
1013                 *out_count = num_viewports;
1014         }
1015
1016         void set_scissor()
1017         {
1018                 pipe_scissor_state scissor;
1019                 scissor.minx = scissor_rects[0].left;
1020                 scissor.miny = scissor_rects[0].top;
1021                 scissor.maxx = scissor_rects[0].right;
1022                 scissor.maxy = scissor_rects[0].bottom;
1023                 pipe->set_scissor_state(pipe, &scissor);
1024         }
1025
1026         virtual void STDMETHODCALLTYPE RSSetScissorRects(
1027                 unsigned count,
1028                 const D3D11_RECT *new_rects)
1029         {
1030                 SYNCHRONIZED;
1031                 if(count)
1032                 {
1033                         if(memcmp(&scissor_rects[0], &new_rects[0], sizeof(scissor_rects[0])))
1034                         {
1035                                 scissor_rects[0] = new_rects[0];
1036                                 set_scissor();
1037                         }
1038                         for(unsigned i = 1; i < count; ++i)
1039                                 scissor_rects[i] = new_rects[i];
1040                 }
1041                 else if(num_scissor_rects)
1042                 {
1043                         // TODO: what should we do here?
1044                         memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1045                         set_scissor();
1046                 }
1047
1048                 num_scissor_rects = count;
1049         }
1050
1051         virtual void STDMETHODCALLTYPE RSGetScissorRects(
1052                 unsigned *out_count,
1053                 D3D11_RECT *out_rects)
1054         {
1055                 SYNCHRONIZED;
1056                 if(out_rects)
1057                 {
1058                         unsigned i;
1059                         for(i = 0; i < std::min(*out_count, num_scissor_rects); ++i)
1060                                 out_rects[i] = scissor_rects[i];
1061
1062                         memset(out_rects + i, 0, (*out_count - i) * sizeof(D3D11_RECT));
1063                 }
1064
1065                 *out_count = num_scissor_rects;
1066         }
1067
1068         virtual void STDMETHODCALLTYPE OMSetBlendState(
1069                 ID3D11BlendState *new_blend_state,
1070                 const float new_blend_factor[4],
1071                 unsigned new_sample_mask)
1072         {
1073                 SYNCHRONIZED;
1074                 float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1075
1076                 if(blend_state.p != new_blend_state)
1077                 {
1078                         pipe->bind_blend_state(pipe, new_blend_state ? ((GalliumD3D11BlendState*)new_blend_state)->object : default_blend);
1079                         blend_state = new_blend_state;
1080                 }
1081
1082                 // Windows D3D11 does this, even though it's apparently undocumented
1083                 if(!new_blend_factor)
1084                         new_blend_factor = white;
1085
1086                 if(memcmp(blend_color, new_blend_factor, sizeof(blend_color)))
1087                 {
1088                         pipe->set_blend_color(pipe, (struct pipe_blend_color*)new_blend_factor);
1089                         memcpy(blend_color, new_blend_factor, sizeof(blend_color));
1090                 }
1091
1092                 if(sample_mask != new_sample_mask)
1093                 {
1094                         pipe->set_sample_mask(pipe, new_sample_mask);
1095                         sample_mask = new_sample_mask;
1096                 }
1097         }
1098
1099         virtual void STDMETHODCALLTYPE OMGetBlendState(
1100                 ID3D11BlendState **out_blend_state,
1101                 float out_blend_factor[4],
1102                 unsigned *out_sample_mask)
1103         {
1104                 SYNCHRONIZED;
1105                 if(out_blend_state)
1106                         *out_blend_state = blend_state.ref();
1107                 if(out_blend_factor)
1108                         memcpy(out_blend_factor, blend_color, sizeof(blend_color));
1109                 if(out_sample_mask)
1110                         *out_sample_mask = sample_mask;
1111         }
1112
1113         void set_stencil_ref()
1114         {
1115                 struct pipe_stencil_ref sref;
1116                 sref.ref_value[0] = stencil_ref;
1117                 sref.ref_value[1] = stencil_ref;
1118                 pipe->set_stencil_ref(pipe, &sref);
1119         }
1120
1121         virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1122                 ID3D11DepthStencilState *new_depth_stencil_state,
1123                 unsigned new_stencil_ref)
1124         {
1125                 SYNCHRONIZED;
1126                 if(new_depth_stencil_state != depth_stencil_state.p)
1127                 {
1128                         pipe->bind_depth_stencil_alpha_state(pipe, new_depth_stencil_state ? ((GalliumD3D11DepthStencilState*)new_depth_stencil_state)->object : default_depth_stencil);
1129                         depth_stencil_state = new_depth_stencil_state;
1130                 }
1131
1132                 if(new_stencil_ref != stencil_ref)
1133                 {
1134                         stencil_ref = new_stencil_ref;
1135                         set_stencil_ref();
1136                 }
1137         }
1138
1139         virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1140                 ID3D11DepthStencilState **out_depth_stencil_state,
1141                 unsigned *out_stencil_ref)
1142         {
1143                 SYNCHRONIZED;
1144                 if(*out_depth_stencil_state)
1145                         *out_depth_stencil_state = depth_stencil_state.ref();
1146                 if(out_stencil_ref)
1147                         *out_stencil_ref = stencil_ref;
1148         }
1149
1150         void set_framebuffer()
1151         {
1152                 struct pipe_framebuffer_state fb;
1153                 memset(&fb, 0, sizeof(fb));
1154                 if(depth_stencil_view)
1155                 {
1156                         struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1157                         fb.zsbuf = surf;
1158                         if(surf->width > fb.width)
1159                                 fb.width = surf->width;
1160                         if(surf->height > fb.height)
1161                                 fb.height = surf->height;
1162                 }
1163                 fb.nr_cbufs = num_render_target_views;
1164                 unsigned i;
1165                 for(i = 0; i < num_render_target_views; ++i)
1166                 {
1167                         if(render_target_views[i])
1168                         {
1169                                 struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1170                                 fb.cbufs[i] = surf;
1171                                 if(surf->width > fb.width)
1172                                         fb.width = surf->width;
1173                                 if(surf->height > fb.height)
1174                                         fb.height = surf->height;
1175                         }
1176                 }
1177
1178                 pipe->set_framebuffer_state(pipe, &fb);
1179         }
1180
1181         /* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1182          * Hopefully nobody relies on this happening
1183          */
1184
1185         virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1186                 unsigned count,
1187                 ID3D11RenderTargetView *const *new_render_target_views,
1188                 ID3D11DepthStencilView  *new_depth_stencil_view)
1189         {
1190                 SYNCHRONIZED;
1191                 if(!new_render_target_views)
1192                         count = 0;
1193                 if(count == num_render_target_views)
1194                 {
1195                         for(unsigned i = 0; i < count; ++i)
1196                         {
1197                                 if(new_render_target_views[i] != render_target_views[i].p)
1198                                         goto changed;
1199                         }
1200                         return;
1201                 }
1202 changed:
1203                 depth_stencil_view = new_depth_stencil_view;
1204                 unsigned i;
1205                 for(i = 0; i < count; ++i)
1206                 {
1207                         render_target_views[i] = new_render_target_views[i];
1208 #if API >= 11
1209                         om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1210 #endif
1211                 }
1212                 for(; i < num_render_target_views; ++i)
1213                         render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1214                 num_render_target_views = count;
1215                 set_framebuffer();
1216         }
1217
1218         virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1219                 unsigned count,
1220                 ID3D11RenderTargetView **out_render_target_views,
1221                 ID3D11DepthStencilView  **out_depth_stencil_view)
1222         {
1223                 SYNCHRONIZED;
1224                 if(out_render_target_views)
1225                 {
1226                         unsigned i;
1227                         for(i = 0; i < std::min(num_render_target_views, count); ++i)
1228                                 out_render_target_views[i] = render_target_views[i].ref();
1229
1230                         for(; i < count; ++i)
1231                                 out_render_target_views[i] = 0;
1232                 }
1233
1234                 if(out_depth_stencil_view)
1235                         *out_depth_stencil_view = depth_stencil_view.ref();
1236         }
1237
1238 #if API >= 11
1239         /* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1240         virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1241                 unsigned rtv_count,
1242                 ID3D11RenderTargetView *const *new_render_target_views,
1243                 ID3D11DepthStencilView  *new_depth_stencil_view,
1244                 unsigned uav_start,
1245                 unsigned uav_count,
1246                 ID3D11UnorderedAccessView *const *new_unordered_access_views,
1247                 const unsigned *new_uav_initial_counts)
1248         {
1249                 SYNCHRONIZED;
1250                 if(rtv_count != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1251                         OMSetRenderTargets(rtv_count, new_render_target_views, new_depth_stencil_view);
1252
1253                 if(uav_count != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1254                 {
1255                         for(unsigned i = 0; i < uav_count; ++i)
1256                         {
1257                                 om_unordered_access_views[uav_start + i] = new_unordered_access_views[i];
1258                                 render_target_views[uav_start + i] = (ID3D11RenderTargetView*)0;
1259                         }
1260                 }
1261         }
1262
1263         virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1264                 unsigned rtv_count,
1265                 ID3D11RenderTargetView **out_render_target_views,
1266                 ID3D11DepthStencilView  **out_depth_stencil_view,
1267                 unsigned uav_start,
1268                 unsigned uav_count,
1269                 ID3D11UnorderedAccessView **out_unordered_access_views)
1270         {
1271                 SYNCHRONIZED;
1272                 if(out_render_target_views)
1273                         OMGetRenderTargets(rtv_count, out_render_target_views, out_depth_stencil_view);
1274
1275                 if(out_unordered_access_views)
1276                 {
1277                         for(unsigned i = 0; i < uav_count; ++i)
1278                                 out_unordered_access_views[i] = om_unordered_access_views[uav_start + i].ref();
1279                 }
1280         }
1281 #endif
1282
1283         virtual void STDMETHODCALLTYPE SOSetTargets(
1284                 unsigned count,
1285                 ID3D11Buffer *const *new_so_targets,
1286                 const unsigned *new_offsets)
1287         {
1288                 SYNCHRONIZED;
1289                 unsigned i;
1290                 if(!new_so_targets)
1291                         count = 0;
1292                 bool changed = false;
1293                 for(i = 0; i < count; ++i)
1294                 {
1295                         ID3D11Buffer* buffer = new_so_targets[i];
1296                         if(buffer != so_targets[i].p || new_offsets[i] != so_offsets[i])
1297                         {
1298                                 so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1299                                 so_targets[i] = buffer;
1300                                 so_offsets[i] = new_offsets[i];
1301                                 changed = true;
1302                         }
1303                 }
1304                 for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1305                 {
1306                         if(so_targets[i].p || so_offsets[i])
1307                         {
1308                                 changed = true;
1309                                 so_targets[i] = (ID3D11Buffer*)0;
1310                                 so_offsets[i] = 0;
1311                         }
1312                 }
1313                 num_so_targets = count;
1314
1315                 if(changed && caps.so)
1316                         pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1317         }
1318
1319         virtual void STDMETHODCALLTYPE SOGetTargets(
1320                 unsigned count,
1321                 ID3D11Buffer **out_so_targets
1322 #if API < 11
1323                 , UINT *out_offsets
1324 #endif
1325                 )
1326         {
1327                 SYNCHRONIZED;
1328                 for(unsigned i = 0; i < count; ++i)
1329                 {
1330                         out_so_targets[i] = so_targets[i].ref();
1331 #if API < 11
1332                         out_offsets[i] = so_offsets[i];
1333 #endif
1334                 }
1335         }
1336
1337         virtual void STDMETHODCALLTYPE Begin(
1338                 ID3D11Asynchronous *async)
1339         {
1340                 SYNCHRONIZED;
1341                 if(caps.queries)
1342                         pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1343         }
1344
1345         virtual void STDMETHODCALLTYPE End(
1346                 ID3D11Asynchronous *async)
1347         {
1348                 SYNCHRONIZED;
1349                 if(caps.queries)
1350                         pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)async)->query);
1351         }
1352
1353         virtual HRESULT STDMETHODCALLTYPE GetData(
1354                 ID3D11Asynchronous *iasync,
1355                 void *out_data,
1356                 unsigned data_size,
1357                 unsigned get_data_flags)
1358         {
1359                 SYNCHRONIZED;
1360                 if(!caps.queries)
1361                         return E_NOTIMPL;
1362
1363                 GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)iasync;
1364                 void* tmp_data = alloca(async->data_size);
1365                 boolean ret = pipe->get_query_result(pipe, async->query, !(get_data_flags & D3D11_ASYNC_GETDATA_DONOTFLUSH), tmp_data);
1366                 if(out_data)
1367                         memcpy(out_data, tmp_data, std::min(async->data_size, data_size));
1368                 return ret ? S_OK : S_FALSE;
1369         }
1370
1371         void set_render_condition()
1372         {
1373                 if(caps.render_condition)
1374                 {
1375                         if(!render_predicate)
1376                                 pipe->render_condition(pipe, 0, 0);
1377                         else
1378                         {
1379                                 GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1380                                 if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1381                                 {
1382                                         unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1383                                         pipe->render_condition(pipe, predicate->query, mode);
1384                                 }
1385                                 else
1386                                 {
1387                                         /* TODO: add inverted predication to Gallium*/
1388                                         pipe->render_condition(pipe, 0, 0);
1389                                 }
1390                         }
1391                 }
1392         }
1393
1394         virtual void STDMETHODCALLTYPE SetPredication(
1395                 ID3D11Predicate *new_predicate,
1396                 BOOL new_predicate_value)
1397         {
1398                 SYNCHRONIZED;
1399                 if(render_predicate.p != new_predicate || render_predicate_value != new_predicate_value)
1400                 {
1401                         render_predicate = new_predicate;
1402                         render_predicate_value = new_predicate_value;
1403                         set_render_condition();
1404                 }
1405         }
1406
1407         virtual void STDMETHODCALLTYPE GetPredication(
1408                 ID3D11Predicate **out_predicate,
1409                 BOOL *out_predicate_value)
1410         {
1411                 SYNCHRONIZED;
1412                 if(out_predicate)
1413                         *out_predicate = render_predicate.ref();
1414                 if(out_predicate_value)
1415                         *out_predicate_value = render_predicate_value;
1416         }
1417
1418         static unsigned d3d11_subresource_to_level(struct pipe_resource* resource, unsigned subresource)
1419         {
1420                 if(subresource <= resource->last_level)
1421                 {
1422                         return subresource;
1423                 }
1424                 else
1425                 {
1426                         unsigned levels = resource->last_level + 1;
1427                         return subresource % levels;
1428                 }
1429         }
1430
1431         static unsigned d3d11_subresource_to_face(struct pipe_resource* resource, unsigned subresource)
1432         {
1433                 if(subresource <= resource->last_level)
1434                 {
1435                         return 0;
1436                 }
1437                 else
1438                 {
1439                         unsigned levels = resource->last_level + 1;
1440                         return subresource / levels;
1441                 }
1442         }
1443                 
1444         
1445         /* TODO: deferred contexts will need a different implementation of this,
1446          * because we can't put the transfer info into the resource itself.
1447          * Also, there are very different restrictions, for obvious reasons.
1448          */
1449         virtual HRESULT STDMETHODCALLTYPE Map(
1450                 ID3D11Resource *iresource,
1451                 unsigned subresource,
1452                 D3D11_MAP map_type,
1453                 unsigned map_flags,
1454                 D3D11_MAPPED_SUBRESOURCE *mapped_resource)
1455         {
1456                 SYNCHRONIZED;
1457                 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1458                 if(resource->transfers.count(subresource))
1459                         return E_FAIL;
1460                 unsigned level = d3d11_subresource_to_level(resource->resource, subresource);
1461                 unsigned face = d3d11_subresource_to_face(resource->resource, subresource);
1462                 pipe_box box = d3d11_to_pipe_box(resource->resource, level, 0);
1463                 /* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1464                 unsigned usage = 0;
1465                 if(map_type == D3D11_MAP_READ)
1466                         usage = PIPE_TRANSFER_READ;
1467                 else if(map_type == D3D11_MAP_WRITE)
1468                         usage = PIPE_TRANSFER_WRITE;
1469                 else if(map_type == D3D11_MAP_READ_WRITE)
1470                         usage = PIPE_TRANSFER_READ_WRITE;
1471                 else if(map_type == D3D11_MAP_WRITE_DISCARD)
1472                         usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1473                 else if(map_type == D3D11_MAP_WRITE_NO_OVERWRITE)
1474                         usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1475                 else
1476                         return E_INVALIDARG;
1477                 if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1478                         usage |= PIPE_TRANSFER_DONTBLOCK;
1479                 struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, level, usage, &box);
1480                 if(!transfer) {
1481                         if(map_type & D3D10_MAP_FLAG_DO_NOT_WAIT)
1482                                 return DXGI_ERROR_WAS_STILL_DRAWING;
1483                         else
1484                                 return E_FAIL;
1485                 }
1486                 resource->transfers[subresource] = transfer;
1487                 mapped_resource->pData = pipe->transfer_map(pipe, transfer);
1488                 mapped_resource->RowPitch = transfer->stride;
1489                 mapped_resource->DepthPitch = transfer->layer_stride;
1490                 return S_OK;
1491         }
1492
1493         virtual void STDMETHODCALLTYPE Unmap(
1494                 ID3D11Resource *iresource,
1495                 unsigned subresource)
1496         {
1497                 SYNCHRONIZED;
1498                 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1499                 std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(subresource);
1500                 if(i != resource->transfers.end())
1501                 {
1502                         pipe->transfer_unmap(pipe, i->second);
1503                         pipe->transfer_destroy(pipe, i->second);
1504                         resource->transfers.erase(i);
1505                 }
1506         }
1507
1508         virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1509                 ID3D11Resource *dst_resource,
1510                 unsigned dst_subresource,
1511                 unsigned dst_x,
1512                 unsigned dst_y,
1513                 unsigned dst_z,
1514                 ID3D11Resource *src_resource,
1515                 unsigned src_subresource,
1516                 const D3D11_BOX *src_box)
1517         {
1518                 SYNCHRONIZED;
1519                 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1520                 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1521                 unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1522                 unsigned dst_face = d3d11_subresource_to_face(dst->resource, dst_subresource);
1523                 unsigned src_level = d3d11_subresource_to_level(src->resource, src_subresource);
1524                 unsigned src_face = d3d11_subresource_to_face(src->resource, src_subresource);
1525                 /* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1526                 pipe_box box = d3d11_to_pipe_box(src->resource, src_level, src_box);
1527                 {
1528                         pipe->resource_copy_region(pipe,
1529                                 dst->resource, dst_level, dst_x, dst_y, dst_z,
1530                                 src->resource, src_level, &box);
1531                 }
1532         }
1533
1534         virtual void STDMETHODCALLTYPE CopyResource(
1535                 ID3D11Resource *dst_resource,
1536                 ID3D11Resource *src_resource)
1537         {
1538                 SYNCHRONIZED;
1539                 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1540                 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1541                 unsigned level;
1542                 for(level = 0; level <= dst->resource->last_level; ++level)
1543                 {
1544                         unsigned layers = 1;
1545                         pipe_box box;
1546                         if (dst->resource->target == PIPE_TEXTURE_CUBE)
1547                                 layers = 6;
1548                         else if (dst->resource->target == PIPE_TEXTURE_3D)
1549                                 layers = u_minify(dst->resource->depth0, level);
1550                         /* else layers = dst->resource->array_size; */
1551                         box.x = box.y = box.z = 0;
1552                         box.width = u_minify(dst->resource->width0, level);
1553                         box.height = u_minify(dst->resource->height0, level);
1554                         box.depth = layers;
1555                         pipe->resource_copy_region(pipe,
1556                                                    dst->resource, level, 0, 0, 0,
1557                                                    src->resource, level, &box);
1558                 }
1559         }
1560
1561         virtual void STDMETHODCALLTYPE UpdateSubresource(
1562                 ID3D11Resource *dst_resource,
1563                 unsigned dst_subresource,
1564                 const D3D11_BOX *pDstBox,
1565                 const void *pSrcData,
1566                 unsigned src_row_pitch,
1567                 unsigned src_depth_pitch)
1568         {
1569                 SYNCHRONIZED;
1570                 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1571                 unsigned dst_level = d3d11_subresource_to_level(dst->resource, dst_subresource);
1572                 /* XXX the translation from subresource to level/face(zslice/array layer) isn't quite right */
1573                 pipe_box box = d3d11_to_pipe_box(dst->resource, dst_level, pDstBox);
1574                 pipe->transfer_inline_write(pipe, dst->resource, dst_level, PIPE_TRANSFER_WRITE, &box, pSrcData, src_row_pitch, src_depth_pitch);
1575         }
1576
1577 #if API >= 11
1578         virtual void STDMETHODCALLTYPE CopyStructureCount(
1579                 ID3D11Buffer *dst_buffer,
1580                 unsigned dst_aligned_byte_offset,
1581                 ID3D11UnorderedAccessView *src_view)
1582         {
1583                 SYNCHRONIZED;
1584         }
1585 #endif
1586
1587         virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1588                 ID3D11RenderTargetView *render_target_view,
1589                 const float color[4])
1590         {
1591                 SYNCHRONIZED;
1592                 GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)render_target_view);
1593                 pipe->clear_render_target(pipe, view->object, color, 0, 0, view->object->width, view->object->height);
1594         }
1595
1596         virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1597                 ID3D11DepthStencilView  *depth_stencil_view,
1598                 unsigned clear_flags,
1599                 float depth,
1600                 UINT8 stencil)
1601         {
1602                 SYNCHRONIZED;
1603                 GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)depth_stencil_view);
1604                 unsigned flags = 0;
1605                 if(clear_flags & D3D11_CLEAR_DEPTH)
1606                         flags |= PIPE_CLEAR_DEPTH;
1607                 if(clear_flags & D3D11_CLEAR_STENCIL)
1608                         flags |= PIPE_CLEAR_STENCIL;
1609                 pipe->clear_depth_stencil(pipe, view->object, flags, depth, stencil, 0, 0, view->object->width, view->object->height);
1610         }
1611
1612 #if API >= 11
1613         virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1614                 ID3D11UnorderedAccessView *unordered_access_view,
1615                 const unsigned values[4])
1616         {
1617                 SYNCHRONIZED;
1618         }
1619
1620         virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1621                         ID3D11UnorderedAccessView *unordered_access_view,
1622                         const float values[4])
1623         {
1624                 SYNCHRONIZED;
1625         }
1626 #endif
1627
1628         void restore_gallium_state_blit_only()
1629         {
1630                 pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1631                 pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1632                 pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1633                 pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1634                 pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1635                 pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1636                 if(caps.gs)
1637                         pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1638                 set_framebuffer();
1639                 set_viewport();
1640                 set_clip();
1641                 set_render_condition();
1642                 // TODO: restore stream output
1643
1644                 update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1645         }
1646
1647         virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1648         {
1649                 SYNCHRONIZED;
1650                 restore_gallium_state_blit_only();
1651         }
1652
1653         virtual void STDMETHODCALLTYPE GenerateMips(
1654                 ID3D11ShaderResourceView *shader_resource_view)
1655         {
1656                 SYNCHRONIZED;
1657
1658                 GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)shader_resource_view;
1659                 if(caps.gs)
1660                         pipe->bind_gs_state(pipe, 0);
1661                 if(caps.so)
1662                         pipe->bind_stream_output_state(pipe, 0);
1663                 if(pipe->render_condition)
1664                         pipe->render_condition(pipe, 0, 0);
1665                 util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1666                 restore_gallium_state_blit_only();
1667         }
1668
1669         virtual void STDMETHODCALLTYPE RestoreGalliumState()
1670         {
1671                 SYNCHRONIZED;
1672                 restore_gallium_state_blit_only();
1673
1674                 set_index_buffer();
1675                 set_stencil_ref();
1676                 pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1677                 pipe->set_sample_mask(pipe, sample_mask);
1678
1679                 for(unsigned s = 0; s < 3; ++s)
1680                 {
1681                         unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1682                         for(unsigned i = 0; i < num; ++i)
1683                                 pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1684                 }
1685
1686                 if(caps.so)
1687                         pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1688
1689                 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1690                 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1691
1692                 set_scissor();
1693         }
1694
1695 #if API >= 11
1696         /* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1697         virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1698                 ID3D11Resource *iresource,
1699                 float min_lod)
1700         {
1701                 SYNCHRONIZED;
1702                 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1703                 if(resource->min_lod != min_lod)
1704                 {
1705                         // TODO: actually do anything?
1706                         resource->min_lod = min_lod;
1707                 }
1708         }
1709
1710         virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1711                 ID3D11Resource *iresource)
1712         {
1713                 SYNCHRONIZED;
1714                 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)iresource;
1715                 return resource->min_lod;
1716         }
1717 #endif
1718
1719         virtual void STDMETHODCALLTYPE ResolveSubresource(
1720                 ID3D11Resource *dst_resource,
1721                 unsigned dst_subresource,
1722                 ID3D11Resource *src_resource,
1723                 unsigned src_subresource,
1724                 DXGI_FORMAT format)
1725         {
1726                 SYNCHRONIZED;
1727                 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
1728                 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
1729                 unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
1730                 unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource);
1731                 pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer);
1732         }
1733
1734 #if API >= 11
1735         virtual void STDMETHODCALLTYPE ExecuteCommandList(
1736                 ID3D11CommandList *command_list,
1737                 BOOL restore_context_state)
1738         {
1739                 SYNCHRONIZED;
1740         }
1741
1742         virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1743                 BOOL restore_deferred_context_state,
1744                 ID3D11CommandList **out_command_list)
1745         {
1746                 SYNCHRONIZED;
1747                 return E_NOTIMPL;
1748         }
1749 #endif
1750
1751         virtual void STDMETHODCALLTYPE ClearState(void)
1752         {
1753                 /* we don't take a lock here because we would deadlock otherwise
1754                  * TODO: this is probably incorrect, because ClearState should likely be atomic.
1755                  * However, I can't think of any correct usage that would be affected by this
1756                  * being non-atomic, and making this atomic is quite expensive and complicates
1757                  * the code
1758                  */
1759
1760                 // we qualify all calls so that we avoid virtual dispatch and might get them inlined
1761                 // TODO: make sure all this gets inlined, which might require more compiler flags
1762                 // TODO: optimize this
1763 #if API >= 11
1764                 GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1765                 GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1766                 GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1767                 GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1768                 GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1769                 GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1770 #else
1771                 GalliumD3D11DeviceContext::PSSetShader(0);
1772                 GalliumD3D11DeviceContext::GSSetShader(0);
1773                 GalliumD3D11DeviceContext::VSSetShader(0);
1774 #endif
1775
1776                 GalliumD3D11DeviceContext::IASetInputLayout(0);
1777                 GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1778                 GalliumD3D11DeviceContext::RSSetState(0);
1779                 GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1780                 GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1781                 GalliumD3D11DeviceContext::SetPredication(0, 0);
1782                 GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1783
1784                 GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1785                 GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1786                 GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1787 #if API >= 11
1788                 GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1789                 GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1790                 GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1791 #endif
1792
1793                 GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1794 #if API >= 11
1795                 GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1796 #else
1797                 GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1798 #endif
1799                 GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1800
1801                 GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1802                 GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1803                 GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1804 #if API >= 11
1805                 GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1806                 GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1807                 GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1808 #endif
1809
1810                 GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1811                 GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1812                 GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1813 #if API >= 11
1814                 GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1815                 GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1816                 GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1817 #endif
1818
1819                 GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1820                 GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1821         }
1822
1823         virtual void STDMETHODCALLTYPE Flush(void)
1824         {
1825                 SYNCHRONIZED;
1826                 pipe->flush(pipe, 0);
1827         }
1828
1829         /* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1830          * cleanly unbound from the pipeline.
1831          * In Direct3D 11, the pipeline holds a reference.
1832          *
1833          * Note that instead of always scanning the pipeline on destruction, we could
1834          * maintain the internal reference count on DirectX 10 and use it to check if an
1835          * object is still bound.
1836          * Presumably, on average, scanning is faster if the application is well written.
1837          */
1838 #if API < 11
1839 #define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1840         void Unbind##name(ID3D11##name* state) \
1841         { \
1842                 SYNCHRONIZED; \
1843                 if((void*)state == (void*)member.p) \
1844                 { \
1845                         member.p = 0; \
1846                         pipe->bind_##gallium##_state(pipe, default_##def); \
1847                 } \
1848         }
1849         IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1850         IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1851         IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1852         IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1853         IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1854         IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1855         IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1856
1857         void UnbindPredicate(ID3D11Predicate* predicate)
1858         {
1859                 SYNCHRONIZED;
1860                 if(predicate == render_predicate)
1861                 {
1862                         render_predicate.p = NULL;
1863                         render_predicate_value = 0;
1864                         pipe->render_condition(pipe, 0, 0);
1865                 }
1866         }
1867
1868         void UnbindSamplerState(ID3D11SamplerState* state)
1869         {
1870                 SYNCHRONIZED;
1871                 for(unsigned s = 0; s < D3D11_STAGES; ++s)
1872                 {
1873                         for(unsigned i = 0; i < num_samplers[s]; ++i)
1874                         {
1875                                 if(samplers[s][i] == state)
1876                                 {
1877                                         samplers[s][i].p = NULL;
1878                                         sampler_csos[s].v[i] = NULL;
1879                                         update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1880                                 }
1881                         }
1882                 }
1883         }
1884
1885         void UnbindBuffer(ID3D11Buffer* buffer)
1886         {
1887                 SYNCHRONIZED;
1888                 if(buffer == index_buffer)
1889                 {
1890                         index_buffer.p = 0;
1891                         index_format = DXGI_FORMAT_UNKNOWN;
1892                         index_offset = 0;
1893                         struct pipe_index_buffer ib;
1894                         memset(&ib, 0, sizeof(ib));
1895                         pipe->set_index_buffer(pipe, &ib);
1896                 }
1897
1898                 for(unsigned i = 0; i < num_vertex_buffers; ++i)
1899                 {
1900                         if(buffer == input_buffers[i])
1901                         {
1902                                 input_buffers[i].p = 0;
1903                                 memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1904                                 update_flags |= UPDATE_VERTEX_BUFFERS;
1905                         }
1906                 }
1907
1908                 for(unsigned s = 0; s < D3D11_STAGES; ++s)
1909                 {
1910                         for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1911                         {
1912                                 if(constant_buffers[s][i] == buffer)
1913                                 {
1914                                         constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1915                                         pipe->set_constant_buffer(pipe, s, i, NULL);
1916                                 }
1917                         }
1918                 }
1919         }
1920
1921         void UnbindDepthStencilView(ID3D11DepthStencilView * view)
1922         {
1923                 SYNCHRONIZED;
1924                 if(view == depth_stencil_view)
1925                 {
1926                         depth_stencil_view.p = NULL;
1927                         set_framebuffer();
1928                 }
1929         }
1930
1931         void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1932         {
1933                 SYNCHRONIZED;
1934                 bool any_bound = false;
1935                 for(unsigned i = 0; i < num_render_target_views; ++i)
1936                 {
1937                         if(render_target_views[i] == view)
1938                         {
1939                                 render_target_views[i].p = NULL;
1940                                 any_bound = true;
1941                         }
1942                 }
1943                 if(any_bound)
1944                         set_framebuffer();
1945         }
1946
1947         void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1948         {
1949                 SYNCHRONIZED;
1950                 for(unsigned s = 0; s < D3D11_STAGES; ++s)
1951                 {
1952                         for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1953                         {
1954                                 if(shader_resource_views[s][i] == view)
1955                                 {
1956                                         shader_resource_views[s][i].p = NULL;
1957                                         sampler_views[s][i] = NULL;
1958                                         update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1959                                 }
1960                         }
1961                 }
1962         }
1963 #endif
1964
1965 #undef SYNCHRONIZED
1966 };
1967
1968 #if API >= 11
1969 /* This approach serves two purposes.
1970  * First, we don't want to do an atomic operation to manipulate the reference
1971  * count every time something is bound/unbound to the pipeline, since they are
1972  * expensive.
1973  * Fortunately, the immediate context can only be used by a single thread, so
1974  * we don't have to use them, as long as a separate reference count is used
1975  * (see dual_refcnt_t).
1976  *
1977  * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1978  * garbage cycle.
1979  * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
1980  * one for each external reference count, while internal nonatomic_add_ref doesn't
1981  * add any.
1982  *
1983  * Note that ideally we would to eliminate the non-atomic op too, but this is more
1984  * complicated, since we would either need to use garbage collection and give up
1985  * deterministic destruction (especially bad for large textures), or scan the whole
1986  * pipeline state every time the reference count of object drops to 0, which risks
1987  * pathological slowdowns.
1988  *
1989  * Since this microoptimization should matter relatively little, let's avoid it for now.
1990  *
1991  * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
1992  * Eliminating the atomic ops for deferred contexts seems substantially harder.
1993  * This might be a problem if they are used in a one-shot multithreaded rendering
1994  * fashion, where SMP cacheline bouncing on the reference count may be visible.
1995  *
1996  * The idea would be to attach a structure of reference counts indexed by deferred
1997  * context id to each object. Ideally, this should be organized like ext2 block pointers.
1998  *
1999  * Every deferred context would get a reference count in its own cacheline.
2000  * The external count is protected by a lock bit, and there is also a "lock bit" in each
2001  * internal count.
2002  *
2003  * When the external count has to be dropped to 0, the lock bit is taken and all internal
2004  * reference counts are scanned, taking a count of them. A flag would also be set on them.
2005  * Deferred context manipulation would notice the flag, and update the count.
2006  * Once the count goes to zero, the object is freed.
2007  *
2008  * The problem of this is that if the external reference count ping-pongs between
2009  * zero and non-zero, the scans will take a lot of time.
2010  *
2011  * The idea to solve this is to compute the scans in a binary-tree like fashion, where
2012  * each binary tree node would have a "determined bit", which would be invalidated
2013  * by manipulations.
2014  *
2015  * However, all this complexity might actually be a loss in most cases, so let's just
2016  * stick to a single atomic refcnt for now.
2017  *
2018  * Also, we don't even support deferred contexts yet, so this can wait.
2019  */
2020 struct nonatomic_device_child_ptr_traits
2021 {
2022         static void add_ref(void* p)
2023         {
2024                 if(p)
2025                         ((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
2026         }
2027
2028         static void release(void* p)
2029         {
2030                 if(p)
2031                         ((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
2032         }
2033 };
2034
2035 struct GalliumD3D11ImmediateDeviceContext
2036         : public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
2037 {
2038         GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
2039         : GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>(device, pipe, context_flags)
2040         {
2041                 // not necessary, but tests that the API at least basically works
2042                 ClearState();
2043         }
2044
2045         /* we do this since otherwise we would have a garbage cycle between this and the device */
2046         virtual ULONG STDMETHODCALLTYPE AddRef()
2047         {
2048                 return this->device->AddRef();
2049         }
2050
2051         virtual ULONG STDMETHODCALLTYPE Release()
2052         {
2053                 return this->device->Release();
2054         }
2055
2056         virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2057         {
2058                 return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2059         }
2060 };
2061
2062 static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2063 {
2064         return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2065 }
2066
2067 static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2068 {
2069         ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2070 }
2071
2072 static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2073 {
2074         ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2075 }
2076
2077 static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2078 {
2079         delete (GalliumD3D11ImmediateDeviceContext*)context;
2080 }
2081 #endif