From 65303c49d1efc996f5cc9dafa2768d0225b76f1c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 20 Oct 2011 18:32:23 +0200 Subject: [PATCH] d3d1x: switch to TGSI SAMPLE opcodes We don't want to clutter the code or handicap new hardware for the sake of ancient GPUs on which d3d1x won't ever be used, much less be fully compliant, anyway. --- .../state_trackers/d3d1x/d3d1xshader/include/sm4.h | 11 -- .../d3d1x/d3d1xshader/src/sm4_analyze.cpp | 64 ---------- .../state_trackers/d3d1x/gd3d11/d3d11_context.h | 61 +++------ .../state_trackers/d3d1x/gd3d11/d3d11_objects.h | 3 - .../state_trackers/d3d1x/gd3d11/d3d11_screen.h | 6 - .../state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp | 138 ++++++--------------- 6 files changed, 61 insertions(+), 222 deletions(-) diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h index d3ca274..1db6bab 100644 --- a/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/include/sm4.h @@ -366,15 +366,6 @@ struct sm4_program */ std::vector cf_insn_linked; - /* NOTE: sampler 0 is the unnormalized nearest sampler for LD/LD_MS, while - * sampler 1 is user-specified sampler 0 - */ - bool resource_sampler_slots_assigned; - std::vector slot_to_resource; - std::vector slot_to_sampler; - std::map, int> resource_sampler_to_slot; - std::map resource_to_slot; - bool labels_found; std::vector label_to_insn_num; @@ -382,7 +373,6 @@ struct sm4_program { memset(&version, 0, sizeof(version)); labels_found = false; - resource_sampler_slots_assigned = false; } ~sm4_program() @@ -404,7 +394,6 @@ sm4_program* sm4_parse(void* tokens, int size); bool sm4_link_cf_insns(sm4_program& program); bool sm4_find_labels(sm4_program& program); -bool sm4_allocate_resource_sampler_pairs(sm4_program& program); #endif /* SM4_H_ */ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp index 7903d54..ac7bfdc 100644 --- a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_analyze.cpp @@ -120,67 +120,3 @@ bool sm4_find_labels(sm4_program& program) program.labels_found = true; return true; } - -bool sm4_allocate_resource_sampler_pairs(sm4_program& program) -{ - if(program.resource_sampler_slots_assigned) - return true; - - std::set > pairs; - std::set resinfos; - - for(unsigned insn_num = 0; insn_num < program.insns.size(); ++insn_num) - { - int resource = -1; - int sampler = -2; - for(unsigned i = 0; i < program.insns[insn_num]->num_ops; ++i) - { - sm4_op* op = program.insns[insn_num]->ops[i].get(); - if(op) - { - if(op->file == SM4_FILE_RESOURCE) - { - if(!op->has_simple_index() || resource >= 0) - return false; - resource = (int)op->indices[0].disp; - } - if(op->file == SM4_FILE_SAMPLER) - { - if(!op->has_simple_index() || sampler >= 0) - return false; - sampler = (int)op->indices[0].disp; - } - } - } - - unsigned opcode = program.insns[insn_num]->opcode; - if(opcode == SM4_OPCODE_LD || opcode == SM4_OPCODE_LD_MS) - sampler = -1; - if(sampler >= -1 && resource >= 0) - pairs.insert(std::make_pair(resource, sampler)); - if(opcode == SM4_OPCODE_RESINFO) - resinfos.insert(resource); - } - - for(std::set >::iterator i = pairs.begin(); i != pairs.end(); ++i) - { - program.resource_sampler_to_slot[*i] = program.slot_to_resource.size(); - if(!program.resource_to_slot.count(i->first)) - { - program.resource_to_slot[i->first] = program.slot_to_resource.size(); - resinfos.erase(i->first); - } - program.slot_to_resource.push_back(i->first); - program.slot_to_sampler.push_back(i->second); - } - - for(std::set::iterator i = resinfos.begin(); i != resinfos.end(); ++i) - { - program.resource_sampler_to_slot[std::make_pair(*i, -1)] = program.slot_to_resource.size(); - program.resource_to_slot[*i] = program.slot_to_resource.size(); - program.slot_to_resource.push_back(*i); - program.slot_to_sampler.push_back(-1); - } - program.resource_sampler_slots_assigned = true; - return true; -} diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index afa4773..2cda1ad 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -83,20 +83,15 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl void* default_depth_stencil; void* default_blend; void* default_sampler; - void* ld_sampler; - void * default_shaders[D3D11_STAGES]; + void* default_shaders[D3D11_STAGES]; // derived state int primitive_mode; struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT]; struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT]; - struct - { - void* ld; // accessed with a -1 index from v - void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; - } sampler_csos[D3D11_STAGES]; - struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT]; + void* sampler_csos[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT]; + struct pipe_resource* buffers[D3D11_SO_BUFFER_SLOT_COUNT]; unsigned num_shader_resource_views[D3D11_STAGES]; unsigned num_samplers[D3D11_STAGES]; unsigned num_vertex_buffers; @@ -246,14 +241,10 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl samplerd.min_lod = -FLT_MAX; samplerd.max_lod = FLT_MAX; samplerd.max_anisotropy = 1; - ld_sampler = pipe->create_sampler_state(pipe, &samplerd); for(unsigned s = 0; s < D3D11_STAGES; ++s) - { - sampler_csos[s].ld = ld_sampler; for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i) - sampler_csos[s].v[i] = default_sampler; - } + sampler_csos[s][i] = default_sampler; // TODO: should this really be empty shaders, or should they be all-passthrough? memset(default_shaders, 0, sizeof(default_shaders)); @@ -294,7 +285,6 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl if(pipe->bind_geometry_sampler_states) pipe->bind_geometry_sampler_states(pipe, 0, 0); pipe->delete_sampler_state(pipe, default_sampler); - pipe->delete_sampler_state(pipe, ld_sampler); pipe->bind_fs_state(pipe, 0); pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]); @@ -393,14 +383,14 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl if(samplers[s][start + i].p != samps[i]) { samplers[s][start + i] = samps[i]; - sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler; + sampler_csos[s][start + i] = samps[i] ? samps[i]->object : default_sampler; last_different = i; } - if(last_different >= 0) - { - num_samplers[s] = std::max(num_samplers[s], start + last_different + 1); - update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s); - } + } + if(last_different >= 0) + { + num_samplers[s] = std::max(num_samplers[s], start + last_different + 1); + update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s); } } @@ -517,22 +507,17 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl --num_shader_resource_views[s]; if((1 << s) & caps.stages_with_sampling) { - struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS]; - unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0; - for(unsigned i = 0; i < num_views_to_bind; ++i) - { - views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]]; - } + const unsigned num_views_to_bind = num_shader_resource_views[s]; switch(s) { case PIPE_SHADER_VERTEX: - pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind); + pipe->set_vertex_sampler_views(pipe, num_views_to_bind, sampler_views[s]); break; case PIPE_SHADER_FRAGMENT: - pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind); + pipe->set_fragment_sampler_views(pipe, num_views_to_bind, sampler_views[s]); break; case PIPE_SHADER_GEOMETRY: - pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind); + pipe->set_geometry_sampler_views(pipe, num_views_to_bind, sampler_views[s]); break; } } @@ -540,27 +525,21 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s))) { - while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1]) + while(num_samplers[s] && !sampler_csos[s][num_samplers[s] - 1]) --num_samplers[s]; if((1 << s) & caps.stages_with_sampling) { - void* samplers_to_bind[PIPE_MAX_SAMPLERS]; - unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0; - for(unsigned i = 0; i < num_samplers_to_bind; ++i) - { - // index can be -1 to access sampler_csos[s].ld - samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]); - } + const unsigned num_samplers_to_bind = num_samplers[s]; switch(s) { case PIPE_SHADER_VERTEX: - pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); + pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]); break; case PIPE_SHADER_FRAGMENT: - pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); + pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]); break; case PIPE_SHADER_GEOMETRY: - pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind); + pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, sampler_csos[s]); break; } } @@ -1918,7 +1897,7 @@ changed: if(samplers[s][i] == state) { samplers[s][i].p = NULL; - sampler_csos[s].v[i] = NULL; + sampler_csos[s][i] = NULL; update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)); } } diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h index 836603e..488539f 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_objects.h @@ -216,9 +216,6 @@ struct GalliumD3D11RasterizerState : public GalliumD3D11RasterizerStateBase template struct GalliumD3D11Shader : public GalliumD3D11Object { - std::vector slot_to_resource; - std::vector slot_to_sampler; - GalliumD3D11Shader(GalliumD3D11Screen* device, void* object) : GalliumD3D11Object(device, object) {} diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h index f16492f..c6e9633 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_screen.h @@ -1225,12 +1225,6 @@ struct GalliumD3D11ScreenImpl : public GalliumD3D11Screen break; } - if(shader) - { - shader->slot_to_resource = sm4->slot_to_resource; - shader->slot_to_sampler = sm4->slot_to_sampler; - } - free((void*)tgsi_shader.tokens); return shader; } diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp index 615ce8c..fd54a67 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp +++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp @@ -78,6 +78,7 @@ struct sm4_to_tgsi_converter std::vector temps; std::vector outputs; std::vector inputs; + std::vector resources; std::vector samplers; std::vector > targets; // first is normal, second shadow/comparison std::vector sampler_modes; // 0 = normal, 1 = shadow/comparison @@ -191,24 +192,27 @@ struct sm4_to_tgsi_converter return (int)op.indices[0].disp; } - int _texslot(bool have_sampler = true) + unsigned tex_target(unsigned resource, unsigned sampler) { - std::map, int>::iterator i; - i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1)); - check(i != program.resource_sampler_to_slot.end()); - return i->second; + unsigned shadow = sampler_modes[sampler]; + unsigned target = shadow ? targets[resource].second : targets[resource].first; + check(target); + return target; } - unsigned tex_target(unsigned texslot) + enum pipe_type res_return_type(unsigned type) { - unsigned mode = sampler_modes[program.slot_to_sampler[texslot]]; - unsigned target; - if(mode) - target = targets[program.slot_to_resource[texslot]].second; - else - target = targets[program.slot_to_resource[texslot]].first; - check(target); - return target; + switch(type) + { + case D3D_RETURN_TYPE_UNORM: return PIPE_TYPE_UNORM; + case D3D_RETURN_TYPE_SNORM: return PIPE_TYPE_SNORM; + case D3D_RETURN_TYPE_SINT: return PIPE_TYPE_SINT; + case D3D_RETURN_TYPE_UINT: return PIPE_TYPE_UINT; + case D3D_RETURN_TYPE_FLOAT: return PIPE_TYPE_FLOAT; + default: + fail("invalid resource return type"); + return PIPE_TYPE_FLOAT; + } } std::vector insn_tmps; @@ -440,102 +444,37 @@ struct sm4_to_tgsi_converter } break; case SM4_OPCODE_RESINFO: - { - std::map::iterator i; - i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2)); - check(i != program.resource_to_slot.end()); - unsigned texslot = i->second; - - // no driver actually provides this, unfortunately - ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); + // TODO: return type + ureg_RESINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); + break; + // TODO: sample index, texture offset + case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg + ureg_LOAD(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); break; - }; - // TODO: sample offset, sample index - case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch) case SM4_OPCODE_LD_MS: - { - unsigned texslot = _texslot(false); - unsigned dim = 0; - switch(targets[texslot].first) - { - case TGSI_TEXTURE_1D: - dim = 1; - break; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - dim = 2; - break; - case TGSI_TEXTURE_3D: - dim = 3; - break; - default: - check(0); - } - struct ureg_dst tmp = _tmp(); - if(avoid_txf) - { - struct ureg_src texcoord; - if(!avoid_int) - { - ureg_I2F(ureg, tmp, _src(1)); - texcoord = ureg_src(tmp); - } - else - texcoord = _src(1); - - ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]); - } - else - ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]); + ureg_LOAD_MS(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); break; - } case SM4_OPCODE_SAMPLE: // dst, coord, res, samp - { - unsigned texslot = _texslot(); - ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]); + ureg_SAMPLE(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]); break; - } case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x - { - unsigned texslot = _texslot(); - struct ureg_dst tmp = _tmp(); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); - ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + ureg_SAMPLE_B(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4)); break; - } case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x - { - unsigned texslot = _texslot(); - struct ureg_dst tmp = _tmp(); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); - ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + ureg_SAMPLE_C(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4)); break; - } case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x - { - unsigned texslot = _texslot(); - struct ureg_dst tmp = _tmp(); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1)); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0)); - ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0)); - ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + ureg_SAMPLE_C_LZ(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4)); break; - } case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy - { - unsigned texslot = _texslot(); - ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5)); + ureg_SAMPLE_D(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4), _src(5)); break; - } case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x { - unsigned texslot = _texslot(); struct ureg_dst tmp = _tmp(); ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); - ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]); + ureg_SAMPLE_L(ureg, _dst(), ureg_src(tmp), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]); break; } default: @@ -699,16 +638,11 @@ next:; fail("Malformed control flow"); if(!sm4_find_labels(program)) fail("Failed to locate labels"); - if(!sm4_allocate_resource_sampler_pairs(program)) - fail("Unsupported (indirect?) accesses to resources and/or samplers"); ureg = ureg_create(processor); in_sub = false; - for(unsigned i = 0; i < program.slot_to_resource.size(); ++i) - samplers.push_back(ureg_DECL_sampler(ureg, i)); - sm4_to_tgsi_insn_num.resize(program.insns.size()); for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num) { @@ -805,6 +739,13 @@ next:; targets[idx].second = TGSI_TEXTURE_SHADOW2D; break; } + if(resources.size() <= (unsigned)idx) + resources.resize(idx + 1); + resources[idx] = ureg_DECL_resource(ureg, idx, targets[idx].first, + res_return_type(dcl.rrt.x), + res_return_type(dcl.rrt.y), + res_return_type(dcl.rrt.z), + res_return_type(dcl.rrt.w)); break; case SM4_OPCODE_DCL_SAMPLER: check(idx >= 0); @@ -812,6 +753,9 @@ next:; sampler_modes.resize(idx + 1); check(!dcl.dcl_sampler.mono); sampler_modes[idx] = dcl.dcl_sampler.shadow; + if(samplers.size() <= (unsigned)idx) + samplers.resize(idx + 1); + samplers[idx] = ureg_DECL_sampler(ureg, idx); break; case SM4_OPCODE_DCL_CONSTANT_BUFFER: check(dcl.op->num_indices == 2); -- 2.7.4