1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Position and shader input interpolation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
52 * We iterate over the quads in order 0, 1, 2, 3:
64 * Within each quad, we have four pixels which are represented in SOA
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
79 * Do one perspective divide per quad.
81 * For perspective interpolation, the final attribute value is given
87 * a = a0 + dadx*x + dady*y
88 * w = w0 + dwdx*x + dwdy*y
89 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
91 * Instead of computing the division per pixel, with this macro we compute the
92 * division on the upper left pixel of each quad, and use a linear
93 * approximation in the remaining pixels, given by:
95 * da'dx = (dadx - dwdx*a)*oow
96 * da'dy = (dady - dwdy*a)*oow
98 * Ironically, this actually makes things slower -- probably because the
99 * divide hardware unit is rarely used, whereas the multiply unit is typically
102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
105 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
106 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
110 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
113 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
115 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
120 * Initialize the bld->a0, dadx, dady fields. This involves fetching
121 * those values from the arrays which are passed into the JIT function.
124 coeffs_init(struct lp_build_interp_soa_context *bld,
126 LLVMValueRef dadx_ptr,
127 LLVMValueRef dady_ptr)
129 struct lp_build_context *coeff_bld = &bld->coeff_bld;
130 struct gallivm_state *gallivm = coeff_bld->gallivm;
131 LLVMBuilderRef builder = gallivm->builder;
132 LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
133 LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
134 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
135 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
136 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
137 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
141 /* TODO: Use more vector operations */
143 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
144 const unsigned mask = bld->mask[attrib];
145 const unsigned interp = bld->interp[attrib];
146 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
147 if (mask & (1 << chan)) {
148 LLVMValueRef index = lp_build_const_int32(gallivm,
149 attrib * NUM_CHANNELS + chan);
150 LLVMValueRef a0 = zero;
151 LLVMValueRef dadx = zero;
152 LLVMValueRef dady = zero;
153 LLVMValueRef dadxy = zero;
159 case LP_INTERP_PERSPECTIVE:
162 case LP_INTERP_LINEAR:
163 if (attrib == 0 && chan == 0) {
166 else if (attrib == 0 && chan == 1) {
170 dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
171 dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
172 dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
173 attrib_name(dadx, attrib, chan, ".dadx");
174 attrib_name(dady, attrib, chan, ".dady");
175 attrib_name(dadxy, attrib, chan, ".dadxy");
179 case LP_INTERP_CONSTANT:
180 case LP_INTERP_FACING:
181 a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
182 attrib_name(a0, attrib, chan, ".a0");
185 case LP_INTERP_POSITION:
186 /* Nothing to do as the position coeffs are already setup in slot 0 */
195 * dadq = {0, dadx, dady, dadx + dady}
198 dadq = coeff_bld->undef;
199 dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
200 dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
201 dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
202 dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
208 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
211 * a = a0 + (x * dadx + y * dady)
214 if (attrib == 0 && chan == 0) {
217 else if (attrib == 0 && chan == 1) {
222 if (interp != LP_INTERP_CONSTANT &&
223 interp != LP_INTERP_FACING) {
224 LLVMValueRef ax, ay, axy;
225 ax = LLVMBuildFMul(builder, bld->x, dadx, "");
226 ay = LLVMBuildFMul(builder, bld->y, dady, "");
227 axy = LLVMBuildFAdd(builder, ax, ay, "");
228 a = LLVMBuildFAdd(builder, a, axy, "");
236 a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a);
239 * Compute the attrib values on the upper-left corner of each quad.
242 a = LLVMBuildFAdd(builder, a, dadq2, "");
244 #if PERSPECTIVE_DIVIDE_PER_QUAD
249 if (interp == LP_INTERP_PERSPECTIVE) {
250 LLVMValueRef w = bld->a[0][3];
252 assert(bld->mask[0] & TGSI_WRITEMASK_W);
254 bld->oow = lp_build_rcp(coeff_bld, w);
255 lp_build_name(bld->oow, "oow");
257 a = lp_build_mul(coeff_bld, a, bld->oow);
261 attrib_name(a, attrib, chan, ".a");
262 attrib_name(dadq, attrib, chan, ".dadq");
264 bld->a [attrib][chan] = a;
265 bld->dadq[attrib][chan] = dadq;
273 * Increment the shader input attribute values.
274 * This is called when we move from one quad to the next.
277 attribs_update(struct lp_build_interp_soa_context *bld,
278 struct gallivm_state *gallivm,
283 LLVMBuilderRef builder = gallivm->builder;
284 struct lp_build_context *coeff_bld = &bld->coeff_bld;
285 LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index);
286 LLVMValueRef oow = NULL;
290 assert(quad_index < 4);
292 for(attrib = start; attrib < end; ++attrib) {
293 const unsigned mask = bld->mask[attrib];
294 const unsigned interp = bld->interp[attrib];
295 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
296 if(mask & (1 << chan)) {
298 if (interp == LP_INTERP_CONSTANT ||
299 interp == LP_INTERP_FACING) {
300 a = bld->a[attrib][chan];
302 else if (interp == LP_INTERP_POSITION) {
304 a = bld->attribs[0][chan];
309 a = bld->a[attrib][chan];
312 * Broadcast the attribute value for this quad into all elements
315 a = LLVMBuildShuffleVector(builder,
316 a, coeff_bld->undef, shuffle, "");
319 * Get the derivatives.
322 dadq = bld->dadq[attrib][chan];
324 #if PERSPECTIVE_DIVIDE_PER_QUAD
325 if (interp == LP_INTERP_PERSPECTIVE) {
326 LLVMValueRef dwdq = bld->dadq[0][3];
330 oow = LLVMBuildShuffleVector(coeff_bld->builder,
331 bld->oow, coeff_bld->undef,
335 dadq = lp_build_sub(coeff_bld,
337 lp_build_mul(coeff_bld, a, dwdq));
338 dadq = lp_build_mul(coeff_bld, dadq, oow);
343 * Add the derivatives
346 a = lp_build_add(coeff_bld, a, dadq);
348 #if !PERSPECTIVE_DIVIDE_PER_QUAD
349 if (interp == LP_INTERP_PERSPECTIVE) {
351 LLVMValueRef w = bld->attribs[0][3];
353 assert(bld->mask[0] & TGSI_WRITEMASK_W);
354 oow = lp_build_rcp(coeff_bld, w);
356 a = lp_build_mul(coeff_bld, a, oow);
360 if (attrib == 0 && chan == 2) {
361 /* FIXME: Depth values can exceed 1.0, due to the fact that
362 * setup interpolation coefficients refer to (0,0) which causes
363 * precision loss. So we must clamp to 1.0 here to avoid artifacts
365 a = lp_build_min(coeff_bld, a, coeff_bld->one);
368 attrib_name(a, attrib, chan, "");
370 bld->attribs[attrib][chan] = a;
378 * Generate the position vectors.
380 * Parameter x0, y0 are the integer values with upper left coordinates.
383 pos_init(struct lp_build_interp_soa_context *bld,
387 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
388 struct lp_build_context *coeff_bld = &bld->coeff_bld;
390 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
391 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
396 * Initialize fragment shader input attribute info.
399 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
400 struct gallivm_state *gallivm,
402 const struct lp_shader_input *inputs,
403 LLVMBuilderRef builder,
406 LLVMValueRef dadx_ptr,
407 LLVMValueRef dady_ptr,
411 struct lp_type coeff_type;
415 memset(bld, 0, sizeof *bld);
417 memset(&coeff_type, 0, sizeof coeff_type);
418 coeff_type.floating = TRUE;
419 coeff_type.sign = TRUE;
420 coeff_type.width = 32;
421 coeff_type.length = QUAD_SIZE;
423 /* XXX: we don't support interpolating into any other types */
424 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
426 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
428 /* For convenience */
429 bld->pos = bld->attribs[0];
430 bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
433 bld->num_attribs = 1;
434 bld->mask[0] = TGSI_WRITEMASK_XYZW;
435 bld->interp[0] = LP_INTERP_LINEAR;
438 for (attrib = 0; attrib < num_inputs; ++attrib) {
439 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
440 bld->interp[1 + attrib] = inputs[attrib].interp;
442 bld->num_attribs = 1 + num_inputs;
444 /* Ensure all masked out input channels have a valid value */
445 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
446 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
447 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
451 pos_init(bld, x0, y0);
453 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
458 * Advance the position and inputs to the given quad within the block.
461 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
462 struct gallivm_state *gallivm,
465 assert(quad_index < 4);
467 attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs);
471 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
472 struct gallivm_state *gallivm,
475 assert(quad_index < 4);
477 attribs_update(bld, gallivm, quad_index, 0, 1);