src/gallium/drivers/llvmpipe/lp_bld_interp.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * @file
  31  * Position and shader input interpolation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36 #include "pipe/p_shader_tokens.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_math.h"
  40 #include "tgsi/tgsi_scan.h"
  41 #include "gallivm/lp_bld_debug.h"
  42 #include "gallivm/lp_bld_const.h"
  43 #include "gallivm/lp_bld_arit.h"
  44 #include "gallivm/lp_bld_swizzle.h"
  45 #include "lp_bld_interp.h"
  46
  47
  48 /*
  49  * The shader JIT function operates on blocks of quads.
  50  * Each block has 2x2 quads and each quad has 2x2 pixels.
  51  *
  52  * We iterate over the quads in order 0, 1, 2, 3:
  53  *
  54  * #################
  55  * #   |   #   |   #
  56  * #---0---#---1---#
  57  * #   |   #   |   #
  58  * #################
  59  * #   |   #   |   #
  60  * #---2---#---3---#
  61  * #   |   #   |   #
  62  * #################
  63  *
  64  * Within each quad, we have four pixels which are represented in SOA
  65  * order:
  66  *
  67  * #########
  68  * # 0 | 1 #
  69  * #---+---#
  70  * # 2 | 3 #
  71  * #########
  72  *
  73  * So the green channel (for example) of the four pixels is stored in
  74  * a single vector register: {g0, g1, g2, g3}.
  75  */
  76
  77
  78 /**
  79  * Do one perspective divide per quad.
  80  *
  81  * For perspective interpolation, the final attribute value is given
  82  *
  83  *  a' = a/w = a * oow
  84  *
  85  * where
  86  *
  87  *  a = a0 + dadx*x + dady*y
  88  *  w = w0 + dwdx*x + dwdy*y
  89  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
  90  *
  91  * Instead of computing the division per pixel, with this macro we compute the
  92  * division on the upper left pixel of each quad, and use a linear
  93  * approximation in the remaining pixels, given by:
  94  *
  95  *  da'dx = (dadx - dwdx*a)*oow
  96  *  da'dy = (dady - dwdy*a)*oow
  97  *
  98  * Ironically, this actually makes things slower -- probably because the
  99  * divide hardware unit is rarely used, whereas the multiply unit is typically
 100  * already saturated.
 101  */
 102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
 103
 104
 105 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
 106 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 107
 108
 109 static void
 110 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
 111 {
 112    if(attrib == 0)
 113       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
 114    else
 115       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
 116 }
 117
 118
 119 /**
 120  * Initialize the bld->a0, dadx, dady fields.  This involves fetching
 121  * those values from the arrays which are passed into the JIT function.
 122  */
 123 static void
 124 coeffs_init(struct lp_build_interp_soa_context *bld,
 125             LLVMValueRef a0_ptr,
 126             LLVMValueRef dadx_ptr,
 127             LLVMValueRef dady_ptr)
 128 {
 129    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 130    struct gallivm_state *gallivm = coeff_bld->gallivm;
 131    LLVMBuilderRef builder = gallivm->builder;
 132    LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
 133    LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
 134    LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
 135    LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
 136    LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
 137    LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
 138    unsigned attrib;
 139    unsigned chan;
 140
 141    /* TODO: Use more vector operations */
 142
 143    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 144       const unsigned mask = bld->mask[attrib];
 145       const unsigned interp = bld->interp[attrib];
 146       for (chan = 0; chan < NUM_CHANNELS; ++chan) {
 147          if (mask & (1 << chan)) {
 148             LLVMValueRef index = lp_build_const_int32(gallivm,
 149                                       attrib * NUM_CHANNELS + chan);
 150             LLVMValueRef a0 = zero;
 151             LLVMValueRef dadx = zero;
 152             LLVMValueRef dady = zero;
 153             LLVMValueRef dadxy = zero;
 154             LLVMValueRef dadq;
 155             LLVMValueRef dadq2;
 156             LLVMValueRef a;
 157
 158             switch (interp) {
 159             case LP_INTERP_PERSPECTIVE:
 160                /* fall-through */
 161
 162             case LP_INTERP_LINEAR:
 163                if (attrib == 0 && chan == 0) {
 164                   dadxy = dadx = one;
 165                }
 166                else if (attrib == 0 && chan == 1) {
 167                   dadxy = dady = one;
 168                }
 169                else {
 170                   dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
 171                   dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
 172                   dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
 173                   attrib_name(dadx, attrib, chan, ".dadx");
 174                   attrib_name(dady, attrib, chan, ".dady");
 175                   attrib_name(dadxy, attrib, chan, ".dadxy");
 176                }
 177                /* fall-through */
 178
 179             case LP_INTERP_CONSTANT:
 180             case LP_INTERP_FACING:
 181                a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
 182                attrib_name(a0, attrib, chan, ".a0");
 183                break;
 184
 185             case LP_INTERP_POSITION:
 186                /* Nothing to do as the position coeffs are already setup in slot 0 */
 187                continue;
 188
 189             default:
 190                assert(0);
 191                break;
 192             }
 193
 194             /*
 195              * dadq = {0, dadx, dady, dadx + dady}
 196              */
 197
 198             dadq = coeff_bld->undef;
 199             dadq = LLVMBuildInsertElement(builder, dadq, zero,  i0, "");
 200             dadq = LLVMBuildInsertElement(builder, dadq, dadx,  i1, "");
 201             dadq = LLVMBuildInsertElement(builder, dadq, dady,  i2, "");
 202             dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
 203
 204             /*
 205              * dadq2 = 2 * dq
 206              */
 207
 208             dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
 209
 210             /*
 211              * a = a0 + (x * dadx + y * dady)
 212              */
 213
 214             if (attrib == 0 && chan == 0) {
 215                a = bld->x;
 216             }
 217             else if (attrib == 0 && chan == 1) {
 218                a = bld->y;
 219             }
 220             else {
 221                a = a0;
 222                if (interp != LP_INTERP_CONSTANT &&
 223                    interp != LP_INTERP_FACING) {
 224                   LLVMValueRef ax, ay, axy;
 225                   ax = LLVMBuildFMul(builder, bld->x, dadx, "");
 226                   ay = LLVMBuildFMul(builder, bld->y, dady, "");
 227                   axy = LLVMBuildFAdd(builder, ax, ay, "");
 228                   a = LLVMBuildFAdd(builder, a, axy, "");
 229                }
 230             }
 231
 232             /*
 233              * a = {a, a, a, a}
 234              */
 235
 236             a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a);
 237
 238             /*
 239              * Compute the attrib values on the upper-left corner of each quad.
 240              */
 241
 242             a = LLVMBuildFAdd(builder, a, dadq2, "");
 243
 244 #if PERSPECTIVE_DIVIDE_PER_QUAD
 245             /*
 246              * a *= 1 / w
 247              */
 248
 249             if (interp == LP_INTERP_PERSPECTIVE) {
 250                LLVMValueRef w = bld->a[0][3];
 251                assert(attrib != 0);
 252                assert(bld->mask[0] & TGSI_WRITEMASK_W);
 253                if (!bld->oow) {
 254                   bld->oow = lp_build_rcp(coeff_bld, w);
 255                   lp_build_name(bld->oow, "oow");
 256                }
 257                a = lp_build_mul(coeff_bld, a, bld->oow);
 258             }
 259 #endif
 260
 261             attrib_name(a, attrib, chan, ".a");
 262             attrib_name(dadq, attrib, chan, ".dadq");
 263
 264             bld->a   [attrib][chan] = a;
 265             bld->dadq[attrib][chan] = dadq;
 266          }
 267       }
 268    }
 269 }
 270
 271
 272 /**
 273  * Increment the shader input attribute values.
 274  * This is called when we move from one quad to the next.
 275  */
 276 static void
 277 attribs_update(struct lp_build_interp_soa_context *bld,
 278                struct gallivm_state *gallivm,
 279                int quad_index,
 280                int start,
 281                int end)
 282 {
 283    LLVMBuilderRef builder = gallivm->builder;
 284    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 285    LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index);
 286    LLVMValueRef oow = NULL;
 287    unsigned attrib;
 288    unsigned chan;
 289
 290    assert(quad_index < 4);
 291
 292    for(attrib = start; attrib < end; ++attrib) {
 293       const unsigned mask = bld->mask[attrib];
 294       const unsigned interp = bld->interp[attrib];
 295       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
 296          if(mask & (1 << chan)) {
 297             LLVMValueRef a;
 298             if (interp == LP_INTERP_CONSTANT ||
 299                 interp == LP_INTERP_FACING) {
 300                a = bld->a[attrib][chan];
 301             }
 302             else if (interp == LP_INTERP_POSITION) {
 303                assert(attrib > 0);
 304                a = bld->attribs[0][chan];
 305             }
 306             else {
 307                LLVMValueRef dadq;
 308
 309                a = bld->a[attrib][chan];
 310
 311                /*
 312                 * Broadcast the attribute value for this quad into all elements
 313                 */
 314
 315                a = LLVMBuildShuffleVector(builder,
 316                                           a, coeff_bld->undef, shuffle, "");
 317
 318                /*
 319                 * Get the derivatives.
 320                 */
 321
 322                dadq = bld->dadq[attrib][chan];
 323
 324 #if PERSPECTIVE_DIVIDE_PER_QUAD
 325                if (interp == LP_INTERP_PERSPECTIVE) {
 326                   LLVMValueRef dwdq = bld->dadq[0][3];
 327
 328                   if (oow == NULL) {
 329                      assert(bld->oow);
 330                      oow = LLVMBuildShuffleVector(coeff_bld->builder,
 331                                                   bld->oow, coeff_bld->undef,
 332                                                   shuffle, "");
 333                   }
 334
 335                   dadq = lp_build_sub(coeff_bld,
 336                                       dadq,
 337                                       lp_build_mul(coeff_bld, a, dwdq));
 338                   dadq = lp_build_mul(coeff_bld, dadq, oow);
 339                }
 340 #endif
 341
 342                /*
 343                 * Add the derivatives
 344                 */
 345
 346                a = lp_build_add(coeff_bld, a, dadq);
 347
 348 #if !PERSPECTIVE_DIVIDE_PER_QUAD
 349                if (interp == LP_INTERP_PERSPECTIVE) {
 350                   if (oow == NULL) {
 351                      LLVMValueRef w = bld->attribs[0][3];
 352                      assert(attrib != 0);
 353                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
 354                      oow = lp_build_rcp(coeff_bld, w);
 355                   }
 356                   a = lp_build_mul(coeff_bld, a, oow);
 357                }
 358 #endif
 359
 360                if (attrib == 0 && chan == 2) {
 361                   /* FIXME: Depth values can exceed 1.0, due to the fact that
 362                    * setup interpolation coefficients refer to (0,0) which causes
 363                    * precision loss. So we must clamp to 1.0 here to avoid artifacts
 364                    */
 365                   a = lp_build_min(coeff_bld, a, coeff_bld->one);
 366                }
 367
 368                attrib_name(a, attrib, chan, "");
 369             }
 370             bld->attribs[attrib][chan] = a;
 371          }
 372       }
 373    }
 374 }
 375
 376
 377 /**
 378  * Generate the position vectors.
 379  *
 380  * Parameter x0, y0 are the integer values with upper left coordinates.
 381  */
 382 static void
 383 pos_init(struct lp_build_interp_soa_context *bld,
 384          LLVMValueRef x0,
 385          LLVMValueRef y0)
 386 {
 387    LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
 388    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 389
 390    bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
 391    bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
 392 }
 393
 394
 395 /**
 396  * Initialize fragment shader input attribute info.
 397  */
 398 void
 399 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 400                          struct gallivm_state *gallivm,
 401                          unsigned num_inputs,
 402                          const struct lp_shader_input *inputs,
 403                          LLVMBuilderRef builder,
 404                          struct lp_type type,
 405                          LLVMValueRef a0_ptr,
 406                          LLVMValueRef dadx_ptr,
 407                          LLVMValueRef dady_ptr,
 408                          LLVMValueRef x0,
 409                          LLVMValueRef y0)
 410 {
 411    struct lp_type coeff_type;
 412    unsigned attrib;
 413    unsigned chan;
 414
 415    memset(bld, 0, sizeof *bld);
 416
 417    memset(&coeff_type, 0, sizeof coeff_type);
 418    coeff_type.floating = TRUE;
 419    coeff_type.sign = TRUE;
 420    coeff_type.width = 32;
 421    coeff_type.length = QUAD_SIZE;
 422
 423    /* XXX: we don't support interpolating into any other types */
 424    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
 425
 426    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
 427
 428    /* For convenience */
 429    bld->pos = bld->attribs[0];
 430    bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
 431
 432    /* Position */
 433    bld->num_attribs = 1;
 434    bld->mask[0] = TGSI_WRITEMASK_XYZW;
 435    bld->interp[0] = LP_INTERP_LINEAR;
 436
 437    /* Inputs */
 438    for (attrib = 0; attrib < num_inputs; ++attrib) {
 439       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
 440       bld->interp[1 + attrib] = inputs[attrib].interp;
 441    }
 442    bld->num_attribs = 1 + num_inputs;
 443
 444    /* Ensure all masked out input channels have a valid value */
 445    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 446       for (chan = 0; chan < NUM_CHANNELS; ++chan) {
 447          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
 448       }
 449    }
 450
 451    pos_init(bld, x0, y0);
 452
 453    coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
 454 }
 455
 456
 457 /**
 458  * Advance the position and inputs to the given quad within the block.
 459  */
 460 void
 461 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
 462                                   struct gallivm_state *gallivm,
 463                                   int quad_index)
 464 {
 465    assert(quad_index < 4);
 466
 467    attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs);
 468 }
 469
 470 void
 471 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
 472                                   struct gallivm_state *gallivm,
 473                                   int quad_index)
 474 {
 475    assert(quad_index < 4);
 476
 477    attribs_update(bld, gallivm, quad_index, 0, 1);
 478 }
 479