src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42
  43
  44 void
  45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  46                             struct lp_build_context *bld,
  47                             const LLVMValueRef *unswizzled,
  48                             LLVMValueRef swizzled_out[4])
  49 {
  50    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  51    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  52
  53    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  54       /*
  55        * Return zzz1 for depth-stencil formats.
  56        *
  57        * XXX: Allow to control the depth swizzle with an additional parameter,
  58        * as the caller may wish another depth swizzle, or retain the stencil
  59        * value.
  60        */
  61       enum util_format_swizzle swizzle = format_desc->swizzle[0];
  62       LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  63       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
  64       swizzled_out[3] = bld->one;
  65    }
  66    else {
  67       unsigned chan;
  68       for (chan = 0; chan < 4; ++chan) {
  69          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  70          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  71       }
  72    }
  73 }
  74
  75
  76 /**
  77  * Unpack several pixels in SoA.
  78  *
  79  * It takes a vector of packed pixels:
  80  *
  81  *   packed = {P0, P1, P2, P3, ..., Pn}
  82  *
  83  * And will produce four vectors:
  84  *
  85  *   red    = {R0, R1, R2, R3, ..., Rn}
  86  *   green  = {G0, G1, G2, G3, ..., Gn}
  87  *   blue   = {B0, B1, B2, B3, ..., Bn}
  88  *   alpha  = {A0, A1, A2, A3, ..., An}
  89  *
  90  * It requires that a packed pixel fits into an element of the output
  91  * channels. The common case is when converting pixel with a depth of 32 bit or
  92  * less into floats.
  93  *
  94  * \param format_desc  the format of the 'packed' incoming pixel vector
  95  * \param type  the desired type for rgba_out (type.length = n, above)
  96  * \param packed  the incoming vector of packed pixels
  97  * \param rgba_out  returns the SoA R,G,B,A vectors
  98  */
  99 void
 100 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 101                          const struct util_format_description *format_desc,
 102                          struct lp_type type,
 103                          LLVMValueRef packed,
 104                          LLVMValueRef rgba_out[4])
 105 {
 106    LLVMBuilderRef builder = gallivm->builder;
 107    struct lp_build_context bld;
 108    LLVMValueRef inputs[4];
 109    unsigned start;
 110    unsigned chan;
 111
 112    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 113    assert(format_desc->block.width == 1);
 114    assert(format_desc->block.height == 1);
 115    assert(format_desc->block.bits <= type.width);
 116    /* FIXME: Support more output types */
 117    assert(type.floating);
 118    assert(type.width == 32);
 119
 120    lp_build_context_init(&bld, gallivm, type);
 121
 122    /* Decode the input vector components */
 123    start = 0;
 124    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 125       const unsigned width = format_desc->channel[chan].size;
 126       const unsigned stop = start + width;
 127       LLVMValueRef input;
 128
 129       input = packed;
 130
 131       switch(format_desc->channel[chan].type) {
 132       case UTIL_FORMAT_TYPE_VOID:
 133          input = lp_build_undef(gallivm, type);
 134          break;
 135
 136       case UTIL_FORMAT_TYPE_UNSIGNED:
 137          /*
 138           * Align the LSB
 139           */
 140
 141          if (start) {
 142             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 143          }
 144
 145          /*
 146           * Zero the MSBs
 147           */
 148
 149          if (stop < format_desc->block.bits) {
 150             unsigned mask = ((unsigned long long)1 << width) - 1;
 151             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 152          }
 153
 154          /*
 155           * Type conversion
 156           */
 157
 158          if (type.floating) {
 159             if(format_desc->channel[chan].normalized)
 160                input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 161             else
 162                input = LLVMBuildSIToFP(builder, input,
 163                                        lp_build_vec_type(gallivm, type), "");
 164          }
 165          else {
 166             /* FIXME */
 167             assert(0);
 168             input = lp_build_undef(gallivm, type);
 169          }
 170
 171          break;
 172
 173       case UTIL_FORMAT_TYPE_SIGNED:
 174          /*
 175           * Align the sign bit first.
 176           */
 177
 178          if (stop < type.width) {
 179             unsigned bits = type.width - stop;
 180             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 181             input = LLVMBuildShl(builder, input, bits_val, "");
 182          }
 183
 184          /*
 185           * Align the LSB (with an arithmetic shift to preserve the sign)
 186           */
 187
 188          if (format_desc->channel[chan].size < type.width) {
 189             unsigned bits = type.width - format_desc->channel[chan].size;
 190             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 191             input = LLVMBuildAShr(builder, input, bits_val, "");
 192          }
 193
 194          /*
 195           * Type conversion
 196           */
 197
 198          if (type.floating) {
 199             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 200             if (format_desc->channel[chan].normalized) {
 201                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 202                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 203                input = LLVMBuildFMul(builder, input, scale_val, "");
 204             }
 205          }
 206          else {
 207             /* FIXME */
 208             assert(0);
 209             input = lp_build_undef(gallivm, type);
 210          }
 211
 212          break;
 213
 214       case UTIL_FORMAT_TYPE_FLOAT:
 215          if (type.floating) {
 216             assert(start == 0);
 217             assert(stop == 32);
 218             assert(type.width == 32);
 219             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 220          }
 221          else {
 222             /* FIXME */
 223             assert(0);
 224             input = lp_build_undef(gallivm, type);
 225          }
 226          break;
 227
 228       case UTIL_FORMAT_TYPE_FIXED:
 229          if (type.floating) {
 230             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 231             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 232             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 233             input = LLVMBuildFMul(builder, input, scale_val, "");
 234          }
 235          else {
 236             /* FIXME */
 237             assert(0);
 238             input = lp_build_undef(gallivm, type);
 239          }
 240          break;
 241
 242       default:
 243          assert(0);
 244          input = lp_build_undef(gallivm, type);
 245          break;
 246       }
 247
 248       inputs[chan] = input;
 249
 250       start = stop;
 251    }
 252
 253    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 254 }
 255
 256
 257 void
 258 lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm,
 259                           struct lp_type dst_type,
 260                           LLVMValueRef packed,
 261                           LLVMValueRef *rgba)
 262 {
 263    LLVMBuilderRef builder = gallivm->builder;
 264    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 265    unsigned chan;
 266
 267    packed = LLVMBuildBitCast(builder, packed,
 268                              lp_build_int_vec_type(gallivm, dst_type), "");
 269
 270    /* Decode the input vector components */
 271    for (chan = 0; chan < 4; ++chan) {
 272       unsigned start = chan*8;
 273       unsigned stop = start + 8;
 274       LLVMValueRef input;
 275
 276       input = packed;
 277
 278       if (start)
 279          input = LLVMBuildLShr(builder, input,
 280                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 281
 282       if (stop < 32)
 283          input = LLVMBuildAnd(builder, input, mask, "");
 284
 285       input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 286
 287       rgba[chan] = input;
 288    }
 289 }
 290
 291
 292
 293 /**
 294  * Fetch a texels from a texture, returning them in SoA layout.
 295  *
 296  * \param type  the desired return type for 'rgba'.  The vector length
 297  *              is the number of texels to fetch
 298  *
 299  * \param base_ptr  points to start of the texture image block.  For non-
 300  *                  compressed formats, this simply points to the texel.
 301  *                  For compressed formats, it points to the start of the
 302  *                  compressed data block.
 303  *
 304  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 305  *              these will always be (0,0).  For compressed formats, i will
 306  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 307  */
 308 void
 309 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 310                         const struct util_format_description *format_desc,
 311                         struct lp_type type,
 312                         LLVMValueRef base_ptr,
 313                         LLVMValueRef offset,
 314                         LLVMValueRef i,
 315                         LLVMValueRef j,
 316                         LLVMValueRef rgba_out[4])
 317 {
 318    LLVMBuilderRef builder = gallivm->builder;
 319
 320    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 321        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 322         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 323        format_desc->block.width == 1 &&
 324        format_desc->block.height == 1 &&
 325        format_desc->block.bits <= type.width &&
 326        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 327         format_desc->channel[0].size == 32))
 328    {
 329       /*
 330        * The packed pixel fits into an element of the destination format. Put
 331        * the packed pixels into a vector and extract each component for all
 332        * vector elements in parallel.
 333        */
 334
 335       LLVMValueRef packed;
 336
 337       /*
 338        * gather the texels from the texture
 339        * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
 340        */
 341       packed = lp_build_gather(gallivm,
 342                                type.length,
 343                                format_desc->block.bits,
 344                                type.width,
 345                                base_ptr, offset);
 346
 347       /*
 348        * convert texels to float rgba
 349        */
 350       lp_build_unpack_rgba_soa(gallivm,
 351                                format_desc,
 352                                type,
 353                                packed, rgba_out);
 354       return;
 355    }
 356
 357    /*
 358     * Try calling lp_build_fetch_rgba_aos for all pixels.
 359     */
 360
 361    if (util_format_fits_8unorm(format_desc) &&
 362        type.floating && type.width == 32 && type.length == 4) {
 363       struct lp_type tmp_type;
 364       LLVMValueRef tmp;
 365
 366       memset(&tmp_type, 0, sizeof tmp_type);
 367       tmp_type.width = 8;
 368       tmp_type.length = type.length * 4;
 369       tmp_type.norm = TRUE;
 370
 371       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 372                                     base_ptr, offset, i, j);
 373
 374       lp_build_rgba8_to_f32_soa(gallivm,
 375                                 type,
 376                                 tmp,
 377                                 rgba_out);
 378
 379       return;
 380    }
 381
 382    /*
 383     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 384     *
 385     * This is not the most efficient way of fetching pixels, as we
 386     * miss some opportunities to do vectorization, but this is
 387     * convenient for formats or scenarios for which there was no
 388     * opportunity or incentive to optimize.
 389     */
 390
 391    {
 392       unsigned k, chan;
 393       struct lp_type tmp_type;
 394
 395       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 396          debug_printf("%s: scalar unpacking of %s\n",
 397                       __FUNCTION__, format_desc->short_name);
 398       }
 399
 400       tmp_type = type;
 401       tmp_type.length = 4;
 402
 403       for (chan = 0; chan < 4; ++chan) {
 404          rgba_out[chan] = lp_build_undef(gallivm, type);
 405       }
 406
 407       /* loop over number of pixels */
 408       for(k = 0; k < type.length; ++k) {
 409          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 410          LLVMValueRef offset_elem;
 411          LLVMValueRef i_elem, j_elem;
 412          LLVMValueRef tmp;
 413
 414          offset_elem = LLVMBuildExtractElement(builder, offset,
 415                                                index, "");
 416
 417          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 418          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 419
 420          /* Get a single float[4]={R,G,B,A} pixel */
 421          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 422                                        base_ptr, offset_elem,
 423                                        i_elem, j_elem);
 424
 425          /*
 426           * Insert the AoS tmp value channels into the SoA result vectors at
 427           * position = 'index'.
 428           */
 429          for (chan = 0; chan < 4; ++chan) {
 430             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
 431             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 432             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 433                                                     tmp_chan, index, "");
 434          }
 435       }
 436    }
 437 }