1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
46 struct lp_build_context *bld,
47 const LLVMValueRef *unswizzled,
48 LLVMValueRef swizzled_out[4])
50 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
51 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
53 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
55 * Return zzz1 for depth-stencil formats.
57 * XXX: Allow to control the depth swizzle with an additional parameter,
58 * as the caller may wish another depth swizzle, or retain the stencil
61 enum util_format_swizzle swizzle = format_desc->swizzle[0];
62 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
63 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
64 swizzled_out[3] = bld->one;
68 for (chan = 0; chan < 4; ++chan) {
69 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
70 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
77 * Unpack several pixels in SoA.
79 * It takes a vector of packed pixels:
81 * packed = {P0, P1, P2, P3, ..., Pn}
83 * And will produce four vectors:
85 * red = {R0, R1, R2, R3, ..., Rn}
86 * green = {G0, G1, G2, G3, ..., Gn}
87 * blue = {B0, B1, B2, B3, ..., Bn}
88 * alpha = {A0, A1, A2, A3, ..., An}
90 * It requires that a packed pixel fits into an element of the output
91 * channels. The common case is when converting pixel with a depth of 32 bit or
94 * \param format_desc the format of the 'packed' incoming pixel vector
95 * \param type the desired type for rgba_out (type.length = n, above)
96 * \param packed the incoming vector of packed pixels
97 * \param rgba_out returns the SoA R,G,B,A vectors
100 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
101 const struct util_format_description *format_desc,
104 LLVMValueRef rgba_out[4])
106 LLVMBuilderRef builder = gallivm->builder;
107 struct lp_build_context bld;
108 LLVMValueRef inputs[4];
112 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
113 assert(format_desc->block.width == 1);
114 assert(format_desc->block.height == 1);
115 assert(format_desc->block.bits <= type.width);
116 /* FIXME: Support more output types */
117 assert(type.floating);
118 assert(type.width == 32);
120 lp_build_context_init(&bld, gallivm, type);
122 /* Decode the input vector components */
124 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
125 const unsigned width = format_desc->channel[chan].size;
126 const unsigned stop = start + width;
131 switch(format_desc->channel[chan].type) {
132 case UTIL_FORMAT_TYPE_VOID:
133 input = lp_build_undef(gallivm, type);
136 case UTIL_FORMAT_TYPE_UNSIGNED:
142 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
149 if (stop < format_desc->block.bits) {
150 unsigned mask = ((unsigned long long)1 << width) - 1;
151 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
159 if(format_desc->channel[chan].normalized)
160 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
162 input = LLVMBuildSIToFP(builder, input,
163 lp_build_vec_type(gallivm, type), "");
168 input = lp_build_undef(gallivm, type);
173 case UTIL_FORMAT_TYPE_SIGNED:
175 * Align the sign bit first.
178 if (stop < type.width) {
179 unsigned bits = type.width - stop;
180 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
181 input = LLVMBuildShl(builder, input, bits_val, "");
185 * Align the LSB (with an arithmetic shift to preserve the sign)
188 if (format_desc->channel[chan].size < type.width) {
189 unsigned bits = type.width - format_desc->channel[chan].size;
190 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
191 input = LLVMBuildAShr(builder, input, bits_val, "");
199 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
200 if (format_desc->channel[chan].normalized) {
201 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
202 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
203 input = LLVMBuildFMul(builder, input, scale_val, "");
209 input = lp_build_undef(gallivm, type);
214 case UTIL_FORMAT_TYPE_FLOAT:
218 assert(type.width == 32);
219 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
224 input = lp_build_undef(gallivm, type);
228 case UTIL_FORMAT_TYPE_FIXED:
230 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
231 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
232 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
233 input = LLVMBuildFMul(builder, input, scale_val, "");
238 input = lp_build_undef(gallivm, type);
244 input = lp_build_undef(gallivm, type);
248 inputs[chan] = input;
253 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
258 lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm,
259 struct lp_type dst_type,
263 LLVMBuilderRef builder = gallivm->builder;
264 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
267 packed = LLVMBuildBitCast(builder, packed,
268 lp_build_int_vec_type(gallivm, dst_type), "");
270 /* Decode the input vector components */
271 for (chan = 0; chan < 4; ++chan) {
272 unsigned start = chan*8;
273 unsigned stop = start + 8;
279 input = LLVMBuildLShr(builder, input,
280 lp_build_const_int_vec(gallivm, dst_type, start), "");
283 input = LLVMBuildAnd(builder, input, mask, "");
285 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
294 * Fetch a texels from a texture, returning them in SoA layout.
296 * \param type the desired return type for 'rgba'. The vector length
297 * is the number of texels to fetch
299 * \param base_ptr points to start of the texture image block. For non-
300 * compressed formats, this simply points to the texel.
301 * For compressed formats, it points to the start of the
302 * compressed data block.
304 * \param i, j the sub-block pixel coordinates. For non-compressed formats
305 * these will always be (0,0). For compressed formats, i will
306 * be in [0, block_width-1] and j will be in [0, block_height-1].
309 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
310 const struct util_format_description *format_desc,
312 LLVMValueRef base_ptr,
316 LLVMValueRef rgba_out[4])
318 LLVMBuilderRef builder = gallivm->builder;
320 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
321 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
322 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
323 format_desc->block.width == 1 &&
324 format_desc->block.height == 1 &&
325 format_desc->block.bits <= type.width &&
326 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
327 format_desc->channel[0].size == 32))
330 * The packed pixel fits into an element of the destination format. Put
331 * the packed pixels into a vector and extract each component for all
332 * vector elements in parallel.
338 * gather the texels from the texture
339 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
341 packed = lp_build_gather(gallivm,
343 format_desc->block.bits,
348 * convert texels to float rgba
350 lp_build_unpack_rgba_soa(gallivm,
358 * Try calling lp_build_fetch_rgba_aos for all pixels.
361 if (util_format_fits_8unorm(format_desc) &&
362 type.floating && type.width == 32 && type.length == 4) {
363 struct lp_type tmp_type;
366 memset(&tmp_type, 0, sizeof tmp_type);
368 tmp_type.length = type.length * 4;
369 tmp_type.norm = TRUE;
371 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
372 base_ptr, offset, i, j);
374 lp_build_rgba8_to_f32_soa(gallivm,
383 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
385 * This is not the most efficient way of fetching pixels, as we
386 * miss some opportunities to do vectorization, but this is
387 * convenient for formats or scenarios for which there was no
388 * opportunity or incentive to optimize.
393 struct lp_type tmp_type;
395 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
396 debug_printf("%s: scalar unpacking of %s\n",
397 __FUNCTION__, format_desc->short_name);
403 for (chan = 0; chan < 4; ++chan) {
404 rgba_out[chan] = lp_build_undef(gallivm, type);
407 /* loop over number of pixels */
408 for(k = 0; k < type.length; ++k) {
409 LLVMValueRef index = lp_build_const_int32(gallivm, k);
410 LLVMValueRef offset_elem;
411 LLVMValueRef i_elem, j_elem;
414 offset_elem = LLVMBuildExtractElement(builder, offset,
417 i_elem = LLVMBuildExtractElement(builder, i, index, "");
418 j_elem = LLVMBuildExtractElement(builder, j, index, "");
420 /* Get a single float[4]={R,G,B,A} pixel */
421 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
422 base_ptr, offset_elem,
426 * Insert the AoS tmp value channels into the SoA result vectors at
427 * position = 'index'.
429 for (chan = 0; chan < 4; ++chan) {
430 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
431 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
432 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
433 tmp_chan, index, "");