Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27
28 /**
29  * @file
30  * Texture sampling -- SoA.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_bitarit.h"
49 #include "lp_bld_logic.h"
50 #include "lp_bld_swizzle.h"
51 #include "lp_bld_pack.h"
52 #include "lp_bld_flow.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_format.h"
55 #include "lp_bld_init.h"
56 #include "lp_bld_sample.h"
57 #include "lp_bld_sample_aos.h"
58 #include "lp_bld_quad.h"
59
60
61 /**
62  * Build LLVM code for texture coord wrapping, for nearest filtering,
63  * for scaled integer texcoords.
64  * \param block_length  is the length of the pixel block along the
65  *                      coordinate axis
66  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
67  * \param length  the texture size along one dimension
68  * \param stride  pixel stride along the coordinate axis (in bytes)
69  * \param is_pot  if TRUE, length is a power of two
70  * \param wrap_mode  one of PIPE_TEX_WRAP_x
71  * \param out_offset  byte offset for the wrapped coordinate
72  * \param out_i  resulting sub-block pixel coordinate for coord0
73  */
74 static void
75 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
76                                  unsigned block_length,
77                                  LLVMValueRef coord,
78                                  LLVMValueRef length,
79                                  LLVMValueRef stride,
80                                  boolean is_pot,
81                                  unsigned wrap_mode,
82                                  LLVMValueRef *out_offset,
83                                  LLVMValueRef *out_i)
84 {
85    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
86    LLVMBuilderRef builder = bld->gallivm->builder;
87    LLVMValueRef length_minus_one;
88
89    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
90
91    switch(wrap_mode) {
92    case PIPE_TEX_WRAP_REPEAT:
93       if(is_pot)
94          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
95       else {
96          /* Add a bias to the texcoord to handle negative coords */
97          LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
98          coord = LLVMBuildAdd(builder, coord, bias, "");
99          coord = LLVMBuildURem(builder, coord, length, "");
100       }
101       break;
102
103    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
104       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
105       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
106       break;
107
108    case PIPE_TEX_WRAP_CLAMP:
109    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
110    case PIPE_TEX_WRAP_MIRROR_REPEAT:
111    case PIPE_TEX_WRAP_MIRROR_CLAMP:
112    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
113    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
114    default:
115       assert(0);
116    }
117
118    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
119                                   out_offset, out_i);
120 }
121
122
123 /**
124  * Build LLVM code for texture coord wrapping, for linear filtering,
125  * for scaled integer texcoords.
126  * \param block_length  is the length of the pixel block along the
127  *                      coordinate axis
128  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
129  * \param length  the texture size along one dimension
130  * \param stride  pixel stride along the coordinate axis (in bytes)
131  * \param is_pot  if TRUE, length is a power of two
132  * \param wrap_mode  one of PIPE_TEX_WRAP_x
133  * \param offset0  resulting relative offset for coord0
134  * \param offset1  resulting relative offset for coord0 + 1
135  * \param i0  resulting sub-block pixel coordinate for coord0
136  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
137  */
138 static void
139 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
140                                 unsigned block_length,
141                                 LLVMValueRef coord0,
142                                 LLVMValueRef length,
143                                 LLVMValueRef stride,
144                                 boolean is_pot,
145                                 unsigned wrap_mode,
146                                 LLVMValueRef *offset0,
147                                 LLVMValueRef *offset1,
148                                 LLVMValueRef *i0,
149                                 LLVMValueRef *i1)
150 {
151    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
152    LLVMBuilderRef builder = bld->gallivm->builder;
153    LLVMValueRef length_minus_one;
154    LLVMValueRef lmask, umask, mask;
155
156    if (block_length != 1) {
157       /*
158        * If the pixel block covers more than one pixel then there is no easy
159        * way to calculate offset1 relative to offset0. Instead, compute them
160        * independently.
161        */
162
163       LLVMValueRef coord1;
164
165       lp_build_sample_wrap_nearest_int(bld,
166                                        block_length,
167                                        coord0,
168                                        length,
169                                        stride,
170                                        is_pot,
171                                        wrap_mode,
172                                        offset0, i0);
173
174       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
175
176       lp_build_sample_wrap_nearest_int(bld,
177                                        block_length,
178                                        coord1,
179                                        length,
180                                        stride,
181                                        is_pot,
182                                        wrap_mode,
183                                        offset1, i1);
184
185       return;
186    }
187
188    /*
189     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
190     * multiplication.
191     */
192
193    *i0 = int_coord_bld->zero;
194    *i1 = int_coord_bld->zero;
195
196    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
197
198    switch(wrap_mode) {
199    case PIPE_TEX_WRAP_REPEAT:
200       if (is_pot) {
201          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
202       }
203       else {
204          /* Add a bias to the texcoord to handle negative coords */
205          LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
206          coord0 = LLVMBuildAdd(builder, coord0, bias, "");
207          coord0 = LLVMBuildURem(builder, coord0, length, "");
208       }
209
210       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
211                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
212
213       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
214       *offset1 = LLVMBuildAnd(builder,
215                               lp_build_add(int_coord_bld, *offset0, stride),
216                               mask, "");
217       break;
218
219    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
220       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
221                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
222       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
223                                PIPE_FUNC_LESS, coord0, length_minus_one);
224
225       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
226       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
227
228       mask = LLVMBuildAnd(builder, lmask, umask, "");
229
230       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
231       *offset1 = lp_build_add(int_coord_bld,
232                               *offset0,
233                               LLVMBuildAnd(builder, stride, mask, ""));
234       break;
235
236    case PIPE_TEX_WRAP_CLAMP:
237    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
238    case PIPE_TEX_WRAP_MIRROR_REPEAT:
239    case PIPE_TEX_WRAP_MIRROR_CLAMP:
240    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
241    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
242    default:
243       assert(0);
244       *offset0 = int_coord_bld->zero;
245       *offset1 = int_coord_bld->zero;
246       break;
247    }
248 }
249
250
251 /**
252  * Sample a single texture image with nearest sampling.
253  * If sampling a cube texture, r = cube face in [0,5].
254  * Return filtered color as two vectors of 16-bit fixed point values.
255  */
256 static void
257 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
258                               LLVMValueRef int_size,
259                               LLVMValueRef row_stride_vec,
260                               LLVMValueRef img_stride_vec,
261                               LLVMValueRef data_ptr,
262                               LLVMValueRef s,
263                               LLVMValueRef t,
264                               LLVMValueRef r,
265                               LLVMValueRef *colors_lo,
266                               LLVMValueRef *colors_hi)
267 {
268    const unsigned dims = bld->dims;
269    LLVMBuilderRef builder = bld->gallivm->builder;
270    struct lp_build_context i32, h16, u8n;
271    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
272    LLVMValueRef i32_c8;
273    LLVMValueRef width_vec, height_vec, depth_vec;
274    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
275    LLVMValueRef x_stride;
276    LLVMValueRef x_offset, offset;
277    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
278
279    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
280    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
281    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
282
283    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
284    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
285    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
286
287    lp_build_extract_image_sizes(bld,
288                                 bld->int_size_type,
289                                 bld->int_coord_type,
290                                 int_size,
291                                 &width_vec,
292                                 &height_vec,
293                                 &depth_vec);
294
295    if (bld->static_state->normalized_coords) {
296       LLVMValueRef scaled_size;
297       LLVMValueRef flt_size;
298
299       /* scale size by 256 (8 fractional bits) */
300       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
301
302       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
303
304       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
305    }
306    else {
307       /* scale coords by 256 (8 fractional bits) */
308       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
309       if (dims >= 2)
310          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
311       if (dims >= 3)
312          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
313    }
314
315    /* convert float to int */
316    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
317    if (dims >= 2)
318       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
319    if (dims >= 3)
320       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
321
322    /* compute floor (shift right 8) */
323    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
324    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
325    if (dims >= 2)
326       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
327    if (dims >= 3)
328       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
329
330    /* get pixel, row, image strides */
331    x_stride = lp_build_const_vec(bld->gallivm,
332                                  bld->int_coord_bld.type,
333                                  bld->format_desc->block.bits/8);
334
335    /* Do texcoord wrapping, compute texel offset */
336    lp_build_sample_wrap_nearest_int(bld,
337                                     bld->format_desc->block.width,
338                                     s_ipart, width_vec, x_stride,
339                                     bld->static_state->pot_width,
340                                     bld->static_state->wrap_s,
341                                     &x_offset, &x_subcoord);
342    offset = x_offset;
343    if (dims >= 2) {
344       LLVMValueRef y_offset;
345       lp_build_sample_wrap_nearest_int(bld,
346                                        bld->format_desc->block.height,
347                                        t_ipart, height_vec, row_stride_vec,
348                                        bld->static_state->pot_height,
349                                        bld->static_state->wrap_t,
350                                        &y_offset, &y_subcoord);
351       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
352       if (dims >= 3) {
353          LLVMValueRef z_offset;
354          lp_build_sample_wrap_nearest_int(bld,
355                                           1, /* block length (depth) */
356                                           r_ipart, depth_vec, img_stride_vec,
357                                           bld->static_state->pot_height,
358                                           bld->static_state->wrap_r,
359                                           &z_offset, &z_subcoord);
360          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
361       }
362       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
363          LLVMValueRef z_offset;
364          /* The r coord is the cube face in [0,5] */
365          z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
366          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
367       }
368    }
369
370    /*
371     * Fetch the pixels as 4 x 32bit (rgba order might differ):
372     *
373     *   rgba0 rgba1 rgba2 rgba3
374     *
375     * bit cast them into 16 x u8
376     *
377     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
378     *
379     * unpack them into two 8 x i16:
380     *
381     *   r0 g0 b0 a0 r1 g1 b1 a1
382     *   r2 g2 b2 a2 r3 g3 b3 a3
383     *
384     * The higher 8 bits of the resulting elements will be zero.
385     */
386    {
387       LLVMValueRef rgba8;
388
389       if (util_format_is_rgba8_variant(bld->format_desc)) {
390          /*
391           * Given the format is a rgba8, just read the pixels as is,
392           * without any swizzling. Swizzling will be done later.
393           */
394          rgba8 = lp_build_gather(bld->gallivm,
395                                  bld->texel_type.length,
396                                  bld->format_desc->block.bits,
397                                  bld->texel_type.width,
398                                  data_ptr, offset);
399
400          rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
401       }
402       else {
403          rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
404                                          bld->format_desc,
405                                          u8n.type,
406                                          data_ptr, offset,
407                                          x_subcoord,
408                                          y_subcoord);
409       }
410
411       /* Expand one 4*rgba8 to two 2*rgba16 */
412       lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
413                        rgba8,
414                        colors_lo, colors_hi);
415    }
416 }
417
418
419 /**
420  * Sample a single texture image with (bi-)(tri-)linear sampling.
421  * Return filtered color as two vectors of 16-bit fixed point values.
422  */
423 static void
424 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
425                              LLVMValueRef int_size,
426                              LLVMValueRef row_stride_vec,
427                              LLVMValueRef img_stride_vec,
428                              LLVMValueRef data_ptr,
429                              LLVMValueRef s,
430                              LLVMValueRef t,
431                              LLVMValueRef r,
432                              LLVMValueRef *colors_lo,
433                              LLVMValueRef *colors_hi)
434 {
435    const unsigned dims = bld->dims;
436    LLVMBuilderRef builder = bld->gallivm->builder;
437    struct lp_build_context i32, h16, u8n;
438    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
439    LLVMValueRef i32_c8, i32_c128, i32_c255;
440    LLVMValueRef width_vec, height_vec, depth_vec;
441    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
442    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL;
443    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL;
444    LLVMValueRef x_stride, y_stride, z_stride;
445    LLVMValueRef x_offset0, x_offset1;
446    LLVMValueRef y_offset0, y_offset1;
447    LLVMValueRef z_offset0, z_offset1;
448    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
449    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
450    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
451    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
452    LLVMValueRef packed_lo, packed_hi;
453    unsigned x, y, z;
454    unsigned i, j, k;
455    unsigned numj, numk;
456
457    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
458    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
459    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
460
461    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
462    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
463    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
464
465    lp_build_extract_image_sizes(bld,
466                                 bld->int_size_type,
467                                 bld->int_coord_type,
468                                 int_size,
469                                 &width_vec,
470                                 &height_vec,
471                                 &depth_vec);
472
473    if (bld->static_state->normalized_coords) {
474       LLVMValueRef scaled_size;
475       LLVMValueRef flt_size;
476
477       /* scale size by 256 (8 fractional bits) */
478       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
479
480       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
481
482       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
483    }
484    else {
485       /* scale coords by 256 (8 fractional bits) */
486       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
487       if (dims >= 2)
488          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
489       if (dims >= 3)
490          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
491    }
492
493    /* convert float to int */
494    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
495    if (dims >= 2)
496       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
497    if (dims >= 3)
498       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
499
500    /* subtract 0.5 (add -128) */
501    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
502    s = LLVMBuildAdd(builder, s, i32_c128, "");
503    if (dims >= 2) {
504       t = LLVMBuildAdd(builder, t, i32_c128, "");
505    }
506    if (dims >= 3) {
507       r = LLVMBuildAdd(builder, r, i32_c128, "");
508    }
509
510    /* compute floor (shift right 8) */
511    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
512    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
513    if (dims >= 2)
514       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
515    if (dims >= 3)
516       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
517
518    /* compute fractional part (AND with 0xff) */
519    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
520    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
521    if (dims >= 2)
522       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
523    if (dims >= 3)
524       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
525
526    /* get pixel, row and image strides */
527    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
528                                  bld->format_desc->block.bits/8);
529    y_stride = row_stride_vec;
530    z_stride = img_stride_vec;
531
532    /* do texcoord wrapping and compute texel offsets */
533    lp_build_sample_wrap_linear_int(bld,
534                                    bld->format_desc->block.width,
535                                    s_ipart, width_vec, x_stride,
536                                    bld->static_state->pot_width,
537                                    bld->static_state->wrap_s,
538                                    &x_offset0, &x_offset1,
539                                    &x_subcoord[0], &x_subcoord[1]);
540    for (z = 0; z < 2; z++) {
541       for (y = 0; y < 2; y++) {
542          offset[z][y][0] = x_offset0;
543          offset[z][y][1] = x_offset1;
544       }
545    }
546
547    if (dims >= 2) {
548       lp_build_sample_wrap_linear_int(bld,
549                                       bld->format_desc->block.height,
550                                       t_ipart, height_vec, y_stride,
551                                       bld->static_state->pot_height,
552                                       bld->static_state->wrap_t,
553                                       &y_offset0, &y_offset1,
554                                       &y_subcoord[0], &y_subcoord[1]);
555
556       for (z = 0; z < 2; z++) {
557          for (x = 0; x < 2; x++) {
558             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
559                                            offset[z][0][x], y_offset0);
560             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
561                                            offset[z][1][x], y_offset1);
562          }
563       }
564    }
565
566    if (dims >= 3) {
567       lp_build_sample_wrap_linear_int(bld,
568                                       bld->format_desc->block.height,
569                                       r_ipart, depth_vec, z_stride,
570                                       bld->static_state->pot_depth,
571                                       bld->static_state->wrap_r,
572                                       &z_offset0, &z_offset1,
573                                       &z_subcoord[0], &z_subcoord[1]);
574       for (y = 0; y < 2; y++) {
575          for (x = 0; x < 2; x++) {
576             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
577                                            offset[0][y][x], z_offset0);
578             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
579                                            offset[1][y][x], z_offset1);
580          }
581       }
582    }
583    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
584       LLVMValueRef z_offset;
585       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
586       for (y = 0; y < 2; y++) {
587          for (x = 0; x < 2; x++) {
588             /* The r coord is the cube face in [0,5] */
589             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
590                                            offset[0][y][x], z_offset);
591          }
592       }
593    }
594
595    /*
596     * Transform 4 x i32 in
597     *
598     *   s_fpart = {s0, s1, s2, s3}
599     *
600     * into 8 x i16
601     *
602     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
603     *
604     * into two 8 x i16
605     *
606     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
607     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
608     *
609     * and likewise for t_fpart. There is no risk of loosing precision here
610     * since the fractional parts only use the lower 8bits.
611     */
612    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
613    if (dims >= 2)
614       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
615    if (dims >= 3)
616       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
617
618    {
619       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
620       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
621       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
622       LLVMValueRef shuffle_lo;
623       LLVMValueRef shuffle_hi;
624
625       for (j = 0; j < h16.type.length; j += 4) {
626 #ifdef PIPE_ARCH_LITTLE_ENDIAN
627          unsigned subindex = 0;
628 #else
629          unsigned subindex = 1;
630 #endif
631          LLVMValueRef index;
632
633          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
634          for (i = 0; i < 4; ++i)
635             shuffles_lo[j + i] = index;
636
637          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
638          for (i = 0; i < 4; ++i)
639             shuffles_hi[j + i] = index;
640       }
641
642       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
643       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
644
645       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
646                                           shuffle_lo, "");
647       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
648                                           shuffle_hi, "");
649       if (dims >= 2) {
650          t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
651                                              shuffle_lo, "");
652          t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
653                                              shuffle_hi, "");
654       }
655       if (dims >= 3) {
656          r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
657                                              shuffle_lo, "");
658          r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
659                                              shuffle_hi, "");
660       }
661    }
662
663    /*
664     * Fetch the pixels as 4 x 32bit (rgba order might differ):
665     *
666     *   rgba0 rgba1 rgba2 rgba3
667     *
668     * bit cast them into 16 x u8
669     *
670     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
671     *
672     * unpack them into two 8 x i16:
673     *
674     *   r0 g0 b0 a0 r1 g1 b1 a1
675     *   r2 g2 b2 a2 r3 g3 b3 a3
676     *
677     * The higher 8 bits of the resulting elements will be zero.
678     */
679    numj = 1 + (dims >= 2);
680    numk = 1 + (dims >= 3);
681
682    for (k = 0; k < numk; k++) {
683       for (j = 0; j < numj; j++) {
684          for (i = 0; i < 2; i++) {
685             LLVMValueRef rgba8;
686
687             if (util_format_is_rgba8_variant(bld->format_desc)) {
688                /*
689                 * Given the format is a rgba8, just read the pixels as is,
690                 * without any swizzling. Swizzling will be done later.
691                 */
692                rgba8 = lp_build_gather(bld->gallivm,
693                                        bld->texel_type.length,
694                                        bld->format_desc->block.bits,
695                                        bld->texel_type.width,
696                                        data_ptr, offset[k][j][i]);
697
698                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
699             }
700             else {
701                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
702                                                bld->format_desc,
703                                                u8n.type,
704                                                data_ptr, offset[k][j][i],
705                                                x_subcoord[i],
706                                                y_subcoord[j]);
707             }
708
709             /* Expand one 4*rgba8 to two 2*rgba16 */
710             lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
711                              rgba8,
712                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
713          }
714       }
715    }
716
717    /*
718     * Linear interpolation with 8.8 fixed point.
719     */
720    if (dims == 1) {
721       /* 1-D lerp */
722       packed_lo = lp_build_lerp(&h16,
723                                 s_fpart_lo,
724                                 neighbors_lo[0][0][0],
725                                 neighbors_lo[0][0][1]);
726
727       packed_hi = lp_build_lerp(&h16,
728                                 s_fpart_hi,
729                                 neighbors_hi[0][0][0],
730                                 neighbors_hi[0][0][1]);
731    }
732    else {
733       /* 2-D lerp */
734       packed_lo = lp_build_lerp_2d(&h16,
735                                    s_fpart_lo, t_fpart_lo,
736                                    neighbors_lo[0][0][0],
737                                    neighbors_lo[0][0][1],
738                                    neighbors_lo[0][1][0],
739                                    neighbors_lo[0][1][1]);
740
741       packed_hi = lp_build_lerp_2d(&h16,
742                                    s_fpart_hi, t_fpart_hi,
743                                    neighbors_hi[0][0][0],
744                                    neighbors_hi[0][0][1],
745                                    neighbors_hi[0][1][0],
746                                    neighbors_hi[0][1][1]);
747
748       if (dims >= 3) {
749          LLVMValueRef packed_lo2, packed_hi2;
750
751          /* lerp in the second z slice */
752          packed_lo2 = lp_build_lerp_2d(&h16,
753                                        s_fpart_lo, t_fpart_lo,
754                                        neighbors_lo[1][0][0],
755                                        neighbors_lo[1][0][1],
756                                        neighbors_lo[1][1][0],
757                                        neighbors_lo[1][1][1]);
758
759          packed_hi2 = lp_build_lerp_2d(&h16,
760                                        s_fpart_hi, t_fpart_hi,
761                                        neighbors_hi[1][0][0],
762                                        neighbors_hi[1][0][1],
763                                        neighbors_hi[1][1][0],
764                                        neighbors_hi[1][1][1]);
765          /* interp between two z slices */
766          packed_lo = lp_build_lerp(&h16, r_fpart_lo,
767                                    packed_lo, packed_lo2);
768          packed_hi = lp_build_lerp(&h16, r_fpart_hi,
769                                    packed_hi, packed_hi2);
770       }
771    }
772
773    *colors_lo = packed_lo;
774    *colors_hi = packed_hi;
775 }
776
777
778 /**
779  * Sample the texture/mipmap using given image filter and mip filter.
780  * data0_ptr and data1_ptr point to the two mipmap levels to sample
781  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
782  * If we're using nearest miplevel sampling the '1' values will be null/unused.
783  */
784 static void
785 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
786                        unsigned img_filter,
787                        unsigned mip_filter,
788                        LLVMValueRef s,
789                        LLVMValueRef t,
790                        LLVMValueRef r,
791                        LLVMValueRef ilevel0,
792                        LLVMValueRef ilevel1,
793                        LLVMValueRef lod_fpart,
794                        LLVMValueRef colors_lo_var,
795                        LLVMValueRef colors_hi_var)
796 {
797    LLVMBuilderRef builder = bld->gallivm->builder;
798    LLVMValueRef size0;
799    LLVMValueRef size1;
800    LLVMValueRef row_stride0_vec;
801    LLVMValueRef row_stride1_vec;
802    LLVMValueRef img_stride0_vec;
803    LLVMValueRef img_stride1_vec;
804    LLVMValueRef data_ptr0;
805    LLVMValueRef data_ptr1;
806    LLVMValueRef colors0_lo, colors0_hi;
807    LLVMValueRef colors1_lo, colors1_hi;
808
809    /* sample the first mipmap level */
810    lp_build_mipmap_level_sizes(bld, ilevel0,
811                                &size0,
812                                &row_stride0_vec, &img_stride0_vec);
813    data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
814    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
815       lp_build_sample_image_nearest(bld,
816                                     size0,
817                                     row_stride0_vec, img_stride0_vec,
818                                     data_ptr0, s, t, r,
819                                     &colors0_lo, &colors0_hi);
820    }
821    else {
822       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
823       lp_build_sample_image_linear(bld,
824                                    size0,
825                                    row_stride0_vec, img_stride0_vec,
826                                    data_ptr0, s, t, r,
827                                    &colors0_lo, &colors0_hi);
828    }
829
830    /* Store the first level's colors in the output variables */
831    LLVMBuildStore(builder, colors0_lo, colors_lo_var);
832    LLVMBuildStore(builder, colors0_hi, colors_hi_var);
833
834    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
835       LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0);
836       LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32);
837       struct lp_build_if_state if_ctx;
838       LLVMValueRef need_lerp;
839
840       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
841       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
842
843       /* need_lerp = lod_fpart > 0 */
844       need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
845                                 lod_fpart, LLVMConstNull(i32_type),
846                                 "need_lerp");
847
848       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
849       {
850          struct lp_build_context h16_bld;
851
852          lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
853
854          /* sample the second mipmap level */
855          lp_build_mipmap_level_sizes(bld, ilevel1,
856                                      &size1,
857                                      &row_stride1_vec, &img_stride1_vec);
858          data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
859          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
860             lp_build_sample_image_nearest(bld,
861                                           size1,
862                                           row_stride1_vec, img_stride1_vec,
863                                           data_ptr1, s, t, r,
864                                           &colors1_lo, &colors1_hi);
865          }
866          else {
867             lp_build_sample_image_linear(bld,
868                                          size1,
869                                          row_stride1_vec, img_stride1_vec,
870                                          data_ptr1, s, t, r,
871                                          &colors1_lo, &colors1_hi);
872          }
873
874          /* interpolate samples from the two mipmap levels */
875
876          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
877          lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
878
879 #if HAVE_LLVM == 0x208
880          /* This is a work-around for a bug in LLVM 2.8.
881           * Evidently, something goes wrong in the construction of the
882           * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
883           * to force the vector to be properly constructed.
884           * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
885           */
886          {
887             LLVMValueRef shuffles[8], shuffle;
888             int i;
889             assert(h16_bld.type.length <= Elements(shuffles));
890             for (i = 0; i < h16_bld.type.length; i++)
891                shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
892             shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
893             lod_fpart = LLVMBuildShuffleVector(builder,
894                                                lod_fpart, lod_fpart,
895                                                shuffle, "");
896          }
897 #endif
898
899          colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
900                                     colors0_lo, colors1_lo);
901          colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
902                                     colors0_hi, colors1_hi);
903
904          LLVMBuildStore(builder, colors0_lo, colors_lo_var);
905          LLVMBuildStore(builder, colors0_hi, colors_hi_var);
906       }
907       lp_build_endif(&if_ctx);
908    }
909 }
910
911
912
913 /**
914  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
915  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
916  * but only limited texture coord wrap modes.
917  */
918 void
919 lp_build_sample_aos(struct lp_build_sample_context *bld,
920                     unsigned unit,
921                     LLVMValueRef s,
922                     LLVMValueRef t,
923                     LLVMValueRef r,
924                     const LLVMValueRef *ddx,
925                     const LLVMValueRef *ddy,
926                     LLVMValueRef lod_bias, /* optional */
927                     LLVMValueRef explicit_lod, /* optional */
928                     LLVMValueRef texel_out[4])
929 {
930    struct lp_build_context *int_bld = &bld->int_bld;
931    LLVMBuilderRef builder = bld->gallivm->builder;
932    const unsigned mip_filter = bld->static_state->min_mip_filter;
933    const unsigned min_filter = bld->static_state->min_img_filter;
934    const unsigned mag_filter = bld->static_state->mag_img_filter;
935    const unsigned dims = bld->dims;
936    LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
937    LLVMValueRef ilevel0, ilevel1 = NULL;
938    LLVMValueRef packed, packed_lo, packed_hi;
939    LLVMValueRef unswizzled[4];
940    LLVMValueRef face_ddx[4], face_ddy[4];
941    struct lp_build_context h16_bld;
942    LLVMValueRef first_level;
943    LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0);
944
945    /* we only support the common/simple wrap modes at this time */
946    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
947    if (dims >= 2)
948       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
949    if (dims >= 3)
950       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
951
952
953    /* make 16-bit fixed-pt builder context */
954    lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
955
956    /* cube face selection, compute pre-face coords, etc. */
957    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
958       LLVMValueRef face, face_s, face_t;
959       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
960       s = face_s; /* vec */
961       t = face_t; /* vec */
962       /* use 'r' to indicate cube face */
963       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
964
965       /* recompute ddx, ddy using the new (s,t) face texcoords */
966       face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
967       face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
968       face_ddx[2] = NULL;
969       face_ddx[3] = NULL;
970       face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
971       face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
972       face_ddy[2] = NULL;
973       face_ddy[3] = NULL;
974       ddx = face_ddx;
975       ddy = face_ddy;
976    }
977
978    /*
979     * Compute the level of detail (float).
980     */
981    if (min_filter != mag_filter ||
982        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
983       /* Need to compute lod either to choose mipmap levels or to
984        * distinguish between minification/magnification with one mipmap level.
985        */
986       lp_build_lod_selector(bld, unit, ddx, ddy,
987                             lod_bias, explicit_lod,
988                             mip_filter,
989                             &lod_ipart, &lod_fpart);
990    } else {
991       lod_ipart = i32t_zero;
992    }
993
994    /*
995     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
996     */
997    switch (mip_filter) {
998    default:
999       assert(0 && "bad mip_filter value in lp_build_sample_aos()");
1000       /* fall-through */
1001    case PIPE_TEX_MIPFILTER_NONE:
1002       /* always use mip level 0 */
1003       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1004          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1005           * We should be able to set ilevel0 = const(0) but that causes
1006           * bad x86 code to be emitted.
1007           */
1008          assert(lod_ipart);
1009          lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1010       }
1011       else {
1012          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1013                                                        bld->gallivm, unit);
1014          ilevel0 = first_level;
1015       }
1016       break;
1017    case PIPE_TEX_MIPFILTER_NEAREST:
1018       assert(lod_ipart);
1019       lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1020       break;
1021    case PIPE_TEX_MIPFILTER_LINEAR:
1022       assert(lod_ipart);
1023       assert(lod_fpart);
1024       lp_build_linear_mip_levels(bld, unit,
1025                                  lod_ipart, &lod_fpart,
1026                                  &ilevel0, &ilevel1);
1027       break;
1028    }
1029
1030    /*
1031     * Get/interpolate texture colors.
1032     */
1033
1034    packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1035    packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1036
1037    if (min_filter == mag_filter) {
1038       /* no need to distinquish between minification and magnification */
1039       lp_build_sample_mipmap(bld,
1040                              min_filter, mip_filter,
1041                              s, t, r,
1042                              ilevel0, ilevel1, lod_fpart,
1043                              packed_lo, packed_hi);
1044    }
1045    else {
1046       /* Emit conditional to choose min image filter or mag image filter
1047        * depending on the lod being > 0 or <= 0, respectively.
1048        */
1049       struct lp_build_if_state if_ctx;
1050       LLVMValueRef minify;
1051
1052       /* minify = lod >= 0.0 */
1053       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1054                              lod_ipart, int_bld->zero, "");
1055
1056       lp_build_if(&if_ctx, bld->gallivm, minify);
1057       {
1058          /* Use the minification filter */
1059          lp_build_sample_mipmap(bld,
1060                                 min_filter, mip_filter,
1061                                 s, t, r,
1062                                 ilevel0, ilevel1, lod_fpart,
1063                                 packed_lo, packed_hi);
1064       }
1065       lp_build_else(&if_ctx);
1066       {
1067          /* Use the magnification filter */
1068          lp_build_sample_mipmap(bld, 
1069                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1070                                 s, t, r,
1071                                 ilevel0, NULL, NULL,
1072                                 packed_lo, packed_hi);
1073       }
1074       lp_build_endif(&if_ctx);
1075    }
1076
1077    /*
1078     * combine the values stored in 'packed_lo' and 'packed_hi' variables
1079     * into 'packed'
1080     */
1081    packed = lp_build_pack2(bld->gallivm,
1082                            h16_bld.type, lp_type_unorm(8),
1083                            LLVMBuildLoad(builder, packed_lo, ""),
1084                            LLVMBuildLoad(builder, packed_hi, ""));
1085
1086    /*
1087     * Convert to SoA and swizzle.
1088     */
1089    lp_build_rgba8_to_f32_soa(bld->gallivm,
1090                              bld->texel_type,
1091                              packed, unswizzled);
1092
1093    if (util_format_is_rgba8_variant(bld->format_desc)) {
1094       lp_build_format_swizzle_soa(bld->format_desc,
1095                                   &bld->texel_bld,
1096                                   unswizzled, texel_out);
1097    }
1098    else {
1099       texel_out[0] = unswizzled[0];
1100       texel_out[1] = unswizzled[1];
1101       texel_out[2] = unswizzled[2];
1102       texel_out[3] = unswizzled[3];
1103    }
1104 }