From 0a7824862eb753878fa79b153b2a111884ff1197 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Sep 2010 17:04:26 -0600 Subject: [PATCH] gallivm: expand AoS sampling to cover all filtering modes ...and all texture targets (1D/2D/3D/CUBE). --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/gallivm/lp_bld_sample.c | 513 ++++++++- src/gallium/auxiliary/gallivm/lp_bld_sample.h | 164 ++- src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 1145 +++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h | 65 ++ src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 1059 +------------------ 7 files changed, 1919 insertions(+), 1029 deletions(-) create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 5388f4e..2a69294 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -168,6 +168,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_printf.c \ gallivm/lp_bld_quad.c \ gallivm/lp_bld_sample.c \ + gallivm/lp_bld_sample_aos.c \ gallivm/lp_bld_sample_soa.c \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index ba8be2e..cea2d7d 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -219,6 +219,7 @@ if env['llvm']: 'gallivm/lp_bld_printf.c', 'gallivm/lp_bld_quad.c', 'gallivm/lp_bld_sample.c', + 'gallivm/lp_bld_sample_aos.c', 'gallivm/lp_bld_sample_soa.c', 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 259b114..e89ee7c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -36,11 +36,13 @@ #include "pipe/p_state.h" #include "util/u_format.h" #include "util/u_math.h" -#include "lp_bld_debug.h" -#include "lp_bld_const.h" #include "lp_bld_arit.h" -#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_debug.h" +#include "lp_bld_flow.h" #include "lp_bld_sample.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_type.h" /** @@ -124,6 +126,511 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** + * Generate code to compute texture level of detail (lambda). + * \param ddx partial derivatives of (s, t, r, q) with respect to X + * \param ddy partial derivatives of (s, t, r, q) with respect to Y + * \param lod_bias optional float vector with the shader lod bias + * \param explicit_lod optional float vector with the explicit lod + * \param width scalar int texture width + * \param height scalar int texture height + * \param depth scalar int texture depth + * + * XXX: The resulting lod is scalar, so ignore all but the first element of + * derivatives, lod_bias, etc that are passed by the shader. + */ +LLVMValueRef +lp_build_lod_selector(struct lp_build_sample_context *bld, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth) + +{ + if (bld->static_state->min_lod == bld->static_state->max_lod) { + /* User is forcing sampling from a particular mipmap level. + * This is hit during mipmap generation. + */ + return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); + } + else { + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); + LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->min_lod); + LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->max_lod); + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef lod; + + if (explicit_lod) { + lod = LLVMBuildExtractElement(bld->builder, explicit_lod, + index0, ""); + } + else { + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef dsdx, dsdy; + LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; + LLVMValueRef rho; + + dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); + drdy = lp_build_abs(float_bld, drdy); + } + } + + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildFMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; + max = LLVMBuildFMul(bld->builder, + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); + rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildFMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } + } + + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); + + /* add shader lod bias */ + if (lod_bias) { + lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, + index0, ""); + lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + } + } + + /* add sampler lod bias */ + lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + + /* clamp lod */ + lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + + return lod; + } +} + + +/** + * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer + * mipmap level index. + * Note: this is all scalar code. + * \param lod scalar float texture level of detail + * \param level_out returns integer + */ +void +lp_build_nearest_mip_level(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; + + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); + + /* convert float lod to integer */ + level = lp_build_iround(float_bld, lod); + + /* clamp level to legal range of levels */ + *level_out = lp_build_clamp(int_bld, level, zero, last_level); +} + + +/** + * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to + * two (adjacent) mipmap level indexes. Later, we'll sample from those + * two mipmap levels and interpolate between them. + */ +void +lp_build_linear_mip_levels(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level0_out, + LLVMValueRef *level1_out, + LLVMValueRef *weight_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; + + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); + + /* convert float lod to integer */ + level = lp_build_ifloor(float_bld, lod); + + /* compute level 0 and clamp to legal range of levels */ + *level0_out = lp_build_clamp(int_bld, level, + int_bld->zero, + last_level); + /* compute level 1 and clamp to legal range of levels */ + level = lp_build_add(int_bld, level, int_bld->one); + *level1_out = lp_build_clamp(int_bld, level, + int_bld->zero, + last_level); + + *weight_out = lp_build_fract(float_bld, lod); +} + + +LLVMValueRef +lp_build_get_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], data_ptr; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); + data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); + return data_ptr; +} + + +LLVMValueRef +lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_mipmap_level(bld, data_array, lvl); +} + + +/** + * Codegen equivalent for u_minify(). + * Return max(1, base_size >> level); + */ +static LLVMValueRef +lp_build_minify(struct lp_build_sample_context *bld, + LLVMValueRef base_size, + LLVMValueRef level) +{ + LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); + size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); + return size; +} + + +/** + * Dereference stride_array[mipmap_level] array to get a stride. + * Return stride as a vector. + */ +static LLVMValueRef +lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], stride; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); + stride = LLVMBuildLoad(bld->builder, stride, ""); + stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); + return stride; +} + + +/** + * When sampling a mipmap, we need to compute the width, height, depth + * of the source levels from the level indexes. This helper function + * does that. + */ +void +lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, + unsigned dims, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_array, + LLVMValueRef *width0_vec, + LLVMValueRef *width1_vec, + LLVMValueRef *height0_vec, + LLVMValueRef *height1_vec, + LLVMValueRef *depth0_vec, + LLVMValueRef *depth1_vec, + LLVMValueRef *row_stride0_vec, + LLVMValueRef *row_stride1_vec, + LLVMValueRef *img_stride0_vec, + LLVMValueRef *img_stride1_vec) +{ + const unsigned mip_filter = bld->static_state->min_mip_filter; + LLVMValueRef ilevel0_vec, ilevel1_vec; + + ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + + /* + * Compute width, height, depth at mipmap level 'ilevel0' + */ + *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + if (dims >= 2) { + *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); + *row_stride0_vec = lp_build_get_level_stride_vec(bld, + row_stride_array, + ilevel0); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + *img_stride0_vec = lp_build_get_level_stride_vec(bld, + img_stride_array, + ilevel0); + if (dims == 3) { + *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + } + } + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* compute width, height, depth for second mipmap level at 'ilevel1' */ + *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); + if (dims >= 2) { + *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); + *row_stride1_vec = lp_build_get_level_stride_vec(bld, + row_stride_array, + ilevel1); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + *img_stride1_vec = lp_build_get_level_stride_vec(bld, + img_stride_array, + ilevel1); + if (dims == 3) { + *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + } + } + } + } +} + + + +/** Helper used by lp_build_cube_lookup() */ +static LLVMValueRef +lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) +{ + /* ima = -0.5 / abs(coord); */ + LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); + LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); + LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); + return ima; +} + + +/** + * Helper used by lp_build_cube_lookup() + * \param sign scalar +1 or -1 + * \param coord float vector + * \param ima float vector + */ +static LLVMValueRef +lp_build_cube_coord(struct lp_build_context *coord_bld, + LLVMValueRef sign, int negate_coord, + LLVMValueRef coord, LLVMValueRef ima) +{ + /* return negate(coord) * ima * sign + 0.5; */ + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); + LLVMValueRef res; + + assert(negate_coord == +1 || negate_coord == -1); + + if (negate_coord == -1) { + coord = lp_build_negate(coord_bld, coord); + } + + res = lp_build_mul(coord_bld, coord, ima); + if (sign) { + sign = lp_build_broadcast_scalar(coord_bld, sign); + res = lp_build_mul(coord_bld, res, sign); + } + res = lp_build_add(coord_bld, res, half); + + return res; +} + + +/** Helper used by lp_build_cube_lookup() + * Return (major_coord >= 0) ? pos_face : neg_face; + */ +static LLVMValueRef +lp_build_cube_face(struct lp_build_sample_context *bld, + LLVMValueRef major_coord, + unsigned pos_face, unsigned neg_face) +{ + LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + major_coord, + bld->float_bld.zero, ""); + LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); + LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); + LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); + return res; +} + + + +/** + * Generate code to do cube face selection and compute per-face texcoords. + */ +void +lp_build_cube_lookup(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *face, + LLVMValueRef *face_s, + LLVMValueRef *face_t) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *coord_bld = &bld->coord_bld; + LLVMValueRef rx, ry, rz; + LLVMValueRef arx, ary, arz; + LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); + LLVMValueRef arx_ge_ary, arx_ge_arz; + LLVMValueRef ary_ge_arx, ary_ge_arz; + LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; + LLVMValueRef rx_pos, ry_pos, rz_pos; + + assert(bld->coord_bld.type.length == 4); + + /* + * Use the average of the four pixel's texcoords to choose the face. + */ + rx = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, s)); + ry = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, t)); + rz = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, r)); + + arx = lp_build_abs(float_bld, rx); + ary = lp_build_abs(float_bld, ry); + arz = lp_build_abs(float_bld, rz); + + /* + * Compare sign/magnitude of rx,ry,rz to determine face + */ + arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); + arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); + ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); + ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); + + arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); + ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); + rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); + + { + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + *face_s = bld->coord_bld.undef; + *face_t = bld->coord_bld.undef; + *face = bld->int_bld.undef; + + lp_build_name(*face_s, "face_s"); + lp_build_name(*face_t, "face_t"); + lp_build_name(*face, "face"); + + lp_build_flow_scope_declare(flow_ctx, face_s); + lp_build_flow_scope_declare(flow_ctx, face_t); + lp_build_flow_scope_declare(flow_ctx, face); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + { + /* +/- X face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rx); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); + *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rx, + PIPE_TEX_FACE_POS_X, + PIPE_TEX_FACE_NEG_X); + } + lp_build_else(&if_ctx); + { + struct lp_build_flow_context *flow_ctx2; + struct lp_build_if_state if_ctx2; + + LLVMValueRef face_s2 = bld->coord_bld.undef; + LLVMValueRef face_t2 = bld->coord_bld.undef; + LLVMValueRef face2 = bld->int_bld.undef; + + flow_ctx2 = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx2); + lp_build_flow_scope_declare(flow_ctx2, &face_s2); + lp_build_flow_scope_declare(flow_ctx2, &face_t2); + lp_build_flow_scope_declare(flow_ctx2, &face2); + + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + { + /* +/- Y face */ + LLVMValueRef sign = lp_build_sgn(float_bld, ry); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); + face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + face2 = lp_build_cube_face(bld, ry, + PIPE_TEX_FACE_POS_Y, + PIPE_TEX_FACE_NEG_Y); + } + lp_build_else(&if_ctx2); + { + /* +/- Z face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rz); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); + face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + face2 = lp_build_cube_face(bld, rz, + PIPE_TEX_FACE_POS_Z, + PIPE_TEX_FACE_NEG_Z); + } + lp_build_endif(&if_ctx2); + lp_build_flow_scope_end(flow_ctx2); + lp_build_flow_destroy(flow_ctx2); + *face_s = face_s2; + *face_t = face_t2; + *face = face2; + } + + lp_build_endif(&if_ctx); + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + } +} + + +/** * Compute the partial offset of a pixel block along an arbitrary axis. * * @param coord coordinate in pixels diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index caafc4e..ff72b8e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -37,8 +37,11 @@ #include "pipe/p_format.h" - +#include "util/u_debug.h" #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_swizzle.h" + struct pipe_resource; struct pipe_sampler_view; @@ -81,6 +84,10 @@ struct lp_sampler_static_state unsigned normalized_coords:1; float lod_bias, min_lod, max_lod; float border_color[4]; + + /* Aero hacks */ + unsigned force_nearest_s:1; + unsigned force_nearest_t:1; }; @@ -140,6 +147,96 @@ struct lp_sampler_dynamic_state /** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** regular scalar float type */ + struct lp_type float_type; + struct lp_build_context float_bld; + + /** regular scalar float type */ + struct lp_type int_type; + struct lp_build_context int_bld; + + /** Incoming coordinates type and build context */ + struct lp_type coord_type; + struct lp_build_context coord_bld; + + /** Unsigned integer coordinates */ + struct lp_type uint_coord_type; + struct lp_build_context uint_coord_bld; + + /** Signed integer coordinates */ + struct lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + struct lp_type texel_type; + struct lp_build_context texel_bld; +}; + + + +/** + * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at + * this time. Return whether the given mode is supported by that function. + */ +static INLINE boolean +lp_is_simple_wrap_mode(unsigned mode) +{ + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TRUE; + default: + return FALSE; + } +} + + +static INLINE void +apply_sampler_swizzle(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + unsigned char swizzles[4]; + + swizzles[0] = bld->static_state->swizzle_r; + swizzles[1] = bld->static_state->swizzle_g; + swizzles[2] = bld->static_state->swizzle_b; + swizzles[3] = bld->static_state->swizzle_a; + + lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles); +} + + +static INLINE int +texture_dims(enum pipe_texture_target tex) +{ + switch (tex) { + case PIPE_TEXTURE_1D: + return 1; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + return 2; + case PIPE_TEXTURE_3D: + return 3; + default: + assert(0 && "bad texture target in texture_dims()"); + return 2; + } +} + + +/** * Derive the sampler static state. */ void @@ -148,6 +245,71 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); +LLVMValueRef +lp_build_lod_selector(struct lp_build_sample_context *bld, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth); + +void +lp_build_nearest_mip_level(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level_out); + +void +lp_build_linear_mip_levels(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level0_out, + LLVMValueRef *level1_out, + LLVMValueRef *weight_out); + +LLVMValueRef +lp_build_get_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, LLVMValueRef level); + +LLVMValueRef +lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, int level); + + +void +lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, + unsigned dims, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_array, + LLVMValueRef *width0_vec, + LLVMValueRef *width1_vec, + LLVMValueRef *height0_vec, + LLVMValueRef *height1_vec, + LLVMValueRef *depth0_vec, + LLVMValueRef *depth1_vec, + LLVMValueRef *row_stride0_vec, + LLVMValueRef *row_stride1_vec, + LLVMValueRef *img_stride0_vec, + LLVMValueRef *img_stride1_vec); + + +void +lp_build_cube_lookup(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *face, + LLVMValueRef *face_s, + LLVMValueRef *face_t); + + void lp_build_sample_partial_offset(struct lp_build_context *bld, unsigned block_length, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c new file mode 100644 index 0000000..a9a4e7b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -0,0 +1,1145 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- SoA. + * + * @author Jose Fonseca + * @author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_dump.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "util/u_cpu_detect.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" +#include "lp_bld_flow.h" +#include "lp_bld_gather.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" +#include "lp_bld_sample_aos.h" +#include "lp_bld_quad.h" + + +/** + * Build LLVM code for texture coord wrapping, for nearest filtering, + * for scaled integer texcoords. + * \param block_length is the length of the pixel block along the + * coordinate axis + * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size + * \param length the texture size along one dimension + * \param stride pixel stride along the coordinate axis (in bytes) + * \param is_pot if TRUE, length is a power of two + * \param wrap_mode one of PIPE_TEX_WRAP_x + * \param out_offset byte offset for the wrapped coordinate + * \param out_i resulting sub-block pixel coordinate for coord0 + */ +static void +lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef length, + LLVMValueRef stride, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *out_offset, + LLVMValueRef *out_i) +{ + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + } + + lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + out_offset, out_i); +} + + +/** + * Build LLVM code for texture coord wrapping, for linear filtering, + * for scaled integer texcoords. + * \param block_length is the length of the pixel block along the + * coordinate axis + * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size + * \param length the texture size along one dimension + * \param stride pixel stride along the coordinate axis (in bytes) + * \param is_pot if TRUE, length is a power of two + * \param wrap_mode one of PIPE_TEX_WRAP_x + * \param offset0 resulting relative offset for coord0 + * \param offset1 resulting relative offset for coord0 + 1 + * \param i0 resulting sub-block pixel coordinate for coord0 + * \param i1 resulting sub-block pixel coordinate for coord0 + 1 + */ +static void +lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, + unsigned block_length, + LLVMValueRef coord0, + LLVMValueRef length, + LLVMValueRef stride, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *offset0, + LLVMValueRef *offset1, + LLVMValueRef *i0, + LLVMValueRef *i1) +{ + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + LLVMValueRef lmask, umask, mask; + + if (block_length != 1) { + /* + * If the pixel block covers more than one pixel then there is no easy + * way to calculate offset1 relative to offset0. Instead, compute them + * independently. + */ + + LLVMValueRef coord1; + + lp_build_sample_wrap_nearest_int(bld, + block_length, + coord0, + length, + stride, + is_pot, + wrap_mode, + offset0, i0); + + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + + lp_build_sample_wrap_nearest_int(bld, + block_length, + coord1, + length, + stride, + is_pot, + wrap_mode, + offset1, i1); + + return; + } + + /* + * Scalar pixels -- try to compute offset0 and offset1 with a single stride + * multiplication. + */ + + *i0 = uint_coord_bld->zero; + *i1 = uint_coord_bld->zero; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if (is_pot) { + coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); + } + else { + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); + } + + mask = lp_build_compare(bld->builder, int_coord_bld->type, + PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); + + *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset1 = LLVMBuildAnd(bld->builder, + lp_build_add(uint_coord_bld, *offset0, stride), + mask, ""); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); + umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + PIPE_FUNC_LESS, coord0, length_minus_one); + + coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); + coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); + + mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); + + *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset1 = lp_build_add(uint_coord_bld, + *offset0, + LLVMBuildAnd(bld->builder, stride, mask, "")); + break; + + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + *offset0 = uint_coord_bld->zero; + *offset1 = uint_coord_bld->zero; + break; + } +} + + +/** + * Sample a single texture image with nearest sampling. + * If sampling a cube texture, r = cube face in [0,5]. + * Return filtered color as two vectors of 16-bit fixed point values. + */ +static void +lp_build_sample_image_nearest(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *colors_lo, + LLVMValueRef *colors_hi) +{ + const int dims = texture_dims(bld->static_state->target); + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8; + LLVMValueRef s_ipart, t_ipart, r_ipart; + LLVMValueRef x_stride; + LLVMValueRef x_offset, offset; + LLVMValueRef x_subcoord, y_subcoord, z_subcoord; + + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + if (bld->static_state->normalized_coords) { + /* s = s * width, t = t * height */ + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, + coord_vec_type, ""); + s = lp_build_mul(&bld->coord_bld, s, fp_width); + if (dims >= 2) { + LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, + coord_vec_type, ""); + t = lp_build_mul(&bld->coord_bld, t, fp_height); + if (dims >= 3) { + LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, + coord_vec_type, ""); + r = lp_build_mul(&bld->coord_bld, r, fp_depth); + } + } + } + + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + + /* convert float to int */ + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + if (dims >= 2) + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + if (dims >= 3) + r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); + + /* compute floor (shift right 8) */ + i32_c8 = lp_build_const_int_vec(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + if (dims >= 2) + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + if (dims >= 3) + r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); + + /* get pixel, row, image strides */ + x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + bld->format_desc->block.bits/8); + + /* Do texcoord wrapping, compute texel offset */ + lp_build_sample_wrap_nearest_int(bld, + bld->format_desc->block.width, + s_ipart, width_vec, x_stride, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x_offset, &x_subcoord); + offset = x_offset; + if (dims >= 2) { + LLVMValueRef y_offset; + lp_build_sample_wrap_nearest_int(bld, + bld->format_desc->block.height, + t_ipart, height_vec, row_stride_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y_offset, &y_subcoord); + offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); + if (dims >= 3) { + LLVMValueRef z_offset; + lp_build_sample_wrap_nearest_int(bld, + 1, /* block length (depth) */ + r_ipart, depth_vec, img_stride_vec, + bld->static_state->pot_height, + bld->static_state->wrap_r, + &z_offset, &z_subcoord); + offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef z_offset; + /* The r coord is the cube face in [0,5] */ + z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + } + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + { + LLVMValueRef rgba8; + + if (util_format_is_rgba8_variant(bld->format_desc)) { + /* + * Given the format is a rgba8, just read the pixels as is, + * without any swizzling. Swizzling will be done later. + */ + rgba8 = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); + } + else { + rgba8 = lp_build_fetch_rgba_aos(bld->builder, + bld->format_desc, + u8n.type, + data_ptr, offset, + x_subcoord, + y_subcoord); + } + + /* Expand one 4*rgba8 to two 2*rgba16 */ + lp_build_unpack2(builder, u8n.type, h16.type, + rgba8, + colors_lo, colors_hi); + } +} + + +/** + * Sample a single texture image with (bi-)(tri-)linear sampling. + * Return filtered color as two vectors of 16-bit fixed point values. + */ +static void +lp_build_sample_image_linear(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *colors_lo, + LLVMValueRef *colors_hi) +{ + const int dims = texture_dims(bld->static_state->target); + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; + LLVMValueRef x_stride, y_stride, z_stride; + LLVMValueRef x_offset0, x_offset1; + LLVMValueRef y_offset0, y_offset1; + LLVMValueRef z_offset0, z_offset1; + LLVMValueRef offset[2][2][2]; /* [z][y][x] */ + LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2]; + LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */ + LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */ + LLVMValueRef packed_lo, packed_hi; + unsigned x, y, z; + unsigned i, j, k; + unsigned numj, numk; + + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + if (bld->static_state->normalized_coords) { + /* s = s * width, t = t * height */ + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, + coord_vec_type, ""); + s = lp_build_mul(&bld->coord_bld, s, fp_width); + if (dims >= 2) { + LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, + coord_vec_type, ""); + t = lp_build_mul(&bld->coord_bld, t, fp_height); + } + if (dims >= 3) { + LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, + coord_vec_type, ""); + r = lp_build_mul(&bld->coord_bld, r, fp_depth); + } + } + + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + + /* convert float to int */ + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + if (dims >= 2) + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + if (dims >= 3) + r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); + + /* subtract 0.5 (add -128) */ + i32_c128 = lp_build_const_int_vec(i32.type, -128); + if (!bld->static_state->force_nearest_s) { + s = LLVMBuildAdd(builder, s, i32_c128, ""); + } + if (dims >= 2 && !bld->static_state->force_nearest_t) { + t = LLVMBuildAdd(builder, t, i32_c128, ""); + } + if (dims >= 3) { + r = LLVMBuildAdd(builder, r, i32_c128, ""); + } + + /* compute floor (shift right 8) */ + i32_c8 = lp_build_const_int_vec(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + if (dims >= 2) + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + if (dims >= 3) + r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); + + /* compute fractional part (AND with 0xff) */ + i32_c255 = lp_build_const_int_vec(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + if (dims >= 2) + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + if (dims >= 3) + r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); + + /* get pixel, row and image strides */ + x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + bld->format_desc->block.bits/8); + y_stride = row_stride_vec; + z_stride = img_stride_vec; + + /* do texcoord wrapping and compute texel offsets */ + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.width, + s_ipart, width_vec, x_stride, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x_offset0, &x_offset1, + &x_subcoord[0], &x_subcoord[1]); + for (z = 0; z < 2; z++) { + for (y = 0; y < 2; y++) { + offset[z][y][0] = x_offset0; + offset[z][y][1] = x_offset1; + } + } + + if (dims >= 2) { + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.height, + t_ipart, height_vec, y_stride, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y_offset0, &y_offset1, + &y_subcoord[0], &y_subcoord[1]); + + for (z = 0; z < 2; z++) { + for (x = 0; x < 2; x++) { + offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][0][x], y_offset0); + offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][1][x], y_offset1); + } + } + } + + if (dims >= 3) { + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.height, + r_ipart, depth_vec, z_stride, + bld->static_state->pot_depth, + bld->static_state->wrap_r, + &z_offset0, &z_offset1, + &z_subcoord[0], &z_subcoord[1]); + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x], z_offset0); + offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[1][y][x], z_offset1); + } + } + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef z_offset; + z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + /* The r coord is the cube face in [0,5] */ + offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x], z_offset); + } + } + } + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + if (dims >= 2) + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + if (dims >= 3) + r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + + for (j = 0; j < h16.type.length; j += 4) { +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned subindex = 0; +#else + unsigned subindex = 1; +#endif + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for (i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for (i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, + shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, + shuffle_hi, ""); + if (dims >= 2) { + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, + shuffle_lo, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, + shuffle_hi, ""); + } + if (dims >= 3) { + r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, + shuffle_lo, ""); + r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, + shuffle_hi, ""); + } + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + numj = 1 + (dims >= 2); + numk = 1 + (dims >= 3); + + for (k = 0; k < numk; k++) { + for (j = 0; j < numj; j++) { + for (i = 0; i < 2; i++) { + LLVMValueRef rgba8; + + if (util_format_is_rgba8_variant(bld->format_desc)) { + /* + * Given the format is a rgba8, just read the pixels as is, + * without any swizzling. Swizzling will be done later. + */ + rgba8 = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset[k][j][i]); + + rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); + } + else { + rgba8 = lp_build_fetch_rgba_aos(bld->builder, + bld->format_desc, + u8n.type, + data_ptr, offset[k][j][i], + x_subcoord[i], + y_subcoord[j]); + } + + /* Expand one 4*rgba8 to two 2*rgba16 */ + lp_build_unpack2(builder, u8n.type, h16.type, + rgba8, + &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); + } + } + } + + /* + * Linear interpolation with 8.8 fixed point. + */ + if (bld->static_state->force_nearest_s) { + /* special case 1-D lerp */ + packed_lo = lp_build_lerp(&h16, + t_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1]); + + packed_hi = lp_build_lerp(&h16, + t_fpart_hi, + neighbors_hi[0][1][0], + neighbors_hi[0][1][0]); + } + else if (bld->static_state->force_nearest_t) { + /* special case 1-D lerp */ + packed_lo = lp_build_lerp(&h16, + s_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1]); + + packed_hi = lp_build_lerp(&h16, + s_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1]); + } + else { + /* general 1/2/3-D lerping */ + if (dims == 1) { + packed_lo = lp_build_lerp(&h16, + s_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1]); + + packed_hi = lp_build_lerp(&h16, + s_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1]); + } + else { + /* 2-D lerp */ + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1], + neighbors_lo[0][1][0], + neighbors_lo[0][1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1], + neighbors_hi[0][1][0], + neighbors_hi[0][1][1]); + + if (dims >= 3) { + LLVMValueRef packed_lo2, packed_hi2; + + /* lerp in the second z slice */ + packed_lo2 = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[1][0][0], + neighbors_lo[1][0][1], + neighbors_lo[1][1][0], + neighbors_lo[1][1][1]); + + packed_hi2 = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[1][0][0], + neighbors_hi[1][0][1], + neighbors_hi[1][1][0], + neighbors_hi[1][1][1]); + /* interp between two z slices */ + packed_lo = lp_build_lerp(&h16, r_fpart_lo, + packed_lo, packed_lo2); + packed_hi = lp_build_lerp(&h16, r_fpart_hi, + packed_hi, packed_hi2); + } + } + } + + *colors_lo = packed_lo; + *colors_hi = packed_hi; +} + + +/** + * Sample the texture/mipmap using given image filter and mip filter. + * data0_ptr and data1_ptr point to the two mipmap levels to sample + * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. + * If we're using nearest miplevel sampling the '1' values will be null/unused. + */ +static void +lp_build_sample_mipmap(struct lp_build_sample_context *bld, + unsigned img_filter, + unsigned mip_filter, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef lod_fpart, + LLVMValueRef width0_vec, + LLVMValueRef width1_vec, + LLVMValueRef height0_vec, + LLVMValueRef height1_vec, + LLVMValueRef depth0_vec, + LLVMValueRef depth1_vec, + LLVMValueRef row_stride0_vec, + LLVMValueRef row_stride1_vec, + LLVMValueRef img_stride0_vec, + LLVMValueRef img_stride1_vec, + LLVMValueRef data_ptr0, + LLVMValueRef data_ptr1, + LLVMValueRef *colors_lo, + LLVMValueRef *colors_hi) +{ + LLVMValueRef colors0_lo, colors0_hi; + LLVMValueRef colors1_lo, colors1_hi; + + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + /* sample the first mipmap level */ + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, + &colors0_lo, &colors0_hi); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + } + else { + assert(img_filter == PIPE_TEX_FILTER_LINEAR); + + /* sample the first mipmap level */ + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, + &colors0_lo, &colors0_hi); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + struct lp_build_context h16; + lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16)); + + *colors_lo = lp_build_lerp(&h16, lod_fpart, + colors0_lo, colors1_lo); + *colors_hi = lp_build_lerp(&h16, lod_fpart, + colors0_hi, colors1_hi); + } + else { + /* use first/only level's colors */ + *colors_lo = colors0_lo; + *colors_hi = colors0_hi; + } +} + + + +/** + * Texture sampling in AoS format. Used when sampling common 32-bit/texel + * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes + * but only limited texture coord wrap modes. + */ +void +lp_build_sample_aos(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_array, + LLVMValueRef data_array, + LLVMValueRef texel_out[4]) +{ + struct lp_build_context *float_bld = &bld->float_bld; + LLVMBuilderRef builder = bld->builder; + const unsigned mip_filter = bld->static_state->min_mip_filter; + const unsigned min_filter = bld->static_state->min_img_filter; + const unsigned mag_filter = bld->static_state->mag_img_filter; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef lod = NULL, lod_fpart = NULL; + LLVMValueRef ilevel0, ilevel1 = NULL; + LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; + LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; + LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; + LLVMValueRef data_ptr0, data_ptr1 = NULL; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + LLVMValueRef face_ddx[4], face_ddy[4]; + struct lp_build_context h16; + LLVMTypeRef h16_vec_type; + + /* we only support the common/simple wrap modes at this time */ + assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); + if (dims >= 2) + assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t)); + if (dims >= 3) + assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r)); + + + /* make 16-bit fixed-pt builder context */ + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + h16_vec_type = lp_build_vec_type(h16.type); + + + /* cube face selection, compute pre-face coords, etc. */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ + + /* recompute ddx, ddy using the new (s,t) face texcoords */ + face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[2] = NULL; + face_ddx[3] = NULL; + face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[2] = NULL; + face_ddy[3] = NULL; + ddx = face_ddx; + ddy = face_ddy; + } + + + /* + * Compute the level of detail (float). + */ + if (min_filter != mag_filter || + mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* Need to compute lod either to choose mipmap levels or to + * distinguish between minification/magnification with one mipmap level. + */ + lod = lp_build_lod_selector(bld, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth); + } + + /* + * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 + * If mipfilter=linear, also compute the weight between the two + * mipmap levels: lod_fpart + */ + switch (mip_filter) { + default: + assert(0 && "bad mip_filter value in lp_build_sample_aos()"); + /* fall-through */ + case PIPE_TEX_MIPFILTER_NONE: + /* always use mip level 0 */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + /* XXX this is a work-around for an apparent bug in LLVM 2.7. + * We should be able to set ilevel0 = const(0) but that causes + * bad x86 code to be emitted. + */ + lod = lp_build_const_elem(bld->coord_bld.type, 0.0); + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } + break; + case PIPE_TEX_MIPFILTER_NEAREST: + assert(lod); + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + break; + case PIPE_TEX_MIPFILTER_LINEAR: + { + LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); + LLVMValueRef i255 = lp_build_const_int32(255); + LLVMTypeRef i16_type = LLVMIntType(16); + + assert(lod); + + lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, + &lod_fpart); + lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); + lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); + lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); + lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); + + /* the lod_fpart values will be fixed pt values in [0,1) */ + } + break; + } + + /* compute image size(s) of source mipmap level(s) */ + lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, + ilevel0, ilevel1, + row_stride_array, img_stride_array, + &width0_vec, &width1_vec, + &height0_vec, &height1_vec, + &depth0_vec, &depth1_vec, + &row_stride0_vec, &row_stride1_vec, + &img_stride0_vec, &img_stride1_vec); + + /* + * Get pointer(s) to image data for mipmap level(s). + */ + data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + } + + + /* + * Get/interpolate texture colors. + */ + if (min_filter == mag_filter) { + /* no need to distinquish between minification and magnification */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + &packed_lo, &packed_hi); + } + else { + /* Emit conditional to choose min image filter or mag image filter + * depending on the lod being > 0 or <= 0, respectively. + */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef minify; + + flow_ctx = lp_build_flow_create(builder); + lp_build_flow_scope_begin(flow_ctx); + + packed_lo = LLVMGetUndef(h16_vec_type); + packed_hi = LLVMGetUndef(h16_vec_type); + + lp_build_flow_scope_declare(flow_ctx, &packed_lo); + lp_build_flow_scope_declare(flow_ctx, &packed_hi); + + /* minify = lod > 0.0 */ + minify = LLVMBuildFCmp(builder, LLVMRealUGE, + lod, float_bld->zero, ""); + + lp_build_if(&if_ctx, flow_ctx, builder, minify); + { + /* Use the minification filter */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + &packed_lo, &packed_hi); + } + lp_build_else(&if_ctx); + { + /* Use the magnification filter */ + lp_build_sample_mipmap(bld, mag_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + &packed_lo, &packed_hi); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + } + + /* combine 'packed_lo', 'packed_hi' into 'packed' */ + { + struct lp_build_context h16, u8n; + + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + packed = lp_build_pack2(builder, h16.type, u8n.type, + packed_lo, packed_hi); + } + + /* + * Convert to SoA and swizzle. + */ + lp_build_rgba8_to_f32_soa(builder, + bld->texel_type, + packed, unswizzled); + + if (util_format_is_rgba8_variant(bld->format_desc)) { + lp_build_format_swizzle_soa(bld->format_desc, + &bld->texel_bld, + unswizzled, texel_out); + } + else { + texel_out[0] = unswizzled[0]; + texel_out[1] = unswizzled[1]; + texel_out[2] = unswizzled[2]; + texel_out[3] = unswizzled[3]; + } + + apply_sampler_swizzle(bld, texel_out); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h new file mode 100644 index 0000000..e1045bb --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- SoA. + * + * @author Jose Fonseca + * @author Brian Paul + */ + +#ifndef LP_BLD_SAMPLE_AOS_H +#define LP_BLD_SAMPLE_AOS_H + + +#include "lp_bld_sample.h" + + +void +lp_build_sample_aos(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_array, + LLVMValueRef data_array, + LLVMValueRef texel_out[4]); + + +#endif /* LP_BLD_SAMPLE_AOS_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index baf0402..f61f23e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -40,6 +40,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -52,49 +53,11 @@ #include "lp_bld_gather.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" +#include "lp_bld_sample_aos.h" #include "lp_bld_quad.h" /** - * Keep all information for sampling code generation in a single place. - */ -struct lp_build_sample_context -{ - LLVMBuilderRef builder; - - const struct lp_sampler_static_state *static_state; - - struct lp_sampler_dynamic_state *dynamic_state; - - const struct util_format_description *format_desc; - - /** regular scalar float type */ - struct lp_type float_type; - struct lp_build_context float_bld; - - /** regular scalar float type */ - struct lp_type int_type; - struct lp_build_context int_bld; - - /** Incoming coordinates type and build context */ - struct lp_type coord_type; - struct lp_build_context coord_bld; - - /** Unsigned integer coordinates */ - struct lp_type uint_coord_type; - struct lp_build_context uint_coord_bld; - - /** Signed integer coordinates */ - struct lp_type int_coord_type; - struct lp_build_context int_coord_bld; - - /** Output texels type and build context */ - struct lp_type texel_type; - struct lp_build_context texel_bld; -}; - - -/** * Does the given texture wrap mode allow sampling the texture border color? * XXX maybe move this into gallium util code. */ @@ -119,95 +82,10 @@ wrap_mode_uses_border_color(unsigned mode) } -static LLVMValueRef -lp_build_get_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, LLVMValueRef level) -{ - LLVMValueRef indexes[2], data_ptr; - indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indexes[1] = level; - data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); - data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); - return data_ptr; -} - - -static LLVMValueRef -lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, int level) -{ - LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); - return lp_build_get_mipmap_level(bld, data_array, lvl); -} - - -/** - * Dereference stride_array[mipmap_level] array to get a stride. - * Return stride as a vector. - */ -static LLVMValueRef -lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, - LLVMValueRef stride_array, LLVMValueRef level) -{ - LLVMValueRef indexes[2], stride; - indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indexes[1] = level; - stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); - stride = LLVMBuildLoad(bld->builder, stride, ""); - stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); - return stride; -} - - -/** Dereference stride_array[0] array to get a stride (as vector). */ -static LLVMValueRef -lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, - LLVMValueRef stride_array, int level) -{ - LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); - return lp_build_get_level_stride_vec(bld, stride_array, lvl); -} - - -static int -texture_dims(enum pipe_texture_target tex) -{ - switch (tex) { - case PIPE_TEXTURE_1D: - return 1; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_CUBE: - return 2; - case PIPE_TEXTURE_3D: - return 3; - default: - assert(0 && "bad texture target in texture_dims()"); - return 2; - } -} - - -static void -apply_sampler_swizzle(struct lp_build_sample_context *bld, - LLVMValueRef *texel) -{ - unsigned char swizzles[4]; - - swizzles[0] = bld->static_state->swizzle_r; - swizzles[1] = bld->static_state->swizzle_g; - swizzles[2] = bld->static_state->swizzle_b; - swizzles[3] = bld->static_state->swizzle_a; - - lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles); -} - - - /** * Generate code to fetch a texel from a texture at int coords (x, y, z). * The computation depends on whether the texture is 1D, 2D or 3D. - * The result, texel, will be: + * The result, texel, will be float vectors: * texel[0] = red values * texel[1] = green values * texel[2] = blue values @@ -356,204 +234,6 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, /** - * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time. - * Return whether the given mode is supported by that function. - */ -static boolean -is_simple_wrap_mode(unsigned mode) -{ - switch (mode) { - case PIPE_TEX_WRAP_REPEAT: - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return TRUE; - default: - return FALSE; - } -} - - -/** - * Build LLVM code for texture wrap mode, for scaled integer texcoords. - * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size - * \param length the texture size along one dimension - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param i0 resulting sub-block pixel coordinate for coord0 - */ -static void -lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, - unsigned block_length, - LLVMValueRef coord, - LLVMValueRef length, - LLVMValueRef stride, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *out_offset, - LLVMValueRef *out_i) -{ - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef length_minus_one; - - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if(is_pot) - coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord = LLVMBuildURem(bld->builder, coord, length, ""); - break; - - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); - coord = lp_build_min(int_coord_bld, coord, length_minus_one); - break; - - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(0); - } - - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, - out_offset, out_i); -} - - -/** - * Build LLVM code for texture wrap mode, for scaled integer texcoords. - * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size - * \param length the texture size along one dimension - * \param stride pixel stride along the coordinate axis - * \param block_length is the length of the pixel block along the - * coordinate axis - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param offset0 resulting relative offset for coord0 - * \param offset1 resulting relative offset for coord0 + 1 - * \param i0 resulting sub-block pixel coordinate for coord0 - * \param i1 resulting sub-block pixel coordinate for coord0 + 1 - */ -static void -lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, - unsigned block_length, - LLVMValueRef coord0, - LLVMValueRef length, - LLVMValueRef stride, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *offset0, - LLVMValueRef *offset1, - LLVMValueRef *i0, - LLVMValueRef *i1) -{ - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef length_minus_one; - LLVMValueRef lmask, umask, mask; - - if (block_length != 1) { - /* - * If the pixel block covers more than one pixel then there is no easy - * way to calculate offset1 relative to offset0. Instead, compute them - * independently. - */ - - LLVMValueRef coord1; - - lp_build_sample_wrap_nearest_int(bld, - block_length, - coord0, - length, - stride, - is_pot, - wrap_mode, - offset0, i0); - - coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - - lp_build_sample_wrap_nearest_int(bld, - block_length, - coord1, - length, - stride, - is_pot, - wrap_mode, - offset1, i1); - - return; - } - - /* - * Scalar pixels -- try to compute offset0 and offset1 with a single stride - * multiplication. - */ - - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; - - length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if (is_pot) { - coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); - } - else { - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); - } - - mask = lp_build_compare(bld->builder, int_coord_bld->type, - PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), - mask, ""); - break; - - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, - PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); - umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, - PIPE_FUNC_LESS, coord0, length_minus_one); - - coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); - coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); - - mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); - - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, - *offset0, - LLVMBuildAnd(bld->builder, stride, mask, "")); - break; - - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; - break; - } -} - - -/** * Build LLVM code for texture wrap mode for linear filtering. * \param x0_out returns first integer texcoord * \param x1_out returns second integer texcoord @@ -765,7 +445,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /** * Build LLVM code for texture wrap mode for nearest filtering. * \param coord the incoming texcoord (nominally in [0,1]) - * \param length the texture size along one dimension, as int + * \param length the texture size along one dimension, as int vector * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x */ @@ -882,198 +562,6 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, /** - * Codegen equivalent for u_minify(). - * Return max(1, base_size >> level); - */ -static LLVMValueRef -lp_build_minify(struct lp_build_sample_context *bld, - LLVMValueRef base_size, - LLVMValueRef level) -{ - LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); - size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); - return size; -} - - -/** - * Generate code to compute texture level of detail (lambda). - * \param ddx partial derivatives of (s, t, r, q) with respect to X - * \param ddy partial derivatives of (s, t, r, q) with respect to Y - * \param lod_bias optional float vector with the shader lod bias - * \param explicit_lod optional float vector with the explicit lod - * \param width scalar int texture width - * \param height scalar int texture height - * \param depth scalar int texture depth - * - * XXX: The resulting lod is scalar, so ignore all but the first element of - * derivatives, lod_bias, etc that are passed by the shader. - */ -static LLVMValueRef -lp_build_lod_selector(struct lp_build_sample_context *bld, - const LLVMValueRef ddx[4], - const LLVMValueRef ddy[4], - LLVMValueRef lod_bias, /* optional */ - LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth) - -{ - if (bld->static_state->min_lod == bld->static_state->max_lod) { - /* User is forcing sampling from a particular mipmap level. - * This is hit during mipmap generation. - */ - return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); - } - else { - struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), - bld->static_state->lod_bias); - LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), - bld->static_state->min_lod); - LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), - bld->static_state->max_lod); - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef lod; - - if (explicit_lod) { - lod = LLVMBuildExtractElement(bld->builder, explicit_lod, - index0, ""); - } - else { - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef dsdx, dsdy; - LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; - LLVMValueRef rho; - - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); - drdy = lp_build_abs(float_bld, drdy); - } - } - - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); - rho = lp_build_max(float_bld, rho, max); - } - } - - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); - - /* add shader lod bias */ - if (lod_bias) { - lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, - index0, ""); - lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); - } - } - - /* add sampler lod bias */ - lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); - - /* clamp lod */ - lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); - - return lod; - } -} - - -/** - * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer - * mipmap level index. - * Note: this is all scalar code. - * \param lod scalar float texture level of detail - * \param level_out returns integer - */ -static void -lp_build_nearest_mip_level(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level_out) -{ - struct lp_build_context *float_bld = &bld->float_bld; - struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; - - LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_iround(float_bld, lod); - - /* clamp level to legal range of levels */ - *level_out = lp_build_clamp(int_bld, level, zero, last_level); -} - - -/** - * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to - * two (adjacent) mipmap level indexes. Later, we'll sample from those - * two mipmap levels and interpolate between them. - */ -static void -lp_build_linear_mip_levels(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) -{ - struct lp_build_context *float_bld = &bld->float_bld; - struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_ifloor(float_bld, lod); - - /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - /* compute level 1 and clamp to legal range of levels */ - level = lp_build_add(int_bld, level, int_bld->one); - *level1_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - - *weight_out = lp_build_fract(float_bld, lod); -} - - -/** * Generate code to sample a mipmap level with nearest filtering. * If sampling a cube texture, r = cube face in [0,5]. */ @@ -1291,207 +779,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } -/** Helper used by lp_build_cube_lookup() */ -static LLVMValueRef -lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) -{ - /* ima = -0.5 / abs(coord); */ - LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); - LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); - LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); - return ima; -} - - -/** - * Helper used by lp_build_cube_lookup() - * \param sign scalar +1 or -1 - * \param coord float vector - * \param ima float vector - */ -static LLVMValueRef -lp_build_cube_coord(struct lp_build_context *coord_bld, - LLVMValueRef sign, int negate_coord, - LLVMValueRef coord, LLVMValueRef ima) -{ - /* return negate(coord) * ima * sign + 0.5; */ - LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); - LLVMValueRef res; - - assert(negate_coord == +1 || negate_coord == -1); - - if (negate_coord == -1) { - coord = lp_build_negate(coord_bld, coord); - } - - res = lp_build_mul(coord_bld, coord, ima); - if (sign) { - sign = lp_build_broadcast_scalar(coord_bld, sign); - res = lp_build_mul(coord_bld, res, sign); - } - res = lp_build_add(coord_bld, res, half); - - return res; -} - - -/** Helper used by lp_build_cube_lookup() - * Return (major_coord >= 0) ? pos_face : neg_face; - */ -static LLVMValueRef -lp_build_cube_face(struct lp_build_sample_context *bld, - LLVMValueRef major_coord, - unsigned pos_face, unsigned neg_face) -{ - LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, - major_coord, - bld->float_bld.zero, ""); - LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); - LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); - LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); - return res; -} - - - -/** - * Generate code to do cube face selection and compute per-face texcoords. - */ -static void -lp_build_cube_lookup(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - LLVMValueRef *face, - LLVMValueRef *face_s, - LLVMValueRef *face_t) -{ - struct lp_build_context *float_bld = &bld->float_bld; - struct lp_build_context *coord_bld = &bld->coord_bld; - LLVMValueRef rx, ry, rz; - LLVMValueRef arx, ary, arz; - LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); - LLVMValueRef arx_ge_ary, arx_ge_arz; - LLVMValueRef ary_ge_arx, ary_ge_arz; - LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; - LLVMValueRef rx_pos, ry_pos, rz_pos; - - assert(bld->coord_bld.type.length == 4); - - /* - * Use the average of the four pixel's texcoords to choose the face. - */ - rx = lp_build_mul(float_bld, c25, - lp_build_sum_vector(&bld->coord_bld, s)); - ry = lp_build_mul(float_bld, c25, - lp_build_sum_vector(&bld->coord_bld, t)); - rz = lp_build_mul(float_bld, c25, - lp_build_sum_vector(&bld->coord_bld, r)); - - arx = lp_build_abs(float_bld, rx); - ary = lp_build_abs(float_bld, ry); - arz = lp_build_abs(float_bld, rz); - - /* - * Compare sign/magnitude of rx,ry,rz to determine face - */ - arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); - arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); - ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); - ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); - - arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - - rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); - ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); - rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); - - { - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - *face_s = bld->coord_bld.undef; - *face_t = bld->coord_bld.undef; - *face = bld->int_bld.undef; - - lp_build_name(*face_s, "face_s"); - lp_build_name(*face_t, "face_t"); - lp_build_name(*face, "face"); - - lp_build_flow_scope_declare(flow_ctx, face_s); - lp_build_flow_scope_declare(flow_ctx, face_t); - lp_build_flow_scope_declare(flow_ctx, face); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); - { - /* +/- X face */ - LLVMValueRef sign = lp_build_sgn(float_bld, rx); - LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); - *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); - *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - *face = lp_build_cube_face(bld, rx, - PIPE_TEX_FACE_POS_X, - PIPE_TEX_FACE_NEG_X); - } - lp_build_else(&if_ctx); - { - struct lp_build_flow_context *flow_ctx2; - struct lp_build_if_state if_ctx2; - - LLVMValueRef face_s2 = bld->coord_bld.undef; - LLVMValueRef face_t2 = bld->coord_bld.undef; - LLVMValueRef face2 = bld->int_bld.undef; - - flow_ctx2 = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx2); - lp_build_flow_scope_declare(flow_ctx2, &face_s2); - lp_build_flow_scope_declare(flow_ctx2, &face_t2); - lp_build_flow_scope_declare(flow_ctx2, &face2); - - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - - lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); - { - /* +/- Y face */ - LLVMValueRef sign = lp_build_sgn(float_bld, ry); - LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); - face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); - face2 = lp_build_cube_face(bld, ry, - PIPE_TEX_FACE_POS_Y, - PIPE_TEX_FACE_NEG_Y); - } - lp_build_else(&if_ctx2); - { - /* +/- Z face */ - LLVMValueRef sign = lp_build_sgn(float_bld, rz); - LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); - face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - face2 = lp_build_cube_face(bld, rz, - PIPE_TEX_FACE_POS_Z, - PIPE_TEX_FACE_NEG_Z); - } - lp_build_endif(&if_ctx2); - lp_build_flow_scope_end(flow_ctx2); - lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; - *face_t = face_t2; - *face = face2; - } - - lp_build_endif(&if_ctx); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - } -} - - - /** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample @@ -1605,7 +892,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, const unsigned mag_filter = bld->static_state->mag_img_filter; const int dims = texture_dims(bld->static_state->target); LLVMValueRef lod = NULL, lod_fpart = NULL; - LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL; + LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; @@ -1685,47 +972,15 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } } - /* - * Convert scalar integer mipmap levels into vectors. - */ - ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) - ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); - - /* - * Compute width, height at mipmap level 'ilevel0' - */ - width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); - if (dims >= 2) { - height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); - row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, - ilevel0); - if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - img_stride0_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel0); - if (dims == 3) { - depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); - } - } - } - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* compute width, height, depth for second mipmap level at 'ilevel1' */ - width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); - if (dims >= 2) { - height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); - row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, - ilevel1); - if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - img_stride1_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel1); - if (dims ==3) { - depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); - } - } - } - } + /* compute image size(s) of source mipmap level(s) */ + lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, + ilevel0, ilevel1, + row_stride_array, img_stride_array, + &width0_vec, &width1_vec, + &height0_vec, &height1_vec, + &depth0_vec, &depth1_vec, + &row_stride0_vec, &row_stride1_vec, + &img_stride0_vec, &img_stride1_vec); /* * Get pointer(s) to image data for mipmap level(s). @@ -1803,258 +1058,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } - -static void -lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride_array, - LLVMValueRef data_array, - LLVMValueRef texel_out[4]) -{ - LLVMBuilderRef builder = bld->builder; - struct lp_build_context i32, h16, u8n; - LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; - LLVMValueRef i32_c8, i32_c128, i32_c255; - LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; - LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef data_ptr; - LLVMValueRef x_stride, y_stride; - LLVMValueRef x_offset0, x_offset1; - LLVMValueRef y_offset0, y_offset1; - LLVMValueRef offset[2][2]; - LLVMValueRef x_subcoord[2], y_subcoord[2]; - LLVMValueRef neighbors_lo[2][2]; - LLVMValueRef neighbors_hi[2][2]; - LLVMValueRef packed, packed_lo, packed_hi; - LLVMValueRef unswizzled[4]; - const unsigned level = 0; - unsigned i, j; - - assert(bld->static_state->target == PIPE_TEXTURE_2D - || bld->static_state->target == PIPE_TEXTURE_RECT); - assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); - assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); - assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); - - lp_build_context_init(&i32, builder, lp_type_int_vec(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); - - if (bld->static_state->normalized_coords) { - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, ""); - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - - /* convert float to int */ - s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); - t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); - - /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_const_int_vec(i32.type, -128); - s = LLVMBuildAdd(builder, s, i32_c128, ""); - t = LLVMBuildAdd(builder, t, i32_c128, ""); - - /* compute floor (shift right 8) */ - i32_c8 = lp_build_const_int_vec(i32.type, 8); - s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); - t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); - - /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_const_int_vec(i32.type, 255); - s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); - t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); - - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, - bld->format_desc->block.bits/8); - - y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level); - - lp_build_sample_wrap_linear_int(bld, - bld->format_desc->block.width, - s_ipart, width, x_stride, - bld->static_state->pot_width, - bld->static_state->wrap_s, - &x_offset0, &x_offset1, - &x_subcoord[0], &x_subcoord[1]); - lp_build_sample_wrap_linear_int(bld, - bld->format_desc->block.height, - t_ipart, height, y_stride, - bld->static_state->pot_height, - bld->static_state->wrap_t, - &y_offset0, &y_offset1, - &y_subcoord[0], &y_subcoord[1]); - - offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0); - offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0); - offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1); - offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1); - - /* - * Transform 4 x i32 in - * - * s_fpart = {s0, s1, s2, s3} - * - * into 8 x i16 - * - * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} - * - * into two 8 x i16 - * - * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} - * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} - * - * and likewise for t_fpart. There is no risk of loosing precision here - * since the fractional parts only use the lower 8bits. - */ - - s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); - t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); - - { - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffle_lo; - LLVMValueRef shuffle_hi; - - for(j = 0; j < h16.type.length; j += 4) { -#ifdef PIPE_ARCH_LITTLE_ENDIAN - unsigned subindex = 0; -#else - unsigned subindex = 1; -#endif - LLVMValueRef index; - - index = LLVMConstInt(elem_type, j/2 + subindex, 0); - for(i = 0; i < 4; ++i) - shuffles_lo[j + i] = index; - - index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); - for(i = 0; i < 4; ++i) - shuffles_hi[j + i] = index; - } - - shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); - shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); - - s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); - t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); - s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); - t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); - } - - /* - * get pointer to mipmap level 0 data - */ - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level); - - /* - * Fetch the pixels as 4 x 32bit (rgba order might differ): - * - * rgba0 rgba1 rgba2 rgba3 - * - * bit cast them into 16 x u8 - * - * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 - * - * unpack them into two 8 x i16: - * - * r0 g0 b0 a0 r1 g1 b1 a1 - * r2 g2 b2 a2 r3 g3 b3 a3 - * - * The higher 8 bits of the resulting elements will be zero. - */ - - for (j = 0; j < 2; ++j) { - for (i = 0; i < 2; ++i) { - LLVMValueRef rgba8; - - if (util_format_is_rgba8_variant(bld->format_desc)) { - /* - * Given the format is a rgba8, just read the pixels as is, - * without any swizzling. Swizzling will be done later. - */ - rgba8 = lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset[j][i]); - - rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); - - } - else { - rgba8 = lp_build_fetch_rgba_aos(bld->builder, - bld->format_desc, - u8n.type, - data_ptr, offset[j][i], - x_subcoord[i], - y_subcoord[j]); - } - - lp_build_unpack2(builder, u8n.type, h16.type, - rgba8, - &neighbors_lo[j][i], &neighbors_hi[j][i]); - } - } - - /* - * Linear interpolate with 8.8 fixed point. - */ - - packed_lo = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[0][0], - neighbors_lo[0][1], - neighbors_lo[1][0], - neighbors_lo[1][1]); - - packed_hi = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[0][0], - neighbors_hi[0][1], - neighbors_hi[1][0], - neighbors_hi[1][1]); - - packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); - - /* - * Convert to SoA and swizzle. - */ - - lp_build_rgba8_to_f32_soa(bld->builder, - bld->texel_type, - packed, unswizzled); - - if (util_format_is_rgba8_variant(bld->format_desc)) { - lp_build_format_swizzle_soa(bld->format_desc, - &bld->texel_bld, - unswizzled, texel_out); - } else { - texel_out[0] = unswizzled[0]; - texel_out[1] = unswizzled[1]; - texel_out[2] = unswizzled[2]; - texel_out[3] = unswizzled[3]; - } - - apply_sampler_swizzle(bld, texel_out); -} - - static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, @@ -2181,6 +1184,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, t = coords[1]; r = coords[2]; + /* width, height, depth as uint vectors */ width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); @@ -2190,27 +1194,32 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_nop(&bld, texel_out); } else if (util_format_fits_8unorm(bld.format_desc) && - (static_state->target == PIPE_TEXTURE_2D || - static_state->target == PIPE_TEXTURE_RECT) && - static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) { - /* special case */ - lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, - row_stride_array, data_array, texel_out); + lp_is_simple_wrap_mode(static_state->wrap_s) && + lp_is_simple_wrap_mode(static_state->wrap_t)) { + /* do sampling/filtering with fixed pt arithmetic */ + printf("new sample\n"); + lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth, + width_vec, height_vec, depth_vec, + row_stride_array, img_stride_array, + data_array, texel_out); } + else { - if (gallivm_debug & GALLIVM_DEBUG_PERF && - (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST || - static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST || - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) && + if ((gallivm_debug & GALLIVM_DEBUG_PERF) && util_format_fits_8unorm(bld.format_desc)) { debug_printf("%s: using floating point linear filtering for %s\n", __FUNCTION__, bld.format_desc->short_name); + debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n", + static_state->min_img_filter, + static_state->mag_img_filter, + static_state->min_mip_filter, + static_state->wrap_s, + static_state->wrap_t); } + printf("old sample\n"); lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, width, height, depth, -- 2.7.4