From 34c11c87e4e3b5639764abee413c45e918749477 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Sat, 9 Oct 2010 09:34:31 +0100 Subject: [PATCH] gallivm: Do size computations simultanously for all dimensions (AoS). Operate simultanouesly on vector as much as possible, instead of doing the operations on vectors with broadcasted scalars. Also do the 24.8 fixed point scalar with integer shift of the texture size, for unnormalized coordinates. AoS path only for now -- the same thing can be done for SoA. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 106 +++++++++++++----- src/gallium/auxiliary/gallivm/lp_bld_sample.h | 22 +++- src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 125 +++++++++++----------- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 16 ++- 4 files changed, 177 insertions(+), 92 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 7a64392..5bc3c26 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -630,37 +630,21 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, - LLVMValueRef *out_width_vec, - LLVMValueRef *out_height_vec, - LLVMValueRef *out_depth_vec, + LLVMValueRef *out_size, LLVMValueRef *row_stride_vec, LLVMValueRef *img_stride_vec) { const unsigned dims = bld->dims; LLVMValueRef ilevel_vec; - LLVMValueRef size_vec; - LLVMTypeRef i32t = LLVMInt32Type(); ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); /* * Compute width, height, depth at mipmap level 'ilevel' */ - size_vec = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); - *out_width_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 0, 0)); if (dims >= 2) { - - *out_height_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 1, 0)); - *row_stride_vec = lp_build_get_level_stride_vec(bld, bld->row_stride_array, ilevel); @@ -668,18 +652,90 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, *img_stride_vec = lp_build_get_level_stride_vec(bld, bld->img_stride_array, ilevel); - if (dims == 3) { - *out_depth_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 2, 0)); - } } } } +/** + * Extract and broadcast texture size. + * + * @param size_type type of the texture size vector (either + * bld->int_size_type or bld->float_size_type) + * @param coord_type type of the texture size vector (either + * bld->int_coord_type or bld->coord_type) + * @param int_size vector with the integer texture size (width, height, + * depth) + */ +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth) +{ + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + + *out_width = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 0, 0)); + if (dims >= 2) { + *out_height = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 1, 0)); + if (dims == 3) { + *out_depth = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 2, 0)); + } + } +} + + +/** + * Unnormalize coords. + * + * @param int_size vector with the integer texture size (width, height, depth) + */ +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r) +{ + const unsigned dims = bld->dims; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &width, + &height, + &depth); + + /* s = s * width, t = t * height */ + *s = lp_build_mul(&bld->coord_bld, *s, width); + if (dims >= 2) { + *t = lp_build_mul(&bld->coord_bld, *t, height); + if (dims >= 3) { + *r = lp_build_mul(&bld->coord_bld, *r, depth); + } + } +} + /** Helper used by lp_build_cube_lookup() */ static LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index d1a1aa1..ce22854 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -333,14 +333,30 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, - LLVMValueRef *out_width_vec, - LLVMValueRef *out_height_vec, - LLVMValueRef *out_depth_vec, + LLVMValueRef *out_size_vec, LLVMValueRef *row_stride_vec, LLVMValueRef *img_stride_vec); void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth); + + +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r); + + +void lp_build_cube_lookup(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index e741044..1e1e359 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -45,6 +45,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" @@ -253,9 +254,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -270,6 +269,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, t_ipart, r_ipart; LLVMValueRef x_stride; LLVMValueRef x_offset, offset; @@ -283,30 +283,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -417,9 +420,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -434,6 +435,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; @@ -458,30 +460,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -788,12 +793,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef colors_hi_var) { LLVMBuilderRef builder = bld->builder; - LLVMValueRef width0_vec; - LLVMValueRef width1_vec; - LLVMValueRef height0_vec; - LLVMValueRef height1_vec; - LLVMValueRef depth0_vec; - LLVMValueRef depth1_vec; + LLVMValueRef size0; + LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; @@ -806,12 +807,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, - &width0_vec, &height0_vec, &depth0_vec, + &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); @@ -819,7 +820,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); @@ -854,19 +855,19 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, - &width1_vec, &height1_vec, &depth1_vec, + &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, + size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, + size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index f3c4b6a..1af0318 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -805,6 +805,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef *colors_out) { LLVMBuilderRef builder = bld->builder; + LLVMValueRef size0; + LLVMValueRef size1; LLVMValueRef width0_vec; LLVMValueRef width1_vec; LLVMValueRef height0_vec; @@ -822,8 +824,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, - &width0_vec, &height0_vec, &depth0_vec, + &size0, &row_stride0_vec, &img_stride0_vec); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size0, + &width0_vec, &height0_vec, &depth0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, unit, @@ -863,8 +870,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, { /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, - &width1_vec, &height1_vec, &depth1_vec, + &size1, &row_stride1_vec, &img_stride1_vec); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size1, + &width1_vec, &height1_vec, &depth1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, unit, -- 2.7.4