Follow-up of !4319 using the same clang-format config.
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5310>
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
#include "ac_llvm_build.h"
-#include <llvm-c/Core.h>
-#include <llvm/Config/llvm-config.h>
-
-#include "c11/threads.h"
-
-#include <assert.h>
-#include <stdio.h>
-
+#include "ac_exp_param.h"
#include "ac_llvm_util.h"
#include "ac_shader_util.h"
-#include "ac_exp_param.h"
+#include "c11/threads.h"
+#include "shader_enums.h"
+#include "sid.h"
#include "util/bitscan.h"
#include "util/macros.h"
#include "util/u_atomic.h"
#include "util/u_math.h"
-#include "sid.h"
+#include <llvm-c/Core.h>
+#include <llvm/Config/llvm-config.h>
-#include "shader_enums.h"
+#include <assert.h>
+#include <stdio.h>
#define AC_LLVM_INITIAL_CF_DEPTH 4
/* Data for if/else/endif and bgnloop/endloop control flow structures.
*/
struct ac_llvm_flow {
- /* Loop exit or next part of if/else/endif. */
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef loop_entry_block;
+ /* Loop exit or next part of if/else/endif. */
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef loop_entry_block;
};
/* Initialize module-independent parts of the context.
*
* The caller is responsible for initializing ctx::module and ctx::builder.
*/
-void
-ac_llvm_context_init(struct ac_llvm_context *ctx,
- struct ac_llvm_compiler *compiler,
- enum chip_class chip_class, enum radeon_family family,
- enum ac_float_mode float_mode, unsigned wave_size,
- unsigned ballot_mask_bits)
-{
- ctx->context = LLVMContextCreate();
-
- ctx->chip_class = chip_class;
- ctx->family = family;
- ctx->wave_size = wave_size;
- ctx->ballot_mask_bits = ballot_mask_bits;
- ctx->float_mode = float_mode;
- ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
- : compiler->tm,
- ctx->context);
- ctx->builder = ac_create_builder(ctx->context, float_mode);
-
- ctx->voidt = LLVMVoidTypeInContext(ctx->context);
- ctx->i1 = LLVMInt1TypeInContext(ctx->context);
- ctx->i8 = LLVMInt8TypeInContext(ctx->context);
- ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
- ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
- ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
- ctx->i128 = LLVMIntTypeInContext(ctx->context, 128);
- ctx->intptr = ctx->i32;
- ctx->f16 = LLVMHalfTypeInContext(ctx->context);
- ctx->f32 = LLVMFloatTypeInContext(ctx->context);
- ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
- ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
- ctx->v4i16 = LLVMVectorType(ctx->i16, 4);
- ctx->v2f16 = LLVMVectorType(ctx->f16, 2);
- ctx->v4f16 = LLVMVectorType(ctx->f16, 4);
- ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
- ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
- ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
- ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
- ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
- ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
- ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
- ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
- ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
-
- ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
- ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
- ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
- ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
- ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
- ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
- ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
- ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
- ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false);
- ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false);
- ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
- ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
- ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
- ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
- ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
- ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
-
- ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
- ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
-
- ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "range", 5);
-
- ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "invariant.load", 14);
-
- ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "amdgpu.uniform", 14);
-
- ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
- ctx->flow = calloc(1, sizeof(*ctx->flow));
-}
-
-void
-ac_llvm_context_dispose(struct ac_llvm_context *ctx)
-{
- free(ctx->flow->stack);
- free(ctx->flow);
- ctx->flow = NULL;
-}
-
-int
-ac_get_llvm_num_components(LLVMValueRef value)
-{
- LLVMTypeRef type = LLVMTypeOf(value);
- unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
- ? LLVMGetVectorSize(type)
- : 1;
- return num_components;
-}
-
-LLVMValueRef
-ac_llvm_extract_elem(struct ac_llvm_context *ac,
- LLVMValueRef value,
- int index)
-{
- if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
- assert(index == 0);
- return value;
- }
-
- return LLVMBuildExtractElement(ac->builder, value,
- LLVMConstInt(ac->i32, index, false), "");
-}
-
-int
-ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
-{
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
- type = LLVMGetElementType(type);
-
- if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
- return LLVMGetIntTypeWidth(type);
-
- if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
- if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_LDS)
- return 32;
- }
-
- if (type == ctx->f16)
- return 16;
- if (type == ctx->f32)
- return 32;
- if (type == ctx->f64)
- return 64;
-
- unreachable("Unhandled type kind in get_elem_bits");
-}
-
-unsigned
-ac_get_type_size(LLVMTypeRef type)
-{
- LLVMTypeKind kind = LLVMGetTypeKind(type);
-
- switch (kind) {
- case LLVMIntegerTypeKind:
- return LLVMGetIntTypeWidth(type) / 8;
- case LLVMHalfTypeKind:
- return 2;
- case LLVMFloatTypeKind:
- return 4;
- case LLVMDoubleTypeKind:
- return 8;
- case LLVMPointerTypeKind:
- if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
- return 4;
- return 8;
- case LLVMVectorTypeKind:
- return LLVMGetVectorSize(type) *
- ac_get_type_size(LLVMGetElementType(type));
- case LLVMArrayTypeKind:
- return LLVMGetArrayLength(type) *
- ac_get_type_size(LLVMGetElementType(type));
- default:
- assert(0);
- return 0;
- }
+void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
+ enum chip_class chip_class, enum radeon_family family,
+ enum ac_float_mode float_mode, unsigned wave_size,
+ unsigned ballot_mask_bits)
+{
+ ctx->context = LLVMContextCreate();
+
+ ctx->chip_class = chip_class;
+ ctx->family = family;
+ ctx->wave_size = wave_size;
+ ctx->ballot_mask_bits = ballot_mask_bits;
+ ctx->float_mode = float_mode;
+ ctx->module =
+ ac_create_module(wave_size == 32 ? compiler->tm_wave32 : compiler->tm, ctx->context);
+ ctx->builder = ac_create_builder(ctx->context, float_mode);
+
+ ctx->voidt = LLVMVoidTypeInContext(ctx->context);
+ ctx->i1 = LLVMInt1TypeInContext(ctx->context);
+ ctx->i8 = LLVMInt8TypeInContext(ctx->context);
+ ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
+ ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+ ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+ ctx->i128 = LLVMIntTypeInContext(ctx->context, 128);
+ ctx->intptr = ctx->i32;
+ ctx->f16 = LLVMHalfTypeInContext(ctx->context);
+ ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+ ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+ ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
+ ctx->v4i16 = LLVMVectorType(ctx->i16, 4);
+ ctx->v2f16 = LLVMVectorType(ctx->f16, 2);
+ ctx->v4f16 = LLVMVectorType(ctx->f16, 4);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+ ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
+ ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+ ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
+ ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
+ ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+ ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+ ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
+ ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
+
+ ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
+ ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
+ ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
+ ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
+ ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
+ ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
+ ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
+ ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
+ ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false);
+ ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false);
+ ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
+ ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
+ ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
+ ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
+ ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
+ ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
+
+ ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
+ ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
+
+ ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, "range", 5);
+
+ ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14);
+
+ ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
+
+ ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
+ ctx->flow = calloc(1, sizeof(*ctx->flow));
+}
+
+void ac_llvm_context_dispose(struct ac_llvm_context *ctx)
+{
+ free(ctx->flow->stack);
+ free(ctx->flow);
+ ctx->flow = NULL;
+}
+
+int ac_get_llvm_num_components(LLVMValueRef value)
+{
+ LLVMTypeRef type = LLVMTypeOf(value);
+ unsigned num_components =
+ LLVMGetTypeKind(type) == LLVMVectorTypeKind ? LLVMGetVectorSize(type) : 1;
+ return num_components;
+}
+
+LLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index)
+{
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
+ assert(index == 0);
+ return value;
+ }
+
+ return LLVMBuildExtractElement(ac->builder, value, LLVMConstInt(ac->i32, index, false), "");
+}
+
+int ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
+{
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+ type = LLVMGetElementType(type);
+
+ if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
+ return LLVMGetIntTypeWidth(type);
+
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
+ if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_LDS)
+ return 32;
+ }
+
+ if (type == ctx->f16)
+ return 16;
+ if (type == ctx->f32)
+ return 32;
+ if (type == ctx->f64)
+ return 64;
+
+ unreachable("Unhandled type kind in get_elem_bits");
+}
+
+unsigned ac_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMHalfTypeKind:
+ return 2;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMDoubleTypeKind:
+ return 8;
+ case LLVMPointerTypeKind:
+ if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
+ return 4;
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) * ac_get_type_size(LLVMGetElementType(type));
+ case LLVMArrayTypeKind:
+ return LLVMGetArrayLength(type) * ac_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
}
static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- if (t == ctx->i8)
- return ctx->i8;
- else if (t == ctx->f16 || t == ctx->i16)
- return ctx->i16;
- else if (t == ctx->f32 || t == ctx->i32)
- return ctx->i32;
- else if (t == ctx->f64 || t == ctx->i64)
- return ctx->i64;
- else
- unreachable("Unhandled integer size");
-}
-
-LLVMTypeRef
-ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
- LLVMTypeRef elem_type = LLVMGetElementType(t);
- return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
- LLVMGetVectorSize(t));
- }
- if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
- switch (LLVMGetPointerAddressSpace(t)) {
- case AC_ADDR_SPACE_GLOBAL:
- return ctx->i64;
- case AC_ADDR_SPACE_CONST_32BIT:
- case AC_ADDR_SPACE_LDS:
- return ctx->i32;
- default:
- unreachable("unhandled address space");
- }
- }
- return to_integer_type_scalar(ctx, t);
-}
-
-LLVMValueRef
-ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
- return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
- }
- return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
-}
-
-LLVMValueRef
-ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
- return v;
- return ac_to_integer(ctx, v);
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->f16 || t == ctx->i16)
+ return ctx->i16;
+ else if (t == ctx->f32 || t == ctx->i32)
+ return ctx->i32;
+ else if (t == ctx->f64 || t == ctx->i64)
+ return ctx->i64;
+ else
+ unreachable("Unhandled integer size");
}
-static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), LLVMGetVectorSize(t));
+ }
+ if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
+ switch (LLVMGetPointerAddressSpace(t)) {
+ case AC_ADDR_SPACE_GLOBAL:
+ return ctx->i64;
+ case AC_ADDR_SPACE_CONST_32BIT:
+ case AC_ADDR_SPACE_LDS:
+ return ctx->i32;
+ default:
+ unreachable("unhandled address space");
+ }
+ }
+ return to_integer_type_scalar(ctx, t);
+}
+
+LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
{
- if (t == ctx->i8)
- return ctx->i8;
- else if (t == ctx->i16 || t == ctx->f16)
- return ctx->f16;
- else if (t == ctx->i32 || t == ctx->f32)
- return ctx->f32;
- else if (t == ctx->i64 || t == ctx->f64)
- return ctx->f64;
- else
- unreachable("Unhandled float size");
+ LLVMTypeRef type = LLVMTypeOf(v);
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
+ return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
+ }
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
}
-LLVMTypeRef
-ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
- LLVMTypeRef elem_type = LLVMGetElementType(t);
- return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
- LLVMGetVectorSize(t));
- }
- return to_float_type_scalar(ctx, t);
+ LLVMTypeRef type = LLVMTypeOf(v);
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
+ return v;
+ return ac_to_integer(ctx, v);
}
-LLVMValueRef
-ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
+static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- LLVMTypeRef type = LLVMTypeOf(v);
- return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->i16 || t == ctx->f16)
+ return ctx->f16;
+ else if (t == ctx->i32 || t == ctx->f32)
+ return ctx->f32;
+ else if (t == ctx->i64 || t == ctx->f64)
+ return ctx->f64;
+ else
+ unreachable("Unhandled float size");
}
+LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_float_type_scalar(ctx, elem_type), LLVMGetVectorSize(t));
+ }
+ return to_float_type_scalar(ctx, t);
+}
-LLVMValueRef
-ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
- LLVMTypeRef return_type, LLVMValueRef *params,
- unsigned param_count, unsigned attrib_mask)
+LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
{
- LLVMValueRef function, call;
- bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
+}
- function = LLVMGetNamedFunction(ctx->module, name);
- if (!function) {
- LLVMTypeRef param_types[32], function_type;
- unsigned i;
+LLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
+ LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count,
+ unsigned attrib_mask)
+{
+ LLVMValueRef function, call;
+ bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
- assert(param_count <= 32);
+ function = LLVMGetNamedFunction(ctx->module, name);
+ if (!function) {
+ LLVMTypeRef param_types[32], function_type;
+ unsigned i;
- for (i = 0; i < param_count; ++i) {
- assert(params[i]);
- param_types[i] = LLVMTypeOf(params[i]);
- }
- function_type =
- LLVMFunctionType(return_type, param_types, param_count, 0);
- function = LLVMAddFunction(ctx->module, name, function_type);
+ assert(param_count <= 32);
- LLVMSetFunctionCallConv(function, LLVMCCallConv);
- LLVMSetLinkage(function, LLVMExternalLinkage);
+ for (i = 0; i < param_count; ++i) {
+ assert(params[i]);
+ param_types[i] = LLVMTypeOf(params[i]);
+ }
+ function_type = LLVMFunctionType(return_type, param_types, param_count, 0);
+ function = LLVMAddFunction(ctx->module, name, function_type);
- if (!set_callsite_attrs)
- ac_add_func_attributes(ctx->context, function, attrib_mask);
- }
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
- call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
- if (set_callsite_attrs)
- ac_add_func_attributes(ctx->context, call, attrib_mask);
- return call;
+ if (!set_callsite_attrs)
+ ac_add_func_attributes(ctx->context, function, attrib_mask);
+ }
+
+ call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
+ if (set_callsite_attrs)
+ ac_add_func_attributes(ctx->context, call, attrib_mask);
+ return call;
}
/**
*/
void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
{
- LLVMTypeRef elem_type = type;
-
- assert(bufsize >= 8);
-
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
- int ret = snprintf(buf, bufsize, "v%u",
- LLVMGetVectorSize(type));
- if (ret < 0) {
- char *type_name = LLVMPrintTypeToString(type);
- fprintf(stderr, "Error building type name for: %s\n",
- type_name);
- LLVMDisposeMessage(type_name);
- return;
- }
- elem_type = LLVMGetElementType(type);
- buf += ret;
- bufsize -= ret;
- }
- switch (LLVMGetTypeKind(elem_type)) {
- default: break;
- case LLVMIntegerTypeKind:
- snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
- break;
- case LLVMHalfTypeKind:
- snprintf(buf, bufsize, "f16");
- break;
- case LLVMFloatTypeKind:
- snprintf(buf, bufsize, "f32");
- break;
- case LLVMDoubleTypeKind:
- snprintf(buf, bufsize, "f64");
- break;
- }
+ LLVMTypeRef elem_type = type;
+
+ assert(bufsize >= 8);
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ int ret = snprintf(buf, bufsize, "v%u", LLVMGetVectorSize(type));
+ if (ret < 0) {
+ char *type_name = LLVMPrintTypeToString(type);
+ fprintf(stderr, "Error building type name for: %s\n", type_name);
+ LLVMDisposeMessage(type_name);
+ return;
+ }
+ elem_type = LLVMGetElementType(type);
+ buf += ret;
+ bufsize -= ret;
+ }
+ switch (LLVMGetTypeKind(elem_type)) {
+ default:
+ break;
+ case LLVMIntegerTypeKind:
+ snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+ break;
+ case LLVMHalfTypeKind:
+ snprintf(buf, bufsize, "f16");
+ break;
+ case LLVMFloatTypeKind:
+ snprintf(buf, bufsize, "f32");
+ break;
+ case LLVMDoubleTypeKind:
+ snprintf(buf, bufsize, "f64");
+ break;
+ }
}
/**
* Helper function that builds an LLVM IR PHI node and immediately adds
* incoming edges.
*/
-LLVMValueRef
-ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
- unsigned count_incoming, LLVMValueRef *values,
- LLVMBasicBlockRef *blocks)
+LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming,
+ LLVMValueRef *values, LLVMBasicBlockRef *blocks)
{
- LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
- LLVMAddIncoming(phi, values, blocks, count_incoming);
- return phi;
+ LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMAddIncoming(phi, values, blocks, count_incoming);
+ return phi;
}
void ac_build_s_barrier(struct ac_llvm_context *ctx)
{
- ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
- 0, AC_FUNC_ATTR_CONVERGENT);
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
}
/* Prevent optimizations (at least of memory accesses) across the current
* Optionally, a value can be passed through the inline assembly to prevent
* LLVM from hoisting calls to ReadNone functions.
*/
-void
-ac_build_optimization_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef *pvgpr)
-{
- static int counter = 0;
-
- LLVMBuilderRef builder = ctx->builder;
- char code[16];
-
- snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
-
- if (!pvgpr) {
- LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
- LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
- LLVMBuildCall(builder, inlineasm, NULL, 0, "");
- } else {
- LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
- LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
- LLVMTypeRef type = LLVMTypeOf(*pvgpr);
- unsigned bitsize = ac_get_elem_bits(ctx, type);
- LLVMValueRef vgpr = *pvgpr;
- LLVMTypeRef vgpr_type;
- unsigned vgpr_size;
- LLVMValueRef vgpr0;
+void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pvgpr)
+{
+ static int counter = 0;
+
+ LLVMBuilderRef builder = ctx->builder;
+ char code[16];
+
+ snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
- if (bitsize < 32)
- vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, "");
+ if (!pvgpr) {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+ } else {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+ LLVMTypeRef type = LLVMTypeOf(*pvgpr);
+ unsigned bitsize = ac_get_elem_bits(ctx, type);
+ LLVMValueRef vgpr = *pvgpr;
+ LLVMTypeRef vgpr_type;
+ unsigned vgpr_size;
+ LLVMValueRef vgpr0;
- vgpr_type = LLVMTypeOf(vgpr);
- vgpr_size = ac_get_type_size(vgpr_type);
+ if (bitsize < 32)
+ vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, "");
- assert(vgpr_size % 4 == 0);
+ vgpr_type = LLVMTypeOf(vgpr);
+ vgpr_size = ac_get_type_size(vgpr_type);
- vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
- vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
- vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
- vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
- vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+ assert(vgpr_size % 4 == 0);
- if (bitsize < 32)
- vgpr = LLVMBuildTrunc(builder, vgpr, type, "");
+ vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+ vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+ vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+ vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+ vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
- *pvgpr = vgpr;
- }
+ if (bitsize < 32)
+ vgpr = LLVMBuildTrunc(builder, vgpr, type, "");
+
+ *pvgpr = vgpr;
+ }
}
-LLVMValueRef
-ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope)
+LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope)
{
- const char *name = scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : "llvm.amdgcn.s.memtime";
- LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0);
- return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
+ const char *name =
+ scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : "llvm.amdgcn.s.memtime";
+ LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0);
+ return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
}
-LLVMValueRef
-ac_build_ballot(struct ac_llvm_context *ctx,
- LLVMValueRef value)
+LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- const char *name;
+ const char *name;
- if (LLVM_VERSION_MAJOR >= 9) {
- if (ctx->wave_size == 64)
- name = "llvm.amdgcn.icmp.i64.i32";
- else
- name = "llvm.amdgcn.icmp.i32.i32";
- } else {
- name = "llvm.amdgcn.icmp.i32";
- }
- LLVMValueRef args[3] = {
- value,
- ctx->i32_0,
- LLVMConstInt(ctx->i32, LLVMIntNE, 0)
- };
+ if (LLVM_VERSION_MAJOR >= 9) {
+ if (ctx->wave_size == 64)
+ name = "llvm.amdgcn.icmp.i64.i32";
+ else
+ name = "llvm.amdgcn.icmp.i32.i32";
+ } else {
+ name = "llvm.amdgcn.icmp.i32";
+ }
+ LLVMValueRef args[3] = {value, ctx->i32_0, LLVMConstInt(ctx->i32, LLVMIntNE, 0)};
- /* We currently have no other way to prevent LLVM from lifting the icmp
- * calls to a dominating basic block.
- */
- ac_build_optimization_barrier(ctx, &args[0]);
+ /* We currently have no other way to prevent LLVM from lifting the icmp
+ * calls to a dominating basic block.
+ */
+ ac_build_optimization_barrier(ctx, &args[0]);
- args[0] = ac_to_integer(ctx, args[0]);
+ args[0] = ac_to_integer(ctx, args[0]);
- return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
- AC_FUNC_ATTR_NOUNWIND |
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
+ return ac_build_intrinsic(
+ ctx, name, ctx->iN_wavemask, args, 3,
+ AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
}
-LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
- LLVMValueRef value)
+LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- const char *name;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- if (ctx->wave_size == 64)
- name = "llvm.amdgcn.icmp.i64.i1";
- else
- name = "llvm.amdgcn.icmp.i32.i1";
- } else {
- name = "llvm.amdgcn.icmp.i1";
- }
- LLVMValueRef args[3] = {
- value,
- ctx->i1false,
- LLVMConstInt(ctx->i32, LLVMIntNE, 0),
- };
+ const char *name;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ if (ctx->wave_size == 64)
+ name = "llvm.amdgcn.icmp.i64.i1";
+ else
+ name = "llvm.amdgcn.icmp.i32.i1";
+ } else {
+ name = "llvm.amdgcn.icmp.i1";
+ }
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i1false,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0),
+ };
- return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
- AC_FUNC_ATTR_NOUNWIND |
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
+ return ac_build_intrinsic(
+ ctx, name, ctx->iN_wavemask, args, 3,
+ AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
}
-LLVMValueRef
-ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
+LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
- LLVMValueRef vote_set = ac_build_ballot(ctx, value);
- return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
}
-LLVMValueRef
-ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
+LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- LLVMValueRef vote_set = ac_build_ballot(ctx, value);
- return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
- LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0),
+ "");
}
-LLVMValueRef
-ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
+LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
- LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
- LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- vote_set, active_set, "");
- LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- vote_set,
- LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
- return LLVMBuildOr(ctx->builder, all, none, "");
+ LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+ LLVMValueRef none =
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
+ return LLVMBuildOr(ctx->builder, all, none, "");
}
-LLVMValueRef
-ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
- unsigned value_count, unsigned component)
+LLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned component)
{
- LLVMValueRef vec = NULL;
+ LLVMValueRef vec = NULL;
- if (value_count == 1) {
- return values[component];
- } else if (!value_count)
- unreachable("value_count is 0");
+ if (value_count == 1) {
+ return values[component];
+ } else if (!value_count)
+ unreachable("value_count is 0");
- for (unsigned i = component; i < value_count + component; i++) {
- LLVMValueRef value = values[i];
+ for (unsigned i = component; i < value_count + component; i++) {
+ LLVMValueRef value = values[i];
- if (i == component)
- vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
- LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
- vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
- }
- return vec;
+ if (i == component)
+ vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
+ vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
+ }
+ return vec;
}
-LLVMValueRef
-ac_build_gather_values_extended(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count,
- unsigned value_stride,
- bool load,
- bool always_vector)
+LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned value_stride, bool load,
+ bool always_vector)
{
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef vec = NULL;
- unsigned i;
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef vec = NULL;
+ unsigned i;
- if (value_count == 1 && !always_vector) {
- if (load)
- return LLVMBuildLoad(builder, values[0], "");
- return values[0];
- } else if (!value_count)
- unreachable("value_count is 0");
+ if (value_count == 1 && !always_vector) {
+ if (load)
+ return LLVMBuildLoad(builder, values[0], "");
+ return values[0];
+ } else if (!value_count)
+ unreachable("value_count is 0");
- for (i = 0; i < value_count; i++) {
- LLVMValueRef value = values[i * value_stride];
- if (load)
- value = LLVMBuildLoad(builder, value, "");
+ for (i = 0; i < value_count; i++) {
+ LLVMValueRef value = values[i * value_stride];
+ if (load)
+ value = LLVMBuildLoad(builder, value, "");
- if (!i)
- vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
- LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
- vec = LLVMBuildInsertElement(builder, vec, value, index, "");
- }
- return vec;
+ if (!i)
+ vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+ vec = LLVMBuildInsertElement(builder, vec, value, index, "");
+ }
+ return vec;
}
-LLVMValueRef
-ac_build_gather_values(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count)
+LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count)
{
- return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
+ return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
}
/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
* channels with undef. Extract at most src_channels components from the input.
*/
-static LLVMValueRef
-ac_build_expand(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned src_channels,
- unsigned dst_channels)
+static LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, LLVMValueRef value,
+ unsigned src_channels, unsigned dst_channels)
{
- LLVMTypeRef elemtype;
- LLVMValueRef chan[dst_channels];
+ LLVMTypeRef elemtype;
+ LLVMValueRef chan[dst_channels];
- if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
- unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+ unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
- if (src_channels == dst_channels && vec_size == dst_channels)
- return value;
+ if (src_channels == dst_channels && vec_size == dst_channels)
+ return value;
- src_channels = MIN2(src_channels, vec_size);
+ src_channels = MIN2(src_channels, vec_size);
- for (unsigned i = 0; i < src_channels; i++)
- chan[i] = ac_llvm_extract_elem(ctx, value, i);
+ for (unsigned i = 0; i < src_channels; i++)
+ chan[i] = ac_llvm_extract_elem(ctx, value, i);
- elemtype = LLVMGetElementType(LLVMTypeOf(value));
- } else {
- if (src_channels) {
- assert(src_channels == 1);
- chan[0] = value;
- }
- elemtype = LLVMTypeOf(value);
- }
+ elemtype = LLVMGetElementType(LLVMTypeOf(value));
+ } else {
+ if (src_channels) {
+ assert(src_channels == 1);
+ chan[0] = value;
+ }
+ elemtype = LLVMTypeOf(value);
+ }
- for (unsigned i = src_channels; i < dst_channels; i++)
- chan[i] = LLVMGetUndef(elemtype);
+ for (unsigned i = src_channels; i < dst_channels; i++)
+ chan[i] = LLVMGetUndef(elemtype);
- return ac_build_gather_values(ctx, chan, dst_channels);
+ return ac_build_gather_values(ctx, chan, dst_channels);
}
/* Extract components [start, start + channels) from a vector.
*/
-LLVMValueRef
-ac_extract_components(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned start,
- unsigned channels)
+LLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start,
+ unsigned channels)
{
- LLVMValueRef chan[channels];
+ LLVMValueRef chan[channels];
- for (unsigned i = 0; i < channels; i++)
- chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
+ for (unsigned i = 0; i < channels; i++)
+ chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
- return ac_build_gather_values(ctx, chan, channels);
+ return ac_build_gather_values(ctx, chan, channels);
}
/* Expand a scalar or vector to <4 x type> by filling the remaining channels
* with undef. Extract at most num_channels components from the input.
*/
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned num_channels)
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value,
+ unsigned num_channels)
{
- return ac_build_expand(ctx, value, num_channels, 4);
+ return ac_build_expand(ctx, value, num_channels, 4);
}
LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
- const char *name;
+ unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
+ const char *name;
- if (type_size == 2)
- name = "llvm.rint.f16";
- else if (type_size == 4)
- name = "llvm.rint.f32";
- else
- name = "llvm.rint.f64";
+ if (type_size == 2)
+ name = "llvm.rint.f16";
+ else if (type_size == 4)
+ name = "llvm.rint.f32";
+ else
+ name = "llvm.rint.f64";
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
- AC_FUNC_ATTR_READNONE);
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef
-ac_build_fdiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef den)
+LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den)
{
- unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
- const char *name;
+ unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
+ const char *name;
- /* For doubles, we need precise division to pass GLCTS. */
- if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
- type_size == 8)
- return LLVMBuildFDiv(ctx->builder, num, den, "");
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && type_size == 8)
+ return LLVMBuildFDiv(ctx->builder, num, den, "");
- if (type_size == 2)
- name = "llvm.amdgcn.rcp.f16";
- else if (type_size == 4)
- name = "llvm.amdgcn.rcp.f32";
- else
- name = "llvm.amdgcn.rcp.f64";
+ if (type_size == 2)
+ name = "llvm.amdgcn.rcp.f16";
+ else if (type_size == 4)
+ name = "llvm.amdgcn.rcp.f32";
+ else
+ name = "llvm.amdgcn.rcp.f64";
- LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den),
- &den, 1, AC_FUNC_ATTR_READNONE);
+ LLVMValueRef rcp =
+ ac_build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, AC_FUNC_ATTR_READNONE);
- return LLVMBuildFMul(ctx->builder, num, rcp, "");
+ return LLVMBuildFMul(ctx->builder, num, rcp, "");
}
/* See fast_idiv_by_const.h. */
/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
-LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment)
-{
- LLVMBuilderRef builder = ctx->builder;
-
- num = LLVMBuildLShr(builder, num, pre_shift, "");
- num = LLVMBuildMul(builder,
- LLVMBuildZExt(builder, num, ctx->i64, ""),
- LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
- num = LLVMBuildAdd(builder, num,
- LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
- num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
- num = LLVMBuildTrunc(builder, num, ctx->i32, "");
- return LLVMBuildLShr(builder, num, post_shift, "");
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num,
+ LLVMValueRef multiplier, LLVMValueRef pre_shift,
+ LLVMValueRef post_shift, LLVMValueRef increment)
+{
+ LLVMBuilderRef builder = ctx->builder;
+
+ num = LLVMBuildLShr(builder, num, pre_shift, "");
+ num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
+ LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+ num = LLVMBuildAdd(builder, num, LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
+ num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+ num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+ return LLVMBuildLShr(builder, num, post_shift, "");
}
/* See fast_idiv_by_const.h. */
/* If num != UINT_MAX, this more efficient version can be used. */
/* Set: increment = util_fast_udiv_info::increment; */
-LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment)
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num,
+ LLVMValueRef multiplier, LLVMValueRef pre_shift,
+ LLVMValueRef post_shift, LLVMValueRef increment)
{
- LLVMBuilderRef builder = ctx->builder;
+ LLVMBuilderRef builder = ctx->builder;
- num = LLVMBuildLShr(builder, num, pre_shift, "");
- num = LLVMBuildNUWAdd(builder, num, increment, "");
- num = LLVMBuildMul(builder,
- LLVMBuildZExt(builder, num, ctx->i64, ""),
- LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
- num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
- num = LLVMBuildTrunc(builder, num, ctx->i32, "");
- return LLVMBuildLShr(builder, num, post_shift, "");
+ num = LLVMBuildLShr(builder, num, pre_shift, "");
+ num = LLVMBuildNUWAdd(builder, num, increment, "");
+ num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
+ LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+ num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+ num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+ return LLVMBuildLShr(builder, num, post_shift, "");
}
/* See fast_idiv_by_const.h. */
/* Both operands must fit in 31 bits and the divisor must not be 1. */
-LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef post_shift)
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num,
+ LLVMValueRef multiplier, LLVMValueRef post_shift)
{
- LLVMBuilderRef builder = ctx->builder;
+ LLVMBuilderRef builder = ctx->builder;
- num = LLVMBuildMul(builder,
- LLVMBuildZExt(builder, num, ctx->i64, ""),
- LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
- num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
- num = LLVMBuildTrunc(builder, num, ctx->i32, "");
- return LLVMBuildLShr(builder, num, post_shift, "");
+ num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
+ LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+ num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+ num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+ return LLVMBuildLShr(builder, num, post_shift, "");
}
/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
* already multiplied by two. id is the cube face number.
*/
struct cube_selection_coords {
- LLVMValueRef stc[2];
- LLVMValueRef ma;
- LLVMValueRef id;
+ LLVMValueRef stc[2];
+ LLVMValueRef ma;
+ LLVMValueRef id;
};
-static void
-build_cube_intrinsic(struct ac_llvm_context *ctx,
- LLVMValueRef in[3],
- struct cube_selection_coords *out)
+static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3],
+ struct cube_selection_coords *out)
{
- LLVMTypeRef f32 = ctx->f32;
+ LLVMTypeRef f32 = ctx->f32;
- out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, AC_FUNC_ATTR_READNONE);
}
/**
* the selcoords major axis.
*/
static void build_cube_select(struct ac_llvm_context *ctx,
- const struct cube_selection_coords *selcoords,
- const LLVMValueRef *coords,
- LLVMValueRef *out_st,
- LLVMValueRef *out_ma)
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
- LLVMValueRef is_ma_positive;
- LLVMValueRef sgn_ma;
- LLVMValueRef is_ma_z, is_not_ma_z;
- LLVMValueRef is_ma_y;
- LLVMValueRef is_ma_x;
- LLVMValueRef sgn;
- LLVMValueRef tmp;
-
- is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
- selcoords->ma, LLVMConstReal(f32, 0.0), "");
- sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
- LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
-
- is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
- is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
- is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
- LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
- is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
-
- /* Select sc */
- tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
- LLVMBuildSelect(builder, is_ma_z, sgn_ma,
- LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
- out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
-
- /* Select tc */
- tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
- LLVMConstReal(f32, -1.0), "");
- out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
-
- /* Select ma */
- tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
- LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
- ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
- *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
-}
-
-void
-ac_prepare_cube_coords(struct ac_llvm_context *ctx,
- bool is_deriv, bool is_array, bool is_lod,
- LLVMValueRef *coords_arg,
- LLVMValueRef *derivs_arg)
-{
-
- LLVMBuilderRef builder = ctx->builder;
- struct cube_selection_coords selcoords;
- LLVMValueRef coords[3];
- LLVMValueRef invma;
-
- if (is_array && !is_lod) {
- LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
-
- /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
- *
- * "For Array forms, the array layer used will be
- *
- * max(0, min(d−1, floor(layer+0.5)))
- *
- * where d is the depth of the texture array and layer
- * comes from the component indicated in the tables below.
- * Workaroudn for an issue where the layer is taken from a
- * helper invocation which happens to fall on a different
- * layer due to extrapolation."
- *
- * GFX8 and earlier attempt to implement this in hardware by
- * clamping the value of coords[2] = (8 * layer) + face.
- * Unfortunately, this means that the we end up with the wrong
- * face when clamping occurs.
- *
- * Clamp the layer earlier to work around the issue.
- */
- if (ctx->chip_class <= GFX8) {
- LLVMValueRef ge0;
- ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
- tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
- }
-
- coords_arg[3] = tmp;
- }
-
- build_cube_intrinsic(ctx, coords_arg, &selcoords);
-
- invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
- ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
- invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
-
- for (int i = 0; i < 2; ++i)
- coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
-
- coords[2] = selcoords.id;
-
- if (is_deriv && derivs_arg) {
- LLVMValueRef derivs[4];
- int axis;
-
- /* Convert cube derivatives to 2D derivatives. */
- for (axis = 0; axis < 2; axis++) {
- LLVMValueRef deriv_st[2];
- LLVMValueRef deriv_ma;
-
- /* Transform the derivative alongside the texture
- * coordinate. Mathematically, the correct formula is
- * as follows. Assume we're projecting onto the +Z face
- * and denote by dx/dh the derivative of the (original)
- * X texture coordinate with respect to horizontal
- * window coordinates. The projection onto the +Z face
- * plane is:
- *
- * f(x,z) = x/z
- *
- * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
- * = 1/z * dx/dh - x/z * 1/z * dz/dh.
- *
- * This motivatives the implementation below.
- *
- * Whether this actually gives the expected results for
- * apps that might feed in derivatives obtained via
- * finite differences is anyone's guess. The OpenGL spec
- * seems awfully quiet about how textureGrad for cube
- * maps should be handled.
- */
- build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
- deriv_st, &deriv_ma);
-
- deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
-
- for (int i = 0; i < 2; ++i)
- derivs[axis * 2 + i] =
- LLVMBuildFSub(builder,
- LLVMBuildFMul(builder, deriv_st[i], invma, ""),
- LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
- }
-
- memcpy(derivs_arg, derivs, sizeof(derivs));
- }
-
- /* Shift the texture coordinate. This must be applied after the
- * derivative calculation.
- */
- for (int i = 0; i < 2; ++i)
- coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
-
- if (is_array) {
- /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
- /* coords_arg.w component - array_index for cube arrays */
- coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
- }
-
- memcpy(coords_arg, coords, sizeof(coords));
-}
-
-
-LLVMValueRef
-ac_build_fs_interp(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j)
-{
- LLVMValueRef args[5];
- LLVMValueRef p1;
-
- args[0] = i;
- args[1] = llvm_chan;
- args[2] = attr_number;
- args[3] = params;
-
- p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
- ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
-
- args[0] = p1;
- args[1] = j;
- args[2] = llvm_chan;
- args[3] = attr_number;
- args[4] = params;
+ const struct cube_selection_coords *selcoords,
+ const LLVMValueRef *coords, LLVMValueRef *out_st,
+ LLVMValueRef *out_ma)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
+ LLVMValueRef is_ma_positive;
+ LLVMValueRef sgn_ma;
+ LLVMValueRef is_ma_z, is_not_ma_z;
+ LLVMValueRef is_ma_y;
+ LLVMValueRef is_ma_x;
+ LLVMValueRef sgn;
+ LLVMValueRef tmp;
+
+ is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), "");
+ sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0),
+ LLVMConstReal(f32, -1.0), "");
+
+ is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
+ is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
+ is_ma_y = LLVMBuildAnd(
+ builder, is_not_ma_z,
+ LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
+ is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
+
+ /* Select sc */
+ tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
+ sgn = LLVMBuildSelect(
+ builder, is_ma_y, LLVMConstReal(f32, 1.0),
+ LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
+ out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+ /* Select tc */
+ tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
+ sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), "");
+ out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+ /* Select ma */
+ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
+ LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
+ tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+ *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
+}
+
+void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
+ LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg)
+{
+
+ LLVMBuilderRef builder = ctx->builder;
+ struct cube_selection_coords selcoords;
+ LLVMValueRef coords[3];
+ LLVMValueRef invma;
+
+ if (is_array && !is_lod) {
+ LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
+
+ /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+ *
+ * "For Array forms, the array layer used will be
+ *
+ * max(0, min(d−1, floor(layer+0.5)))
+ *
+ * where d is the depth of the texture array and layer
+ * comes from the component indicated in the tables below.
+ * Workaroudn for an issue where the layer is taken from a
+ * helper invocation which happens to fall on a different
+ * layer due to extrapolation."
+ *
+ * GFX8 and earlier attempt to implement this in hardware by
+ * clamping the value of coords[2] = (8 * layer) + face.
+ * Unfortunately, this means that the we end up with the wrong
+ * face when clamping occurs.
+ *
+ * Clamp the layer earlier to work around the issue.
+ */
+ if (ctx->chip_class <= GFX8) {
+ LLVMValueRef ge0;
+ ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+ tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+ }
+
+ coords_arg[3] = tmp;
+ }
+
+ build_cube_intrinsic(ctx, coords_arg, &selcoords);
+
+ invma =
+ ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
+ invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
+
+ for (int i = 0; i < 2; ++i)
+ coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
+
+ coords[2] = selcoords.id;
+
+ if (is_deriv && derivs_arg) {
+ LLVMValueRef derivs[4];
+ int axis;
+
+ /* Convert cube derivatives to 2D derivatives. */
+ for (axis = 0; axis < 2; axis++) {
+ LLVMValueRef deriv_st[2];
+ LLVMValueRef deriv_ma;
+
+ /* Transform the derivative alongside the texture
+ * coordinate. Mathematically, the correct formula is
+ * as follows. Assume we're projecting onto the +Z face
+ * and denote by dx/dh the derivative of the (original)
+ * X texture coordinate with respect to horizontal
+ * window coordinates. The projection onto the +Z face
+ * plane is:
+ *
+ * f(x,z) = x/z
+ *
+ * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
+ * = 1/z * dx/dh - x/z * 1/z * dz/dh.
+ *
+ * This motivatives the implementation below.
+ *
+ * Whether this actually gives the expected results for
+ * apps that might feed in derivatives obtained via
+ * finite differences is anyone's guess. The OpenGL spec
+ * seems awfully quiet about how textureGrad for cube
+ * maps should be handled.
+ */
+ build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma);
+
+ deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
+
+ for (int i = 0; i < 2; ++i)
+ derivs[axis * 2 + i] =
+ LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""),
+ LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
+ }
+
+ memcpy(derivs_arg, derivs, sizeof(derivs));
+ }
+
+ /* Shift the texture coordinate. This must be applied after the
+ * derivative calculation.
+ */
+ for (int i = 0; i < 2; ++i)
+ coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
+
+ if (is_array) {
+ /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
+ /* coords_arg.w component - array_index for cube arrays */
+ coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
+ }
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
- ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+ memcpy(coords_arg, coords, sizeof(coords));
}
-LLVMValueRef
-ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j)
+LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
+ LLVMValueRef j)
{
- LLVMValueRef args[6];
- LLVMValueRef p1;
-
- args[0] = i;
- args[1] = llvm_chan;
- args[2] = attr_number;
- args[3] = ctx->i1false;
- args[4] = params;
-
- p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
- ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
-
- args[0] = p1;
- args[1] = j;
- args[2] = llvm_chan;
- args[3] = attr_number;
- args[4] = ctx->i1false;
- args[5] = params;
+ LLVMValueRef args[5];
+ LLVMValueRef p1;
+
+ args[0] = i;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = params;
+
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
- ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+ args[0] = p1;
+ args[1] = j;
+ args[2] = llvm_chan;
+ args[3] = attr_number;
+ args[4] = params;
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", ctx->f32, args, 5,
+ AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef
-ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
- LLVMValueRef parameter,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params)
+LLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
+ LLVMValueRef j)
{
- LLVMValueRef args[4];
+ LLVMValueRef args[6];
+ LLVMValueRef p1;
+
+ args[0] = i;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = ctx->i1false;
+ args[4] = params;
+
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", ctx->f32, args, 5,
+ AC_FUNC_ATTR_READNONE);
- args[0] = parameter;
- args[1] = llvm_chan;
- args[2] = attr_number;
- args[3] = params;
+ args[0] = p1;
+ args[1] = j;
+ args[2] = llvm_chan;
+ args[3] = attr_number;
+ args[4] = ctx->i1false;
+ args[5] = params;
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
- ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", ctx->f16, args, 6,
+ AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef
-ac_build_gep_ptr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index)
+LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter,
+ LLVMValueRef llvm_chan, LLVMValueRef attr_number,
+ LLVMValueRef params)
{
- return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
+ LLVMValueRef args[4];
+
+ args[0] = parameter;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = params;
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", ctx->f32, args, 4,
+ AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef
-ac_build_gep0(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index)
+LLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index)
{
- LLVMValueRef indices[2] = {
- ctx->i32_0,
- index,
- };
- return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
+ return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
}
-LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef index)
+LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index)
{
- return LLVMBuildPointerCast(ctx->builder,
- LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
- LLVMTypeOf(ptr), "");
+ LLVMValueRef indices[2] = {
+ ctx->i32_0,
+ index,
+ };
+ return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
}
-void
-ac_build_indexed_store(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- LLVMValueRef value)
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMValueRef index)
{
- LLVMBuildStore(ctx->builder, value,
- ac_build_gep0(ctx, base_ptr, index));
+ return LLVMBuildPointerCast(ctx->builder, LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
+ LLVMTypeOf(ptr), "");
+}
+
+void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMBuildStore(ctx->builder, value, ac_build_gep0(ctx, base_ptr, index));
}
/**
* ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
* sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
*/
-static LLVMValueRef
-ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
- LLVMValueRef index, bool uniform, bool invariant,
- bool no_unsigned_wraparound)
+static LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index, bool uniform, bool invariant,
+ bool no_unsigned_wraparound)
{
- LLVMValueRef pointer, result;
+ LLVMValueRef pointer, result;
- if (no_unsigned_wraparound &&
- LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
- pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
- else
- pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
+ if (no_unsigned_wraparound &&
+ LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
+ pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
+ else
+ pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
- if (uniform)
- LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
- result = LLVMBuildLoad(ctx->builder, pointer, "");
- if (invariant)
- LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
- return result;
+ if (uniform)
+ LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
+ result = LLVMBuildLoad(ctx->builder, pointer, "");
+ if (invariant)
+ LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+ return result;
}
-LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
- LLVMValueRef index)
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index)
{
- return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
+ return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
}
-LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index)
{
- return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
+ return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
}
/* This assumes that there is no unsigned integer wraparound during the address
* computation, excluding all GEPs within base_ptr. */
-LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index)
{
- return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
+ return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
}
/* See ac_build_load_custom() documentation. */
LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
+ LLVMValueRef base_ptr, LLVMValueRef index)
{
- return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
+ return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
}
-static unsigned get_load_cache_policy(struct ac_llvm_context *ctx,
- unsigned cache_policy)
+static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
{
- return cache_policy |
- (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
+ return cache_policy | (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
}
-static void
-ac_build_buffer_store_common(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef data,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy,
- bool use_format,
- bool structurized)
+static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef data, LLVMValueRef vindex,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned cache_policy, bool use_format, bool structurized)
{
- LLVMValueRef args[6];
- int idx = 0;
- args[idx++] = data;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
+ LLVMValueRef args[6];
+ int idx = 0;
+ args[idx++] = data;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
- ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name));
+ ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name));
- if (use_format) {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
- indexing_kind, type_name);
- } else {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
- indexing_kind, type_name);
- }
+ if (use_format) {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", indexing_kind,
+ type_name);
+ } else {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", indexing_kind, type_name);
+ }
- ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
}
-void
-ac_build_buffer_store_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef data,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned cache_policy)
+void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
+ LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy)
{
- ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL,
- cache_policy, true, true);
+ ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true, true);
}
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
* The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
* or v4i32 (num_channels=3,4).
*/
-void
-ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy)
-{
- /* Split 3 channel stores, because only LLVM 9+ support 3-channel
- * intrinsics. */
- if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
- LLVMValueRef v[3], v01;
-
- for (int i = 0; i < 3; i++) {
- v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
- LLVMConstInt(ctx->i32, i, 0), "");
- }
- v01 = ac_build_gather_values(ctx, v, 2);
-
- ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
- soffset, inst_offset, cache_policy);
- ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
- soffset, inst_offset + 8,
- cache_policy);
- return;
- }
-
- /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
- * (voffset is swizzled, but soffset isn't swizzled).
- * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
- */
- if (!(cache_policy & ac_swizzled)) {
- LLVMValueRef offset = soffset;
-
- if (inst_offset)
- offset = LLVMBuildAdd(ctx->builder, offset,
- LLVMConstInt(ctx->i32, inst_offset, 0), "");
-
- ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata),
- ctx->i32_0, voffset, offset,
- cache_policy, false, false);
- return;
- }
-
- static const unsigned dfmts[] = {
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_DATA_FORMAT_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32_32
- };
- unsigned dfmt = dfmts[num_channels - 1];
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
- LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
-
- ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, cache_policy);
-}
-
-static LLVMValueRef
-ac_build_buffer_load_common(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned num_channels,
- LLVMTypeRef channel_type,
- unsigned cache_policy,
- bool can_speculate,
- bool use_format,
- bool structurized)
-{
- LLVMValueRef args[5];
- int idx = 0;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- /* D16 is only supported on gfx8+ */
- assert(!use_format ||
- (channel_type != ctx->f16 && channel_type != ctx->i16) ||
- ctx->chip_class >= GFX8);
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- if (use_format) {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
- indexing_kind, type_name);
- } else {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
- indexing_kind, type_name);
- }
-
- return ac_build_intrinsic(ctx, name, type, args, idx,
- ac_get_load_intr_attribs(can_speculate));
-}
-
-LLVMValueRef
-ac_build_buffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- int num_channels,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy,
- bool can_speculate,
- bool allow_smem)
-{
- LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
- if (voffset)
- offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
- if (soffset)
- offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
-
- if (allow_smem && !(cache_policy & ac_slc) &&
- (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
- assert(vindex == NULL);
-
- LLVMValueRef result[8];
-
- for (int i = 0; i < num_channels; i++) {
- if (i) {
- offset = LLVMBuildAdd(ctx->builder, offset,
- LLVMConstInt(ctx->i32, 4, 0), "");
- }
- LLVMValueRef args[3] = {
- rsrc,
- offset,
- LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
- };
- result[i] = ac_build_intrinsic(ctx,
- "llvm.amdgcn.s.buffer.load.f32",
- ctx->f32, args, 3,
- AC_FUNC_ATTR_READNONE);
- }
- if (num_channels == 1)
- return result[0];
-
- if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
- result[num_channels++] = LLVMGetUndef(ctx->f32);
- return ac_build_gather_values(ctx, result, num_channels);
- }
-
- return ac_build_buffer_load_common(ctx, rsrc, vindex,
- offset, ctx->i32_0,
- num_channels, ctx->f32,
- cache_policy,
- can_speculate, false, false);
-}
-
-LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- unsigned cache_policy,
- bool can_speculate,
- bool d16)
-{
- return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
- ctx->i32_0, num_channels,
- d16 ? ctx->f16 : ctx->f32,
- cache_policy, can_speculate,
- true, true);
-}
-
-static LLVMValueRef
-ac_build_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate,
- bool structurized)
-{
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
- LLVMValueRef args[6];
- int idx = 0;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
- args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
- indexing_kind, type_name);
-
- return ac_build_intrinsic(ctx, name, type, args, idx,
- ac_get_load_intr_attribs(can_speculate));
-}
-
-LLVMValueRef
-ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate)
-{
- return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt,
- cache_policy, can_speculate, true);
-}
-
-LLVMValueRef
-ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate)
-{
- return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt,
- cache_policy, can_speculate, false);
-}
-
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy)
-{
- LLVMValueRef res;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- res = ac_build_buffer_load_common(ctx, rsrc, NULL,
- voffset, soffset,
- 1, ctx->i16, cache_policy,
- false, false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
- immoffset, 1, dfmt, nfmt, cache_policy,
- false);
-
- res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
- }
-
- return res;
-}
-
-LLVMValueRef
-ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy)
-{
- LLVMValueRef res;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- res = ac_build_buffer_load_common(ctx, rsrc, NULL,
- voffset, soffset,
- 1, ctx->i8, cache_policy,
- false, false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
+ unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned inst_offset, unsigned cache_policy)
+{
+ /* Split 3 channel stores, because only LLVM 9+ support 3-channel
+ * intrinsics. */
+ if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
+ LLVMValueRef v[3], v01;
+
+ for (int i = 0; i < 3; i++) {
+ v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ v01 = ac_build_gather_values(ctx, v, 2);
+
+ ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, soffset, inst_offset, cache_policy);
+ ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, soffset, inst_offset + 8,
+ cache_policy);
+ return;
+ }
+
+ /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
+ * (voffset is swizzled, but soffset isn't swizzled).
+ * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
+ */
+ if (!(cache_policy & ac_swizzled)) {
+ LLVMValueRef offset = soffset;
+
+ if (inst_offset)
+ offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, inst_offset, 0), "");
+
+ ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), ctx->i32_0, voffset, offset,
+ cache_policy, false, false);
+ return;
+ }
+
+ static const unsigned dfmts[] = {V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32_32};
+ unsigned dfmt = dfmts[num_channels - 1];
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+ LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, immoffset, num_channels, dfmt,
+ nfmt, cache_policy);
+}
+
+static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, unsigned num_channels,
+ LLVMTypeRef channel_type, unsigned cache_policy,
+ bool can_speculate, bool use_format,
+ bool structurized)
+{
+ LLVMValueRef args[5];
+ int idx = 0;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
+ unsigned func =
+ !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ /* D16 is only supported on gfx8+ */
+ assert(!use_format || (channel_type != ctx->f16 && channel_type != ctx->i16) ||
+ ctx->chip_class >= GFX8);
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ if (use_format) {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", indexing_kind,
+ type_name);
+ } else {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", indexing_kind, type_name);
+ }
+
+ return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate));
+}
+
+LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
+ LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned inst_offset, unsigned cache_policy, bool can_speculate,
+ bool allow_smem)
+{
+ LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+ if (soffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+ if (allow_smem && !(cache_policy & ac_slc) &&
+ (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
+ assert(vindex == NULL);
+
+ LLVMValueRef result[8];
+
+ for (int i = 0; i < num_channels; i++) {
+ if (i) {
+ offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), "");
+ }
+ LLVMValueRef args[3] = {
+ rsrc,
+ offset,
+ LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
+ };
+ result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.f32", ctx->f32, args, 3,
+ AC_FUNC_ATTR_READNONE);
+ }
+ if (num_channels == 1)
+ return result[0];
+
+ if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
+ result[num_channels++] = LLVMGetUndef(ctx->f32);
+ return ac_build_gather_values(ctx, result, num_channels);
+ }
+
+ return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, ctx->i32_0, num_channels, ctx->f32,
+ cache_policy, can_speculate, false, false);
+}
+
+LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ unsigned num_channels, unsigned cache_policy,
+ bool can_speculate, bool d16)
+{
+ return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, num_channels,
+ d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true,
+ true);
+}
+
+static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy, bool can_speculate,
+ bool structurized)
+{
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ LLVMValueRef args[6];
+ int idx = 0;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
+ unsigned func =
+ !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s", indexing_kind, type_name);
+
+ return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate));
+}
- res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
- immoffset, 1, dfmt, nfmt, cache_policy,
- false);
+LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy, bool can_speculate)
+{
+ return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, immoffset, num_channels, dfmt,
+ nfmt, cache_policy, can_speculate, true);
+}
+
+LLVMValueRef ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ LLVMValueRef immoffset, unsigned num_channels, unsigned dfmt,
+ unsigned nfmt, unsigned cache_policy, bool can_speculate)
+{
+ return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset, immoffset, num_channels, dfmt,
+ nfmt, cache_policy, can_speculate, false);
+}
+
+LLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ LLVMValueRef immoffset, unsigned cache_policy)
+{
+ LLVMValueRef res;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ res = ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i16,
+ cache_policy, false, false, false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, immoffset, 1, dfmt, nfmt,
+ cache_policy, false);
+
+ res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
+ }
+
+ return res;
+}
+
+LLVMValueRef ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ LLVMValueRef immoffset, unsigned cache_policy)
+{
+ LLVMValueRef res;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ res = ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, cache_policy,
+ false, false, false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, immoffset, 1, dfmt, nfmt,
+ cache_policy, false);
- res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
- }
-
- return res;
+ res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
+ }
+
+ return res;
}
/**
* The input exponent is expected to be biased analogous to IEEE-754, i.e. by
* 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs).
*/
-static LLVMValueRef
-ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned exp_bits, unsigned mant_bits)
+static LLVMValueRef ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned exp_bits, unsigned mant_bits)
{
- assert(LLVMTypeOf(src) == ctx->i32);
+ assert(LLVMTypeOf(src) == ctx->i32);
- LLVMValueRef tmp;
- LLVMValueRef mantissa;
- mantissa = LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
+ LLVMValueRef tmp;
+ LLVMValueRef mantissa;
+ mantissa =
+ LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
- /* Converting normal numbers is just a shift + correcting the exponent bias */
- unsigned normal_shift = 23 - mant_bits;
- unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1);
- LLVMValueRef shifted, normal;
+ /* Converting normal numbers is just a shift + correcting the exponent bias */
+ unsigned normal_shift = 23 - mant_bits;
+ unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1);
+ LLVMValueRef shifted, normal;
- shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
- normal = LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
+ shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
+ normal =
+ LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
- /* Converting nan/inf numbers is the same, but with a different exponent update */
- LLVMValueRef naninf;
- naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
+ /* Converting nan/inf numbers is the same, but with a different exponent update */
+ LLVMValueRef naninf;
+ naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
- /* Converting denormals is the complex case: determine the leading zeros of the
- * mantissa to obtain the correct shift for the mantissa and exponent correction.
- */
- LLVMValueRef denormal;
- LLVMValueRef params[2] = {
- mantissa,
- ctx->i1true, /* result can be undef when arg is 0 */
- };
- LLVMValueRef ctlz = ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32,
- params, 2, AC_FUNC_ATTR_READNONE);
+ /* Converting denormals is the complex case: determine the leading zeros of the
+ * mantissa to obtain the correct shift for the mantissa and exponent correction.
+ */
+ LLVMValueRef denormal;
+ LLVMValueRef params[2] = {
+ mantissa, ctx->i1true, /* result can be undef when arg is 0 */
+ };
+ LLVMValueRef ctlz =
+ ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
- /* Shift such that the leading 1 ends up as the LSB of the exponent field. */
- tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
- denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
+ /* Shift such that the leading 1 ends up as the LSB of the exponent field. */
+ tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
+ denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
- unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1;
- tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
- tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
- denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
+ unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1;
+ tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
+ tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
+ denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
- /* Select the final result. */
- LLVMValueRef result;
+ /* Select the final result. */
+ LLVMValueRef result;
- tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
- LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), "");
- result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
+ tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
+ LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), "");
+ result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
- tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
- LLVMConstInt(ctx->i32, 1 << mant_bits, false), "");
- result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
+ tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, LLVMConstInt(ctx->i32, 1 << mant_bits, false),
+ "");
+ result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
- tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
- result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
+ tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
+ result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
- return ac_to_float(ctx, result);
+ return ac_to_float(ctx, result);
}
/**
* \param rsrc buffer resource descriptor
* \return the resulting vector of floats or integers bitcast to <4 x i32>
*/
-LLVMValueRef
-ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
- unsigned log_size,
- unsigned num_channels,
- unsigned format,
- bool reverse,
- bool known_aligned,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy,
- bool can_speculate)
-{
- LLVMValueRef tmp;
- unsigned load_log_size = log_size;
- unsigned load_num_channels = num_channels;
- if (log_size == 3) {
- load_log_size = 2;
- if (format == AC_FETCH_FORMAT_FLOAT) {
- load_num_channels = 2 * num_channels;
- } else {
- load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */
- }
- }
-
- int log_recombine = 0;
- if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) {
- /* Avoid alignment restrictions by loading one byte at a time. */
- load_num_channels <<= load_log_size;
- log_recombine = load_log_size;
- load_log_size = 0;
- } else if (load_num_channels == 2 || load_num_channels == 4) {
- log_recombine = -util_logbase2(load_num_channels);
- load_num_channels = 1;
- load_log_size += -log_recombine;
- }
-
- assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9);
-
- LLVMValueRef loads[32]; /* up to 32 bytes */
- for (unsigned i = 0; i < load_num_channels; ++i) {
- tmp = LLVMBuildAdd(ctx->builder, soffset,
- LLVMConstInt(ctx->i32, i << load_log_size, false), "");
- LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 :
- load_log_size == 1 ? ctx->i16 : ctx->i32;
- unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
- loads[i] = ac_build_buffer_load_common(
- ctx, rsrc, vindex, voffset, tmp,
- num_channels, channel_type, cache_policy,
- can_speculate, false, true);
- if (load_log_size >= 2)
- loads[i] = ac_to_integer(ctx, loads[i]);
- }
-
- if (log_recombine > 0) {
- /* Recombine bytes if necessary (GFX6 only) */
- LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
-
- for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
- LLVMValueRef accum = NULL;
- for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) {
- tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
- if (i == 0) {
- accum = tmp;
- } else {
- tmp = LLVMBuildShl(ctx->builder, tmp,
- LLVMConstInt(dst_type, 8 * i, false), "");
- accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
- }
- }
- loads[dst] = accum;
- }
- } else if (log_recombine < 0) {
- /* Split vectors of dwords */
- if (load_log_size > 2) {
- assert(load_num_channels == 1);
- LLVMValueRef loaded = loads[0];
- unsigned log_split = load_log_size - 2;
- log_recombine += log_split;
- load_num_channels = 1 << log_split;
- load_log_size = 2;
- for (unsigned i = 0; i < load_num_channels; ++i) {
- tmp = LLVMConstInt(ctx->i32, i, false);
- loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
- }
- }
-
- /* Further split dwords and shorts if required */
- if (log_recombine < 0) {
- for (unsigned src = load_num_channels,
- dst = load_num_channels << -log_recombine;
- src > 0; --src) {
- unsigned dst_bits = 1 << (3 + load_log_size + log_recombine);
- LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
- LLVMValueRef loaded = loads[src - 1];
- LLVMTypeRef loaded_type = LLVMTypeOf(loaded);
- for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) {
- tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false);
- tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
- loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
- }
- }
- }
- }
-
- if (log_size == 3) {
- if (format == AC_FETCH_FORMAT_FLOAT) {
- for (unsigned i = 0; i < num_channels; ++i) {
- tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
- loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
- }
- } else if (format == AC_FETCH_FORMAT_FIXED) {
- /* 10_11_11_FLOAT */
- LLVMValueRef data = loads[0];
- LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
- LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
- LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
- LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
-
- loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
- loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
- loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
-
- num_channels = 3;
- log_size = 2;
- format = AC_FETCH_FORMAT_FLOAT;
- } else {
- /* 2_10_10_10 data formats */
- LLVMValueRef data = loads[0];
- LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
- LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
- loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
- loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
- loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
- loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
-
- num_channels = 4;
- }
- }
-
- if (format == AC_FETCH_FORMAT_FLOAT) {
- if (log_size != 2) {
- for (unsigned chan = 0; chan < num_channels; ++chan) {
- tmp = ac_to_float(ctx, loads[chan]);
- if (log_size == 3)
- tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
- else if (log_size == 1)
- tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
- loads[chan] = ac_to_integer(ctx, tmp);
- }
- }
- } else if (format == AC_FETCH_FORMAT_UINT) {
- if (log_size != 2) {
- for (unsigned chan = 0; chan < num_channels; ++chan)
- loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
- }
- } else if (format == AC_FETCH_FORMAT_SINT) {
- if (log_size != 2) {
- for (unsigned chan = 0; chan < num_channels; ++chan)
- loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
- }
- } else {
- bool unsign = format == AC_FETCH_FORMAT_UNORM ||
- format == AC_FETCH_FORMAT_USCALED ||
- format == AC_FETCH_FORMAT_UINT;
-
- for (unsigned chan = 0; chan < num_channels; ++chan) {
- if (unsign) {
- tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
- } else {
- tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
- }
-
- LLVMValueRef scale = NULL;
- if (format == AC_FETCH_FORMAT_FIXED) {
- assert(log_size == 2);
- scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
- } else if (format == AC_FETCH_FORMAT_UNORM) {
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
- scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
- } else if (format == AC_FETCH_FORMAT_SNORM) {
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
- scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
- }
- if (scale)
- tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
-
- if (format == AC_FETCH_FORMAT_SNORM) {
- /* Clamp to [-1, 1] */
- LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
- LLVMValueRef clamp =
- LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
- tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
- }
-
- loads[chan] = ac_to_integer(ctx, tmp);
- }
- }
-
- while (num_channels < 4) {
- if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) {
- loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
- } else {
- loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
- }
- num_channels++;
- }
-
- if (reverse) {
- tmp = loads[0];
- loads[0] = loads[2];
- loads[2] = tmp;
- }
-
- return ac_build_gather_values(ctx, loads, 4);
-}
-
-static void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool structurized)
-{
- voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
- immoffset, "");
-
- LLVMValueRef args[7];
- int idx = 0;
- args[idx++] = vdata;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
- args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
- indexing_kind, type_name);
-
- ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
-}
-
-void
-ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy)
-{
- ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, cache_policy,
- true);
-}
-
-void
-ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy)
-{
- ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, cache_policy,
- false);
-}
-
-void
-ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy)
-{
- vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
-
- if (LLVM_VERSION_MAJOR >= 9) {
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
- voffset, soffset, cache_policy,
- false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
-
- ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- ctx->i32_0, 1, dfmt, nfmt, cache_policy);
- }
-}
-
-void
-ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy)
-{
- vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
-
- if (LLVM_VERSION_MAJOR >= 9) {
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
- voffset, soffset, cache_policy,
- false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
-
- ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- ctx->i32_0, 1, dfmt, nfmt, cache_policy);
- }
+LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size,
+ unsigned num_channels, unsigned format, bool reverse,
+ bool known_aligned, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, unsigned cache_policy,
+ bool can_speculate)
+{
+ LLVMValueRef tmp;
+ unsigned load_log_size = log_size;
+ unsigned load_num_channels = num_channels;
+ if (log_size == 3) {
+ load_log_size = 2;
+ if (format == AC_FETCH_FORMAT_FLOAT) {
+ load_num_channels = 2 * num_channels;
+ } else {
+ load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */
+ }
+ }
+
+ int log_recombine = 0;
+ if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) {
+ /* Avoid alignment restrictions by loading one byte at a time. */
+ load_num_channels <<= load_log_size;
+ log_recombine = load_log_size;
+ load_log_size = 0;
+ } else if (load_num_channels == 2 || load_num_channels == 4) {
+ log_recombine = -util_logbase2(load_num_channels);
+ load_num_channels = 1;
+ load_log_size += -log_recombine;
+ }
+
+ assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9);
+
+ LLVMValueRef loads[32]; /* up to 32 bytes */
+ for (unsigned i = 0; i < load_num_channels; ++i) {
+ tmp =
+ LLVMBuildAdd(ctx->builder, soffset, LLVMConstInt(ctx->i32, i << load_log_size, false), "");
+ LLVMTypeRef channel_type =
+ load_log_size == 0 ? ctx->i8 : load_log_size == 1 ? ctx->i16 : ctx->i32;
+ unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
+ loads[i] =
+ ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, tmp, num_channels, channel_type,
+ cache_policy, can_speculate, false, true);
+ if (load_log_size >= 2)
+ loads[i] = ac_to_integer(ctx, loads[i]);
+ }
+
+ if (log_recombine > 0) {
+ /* Recombine bytes if necessary (GFX6 only) */
+ LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
+
+ for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
+ LLVMValueRef accum = NULL;
+ for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) {
+ tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
+ if (i == 0) {
+ accum = tmp;
+ } else {
+ tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(dst_type, 8 * i, false), "");
+ accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
+ }
+ }
+ loads[dst] = accum;
+ }
+ } else if (log_recombine < 0) {
+ /* Split vectors of dwords */
+ if (load_log_size > 2) {
+ assert(load_num_channels == 1);
+ LLVMValueRef loaded = loads[0];
+ unsigned log_split = load_log_size - 2;
+ log_recombine += log_split;
+ load_num_channels = 1 << log_split;
+ load_log_size = 2;
+ for (unsigned i = 0; i < load_num_channels; ++i) {
+ tmp = LLVMConstInt(ctx->i32, i, false);
+ loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
+ }
+ }
+
+ /* Further split dwords and shorts if required */
+ if (log_recombine < 0) {
+ for (unsigned src = load_num_channels, dst = load_num_channels << -log_recombine; src > 0;
+ --src) {
+ unsigned dst_bits = 1 << (3 + load_log_size + log_recombine);
+ LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
+ LLVMValueRef loaded = loads[src - 1];
+ LLVMTypeRef loaded_type = LLVMTypeOf(loaded);
+ for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) {
+ tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false);
+ tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
+ loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
+ }
+ }
+ }
+ }
+
+ if (log_size == 3) {
+ if (format == AC_FETCH_FORMAT_FLOAT) {
+ for (unsigned i = 0; i < num_channels; ++i) {
+ tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
+ loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
+ }
+ } else if (format == AC_FETCH_FORMAT_FIXED) {
+ /* 10_11_11_FLOAT */
+ LLVMValueRef data = loads[0];
+ LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
+ LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
+ LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
+ LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
+
+ loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
+ loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
+ loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
+
+ num_channels = 3;
+ log_size = 2;
+ format = AC_FETCH_FORMAT_FLOAT;
+ } else {
+ /* 2_10_10_10 data formats */
+ LLVMValueRef data = loads[0];
+ LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
+ LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
+ loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
+ loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
+ loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
+ loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
+
+ num_channels = 4;
+ }
+ }
+
+ if (format == AC_FETCH_FORMAT_FLOAT) {
+ if (log_size != 2) {
+ for (unsigned chan = 0; chan < num_channels; ++chan) {
+ tmp = ac_to_float(ctx, loads[chan]);
+ if (log_size == 3)
+ tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
+ else if (log_size == 1)
+ tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
+ loads[chan] = ac_to_integer(ctx, tmp);
+ }
+ }
+ } else if (format == AC_FETCH_FORMAT_UINT) {
+ if (log_size != 2) {
+ for (unsigned chan = 0; chan < num_channels; ++chan)
+ loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
+ }
+ } else if (format == AC_FETCH_FORMAT_SINT) {
+ if (log_size != 2) {
+ for (unsigned chan = 0; chan < num_channels; ++chan)
+ loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
+ }
+ } else {
+ bool unsign = format == AC_FETCH_FORMAT_UNORM || format == AC_FETCH_FORMAT_USCALED ||
+ format == AC_FETCH_FORMAT_UINT;
+
+ for (unsigned chan = 0; chan < num_channels; ++chan) {
+ if (unsign) {
+ tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
+ } else {
+ tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
+ }
+
+ LLVMValueRef scale = NULL;
+ if (format == AC_FETCH_FORMAT_FIXED) {
+ assert(log_size == 2);
+ scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
+ } else if (format == AC_FETCH_FORMAT_UNORM) {
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
+ scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
+ } else if (format == AC_FETCH_FORMAT_SNORM) {
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
+ scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
+ }
+ if (scale)
+ tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
+
+ if (format == AC_FETCH_FORMAT_SNORM) {
+ /* Clamp to [-1, 1] */
+ LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
+ LLVMValueRef clamp = LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
+ tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
+ }
+
+ loads[chan] = ac_to_integer(ctx, tmp);
+ }
+ }
+
+ while (num_channels < 4) {
+ if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) {
+ loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
+ } else {
+ loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
+ }
+ num_channels++;
+ }
+
+ if (reverse) {
+ tmp = loads[0];
+ loads[0] = loads[2];
+ loads[2] = tmp;
+ }
+
+ return ac_build_gather_values(ctx, loads, 4);
+}
+
+static void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy, bool structurized)
+{
+ voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, immoffset, "");
+
+ LLVMValueRef args[7];
+ int idx = 0;
+ args[idx++] = vdata;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
+ unsigned func =
+ !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", indexing_kind, type_name);
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
+}
+
+void ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy)
+{
+ ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, immoffset, num_channels, dfmt,
+ nfmt, cache_policy, true);
+}
+
+void ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
+ LLVMValueRef voffset, LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy)
+{
+ ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset, immoffset, num_channels, dfmt,
+ nfmt, cache_policy, false);
+}
+
+void ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned cache_policy)
+{
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false,
+ false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, ctx->i32_0, 1, dfmt, nfmt,
+ cache_policy);
+ }
+}
+
+void ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
+ LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy)
+{
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false,
+ false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, ctx->i32_0, 1, dfmt, nfmt,
+ cache_policy);
+ }
}
/**
* Set range metadata on an instruction. This can only be used on load and
* \p lo is the minimum value inclusive.
* \p hi is the maximum value exclusive.
*/
-static void set_range_metadata(struct ac_llvm_context *ctx,
- LLVMValueRef value, unsigned lo, unsigned hi)
+static void set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned lo,
+ unsigned hi)
{
- LLVMValueRef range_md, md_args[2];
- LLVMTypeRef type = LLVMTypeOf(value);
- LLVMContextRef context = LLVMGetTypeContext(type);
+ LLVMValueRef range_md, md_args[2];
+ LLVMTypeRef type = LLVMTypeOf(value);
+ LLVMContextRef context = LLVMGetTypeContext(type);
- md_args[0] = LLVMConstInt(type, lo, false);
- md_args[1] = LLVMConstInt(type, hi, false);
- range_md = LLVMMDNodeInContext(context, md_args, 2);
- LLVMSetMetadata(value, ctx->range_md_kind, range_md);
+ md_args[0] = LLVMConstInt(type, lo, false);
+ md_args[1] = LLVMConstInt(type, hi, false);
+ range_md = LLVMMDNodeInContext(context, md_args, 2);
+ LLVMSetMetadata(value, ctx->range_md_kind, range_md);
}
-LLVMValueRef
-ac_get_thread_id(struct ac_llvm_context *ctx)
+LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx)
{
- LLVMValueRef tid;
+ LLVMValueRef tid;
- LLVMValueRef tid_args[2];
- tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
- tid_args[1] = ctx->i32_0;
- tid_args[1] = ac_build_intrinsic(ctx,
- "llvm.amdgcn.mbcnt.lo", ctx->i32,
- tid_args, 2, AC_FUNC_ATTR_READNONE);
+ LLVMValueRef tid_args[2];
+ tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
+ tid_args[1] = ctx->i32_0;
+ tid_args[1] =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, tid_args, 2, AC_FUNC_ATTR_READNONE);
- if (ctx->wave_size == 32) {
- tid = tid_args[1];
- } else {
- tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
- ctx->i32, tid_args,
- 2, AC_FUNC_ATTR_READNONE);
- }
- set_range_metadata(ctx, tid, 0, ctx->wave_size);
- return tid;
+ if (ctx->wave_size == 32) {
+ tid = tid_args[1];
+ } else {
+ tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, tid_args, 2,
+ AC_FUNC_ATTR_READNONE);
+ }
+ set_range_metadata(ctx, tid, 0, ctx->wave_size);
+ return tid;
}
/*
* Adding 1 yields the TID of the pixel to the right of the left pixel, and
* adding 2 yields the TID of the pixel below the top pixel.
*/
-LLVMValueRef
-ac_build_ddxy(struct ac_llvm_context *ctx,
- uint32_t mask,
- int idx,
- LLVMValueRef val)
-{
- unsigned tl_lanes[4], trbl_lanes[4];
- char name[32], type[8];
- LLVMValueRef tl, trbl;
- LLVMTypeRef result_type;
- LLVMValueRef result;
-
- result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
-
- if (result_type == ctx->f16)
- val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
- else if (result_type == ctx->v2f16)
- val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
-
- for (unsigned i = 0; i < 4; ++i) {
- tl_lanes[i] = i & mask;
- trbl_lanes[i] = (i & mask) + idx;
- }
-
- tl = ac_build_quad_swizzle(ctx, val,
- tl_lanes[0], tl_lanes[1],
- tl_lanes[2], tl_lanes[3]);
- trbl = ac_build_quad_swizzle(ctx, val,
- trbl_lanes[0], trbl_lanes[1],
- trbl_lanes[2], trbl_lanes[3]);
-
- if (result_type == ctx->f16) {
- tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
- trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
- }
-
- tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
- trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
- result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
-
- ac_build_type_name_for_intr(result_type, type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
-
- return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
-}
-
-void
-ac_build_sendmsg(struct ac_llvm_context *ctx,
- uint32_t msg,
- LLVMValueRef wave_id)
-{
- LLVMValueRef args[2];
- args[0] = LLVMConstInt(ctx->i32, msg, false);
- args[1] = wave_id;
- ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
-}
-
-LLVMValueRef
-ac_build_imsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type)
-{
- LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
- dst_type, &arg, 1,
- AC_FUNC_ATTR_READNONE);
-
- /* The HW returns the last bit index from MSB, but NIR/TGSI wants
- * the index from LSB. Invert it by doing "31 - msb". */
- msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
- msb, "");
-
- LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
- LLVMValueRef cond = LLVMBuildOr(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- arg, ctx->i32_0, ""),
- LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- arg, all_ones, ""), "");
-
- return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
-}
-
-LLVMValueRef
-ac_build_umsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type)
-{
- const char *intrin_name;
- LLVMTypeRef type;
- LLVMValueRef highest_bit;
- LLVMValueRef zero;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
- switch (bitsize) {
- case 64:
- intrin_name = "llvm.ctlz.i64";
- type = ctx->i64;
- highest_bit = LLVMConstInt(ctx->i64, 63, false);
- zero = ctx->i64_0;
- break;
- case 32:
- intrin_name = "llvm.ctlz.i32";
- type = ctx->i32;
- highest_bit = LLVMConstInt(ctx->i32, 31, false);
- zero = ctx->i32_0;
- break;
- case 16:
- intrin_name = "llvm.ctlz.i16";
- type = ctx->i16;
- highest_bit = LLVMConstInt(ctx->i16, 15, false);
- zero = ctx->i16_0;
- break;
- case 8:
- intrin_name = "llvm.ctlz.i8";
- type = ctx->i8;
- highest_bit = LLVMConstInt(ctx->i8, 7, false);
- zero = ctx->i8_0;
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
+LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val)
+{
+ unsigned tl_lanes[4], trbl_lanes[4];
+ char name[32], type[8];
+ LLVMValueRef tl, trbl;
+ LLVMTypeRef result_type;
+ LLVMValueRef result;
+
+ result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
+
+ if (result_type == ctx->f16)
+ val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+ else if (result_type == ctx->v2f16)
+ val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
+
+ for (unsigned i = 0; i < 4; ++i) {
+ tl_lanes[i] = i & mask;
+ trbl_lanes[i] = (i & mask) + idx;
+ }
+
+ tl = ac_build_quad_swizzle(ctx, val, tl_lanes[0], tl_lanes[1], tl_lanes[2], tl_lanes[3]);
+ trbl =
+ ac_build_quad_swizzle(ctx, val, trbl_lanes[0], trbl_lanes[1], trbl_lanes[2], trbl_lanes[3]);
+
+ if (result_type == ctx->f16) {
+ tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
+ trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
+ }
+
+ tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
+ trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
+ result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
+
+ ac_build_type_name_for_intr(result_type, type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
+
+ return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
+}
- LLVMValueRef params[2] = {
- arg,
- ctx->i1true,
- };
+void ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id)
+{
+ LLVMValueRef args[2];
+ args[0] = LLVMConstInt(ctx->i32, msg, false);
+ args[1] = wave_id;
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
+}
+
+LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
+{
+ LLVMValueRef msb =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, AC_FUNC_ATTR_READNONE);
+
+ /* The HW returns the last bit index from MSB, but NIR/TGSI wants
+ * the index from LSB. Invert it by doing "31 - msb". */
+ msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, "");
+
+ LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
+ LLVMValueRef cond =
+ LLVMBuildOr(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, ctx->i32_0, ""),
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, all_ones, ""), "");
+
+ return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
+}
+
+LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
+{
+ const char *intrin_name;
+ LLVMTypeRef type;
+ LLVMValueRef highest_bit;
+ LLVMValueRef zero;
+ unsigned bitsize;
+
+ bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
+ switch (bitsize) {
+ case 64:
+ intrin_name = "llvm.ctlz.i64";
+ type = ctx->i64;
+ highest_bit = LLVMConstInt(ctx->i64, 63, false);
+ zero = ctx->i64_0;
+ break;
+ case 32:
+ intrin_name = "llvm.ctlz.i32";
+ type = ctx->i32;
+ highest_bit = LLVMConstInt(ctx->i32, 31, false);
+ zero = ctx->i32_0;
+ break;
+ case 16:
+ intrin_name = "llvm.ctlz.i16";
+ type = ctx->i16;
+ highest_bit = LLVMConstInt(ctx->i16, 15, false);
+ zero = ctx->i16_0;
+ break;
+ case 8:
+ intrin_name = "llvm.ctlz.i8";
+ type = ctx->i8;
+ highest_bit = LLVMConstInt(ctx->i8, 7, false);
+ zero = ctx->i8_0;
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
- LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
- params, 2,
- AC_FUNC_ATTR_READNONE);
+ LLVMValueRef params[2] = {
+ arg,
+ ctx->i1true,
+ };
- /* The HW returns the last bit index from MSB, but TGSI/NIR wants
- * the index from LSB. Invert it by doing "31 - msb". */
- msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
+ LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE);
- if (bitsize == 64) {
- msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
- } else if (bitsize < 32) {
- msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
- }
+ /* The HW returns the last bit index from MSB, but TGSI/NIR wants
+ * the index from LSB. Invert it by doing "31 - msb". */
+ msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
- /* check for zero */
- return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
- LLVMConstInt(ctx->i32, -1, true), msb, "");
+ if (bitsize == 64) {
+ msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
+ } else if (bitsize < 32) {
+ msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
+ }
+
+ /* check for zero */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
+ LLVMConstInt(ctx->i32, -1, true), msb, "");
}
-LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
+LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
{
- char name[64], type[64];
+ char name[64], type[64];
- ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.minnum.%s", type);
- LLVMValueRef args[2] = {a, b};
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
- AC_FUNC_ATTR_READNONE);
+ ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.minnum.%s", type);
+ LLVMValueRef args[2] = {a, b};
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
+LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
{
- char name[64], type[64];
+ char name[64], type[64];
- ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.maxnum.%s", type);
- LLVMValueRef args[2] = {a, b};
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
- AC_FUNC_ATTR_READNONE);
+ ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.maxnum.%s", type);
+ LLVMValueRef args[2] = {a, b};
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
}
-LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
}
-LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
+LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
}
-LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
+LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
}
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- LLVMTypeRef t = LLVMTypeOf(value);
- return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
- LLVMConstReal(t, 1.0));
+ LLVMTypeRef t = LLVMTypeOf(value);
+ return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
+ LLVMConstReal(t, 1.0));
}
void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
{
- LLVMValueRef args[9];
+ LLVMValueRef args[9];
- args[0] = LLVMConstInt(ctx->i32, a->target, 0);
- args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
+ args[0] = LLVMConstInt(ctx->i32, a->target, 0);
+ args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
- if (a->compr) {
- args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
- ctx->v2i16, "");
- args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
- ctx->v2i16, "");
- args[4] = LLVMConstInt(ctx->i1, a->done, 0);
- args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
+ if (a->compr) {
+ args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->v2i16, "");
+ args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->v2i16, "");
+ args[4] = LLVMConstInt(ctx->i1, a->done, 0);
+ args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
- ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
- ctx->voidt, args, 6, 0);
- } else {
- args[2] = a->out[0];
- args[3] = a->out[1];
- args[4] = a->out[2];
- args[5] = a->out[3];
- args[6] = LLVMConstInt(ctx->i1, a->done, 0);
- args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
+ ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", ctx->voidt, args, 6, 0);
+ } else {
+ args[2] = a->out[0];
+ args[3] = a->out[1];
+ args[4] = a->out[2];
+ args[5] = a->out[3];
+ args[6] = LLVMConstInt(ctx->i1, a->done, 0);
+ args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
- ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
- ctx->voidt, args, 8, 0);
- }
+ ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0);
+ }
}
void ac_build_export_null(struct ac_llvm_context *ctx)
{
- struct ac_export_args args;
+ struct ac_export_args args;
- args.enabled_channels = 0x0; /* enabled channels */
- args.valid_mask = 1; /* whether the EXEC mask is valid */
- args.done = 1; /* DONE bit */
- args.target = V_008DFC_SQ_EXP_NULL;
- args.compr = 0; /* COMPR flag (0 = 32-bit export) */
- args.out[0] = LLVMGetUndef(ctx->f32); /* R */
- args.out[1] = LLVMGetUndef(ctx->f32); /* G */
- args.out[2] = LLVMGetUndef(ctx->f32); /* B */
- args.out[3] = LLVMGetUndef(ctx->f32); /* A */
+ args.enabled_channels = 0x0; /* enabled channels */
+ args.valid_mask = 1; /* whether the EXEC mask is valid */
+ args.done = 1; /* DONE bit */
+ args.target = V_008DFC_SQ_EXP_NULL;
+ args.compr = 0; /* COMPR flag (0 = 32-bit export) */
+ args.out[0] = LLVMGetUndef(ctx->f32); /* R */
+ args.out[1] = LLVMGetUndef(ctx->f32); /* G */
+ args.out[2] = LLVMGetUndef(ctx->f32); /* B */
+ args.out[3] = LLVMGetUndef(ctx->f32); /* A */
- ac_build_export(ctx, &args);
+ ac_build_export(ctx, &args);
}
static unsigned ac_num_coords(enum ac_image_dim dim)
{
- switch (dim) {
- case ac_image_1d:
- return 1;
- case ac_image_2d:
- case ac_image_1darray:
- return 2;
- case ac_image_3d:
- case ac_image_cube:
- case ac_image_2darray:
- case ac_image_2dmsaa:
- return 3;
- case ac_image_2darraymsaa:
- return 4;
- default:
- unreachable("ac_num_coords: bad dim");
- }
+ switch (dim) {
+ case ac_image_1d:
+ return 1;
+ case ac_image_2d:
+ case ac_image_1darray:
+ return 2;
+ case ac_image_3d:
+ case ac_image_cube:
+ case ac_image_2darray:
+ case ac_image_2dmsaa:
+ return 3;
+ case ac_image_2darraymsaa:
+ return 4;
+ default:
+ unreachable("ac_num_coords: bad dim");
+ }
}
static unsigned ac_num_derivs(enum ac_image_dim dim)
{
- switch (dim) {
- case ac_image_1d:
- case ac_image_1darray:
- return 2;
- case ac_image_2d:
- case ac_image_2darray:
- case ac_image_cube:
- return 4;
- case ac_image_3d:
- return 6;
- case ac_image_2dmsaa:
- case ac_image_2darraymsaa:
- default:
- unreachable("derivatives not supported");
- }
+ switch (dim) {
+ case ac_image_1d:
+ case ac_image_1darray:
+ return 2;
+ case ac_image_2d:
+ case ac_image_2darray:
+ case ac_image_cube:
+ return 4;
+ case ac_image_3d:
+ return 6;
+ case ac_image_2dmsaa:
+ case ac_image_2darraymsaa:
+ default:
+ unreachable("derivatives not supported");
+ }
}
static const char *get_atomic_name(enum ac_atomic_op op)
{
- switch (op) {
- case ac_atomic_swap: return "swap";
- case ac_atomic_add: return "add";
- case ac_atomic_sub: return "sub";
- case ac_atomic_smin: return "smin";
- case ac_atomic_umin: return "umin";
- case ac_atomic_smax: return "smax";
- case ac_atomic_umax: return "umax";
- case ac_atomic_and: return "and";
- case ac_atomic_or: return "or";
- case ac_atomic_xor: return "xor";
- case ac_atomic_inc_wrap: return "inc";
- case ac_atomic_dec_wrap: return "dec";
- }
- unreachable("bad atomic op");
-}
-
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
- struct ac_image_args *a)
-{
- const char *overload[3] = { "", "", "" };
- unsigned num_overloads = 0;
- LLVMValueRef args[18];
- unsigned num_args = 0;
- enum ac_image_dim dim = a->dim;
-
- assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
- !a->level_zero);
- assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
- a->opcode != ac_image_store_mip) ||
- a->lod);
- assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
- (!a->compare && !a->offset));
- assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_get_lod) ||
- !a->bias);
- assert((a->bias ? 1 : 0) +
- (a->lod ? 1 : 0) +
- (a->level_zero ? 1 : 0) +
- (a->derivs[0] ? 1 : 0) <= 1);
- assert((a->min_lod ? 1 : 0) +
- (a->lod ? 1 : 0) +
- (a->level_zero ? 1 : 0) <= 1);
- assert(!a->d16 || (ctx->chip_class >= GFX8 &&
- a->opcode != ac_image_atomic &&
- a->opcode != ac_image_atomic_cmpswap &&
- a->opcode != ac_image_get_lod &&
- a->opcode != ac_image_get_resinfo));
-
- if (a->opcode == ac_image_get_lod) {
- switch (dim) {
- case ac_image_1darray:
- dim = ac_image_1d;
- break;
- case ac_image_2darray:
- case ac_image_cube:
- dim = ac_image_2d;
- break;
- default:
- break;
- }
- }
-
- bool sample = a->opcode == ac_image_sample ||
- a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_get_lod;
- bool atomic = a->opcode == ac_image_atomic ||
- a->opcode == ac_image_atomic_cmpswap;
- bool load = a->opcode == ac_image_sample ||
- a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_load ||
- a->opcode == ac_image_load_mip;
- LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
-
- if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
- args[num_args++] = a->data[0];
- if (a->opcode == ac_image_atomic_cmpswap)
- args[num_args++] = a->data[1];
- }
-
- if (!atomic)
- args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
-
- if (a->offset)
- args[num_args++] = ac_to_integer(ctx, a->offset);
- if (a->bias) {
- args[num_args++] = ac_to_float(ctx, a->bias);
- overload[num_overloads++] = ".f32";
- }
- if (a->compare)
- args[num_args++] = ac_to_float(ctx, a->compare);
- if (a->derivs[0]) {
- unsigned count = ac_num_derivs(dim);
- for (unsigned i = 0; i < count; ++i)
- args[num_args++] = ac_to_float(ctx, a->derivs[i]);
- overload[num_overloads++] = ".f32";
- }
- unsigned num_coords =
- a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
- for (unsigned i = 0; i < num_coords; ++i)
- args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
- if (a->lod)
- args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
- if (a->min_lod)
- args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
-
- overload[num_overloads++] = sample ? ".f32" : ".i32";
-
- args[num_args++] = a->resource;
- if (sample) {
- args[num_args++] = a->sampler;
- args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
- }
-
- args[num_args++] = ctx->i32_0; /* texfailctrl */
- args[num_args++] = LLVMConstInt(ctx->i32,
- load ? get_load_cache_policy(ctx, a->cache_policy) :
- a->cache_policy, false);
-
- const char *name;
- const char *atomic_subop = "";
- switch (a->opcode) {
- case ac_image_sample: name = "sample"; break;
- case ac_image_gather4: name = "gather4"; break;
- case ac_image_load: name = "load"; break;
- case ac_image_load_mip: name = "load.mip"; break;
- case ac_image_store: name = "store"; break;
- case ac_image_store_mip: name = "store.mip"; break;
- case ac_image_atomic:
- name = "atomic.";
- atomic_subop = get_atomic_name(a->atomic);
- break;
- case ac_image_atomic_cmpswap:
- name = "atomic.";
- atomic_subop = "cmpswap";
- break;
- case ac_image_get_lod: name = "getlod"; break;
- case ac_image_get_resinfo: name = "getresinfo"; break;
- default: unreachable("invalid image opcode");
- }
-
- const char *dimname;
- switch (dim) {
- case ac_image_1d: dimname = "1d"; break;
- case ac_image_2d: dimname = "2d"; break;
- case ac_image_3d: dimname = "3d"; break;
- case ac_image_cube: dimname = "cube"; break;
- case ac_image_1darray: dimname = "1darray"; break;
- case ac_image_2darray: dimname = "2darray"; break;
- case ac_image_2dmsaa: dimname = "2dmsaa"; break;
- case ac_image_2darraymsaa: dimname = "2darraymsaa"; break;
- default: unreachable("invalid dim");
- }
-
- bool lod_suffix =
- a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
- char intr_name[96];
- snprintf(intr_name, sizeof(intr_name),
- "llvm.amdgcn.image.%s%s" /* base name */
- "%s%s%s%s" /* sample/gather modifiers */
- ".%s.%s%s%s%s", /* dimension and type overloads */
- name, atomic_subop,
- a->compare ? ".c" : "",
- a->bias ? ".b" :
- lod_suffix ? ".l" :
- a->derivs[0] ? ".d" :
- a->level_zero ? ".lz" : "",
- a->min_lod ? ".cl" : "",
- a->offset ? ".o" : "",
- dimname,
- atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"),
- overload[0], overload[1], overload[2]);
-
- LLVMTypeRef retty;
- if (atomic)
- retty = ctx->i32;
- else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
- retty = ctx->voidt;
- else
- retty = a->d16 ? ctx->v4f16 : ctx->v4f32;
-
- LLVMValueRef result =
- ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
- a->attributes);
- if (!sample && !atomic && retty != ctx->voidt)
- result = ac_to_integer(ctx, result);
-
- return result;
-}
-
-LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc)
-{
- LLVMValueRef samples;
-
- /* Read the samples from the descriptor directly.
- * Hardware doesn't have any instruction for this.
- */
- samples = LLVMBuildExtractElement(ctx->builder, rsrc,
- LLVMConstInt(ctx->i32, 3, 0), "");
- samples = LLVMBuildLShr(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 16, 0), "");
- samples = LLVMBuildAnd(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 0xf, 0), "");
- samples = LLVMBuildShl(ctx->builder, ctx->i32_1,
- samples, "");
- return samples;
-}
-
-LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2])
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16,
- args, 2, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2])
-{
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2])
-{
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ switch (op) {
+ case ac_atomic_swap:
+ return "swap";
+ case ac_atomic_add:
+ return "add";
+ case ac_atomic_sub:
+ return "sub";
+ case ac_atomic_smin:
+ return "smin";
+ case ac_atomic_umin:
+ return "umin";
+ case ac_atomic_smax:
+ return "smax";
+ case ac_atomic_umax:
+ return "umax";
+ case ac_atomic_and:
+ return "and";
+ case ac_atomic_or:
+ return "or";
+ case ac_atomic_xor:
+ return "xor";
+ case ac_atomic_inc_wrap:
+ return "inc";
+ case ac_atomic_dec_wrap:
+ return "dec";
+ }
+ unreachable("bad atomic op");
+}
+
+LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a)
+{
+ const char *overload[3] = {"", "", ""};
+ unsigned num_overloads = 0;
+ LLVMValueRef args[18];
+ unsigned num_args = 0;
+ enum ac_image_dim dim = a->dim;
+
+ assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || !a->level_zero);
+ assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
+ a->opcode != ac_image_store_mip) ||
+ a->lod);
+ assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+ (!a->compare && !a->offset));
+ assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_get_lod) ||
+ !a->bias);
+ assert((a->bias ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <=
+ 1);
+ assert((a->min_lod ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) <= 1);
+ assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic &&
+ a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod &&
+ a->opcode != ac_image_get_resinfo));
+
+ if (a->opcode == ac_image_get_lod) {
+ switch (dim) {
+ case ac_image_1darray:
+ dim = ac_image_1d;
+ break;
+ case ac_image_2darray:
+ case ac_image_cube:
+ dim = ac_image_2d;
+ break;
+ default:
+ break;
+ }
+ }
+
+ bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_get_lod;
+ bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap;
+ bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_load || a->opcode == ac_image_load_mip;
+ LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
+
+ if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
+ args[num_args++] = a->data[0];
+ if (a->opcode == ac_image_atomic_cmpswap)
+ args[num_args++] = a->data[1];
+ }
+
+ if (!atomic)
+ args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
+
+ if (a->offset)
+ args[num_args++] = ac_to_integer(ctx, a->offset);
+ if (a->bias) {
+ args[num_args++] = ac_to_float(ctx, a->bias);
+ overload[num_overloads++] = ".f32";
+ }
+ if (a->compare)
+ args[num_args++] = ac_to_float(ctx, a->compare);
+ if (a->derivs[0]) {
+ unsigned count = ac_num_derivs(dim);
+ for (unsigned i = 0; i < count; ++i)
+ args[num_args++] = ac_to_float(ctx, a->derivs[i]);
+ overload[num_overloads++] = ".f32";
+ }
+ unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
+ for (unsigned i = 0; i < num_coords; ++i)
+ args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
+ if (a->lod)
+ args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
+ if (a->min_lod)
+ args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
+
+ overload[num_overloads++] = sample ? ".f32" : ".i32";
+
+ args[num_args++] = a->resource;
+ if (sample) {
+ args[num_args++] = a->sampler;
+ args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
+ }
+
+ args[num_args++] = ctx->i32_0; /* texfailctrl */
+ args[num_args++] = LLVMConstInt(
+ ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : a->cache_policy, false);
+
+ const char *name;
+ const char *atomic_subop = "";
+ switch (a->opcode) {
+ case ac_image_sample:
+ name = "sample";
+ break;
+ case ac_image_gather4:
+ name = "gather4";
+ break;
+ case ac_image_load:
+ name = "load";
+ break;
+ case ac_image_load_mip:
+ name = "load.mip";
+ break;
+ case ac_image_store:
+ name = "store";
+ break;
+ case ac_image_store_mip:
+ name = "store.mip";
+ break;
+ case ac_image_atomic:
+ name = "atomic.";
+ atomic_subop = get_atomic_name(a->atomic);
+ break;
+ case ac_image_atomic_cmpswap:
+ name = "atomic.";
+ atomic_subop = "cmpswap";
+ break;
+ case ac_image_get_lod:
+ name = "getlod";
+ break;
+ case ac_image_get_resinfo:
+ name = "getresinfo";
+ break;
+ default:
+ unreachable("invalid image opcode");
+ }
+
+ const char *dimname;
+ switch (dim) {
+ case ac_image_1d:
+ dimname = "1d";
+ break;
+ case ac_image_2d:
+ dimname = "2d";
+ break;
+ case ac_image_3d:
+ dimname = "3d";
+ break;
+ case ac_image_cube:
+ dimname = "cube";
+ break;
+ case ac_image_1darray:
+ dimname = "1darray";
+ break;
+ case ac_image_2darray:
+ dimname = "2darray";
+ break;
+ case ac_image_2dmsaa:
+ dimname = "2dmsaa";
+ break;
+ case ac_image_2darraymsaa:
+ dimname = "2darraymsaa";
+ break;
+ default:
+ unreachable("invalid dim");
+ }
+
+ bool lod_suffix = a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
+ char intr_name[96];
+ snprintf(intr_name, sizeof(intr_name),
+ "llvm.amdgcn.image.%s%s" /* base name */
+ "%s%s%s%s" /* sample/gather modifiers */
+ ".%s.%s%s%s%s", /* dimension and type overloads */
+ name, atomic_subop, a->compare ? ".c" : "",
+ a->bias ? ".b" : lod_suffix ? ".l" : a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "",
+ a->min_lod ? ".cl" : "", a->offset ? ".o" : "", dimname,
+ atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"), overload[0], overload[1], overload[2]);
+
+ LLVMTypeRef retty;
+ if (atomic)
+ retty = ctx->i32;
+ else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
+ retty = ctx->voidt;
+ else
+ retty = a->d16 ? ctx->v4f16 : ctx->v4f32;
+
+ LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, retty, args, num_args, a->attributes);
+ if (!sample && !atomic && retty != ctx->voidt)
+ result = ac_to_integer(ctx, result);
+
+ return result;
+}
+
+LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc)
+{
+ LLVMValueRef samples;
+
+ /* Read the samples from the descriptor directly.
+ * Hardware doesn't have any instruction for this.
+ */
+ samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), "");
+ samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), "");
+ samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), "");
+ samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, "");
+ return samples;
+}
+
+LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
+{
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
+{
+ LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
+{
+ LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
}
/* The 8-bit and 10-bit clamping is for HW workarounds. */
-LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi)
-{
- assert(bits == 8 || bits == 10 || bits == 16);
-
- LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
- bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
- LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
- bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
- LLVMValueRef max_alpha =
- bits != 10 ? max_rgb : ctx->i32_1;
- LLVMValueRef min_alpha =
- bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
-
- /* Clamp. */
- if (bits != 16) {
- for (int i = 0; i < 2; i++) {
- bool alpha = hi && i == 1;
- args[i] = ac_build_imin(ctx, args[i],
- alpha ? max_alpha : max_rgb);
- args[i] = ac_build_imax(ctx, args[i],
- alpha ? min_alpha : min_rgb);
- }
- }
-
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
+ LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
+ LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1;
+ LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+
+ /* Clamp. */
+ if (bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_imin(ctx, args[i], alpha ? max_alpha : max_rgb);
+ args[i] = ac_build_imax(ctx, args[i], alpha ? min_alpha : min_rgb);
+ }
+ }
+
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
}
/* The 8-bit and 10-bit clamping is for HW workarounds. */
-LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi)
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi)
{
- assert(bits == 8 || bits == 10 || bits == 16);
+ assert(bits == 8 || bits == 10 || bits == 16);
- LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
- bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
- LLVMValueRef max_alpha =
- bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
+ LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
- /* Clamp. */
- if (bits != 16) {
- for (int i = 0; i < 2; i++) {
- bool alpha = hi && i == 1;
- args[i] = ac_build_umin(ctx, args[i],
- alpha ? max_alpha : max_rgb);
- }
- }
+ /* Clamp. */
+ if (bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_umin(ctx, args[i], alpha ? max_alpha : max_rgb);
+ }
+ }
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
}
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
- &i1, 1, AC_FUNC_ATTR_READNONE);
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, AC_FUNC_ATTR_READNONE);
}
void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
- ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
- &i1, 1, 0);
+ ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0);
}
-LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
- LLVMValueRef offset, LLVMValueRef width,
- bool is_signed)
+LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset,
+ LLVMValueRef width, bool is_signed)
{
- LLVMValueRef args[] = {
- input,
- offset,
- width,
- };
-
- return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" :
- "llvm.amdgcn.ubfe.i32",
- ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
+ LLVMValueRef args[] = {
+ input,
+ offset,
+ width,
+ };
+ return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32",
+ ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
}
-LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2)
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
+ LLVMValueRef s2)
{
- return LLVMBuildAdd(ctx->builder,
- LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
+ return LLVMBuildAdd(ctx->builder, LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
}
-LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2)
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
+ LLVMValueRef s2)
{
- /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
- if (ctx->chip_class >= GFX10) {
- return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32,
- (LLVMValueRef []) {s0, s1, s2}, 3,
- AC_FUNC_ATTR_READNONE);
- }
+ /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
+ if (ctx->chip_class >= GFX10) {
+ return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, (LLVMValueRef[]){s0, s1, s2}, 3,
+ AC_FUNC_ATTR_READNONE);
+ }
- return LLVMBuildFAdd(ctx->builder,
- LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
+ return LLVMBuildFAdd(ctx->builder, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
}
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
{
- if (!wait_flags)
- return;
-
- unsigned lgkmcnt = 63;
- unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
- unsigned vscnt = 63;
-
- if (wait_flags & AC_WAIT_LGKM)
- lgkmcnt = 0;
- if (wait_flags & AC_WAIT_VLOAD)
- vmcnt = 0;
-
- if (wait_flags & AC_WAIT_VSTORE) {
- if (ctx->chip_class >= GFX10)
- vscnt = 0;
- else
- vmcnt = 0;
- }
-
- /* There is no intrinsic for vscnt(0), so use a fence. */
- if ((wait_flags & AC_WAIT_LGKM &&
- wait_flags & AC_WAIT_VLOAD &&
- wait_flags & AC_WAIT_VSTORE) ||
- vscnt == 0) {
- LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
- return;
- }
-
- unsigned simm16 = (lgkmcnt << 8) |
- (7 << 4) | /* expcnt */
- (vmcnt & 0xf) |
- ((vmcnt >> 4) << 14);
-
- LLVMValueRef args[1] = {
- LLVMConstInt(ctx->i32, simm16, false),
- };
- ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
- ctx->voidt, args, 1, 0);
-}
-
-LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.fract.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.fract.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.amdgcn.fract.f64";
- type = ctx->f64;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
+ if (!wait_flags)
+ return;
+
+ unsigned lgkmcnt = 63;
+ unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
+ unsigned vscnt = 63;
+
+ if (wait_flags & AC_WAIT_LGKM)
+ lgkmcnt = 0;
+ if (wait_flags & AC_WAIT_VLOAD)
+ vmcnt = 0;
+
+ if (wait_flags & AC_WAIT_VSTORE) {
+ if (ctx->chip_class >= GFX10)
+ vscnt = 0;
+ else
+ vmcnt = 0;
+ }
+
+ /* There is no intrinsic for vscnt(0), so use a fence. */
+ if ((wait_flags & AC_WAIT_LGKM && wait_flags & AC_WAIT_VLOAD && wait_flags & AC_WAIT_VSTORE) ||
+ vscnt == 0) {
+ LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
+ return;
+ }
+
+ unsigned simm16 = (lgkmcnt << 8) | (7 << 4) | /* expcnt */
+ (vmcnt & 0xf) | ((vmcnt >> 4) << 14);
+
+ LLVMValueRef args[1] = {
+ LLVMConstInt(ctx->i32, simm16, false),
+ };
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0);
+}
+
+LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.fract.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.fract.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.fract.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef ac_const_uint_vec(struct ac_llvm_context *ctx, LLVMTypeRef type, uint64_t value)
{
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
- LLVMValueRef scalar = LLVMConstInt(LLVMGetElementType(type), value, 0);
- unsigned vec_size = LLVMGetVectorSize(type);
- LLVMValueRef *scalars = alloca(vec_size * sizeof(LLVMValueRef*));
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ LLVMValueRef scalar = LLVMConstInt(LLVMGetElementType(type), value, 0);
+ unsigned vec_size = LLVMGetVectorSize(type);
+ LLVMValueRef *scalars = alloca(vec_size * sizeof(LLVMValueRef *));
- for (unsigned i = 0; i < vec_size; i++)
- scalars[i] = scalar;
- return LLVMConstVector(scalars, vec_size);
- }
- return LLVMConstInt(type, value, 0);
+ for (unsigned i = 0; i < vec_size; i++)
+ scalars[i] = scalar;
+ return LLVMConstVector(scalars, vec_size);
+ }
+ return LLVMConstInt(type, value, 0);
}
LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- LLVMTypeRef type = LLVMTypeOf(src0);
- LLVMValueRef val;
+ LLVMTypeRef type = LLVMTypeOf(src0);
+ LLVMValueRef val;
- /* v_med3 is selected only when max is first. (LLVM bug?) */
- val = ac_build_imax(ctx, src0, ac_const_uint_vec(ctx, type, -1));
- return ac_build_imin(ctx, val, ac_const_uint_vec(ctx, type, 1));
+ /* v_med3 is selected only when max is first. (LLVM bug?) */
+ val = ac_build_imax(ctx, src0, ac_const_uint_vec(ctx, type, -1));
+ return ac_build_imin(ctx, val, ac_const_uint_vec(ctx, type, 1));
}
static LLVMValueRef ac_eliminate_negative_zero(struct ac_llvm_context *ctx, LLVMValueRef val)
{
- ac_enable_signed_zeros(ctx);
- /* (val + 0) converts negative zero to positive zero. */
- val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), "");
- ac_disable_signed_zeros(ctx);
- return val;
+ ac_enable_signed_zeros(ctx);
+ /* (val + 0) converts negative zero to positive zero. */
+ val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), "");
+ ac_disable_signed_zeros(ctx);
+ return val;
}
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src)
{
- LLVMTypeRef type = LLVMTypeOf(src);
- LLVMValueRef pos, neg, dw[2], val;
- unsigned bitsize = ac_get_elem_bits(ctx, type);
-
- /* The standard version leads to this:
- * v_cmp_ngt_f32_e64 s[0:1], s4, 0 ; D40B0000 00010004
- * v_cndmask_b32_e64 v4, 1.0, s4, s[0:1] ; D5010004 000008F2
- * v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880
- * v_cndmask_b32_e32 v4, -1.0, v4, vcc ; 020808F3
- *
- * The isign version:
- * v_add_f32_e64 v4, s4, 0 ; D5030004 00010004
- * v_med3_i32 v4, v4, -1, 1 ; D5580004 02058304
- * v_cvt_f32_i32_e32 v4, v4 ; 7E080B04
- *
- * (src0 + 0) converts negative zero to positive zero.
- * After that, int(fsign(x)) == isign(floatBitsToInt(x)).
- *
- * For FP64, use the standard version, which doesn't suffer from the huge DP rate
- * reduction. (FP64 comparisons are as fast as int64 comparisons)
- */
- if (bitsize == 16 || bitsize == 32) {
- val = ac_to_integer(ctx, ac_eliminate_negative_zero(ctx, src));
- val = ac_build_isign(ctx, val);
- return LLVMBuildSIToFP(ctx->builder, val, type, "");
- }
-
- assert(bitsize == 64);
- pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, "");
- neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, "");
- dw[0] = ctx->i32_0;
- dw[1] = LLVMBuildSelect(ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0),
- LLVMBuildSelect(ctx->builder, neg,
- LLVMConstInt(ctx->i32, 0xBFF00000, 0),
- ctx->i32_0, ""), "");
- return LLVMBuildBitCast(ctx->builder, ac_build_gather_values(ctx, dw, 2), ctx->f64, "");
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef pos, neg, dw[2], val;
+ unsigned bitsize = ac_get_elem_bits(ctx, type);
+
+ /* The standard version leads to this:
+ * v_cmp_ngt_f32_e64 s[0:1], s4, 0 ; D40B0000 00010004
+ * v_cndmask_b32_e64 v4, 1.0, s4, s[0:1] ; D5010004 000008F2
+ * v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880
+ * v_cndmask_b32_e32 v4, -1.0, v4, vcc ; 020808F3
+ *
+ * The isign version:
+ * v_add_f32_e64 v4, s4, 0 ; D5030004 00010004
+ * v_med3_i32 v4, v4, -1, 1 ; D5580004 02058304
+ * v_cvt_f32_i32_e32 v4, v4 ; 7E080B04
+ *
+ * (src0 + 0) converts negative zero to positive zero.
+ * After that, int(fsign(x)) == isign(floatBitsToInt(x)).
+ *
+ * For FP64, use the standard version, which doesn't suffer from the huge DP rate
+ * reduction. (FP64 comparisons are as fast as int64 comparisons)
+ */
+ if (bitsize == 16 || bitsize == 32) {
+ val = ac_to_integer(ctx, ac_eliminate_negative_zero(ctx, src));
+ val = ac_build_isign(ctx, val);
+ return LLVMBuildSIToFP(ctx->builder, val, type, "");
+ }
+
+ assert(bitsize == 64);
+ pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, "");
+ neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, "");
+ dw[0] = ctx->i32_0;
+ dw[1] = LLVMBuildSelect(
+ ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0),
+ LLVMBuildSelect(ctx->builder, neg, LLVMConstInt(ctx->i32, 0xBFF00000, 0), ctx->i32_0, ""),
+ "");
+ return LLVMBuildBitCast(ctx->builder, ac_build_gather_values(ctx, dw, 2), ctx->f64, "");
}
LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- LLVMValueRef result;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-
- switch (bitsize) {
- case 128:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
- break;
- case 64:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
- break;
- case 32:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- case 16:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- case 8:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
-
- return result;
-}
-
-LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- LLVMValueRef result;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-
- switch (bitsize) {
- case 64:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
- break;
- case 32:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- case 16:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- case 8:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
-
- return result;
-}
-
-#define AC_EXP_TARGET 0
+ LLVMValueRef result;
+ unsigned bitsize;
+
+ bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+ switch (bitsize) {
+ case 128:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 64:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 32:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+ break;
+ case 16:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ return result;
+}
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ unsigned bitsize;
+
+ bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+ switch (bitsize) {
+ case 64:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 32:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+ break;
+ case 16:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ return result;
+}
+
+#define AC_EXP_TARGET 0
#define AC_EXP_ENABLED_CHANNELS 1
-#define AC_EXP_OUT0 2
+#define AC_EXP_OUT0 2
-enum ac_ir_type {
- AC_IR_UNDEF,
- AC_IR_CONST,
- AC_IR_VALUE,
+enum ac_ir_type
+{
+ AC_IR_UNDEF,
+ AC_IR_CONST,
+ AC_IR_VALUE,
};
-struct ac_vs_exp_chan
-{
- LLVMValueRef value;
- float const_float;
- enum ac_ir_type type;
+struct ac_vs_exp_chan {
+ LLVMValueRef value;
+ float const_float;
+ enum ac_ir_type type;
};
struct ac_vs_exp_inst {
- unsigned offset;
- LLVMValueRef inst;
- struct ac_vs_exp_chan chan[4];
+ unsigned offset;
+ LLVMValueRef inst;
+ struct ac_vs_exp_chan chan[4];
};
struct ac_vs_exports {
- unsigned num;
- struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
+ unsigned num;
+ struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
};
/* Return true if the PARAM export has been eliminated. */
-static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- struct ac_vs_exp_inst *exp)
-{
- unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
- bool is_zero[4] = {}, is_one[4] = {};
-
- for (i = 0; i < 4; i++) {
- /* It's a constant expression. Undef outputs are eliminated too. */
- if (exp->chan[i].type == AC_IR_UNDEF) {
- is_zero[i] = true;
- is_one[i] = true;
- } else if (exp->chan[i].type == AC_IR_CONST) {
- if (exp->chan[i].const_float == 0)
- is_zero[i] = true;
- else if (exp->chan[i].const_float == 1)
- is_one[i] = true;
- else
- return false; /* other constant */
- } else
- return false;
- }
-
- /* Only certain combinations of 0 and 1 can be eliminated. */
- if (is_zero[0] && is_zero[1] && is_zero[2])
- default_val = is_zero[3] ? 0 : 1;
- else if (is_one[0] && is_one[1] && is_one[2])
- default_val = is_zero[3] ? 2 : 3;
- else
- return false;
-
- /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
- LLVMInstructionEraseFromParent(exp->inst);
-
- /* Change OFFSET to DEFAULT_VAL. */
- for (i = 0; i < num_outputs; i++) {
- if (vs_output_param_offset[i] == exp->offset) {
- vs_output_param_offset[i] =
- AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
- break;
- }
- }
- return true;
+static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, uint32_t num_outputs,
+ struct ac_vs_exp_inst *exp)
+{
+ unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+ bool is_zero[4] = {}, is_one[4] = {};
+
+ for (i = 0; i < 4; i++) {
+ /* It's a constant expression. Undef outputs are eliminated too. */
+ if (exp->chan[i].type == AC_IR_UNDEF) {
+ is_zero[i] = true;
+ is_one[i] = true;
+ } else if (exp->chan[i].type == AC_IR_CONST) {
+ if (exp->chan[i].const_float == 0)
+ is_zero[i] = true;
+ else if (exp->chan[i].const_float == 1)
+ is_one[i] = true;
+ else
+ return false; /* other constant */
+ } else
+ return false;
+ }
+
+ /* Only certain combinations of 0 and 1 can be eliminated. */
+ if (is_zero[0] && is_zero[1] && is_zero[2])
+ default_val = is_zero[3] ? 0 : 1;
+ else if (is_one[0] && is_one[1] && is_one[2])
+ default_val = is_zero[3] ? 2 : 3;
+ else
+ return false;
+
+ /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+ LLVMInstructionEraseFromParent(exp->inst);
+
+ /* Change OFFSET to DEFAULT_VAL. */
+ for (i = 0; i < num_outputs; i++) {
+ if (vs_output_param_offset[i] == exp->offset) {
+ vs_output_param_offset[i] = AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+ break;
+ }
+ }
+ return true;
}
static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
- uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- struct ac_vs_exports *processed,
- struct ac_vs_exp_inst *exp)
-{
- unsigned p, copy_back_channels = 0;
-
- /* See if the output is already in the list of processed outputs.
- * The LLVMValueRef comparison relies on SSA.
- */
- for (p = 0; p < processed->num; p++) {
- bool different = false;
-
- for (unsigned j = 0; j < 4; j++) {
- struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
- struct ac_vs_exp_chan *c2 = &exp->chan[j];
-
- /* Treat undef as a match. */
- if (c2->type == AC_IR_UNDEF)
- continue;
-
- /* If c1 is undef but c2 isn't, we can copy c2 to c1
- * and consider the instruction duplicated.
- */
- if (c1->type == AC_IR_UNDEF) {
- copy_back_channels |= 1 << j;
- continue;
- }
-
- /* Test whether the channels are not equal. */
- if (c1->type != c2->type ||
- (c1->type == AC_IR_CONST &&
- c1->const_float != c2->const_float) ||
- (c1->type == AC_IR_VALUE &&
- c1->value != c2->value)) {
- different = true;
- break;
- }
- }
- if (!different)
- break;
-
- copy_back_channels = 0;
- }
- if (p == processed->num)
- return false;
-
- /* If a match was found, but the matching export has undef where the new
- * one has a normal value, copy the normal value to the undef channel.
- */
- struct ac_vs_exp_inst *match = &processed->exp[p];
-
- /* Get current enabled channels mask. */
- LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
- unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
-
- while (copy_back_channels) {
- unsigned chan = u_bit_scan(©_back_channels);
-
- assert(match->chan[chan].type == AC_IR_UNDEF);
- LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
- exp->chan[chan].value);
- match->chan[chan] = exp->chan[chan];
-
- /* Update number of enabled channels because the original mask
- * is not always 0xf.
- */
- enabled_channels |= (1 << chan);
- LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
- LLVMConstInt(ctx->i32, enabled_channels, 0));
- }
-
- /* The PARAM export is duplicated. Kill it. */
- LLVMInstructionEraseFromParent(exp->inst);
-
- /* Change OFFSET to the matching export. */
- for (unsigned i = 0; i < num_outputs; i++) {
- if (vs_output_param_offset[i] == exp->offset) {
- vs_output_param_offset[i] = match->offset;
- break;
- }
- }
- return true;
-}
-
-void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
- LLVMValueRef main_fn,
- uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- uint32_t skip_output_mask,
- uint8_t *num_param_exports)
-{
- LLVMBasicBlockRef bb;
- bool removed_any = false;
- struct ac_vs_exports exports;
-
- exports.num = 0;
-
- /* Process all LLVM instructions. */
- bb = LLVMGetFirstBasicBlock(main_fn);
- while (bb) {
- LLVMValueRef inst = LLVMGetFirstInstruction(bb);
-
- while (inst) {
- LLVMValueRef cur = inst;
- inst = LLVMGetNextInstruction(inst);
- struct ac_vs_exp_inst exp;
-
- if (LLVMGetInstructionOpcode(cur) != LLVMCall)
- continue;
-
- LLVMValueRef callee = ac_llvm_get_called_value(cur);
-
- if (!ac_llvm_is_function(callee))
- continue;
-
- const char *name = LLVMGetValueName(callee);
- unsigned num_args = LLVMCountParams(callee);
-
- /* Check if this is an export instruction. */
- if ((num_args != 9 && num_args != 8) ||
- (strcmp(name, "llvm.SI.export") &&
- strcmp(name, "llvm.amdgcn.exp.f32")))
- continue;
-
- LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
- unsigned target = LLVMConstIntGetZExtValue(arg);
-
- if (target < V_008DFC_SQ_EXP_PARAM)
- continue;
-
- target -= V_008DFC_SQ_EXP_PARAM;
-
- /* Parse the instruction. */
- memset(&exp, 0, sizeof(exp));
- exp.offset = target;
- exp.inst = cur;
-
- for (unsigned i = 0; i < 4; i++) {
- LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
-
- exp.chan[i].value = v;
-
- if (LLVMIsUndef(v)) {
- exp.chan[i].type = AC_IR_UNDEF;
- } else if (LLVMIsAConstantFP(v)) {
- LLVMBool loses_info;
- exp.chan[i].type = AC_IR_CONST;
- exp.chan[i].const_float =
- LLVMConstRealGetDouble(v, &loses_info);
- } else {
- exp.chan[i].type = AC_IR_VALUE;
- }
- }
-
- /* Eliminate constant and duplicated PARAM exports. */
- if (!((1u << target) & skip_output_mask) &&
- (ac_eliminate_const_output(vs_output_param_offset,
- num_outputs, &exp) ||
- ac_eliminate_duplicated_output(ctx,
- vs_output_param_offset,
- num_outputs, &exports,
- &exp))) {
- removed_any = true;
- } else {
- exports.exp[exports.num++] = exp;
- }
- }
- bb = LLVMGetNextBasicBlock(bb);
- }
-
- /* Remove holes in export memory due to removed PARAM exports.
- * This is done by renumbering all PARAM exports.
- */
- if (removed_any) {
- uint8_t old_offset[VARYING_SLOT_MAX];
- unsigned out, i;
-
- /* Make a copy of the offsets. We need the old version while
- * we are modifying some of them. */
- memcpy(old_offset, vs_output_param_offset,
- sizeof(old_offset));
-
- for (i = 0; i < exports.num; i++) {
- unsigned offset = exports.exp[i].offset;
-
- /* Update vs_output_param_offset. Multiple outputs can
- * have the same offset.
- */
- for (out = 0; out < num_outputs; out++) {
- if (old_offset[out] == offset)
- vs_output_param_offset[out] = i;
- }
-
- /* Change the PARAM offset in the instruction. */
- LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
- LLVMConstInt(ctx->i32,
- V_008DFC_SQ_EXP_PARAM + i, 0));
- }
- *num_param_exports = exports.num;
- }
+ uint8_t *vs_output_param_offset, uint32_t num_outputs,
+ struct ac_vs_exports *processed,
+ struct ac_vs_exp_inst *exp)
+{
+ unsigned p, copy_back_channels = 0;
+
+ /* See if the output is already in the list of processed outputs.
+ * The LLVMValueRef comparison relies on SSA.
+ */
+ for (p = 0; p < processed->num; p++) {
+ bool different = false;
+
+ for (unsigned j = 0; j < 4; j++) {
+ struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
+ struct ac_vs_exp_chan *c2 = &exp->chan[j];
+
+ /* Treat undef as a match. */
+ if (c2->type == AC_IR_UNDEF)
+ continue;
+
+ /* If c1 is undef but c2 isn't, we can copy c2 to c1
+ * and consider the instruction duplicated.
+ */
+ if (c1->type == AC_IR_UNDEF) {
+ copy_back_channels |= 1 << j;
+ continue;
+ }
+
+ /* Test whether the channels are not equal. */
+ if (c1->type != c2->type ||
+ (c1->type == AC_IR_CONST && c1->const_float != c2->const_float) ||
+ (c1->type == AC_IR_VALUE && c1->value != c2->value)) {
+ different = true;
+ break;
+ }
+ }
+ if (!different)
+ break;
+
+ copy_back_channels = 0;
+ }
+ if (p == processed->num)
+ return false;
+
+ /* If a match was found, but the matching export has undef where the new
+ * one has a normal value, copy the normal value to the undef channel.
+ */
+ struct ac_vs_exp_inst *match = &processed->exp[p];
+
+ /* Get current enabled channels mask. */
+ LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
+ unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
+
+ while (copy_back_channels) {
+ unsigned chan = u_bit_scan(©_back_channels);
+
+ assert(match->chan[chan].type == AC_IR_UNDEF);
+ LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, exp->chan[chan].value);
+ match->chan[chan] = exp->chan[chan];
+
+ /* Update number of enabled channels because the original mask
+ * is not always 0xf.
+ */
+ enabled_channels |= (1 << chan);
+ LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
+ LLVMConstInt(ctx->i32, enabled_channels, 0));
+ }
+
+ /* The PARAM export is duplicated. Kill it. */
+ LLVMInstructionEraseFromParent(exp->inst);
+
+ /* Change OFFSET to the matching export. */
+ for (unsigned i = 0; i < num_outputs; i++) {
+ if (vs_output_param_offset[i] == exp->offset) {
+ vs_output_param_offset[i] = match->offset;
+ break;
+ }
+ }
+ return true;
+}
+
+void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn,
+ uint8_t *vs_output_param_offset, uint32_t num_outputs,
+ uint32_t skip_output_mask, uint8_t *num_param_exports)
+{
+ LLVMBasicBlockRef bb;
+ bool removed_any = false;
+ struct ac_vs_exports exports;
+
+ exports.num = 0;
+
+ /* Process all LLVM instructions. */
+ bb = LLVMGetFirstBasicBlock(main_fn);
+ while (bb) {
+ LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+ while (inst) {
+ LLVMValueRef cur = inst;
+ inst = LLVMGetNextInstruction(inst);
+ struct ac_vs_exp_inst exp;
+
+ if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+ continue;
+
+ LLVMValueRef callee = ac_llvm_get_called_value(cur);
+
+ if (!ac_llvm_is_function(callee))
+ continue;
+
+ const char *name = LLVMGetValueName(callee);
+ unsigned num_args = LLVMCountParams(callee);
+
+ /* Check if this is an export instruction. */
+ if ((num_args != 9 && num_args != 8) ||
+ (strcmp(name, "llvm.SI.export") && strcmp(name, "llvm.amdgcn.exp.f32")))
+ continue;
+
+ LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
+ unsigned target = LLVMConstIntGetZExtValue(arg);
+
+ if (target < V_008DFC_SQ_EXP_PARAM)
+ continue;
+
+ target -= V_008DFC_SQ_EXP_PARAM;
+
+ /* Parse the instruction. */
+ memset(&exp, 0, sizeof(exp));
+ exp.offset = target;
+ exp.inst = cur;
+
+ for (unsigned i = 0; i < 4; i++) {
+ LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
+
+ exp.chan[i].value = v;
+
+ if (LLVMIsUndef(v)) {
+ exp.chan[i].type = AC_IR_UNDEF;
+ } else if (LLVMIsAConstantFP(v)) {
+ LLVMBool loses_info;
+ exp.chan[i].type = AC_IR_CONST;
+ exp.chan[i].const_float = LLVMConstRealGetDouble(v, &loses_info);
+ } else {
+ exp.chan[i].type = AC_IR_VALUE;
+ }
+ }
+
+ /* Eliminate constant and duplicated PARAM exports. */
+ if (!((1u << target) & skip_output_mask) &&
+ (ac_eliminate_const_output(vs_output_param_offset, num_outputs, &exp) ||
+ ac_eliminate_duplicated_output(ctx, vs_output_param_offset, num_outputs, &exports,
+ &exp))) {
+ removed_any = true;
+ } else {
+ exports.exp[exports.num++] = exp;
+ }
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ /* Remove holes in export memory due to removed PARAM exports.
+ * This is done by renumbering all PARAM exports.
+ */
+ if (removed_any) {
+ uint8_t old_offset[VARYING_SLOT_MAX];
+ unsigned out, i;
+
+ /* Make a copy of the offsets. We need the old version while
+ * we are modifying some of them. */
+ memcpy(old_offset, vs_output_param_offset, sizeof(old_offset));
+
+ for (i = 0; i < exports.num; i++) {
+ unsigned offset = exports.exp[i].offset;
+
+ /* Update vs_output_param_offset. Multiple outputs can
+ * have the same offset.
+ */
+ for (out = 0; out < num_outputs; out++) {
+ if (old_offset[out] == offset)
+ vs_output_param_offset[out] = i;
+ }
+
+ /* Change the PARAM offset in the instruction. */
+ LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
+ LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_PARAM + i, 0));
+ }
+ *num_param_exports = exports.num;
+ }
}
void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
{
- LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
- ac_build_intrinsic(ctx,
- "llvm.amdgcn.init.exec", ctx->voidt,
- &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
+ LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+ ac_build_intrinsic(ctx, "llvm.amdgcn.init.exec", ctx->voidt, &full_mask, 1,
+ AC_FUNC_ATTR_CONVERGENT);
}
void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
{
- unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
- ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
- LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
- "lds");
-}
-
-LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr)
-{
- return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
-}
-
-void ac_lds_store(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr,
- LLVMValueRef value)
-{
- value = ac_to_integer(ctx, value);
- ac_build_indexed_store(ctx, ctx->lds,
- dw_addr, value);
-}
-
-LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
- LLVMTypeRef dst_type,
- LLVMValueRef src0)
-{
- unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
- const char *intrin_name;
- LLVMTypeRef type;
- LLVMValueRef zero;
-
- switch (src0_bitsize) {
- case 64:
- intrin_name = "llvm.cttz.i64";
- type = ctx->i64;
- zero = ctx->i64_0;
- break;
- case 32:
- intrin_name = "llvm.cttz.i32";
- type = ctx->i32;
- zero = ctx->i32_0;
- break;
- case 16:
- intrin_name = "llvm.cttz.i16";
- type = ctx->i16;
- zero = ctx->i16_0;
- break;
- case 8:
- intrin_name = "llvm.cttz.i8";
- type = ctx->i8;
- zero = ctx->i8_0;
- break;
- default:
- unreachable(!"invalid bitsize");
- }
-
- LLVMValueRef params[2] = {
- src0,
-
- /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
- * add special code to check for x=0. The reason is that
- * the LLVM behavior for x=0 is different from what we
- * need here. However, LLVM also assumes that ffs(x) is
- * in [0, 31], but GLSL expects that ffs(0) = -1, so
- * a conditional assignment to handle 0 is still required.
- *
- * The hardware already implements the correct behavior.
- */
- ctx->i1true,
- };
-
- LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
- params, 2,
- AC_FUNC_ATTR_READNONE);
-
- if (src0_bitsize == 64) {
- lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
- } else if (src0_bitsize < 32) {
- lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
- }
-
- /* TODO: We need an intrinsic to skip this conditional. */
- /* Check for zero: */
- return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
- LLVMIntEQ, src0,
- zero, ""),
- LLVMConstInt(ctx->i32, -1, 0), lsb, "");
+ unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
+ ctx->lds = LLVMBuildIntToPtr(
+ ctx->builder, ctx->i32_0,
+ LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds");
+}
+
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr)
+{
+ return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
+}
+
+void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value)
+{
+ value = ac_to_integer(ctx, value);
+ ac_build_indexed_store(ctx, ctx->lds, dw_addr, value);
+}
+
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0)
+{
+ unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+ const char *intrin_name;
+ LLVMTypeRef type;
+ LLVMValueRef zero;
+
+ switch (src0_bitsize) {
+ case 64:
+ intrin_name = "llvm.cttz.i64";
+ type = ctx->i64;
+ zero = ctx->i64_0;
+ break;
+ case 32:
+ intrin_name = "llvm.cttz.i32";
+ type = ctx->i32;
+ zero = ctx->i32_0;
+ break;
+ case 16:
+ intrin_name = "llvm.cttz.i16";
+ type = ctx->i16;
+ zero = ctx->i16_0;
+ break;
+ case 8:
+ intrin_name = "llvm.cttz.i8";
+ type = ctx->i8;
+ zero = ctx->i8_0;
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ }
+
+ LLVMValueRef params[2] = {
+ src0,
+
+ /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+ * add special code to check for x=0. The reason is that
+ * the LLVM behavior for x=0 is different from what we
+ * need here. However, LLVM also assumes that ffs(x) is
+ * in [0, 31], but GLSL expects that ffs(0) = -1, so
+ * a conditional assignment to handle 0 is still required.
+ *
+ * The hardware already implements the correct behavior.
+ */
+ ctx->i1true,
+ };
+
+ LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE);
+
+ if (src0_bitsize == 64) {
+ lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
+ } else if (src0_bitsize < 32) {
+ lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
+ }
+
+ /* TODO: We need an intrinsic to skip this conditional. */
+ /* Check for zero: */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""),
+ LLVMConstInt(ctx->i32, -1, 0), lsb, "");
}
LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
{
- return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST);
+ return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST);
}
LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
{
- return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT);
+ return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT);
}
-static struct ac_llvm_flow *
-get_current_flow(struct ac_llvm_context *ctx)
+static struct ac_llvm_flow *get_current_flow(struct ac_llvm_context *ctx)
{
- if (ctx->flow->depth > 0)
- return &ctx->flow->stack[ctx->flow->depth - 1];
- return NULL;
+ if (ctx->flow->depth > 0)
+ return &ctx->flow->stack[ctx->flow->depth - 1];
+ return NULL;
}
-static struct ac_llvm_flow *
-get_innermost_loop(struct ac_llvm_context *ctx)
+static struct ac_llvm_flow *get_innermost_loop(struct ac_llvm_context *ctx)
{
- for (unsigned i = ctx->flow->depth; i > 0; --i) {
- if (ctx->flow->stack[i - 1].loop_entry_block)
- return &ctx->flow->stack[i - 1];
- }
- return NULL;
+ for (unsigned i = ctx->flow->depth; i > 0; --i) {
+ if (ctx->flow->stack[i - 1].loop_entry_block)
+ return &ctx->flow->stack[i - 1];
+ }
+ return NULL;
}
-static struct ac_llvm_flow *
-push_flow(struct ac_llvm_context *ctx)
+static struct ac_llvm_flow *push_flow(struct ac_llvm_context *ctx)
{
- struct ac_llvm_flow *flow;
+ struct ac_llvm_flow *flow;
- if (ctx->flow->depth >= ctx->flow->depth_max) {
- unsigned new_max = MAX2(ctx->flow->depth << 1,
- AC_LLVM_INITIAL_CF_DEPTH);
+ if (ctx->flow->depth >= ctx->flow->depth_max) {
+ unsigned new_max = MAX2(ctx->flow->depth << 1, AC_LLVM_INITIAL_CF_DEPTH);
- ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
- ctx->flow->depth_max = new_max;
- }
+ ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
+ ctx->flow->depth_max = new_max;
+ }
- flow = &ctx->flow->stack[ctx->flow->depth];
- ctx->flow->depth++;
+ flow = &ctx->flow->stack[ctx->flow->depth];
+ ctx->flow->depth++;
- flow->next_block = NULL;
- flow->loop_entry_block = NULL;
- return flow;
+ flow->next_block = NULL;
+ flow->loop_entry_block = NULL;
+ return flow;
}
-static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
- int label_id)
+static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int label_id)
{
- char buf[32];
- snprintf(buf, sizeof(buf), "%s%d", base, label_id);
- LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%s%d", base, label_id);
+ LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
}
/* Append a basic block at the level of the parent flow.
*/
-static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
- const char *name)
+static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, const char *name)
{
- assert(ctx->flow->depth >= 1);
+ assert(ctx->flow->depth >= 1);
- if (ctx->flow->depth >= 2) {
- struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
+ if (ctx->flow->depth >= 2) {
+ struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
- return LLVMInsertBasicBlockInContext(ctx->context,
- flow->next_block, name);
- }
+ return LLVMInsertBasicBlockInContext(ctx->context, flow->next_block, name);
+ }
- LLVMValueRef main_fn =
- LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
- return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
+ LLVMValueRef main_fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
+ return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
}
/* Emit a branch to the given default target for the current block if
* applicable -- that is, if the current block does not already contain a
* branch from a break or continue.
*/
-static void emit_default_branch(LLVMBuilderRef builder,
- LLVMBasicBlockRef target)
+static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
{
- if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
- LLVMBuildBr(builder, target);
+ if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+ LLVMBuildBr(builder, target);
}
void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
{
- struct ac_llvm_flow *flow = push_flow(ctx);
- flow->loop_entry_block = append_basic_block(ctx, "LOOP");
- flow->next_block = append_basic_block(ctx, "ENDLOOP");
- set_basicblock_name(flow->loop_entry_block, "loop", label_id);
- LLVMBuildBr(ctx->builder, flow->loop_entry_block);
- LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
+ struct ac_llvm_flow *flow = push_flow(ctx);
+ flow->loop_entry_block = append_basic_block(ctx, "LOOP");
+ flow->next_block = append_basic_block(ctx, "ENDLOOP");
+ set_basicblock_name(flow->loop_entry_block, "loop", label_id);
+ LLVMBuildBr(ctx->builder, flow->loop_entry_block);
+ LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
}
void ac_build_break(struct ac_llvm_context *ctx)
{
- struct ac_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(ctx->builder, flow->next_block);
+ struct ac_llvm_flow *flow = get_innermost_loop(ctx);
+ LLVMBuildBr(ctx->builder, flow->next_block);
}
void ac_build_continue(struct ac_llvm_context *ctx)
{
- struct ac_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(ctx->builder, flow->loop_entry_block);
+ struct ac_llvm_flow *flow = get_innermost_loop(ctx);
+ LLVMBuildBr(ctx->builder, flow->loop_entry_block);
}
void ac_build_else(struct ac_llvm_context *ctx, int label_id)
{
- struct ac_llvm_flow *current_branch = get_current_flow(ctx);
- LLVMBasicBlockRef endif_block;
+ struct ac_llvm_flow *current_branch = get_current_flow(ctx);
+ LLVMBasicBlockRef endif_block;
- assert(!current_branch->loop_entry_block);
+ assert(!current_branch->loop_entry_block);
- endif_block = append_basic_block(ctx, "ENDIF");
- emit_default_branch(ctx->builder, endif_block);
+ endif_block = append_basic_block(ctx, "ENDIF");
+ emit_default_branch(ctx->builder, endif_block);
- LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
- set_basicblock_name(current_branch->next_block, "else", label_id);
+ LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
+ set_basicblock_name(current_branch->next_block, "else", label_id);
- current_branch->next_block = endif_block;
+ current_branch->next_block = endif_block;
}
void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
{
- struct ac_llvm_flow *current_branch = get_current_flow(ctx);
+ struct ac_llvm_flow *current_branch = get_current_flow(ctx);
- assert(!current_branch->loop_entry_block);
+ assert(!current_branch->loop_entry_block);
- emit_default_branch(ctx->builder, current_branch->next_block);
- LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
- set_basicblock_name(current_branch->next_block, "endif", label_id);
+ emit_default_branch(ctx->builder, current_branch->next_block);
+ LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
+ set_basicblock_name(current_branch->next_block, "endif", label_id);
- ctx->flow->depth--;
+ ctx->flow->depth--;
}
void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
{
- struct ac_llvm_flow *current_loop = get_current_flow(ctx);
+ struct ac_llvm_flow *current_loop = get_current_flow(ctx);
- assert(current_loop->loop_entry_block);
+ assert(current_loop->loop_entry_block);
- emit_default_branch(ctx->builder, current_loop->loop_entry_block);
+ emit_default_branch(ctx->builder, current_loop->loop_entry_block);
- LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
- set_basicblock_name(current_loop->next_block, "endloop", label_id);
- ctx->flow->depth--;
+ LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
+ set_basicblock_name(current_loop->next_block, "endloop", label_id);
+ ctx->flow->depth--;
}
void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
{
- struct ac_llvm_flow *flow = push_flow(ctx);
- LLVMBasicBlockRef if_block;
+ struct ac_llvm_flow *flow = push_flow(ctx);
+ LLVMBasicBlockRef if_block;
- if_block = append_basic_block(ctx, "IF");
- flow->next_block = append_basic_block(ctx, "ELSE");
- set_basicblock_name(if_block, "if", label_id);
- LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
- LLVMPositionBuilderAtEnd(ctx->builder, if_block);
+ if_block = append_basic_block(ctx, "IF");
+ flow->next_block = append_basic_block(ctx, "ELSE");
+ set_basicblock_name(if_block, "if", label_id);
+ LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
+ LLVMPositionBuilderAtEnd(ctx->builder, if_block);
}
-void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
- int label_id)
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, int label_id)
{
- LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
- value, ctx->f32_0, "");
- ac_build_ifcc(ctx, cond, label_id);
+ LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, value, ctx->f32_0, "");
+ ac_build_ifcc(ctx, cond, label_id);
}
-void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
- int label_id)
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, int label_id)
{
- LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- ac_to_integer(ctx, value),
- ctx->i32_0, "");
- ac_build_ifcc(ctx, cond, label_id);
+ LLVMValueRef cond =
+ LLVMBuildICmp(ctx->builder, LLVMIntNE, ac_to_integer(ctx, value), ctx->i32_0, "");
+ ac_build_ifcc(ctx, cond, label_id);
}
-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
- const char *name)
+LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name)
{
- LLVMBuilderRef builder = ac->builder;
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
- LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
- LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
- LLVMValueRef res;
+ LLVMBuilderRef builder = ac->builder;
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
+ LLVMValueRef res;
- if (first_instr) {
- LLVMPositionBuilderBefore(first_builder, first_instr);
- } else {
- LLVMPositionBuilderAtEnd(first_builder, first_block);
- }
+ if (first_instr) {
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+ } else {
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ }
- res = LLVMBuildAlloca(first_builder, type, name);
- LLVMDisposeBuilder(first_builder);
- return res;
+ res = LLVMBuildAlloca(first_builder, type, name);
+ LLVMDisposeBuilder(first_builder);
+ return res;
}
-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac,
- LLVMTypeRef type, const char *name)
+LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name)
{
- LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
- LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
- return ptr;
+ LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
+ LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
+ return ptr;
}
-LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMTypeRef type)
+LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type)
{
- int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- return LLVMBuildBitCast(ctx->builder, ptr,
- LLVMPointerType(type, addr_space), "");
+ int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ return LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
}
-LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
- unsigned count)
+LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count)
{
- unsigned num_components = ac_get_llvm_num_components(value);
- if (count == num_components)
- return value;
+ unsigned num_components = ac_get_llvm_num_components(value);
+ if (count == num_components)
+ return value;
- LLVMValueRef masks[MAX2(count, 2)];
- masks[0] = ctx->i32_0;
- masks[1] = ctx->i32_1;
- for (unsigned i = 2; i < count; i++)
- masks[i] = LLVMConstInt(ctx->i32, i, false);
+ LLVMValueRef masks[MAX2(count, 2)];
+ masks[0] = ctx->i32_0;
+ masks[1] = ctx->i32_1;
+ for (unsigned i = 2; i < count; i++)
+ masks[i] = LLVMConstInt(ctx->i32, i, false);
- if (count == 1)
- return LLVMBuildExtractElement(ctx->builder, value, masks[0],
- "");
+ if (count == 1)
+ return LLVMBuildExtractElement(ctx->builder, value, masks[0], "");
- LLVMValueRef swizzle = LLVMConstVector(masks, count);
- return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
+ LLVMValueRef swizzle = LLVMConstVector(masks, count);
+ return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
}
-LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
- unsigned rshift, unsigned bitwidth)
+LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift,
+ unsigned bitwidth)
{
- LLVMValueRef value = param;
- if (rshift)
- value = LLVMBuildLShr(ctx->builder, value,
- LLVMConstInt(ctx->i32, rshift, false), "");
+ LLVMValueRef value = param;
+ if (rshift)
+ value = LLVMBuildLShr(ctx->builder, value, LLVMConstInt(ctx->i32, rshift, false), "");
- if (rshift + bitwidth < 32) {
- unsigned mask = (1 << bitwidth) - 1;
- value = LLVMBuildAnd(ctx->builder, value,
- LLVMConstInt(ctx->i32, mask, false), "");
- }
- return value;
+ if (rshift + bitwidth < 32) {
+ unsigned mask = (1 << bitwidth) - 1;
+ value = LLVMBuildAnd(ctx->builder, value, LLVMConstInt(ctx->i32, mask, false), "");
+ }
+ return value;
}
/* Adjust the sample index according to FMASK.
* The sample index should be adjusted as follows:
* addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
*/
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
- LLVMValueRef *addr, bool is_array_tex)
-{
- struct ac_image_args fmask_load = {};
- fmask_load.opcode = ac_image_load;
- fmask_load.resource = fmask;
- fmask_load.dmask = 0xf;
- fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
- fmask_load.attributes = AC_FUNC_ATTR_READNONE;
-
- fmask_load.coords[0] = addr[0];
- fmask_load.coords[1] = addr[1];
- if (is_array_tex)
- fmask_load.coords[2] = addr[2];
-
- LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
- fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
- ac->i32_0, "");
-
- /* Apply the formula. */
- unsigned sample_chan = is_array_tex ? 3 : 2;
- LLVMValueRef final_sample;
- final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
- LLVMConstInt(ac->i32, 4, 0), "");
- final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
- /* Mask the sample index by 0x7, because 0x8 means an unknown value
- * with EQAA, so those will map to 0. */
- final_sample = LLVMBuildAnd(ac->builder, final_sample,
- LLVMConstInt(ac->i32, 0x7, 0), "");
-
- /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
- * resource descriptor is 0 (invalid).
- */
- LLVMValueRef tmp;
- tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
- tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
- tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
-
- /* Replace the MSAA sample index. */
- addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
- addr[sample_chan], "");
-}
-
-static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
- LLVMValueRef lane, bool with_opt_barrier)
-{
- LLVMTypeRef type = LLVMTypeOf(src);
- LLVMValueRef result;
-
- if (with_opt_barrier)
- ac_build_optimization_barrier(ctx, &src);
-
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
- if (lane)
- lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
-
- result = ac_build_intrinsic(ctx,
- lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
- ctx->i32, (LLVMValueRef []) { src, lane },
- lane == NULL ? 1 : 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-
- return LLVMBuildTrunc(ctx->builder, result, type, "");
-}
-
-static LLVMValueRef
-ac_build_readlane_common(struct ac_llvm_context *ctx,
- LLVMValueRef src, LLVMValueRef lane,
- bool with_opt_barrier)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
-
- if (bits > 32) {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- LLVMValueRef ret_comp;
-
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i, 0), "");
-
- ret_comp = _ac_build_readlane(ctx, src, lane,
- with_opt_barrier);
-
- ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
- LLVMConstInt(ctx->i32, i, 0), "");
- }
- } else {
- ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
- }
-
- if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
- return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
+ bool is_array_tex)
+{
+ struct ac_image_args fmask_load = {};
+ fmask_load.opcode = ac_image_load;
+ fmask_load.resource = fmask;
+ fmask_load.dmask = 0xf;
+ fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
+ fmask_load.attributes = AC_FUNC_ATTR_READNONE;
+
+ fmask_load.coords[0] = addr[0];
+ fmask_load.coords[1] = addr[1];
+ if (is_array_tex)
+ fmask_load.coords[2] = addr[2];
+
+ LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
+ fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
+
+ /* Apply the formula. */
+ unsigned sample_chan = is_array_tex ? 3 : 2;
+ LLVMValueRef final_sample;
+ final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], LLVMConstInt(ac->i32, 4, 0), "");
+ final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
+ /* Mask the sample index by 0x7, because 0x8 means an unknown value
+ * with EQAA, so those will map to 0. */
+ final_sample = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
+
+ /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
+ * resource descriptor is 0 (invalid).
+ */
+ LLVMValueRef tmp;
+ tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
+ tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
+ tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
+
+ /* Replace the MSAA sample index. */
+ addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample, addr[sample_chan], "");
+}
+
+static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef lane, bool with_opt_barrier)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef result;
+
+ if (with_opt_barrier)
+ ac_build_optimization_barrier(ctx, &src);
+
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+ if (lane)
+ lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
+
+ result =
+ ac_build_intrinsic(ctx, lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
+ ctx->i32, (LLVMValueRef[]){src, lane}, lane == NULL ? 1 : 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, result, type, "");
+}
+
+static LLVMValueRef ac_build_readlane_common(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef lane, bool with_opt_barrier)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ LLVMValueRef ret_comp;
+
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+
+ ret_comp = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
+
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
+ }
+
+ if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+ return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
/**
* @param lane - id of the lane or NULL for the first active lane
* @return value of the lane
*/
-LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef src, LLVMValueRef lane)
-{
- return ac_build_readlane_common(ctx, src, lane, false);
-}
-
-
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
-{
- return ac_build_readlane_common(ctx, src, lane, true);
-}
-
-LLVMValueRef
-ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
- (LLVMValueRef []) {value, lane, src}, 3,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
-{
- if (ctx->wave_size == 32) {
- return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
- (LLVMValueRef []) { mask, ctx->i32_0 },
- 2, AC_FUNC_ATTR_READNONE);
- }
- LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, "");
- LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
- ctx->i32_0, "");
- LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
- ctx->i32_1, "");
- LLVMValueRef val =
- ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
- (LLVMValueRef []) { mask_lo, ctx->i32_0 },
- 2, AC_FUNC_ATTR_READNONE);
- val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
- (LLVMValueRef []) { mask_hi, val },
- 2, AC_FUNC_ATTR_READNONE);
- return val;
-}
-
-enum dpp_ctrl {
- _dpp_quad_perm = 0x000,
- _dpp_row_sl = 0x100,
- _dpp_row_sr = 0x110,
- _dpp_row_rr = 0x120,
- dpp_wf_sl1 = 0x130,
- dpp_wf_rl1 = 0x134,
- dpp_wf_sr1 = 0x138,
- dpp_wf_rr1 = 0x13C,
- dpp_row_mirror = 0x140,
- dpp_row_half_mirror = 0x141,
- dpp_row_bcast15 = 0x142,
- dpp_row_bcast31 = 0x143
+LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef lane)
+{
+ return ac_build_readlane_common(ctx, src, lane, false);
+}
+
+LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+{
+ return ac_build_readlane_common(ctx, src, lane, true);
+}
+
+LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value,
+ LLVMValueRef lane)
+{
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
+ (LLVMValueRef[]){value, lane, src}, 3,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
+{
+ if (ctx->wave_size == 32) {
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ (LLVMValueRef[]){mask, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE);
+ }
+ LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, "");
+ LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_0, "");
+ LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_1, "");
+ LLVMValueRef val =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ (LLVMValueRef[]){mask_lo, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE);
+ val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, (LLVMValueRef[]){mask_hi, val},
+ 2, AC_FUNC_ATTR_READNONE);
+ return val;
+}
+
+enum dpp_ctrl
+{
+ _dpp_quad_perm = 0x000,
+ _dpp_row_sl = 0x100,
+ _dpp_row_sr = 0x110,
+ _dpp_row_rr = 0x120,
+ dpp_wf_sl1 = 0x130,
+ dpp_wf_rl1 = 0x134,
+ dpp_wf_sr1 = 0x138,
+ dpp_wf_rr1 = 0x13C,
+ dpp_row_mirror = 0x140,
+ dpp_row_half_mirror = 0x141,
+ dpp_row_bcast15 = 0x142,
+ dpp_row_bcast31 = 0x143
};
-static inline enum dpp_ctrl
-dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
- assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
- return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
-}
-
-static inline enum dpp_ctrl
-dpp_row_sl(unsigned amount)
-{
- assert(amount > 0 && amount < 16);
- return _dpp_row_sl | amount;
-}
-
-static inline enum dpp_ctrl
-dpp_row_sr(unsigned amount)
-{
- assert(amount > 0 && amount < 16);
- return _dpp_row_sr | amount;
-}
-
-static LLVMValueRef
-_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
- enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
- bool bound_ctrl)
-{
- LLVMTypeRef type = LLVMTypeOf(src);
- LLVMValueRef res;
-
- old = LLVMBuildZExt(ctx->builder, old, ctx->i32, "");
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
-
- res = ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32,
- (LLVMValueRef[]) {
- old, src,
- LLVMConstInt(ctx->i32, dpp_ctrl, 0),
- LLVMConstInt(ctx->i32, row_mask, 0),
- LLVMConstInt(ctx->i32, bank_mask, 0),
- LLVMConstInt(ctx->i1, bound_ctrl, 0) },
- 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-
- return LLVMBuildTrunc(ctx->builder, res, type, "");
-}
-
-static LLVMValueRef
-ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
- enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
- bool bound_ctrl)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- old = ac_to_integer(ctx, old);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits > 32) {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- LLVMValueRef old_vector =
- LLVMBuildBitCast(ctx->builder, old, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- old = LLVMBuildExtractElement(ctx->builder, old_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src,
- dpp_ctrl,
- row_mask,
- bank_mask,
- bound_ctrl);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- } else {
- ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask,
- bank_mask, bound_ctrl);
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
- bool exchange_rows, bool bound_ctrl)
-{
- LLVMTypeRef type = LLVMTypeOf(src);
- LLVMValueRef result;
-
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
-
- LLVMValueRef args[6] = {
- src,
- src,
- LLVMConstInt(ctx->i32, sel, false),
- LLVMConstInt(ctx->i32, sel >> 32, false),
- ctx->i1true, /* fi */
- bound_ctrl ? ctx->i1true : ctx->i1false,
- };
-
- result = ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
- : "llvm.amdgcn.permlane16",
- ctx->i32, args, 6,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-
- return LLVMBuildTrunc(ctx->builder, result, type, "");
-}
-
-static LLVMValueRef
-ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
- bool exchange_rows, bool bound_ctrl)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits > 32) {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp =
- _ac_build_permlane16(ctx, src, sel,
- exchange_rows,
- bound_ctrl);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- } else {
- ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
- bound_ctrl);
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static inline unsigned
-ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
-{
- assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
- return and_mask | (or_mask << 5) | (xor_mask << 10);
-}
-
-static LLVMValueRef
-_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- LLVMValueRef ret;
-
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
-
- ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32,
- (LLVMValueRef []) {
- src, LLVMConstInt(ctx->i32, mask, 0) },
- 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-
- return LLVMBuildTrunc(ctx->builder, ret, src_type, "");
-}
-
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits > 32) {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src,
- mask);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- } else {
- ret = _ac_build_ds_swizzle(ctx, src, mask);
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- unsigned bitsize = ac_get_elem_bits(ctx, src_type);
- char name[32], type[8];
- LLVMValueRef ret;
-
- src = ac_to_integer(ctx, src);
-
- if (bitsize < 32)
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
-
- ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
- ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src),
- (LLVMValueRef []) { src }, 1,
- AC_FUNC_ATTR_READNONE);
-
- if (bitsize < 32)
- ret = LLVMBuildTrunc(ctx->builder, ret,
- ac_to_integer_type(ctx, src_type), "");
-
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
- LLVMValueRef inactive)
-{
- char name[33], type[8];
- LLVMTypeRef src_type = LLVMTypeOf(src);
- unsigned bitsize = ac_get_elem_bits(ctx, src_type);
- src = ac_to_integer(ctx, src);
- inactive = ac_to_integer(ctx, inactive);
-
- if (bitsize < 32) {
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
- inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, "");
- }
-
- ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
- LLVMValueRef ret =
- ac_build_intrinsic(ctx, name,
- LLVMTypeOf(src), (LLVMValueRef []) {
- src, inactive }, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
- if (bitsize < 32)
- ret = LLVMBuildTrunc(ctx->builder, ret, src_type, "");
-
- return ret;
-}
-
-static LLVMValueRef
-get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size)
-{
- if (type_size == 1) {
- switch (op) {
- case nir_op_iadd: return ctx->i8_0;
- case nir_op_imul: return ctx->i8_1;
- case nir_op_imin: return LLVMConstInt(ctx->i8, INT8_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i8, UINT8_MAX, 0);
- case nir_op_imax: return LLVMConstInt(ctx->i8, INT8_MIN, 0);
- case nir_op_umax: return ctx->i8_0;
- case nir_op_iand: return LLVMConstInt(ctx->i8, -1, 0);
- case nir_op_ior: return ctx->i8_0;
- case nir_op_ixor: return ctx->i8_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- } else if (type_size == 2) {
- switch (op) {
- case nir_op_iadd: return ctx->i16_0;
- case nir_op_fadd: return ctx->f16_0;
- case nir_op_imul: return ctx->i16_1;
- case nir_op_fmul: return ctx->f16_1;
- case nir_op_imin: return LLVMConstInt(ctx->i16, INT16_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i16, UINT16_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f16, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i16, INT16_MIN, 0);
- case nir_op_umax: return ctx->i16_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f16, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i16, -1, 0);
- case nir_op_ior: return ctx->i16_0;
- case nir_op_ixor: return ctx->i16_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- } else if (type_size == 4) {
- switch (op) {
- case nir_op_iadd: return ctx->i32_0;
- case nir_op_fadd: return ctx->f32_0;
- case nir_op_imul: return ctx->i32_1;
- case nir_op_fmul: return ctx->f32_1;
- case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0);
- case nir_op_umax: return ctx->i32_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0);
- case nir_op_ior: return ctx->i32_0;
- case nir_op_ixor: return ctx->i32_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- } else { /* type_size == 64bit */
- switch (op) {
- case nir_op_iadd: return ctx->i64_0;
- case nir_op_fadd: return ctx->f64_0;
- case nir_op_imul: return ctx->i64_1;
- case nir_op_fmul: return ctx->f64_1;
- case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0);
- case nir_op_umax: return ctx->i64_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0);
- case nir_op_ior: return ctx->i64_0;
- case nir_op_ixor: return ctx->i64_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- }
-}
-
-static LLVMValueRef
-ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op)
-{
- bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
- bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4;
- switch (op) {
- case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
- case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
- case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, "");
- case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
- case nir_op_imin: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_umin: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_fmin: return ac_build_intrinsic(ctx,
- _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16",
- _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16,
- (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
- case nir_op_imax: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_umax: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_fmax: return ac_build_intrinsic(ctx,
- _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16",
- _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16,
- (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
- case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
- case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
- case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
- default:
- unreachable("bad reduction intrinsic");
- }
+static inline enum dpp_ctrl dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2,
+ unsigned lane3)
+{
+ assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
+ return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
+}
+
+static inline enum dpp_ctrl dpp_row_sl(unsigned amount)
+{
+ assert(amount > 0 && amount < 16);
+ return _dpp_row_sl | amount;
+}
+
+static inline enum dpp_ctrl dpp_row_sr(unsigned amount)
+{
+ assert(amount > 0 && amount < 16);
+ return _dpp_row_sr | amount;
+}
+
+static LLVMValueRef _ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
+ enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
+ bool bound_ctrl)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef res;
+
+ old = LLVMBuildZExt(ctx->builder, old, ctx->i32, "");
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ res = ac_build_intrinsic(
+ ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32,
+ (LLVMValueRef[]){old, src, LLVMConstInt(ctx->i32, dpp_ctrl, 0),
+ LLVMConstInt(ctx->i32, row_mask, 0), LLVMConstInt(ctx->i32, bank_mask, 0),
+ LLVMConstInt(ctx->i1, bound_ctrl, 0)},
+ 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, res, type, "");
+}
+
+static LLVMValueRef ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
+ enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
+ bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ old = ac_to_integer(ctx, old);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ LLVMValueRef old_vector = LLVMBuildBitCast(ctx->builder, old, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ old = LLVMBuildExtractElement(ctx->builder, old_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp =
+ _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef _ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src,
+ uint64_t sel, bool exchange_rows, bool bound_ctrl)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef result;
+
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ LLVMValueRef args[6] = {
+ src,
+ src,
+ LLVMConstInt(ctx->i32, sel, false),
+ LLVMConstInt(ctx->i32, sel >> 32, false),
+ ctx->i1true, /* fi */
+ bound_ctrl ? ctx->i1true : ctx->i1false,
+ };
+
+ result =
+ ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" : "llvm.amdgcn.permlane16",
+ ctx->i32, args, 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, result, type, "");
+}
+
+static LLVMValueRef ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
+ bool exchange_rows, bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
+{
+ assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
+ return and_mask | (or_mask << 5) | (xor_mask << 10);
+}
+
+static LLVMValueRef _ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned mask)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ LLVMValueRef ret;
+
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32,
+ (LLVMValueRef[]){src, LLVMConstInt(ctx->i32, mask, 0)}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, ret, src_type, "");
+}
+
+LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, mask);
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_ds_swizzle(ctx, src, mask);
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ unsigned bitsize = ac_get_elem_bits(ctx, src_type);
+ char name[32], type[8];
+ LLVMValueRef ret;
+
+ src = ac_to_integer(ctx, src);
+
+ if (bitsize < 32)
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
+ ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ if (bitsize < 32)
+ ret = LLVMBuildTrunc(ctx->builder, ret, ac_to_integer_type(ctx, src_type), "");
+
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef inactive)
+{
+ char name[33], type[8];
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ unsigned bitsize = ac_get_elem_bits(ctx, src_type);
+ src = ac_to_integer(ctx, src);
+ inactive = ac_to_integer(ctx, inactive);
+
+ if (bitsize < 32) {
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+ inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, "");
+ }
+
+ ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
+ LLVMValueRef ret =
+ ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src, inactive}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+ if (bitsize < 32)
+ ret = LLVMBuildTrunc(ctx->builder, ret, src_type, "");
+
+ return ret;
+}
+
+static LLVMValueRef get_reduction_identity(struct ac_llvm_context *ctx, nir_op op,
+ unsigned type_size)
+{
+ if (type_size == 1) {
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i8_0;
+ case nir_op_imul:
+ return ctx->i8_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i8, INT8_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i8, UINT8_MAX, 0);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i8, INT8_MIN, 0);
+ case nir_op_umax:
+ return ctx->i8_0;
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i8, -1, 0);
+ case nir_op_ior:
+ return ctx->i8_0;
+ case nir_op_ixor:
+ return ctx->i8_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else if (type_size == 2) {
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i16_0;
+ case nir_op_fadd:
+ return ctx->f16_0;
+ case nir_op_imul:
+ return ctx->i16_1;
+ case nir_op_fmul:
+ return ctx->f16_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i16, INT16_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i16, UINT16_MAX, 0);
+ case nir_op_fmin:
+ return LLVMConstReal(ctx->f16, INFINITY);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i16, INT16_MIN, 0);
+ case nir_op_umax:
+ return ctx->i16_0;
+ case nir_op_fmax:
+ return LLVMConstReal(ctx->f16, -INFINITY);
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i16, -1, 0);
+ case nir_op_ior:
+ return ctx->i16_0;
+ case nir_op_ixor:
+ return ctx->i16_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else if (type_size == 4) {
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i32_0;
+ case nir_op_fadd:
+ return ctx->f32_0;
+ case nir_op_imul:
+ return ctx->i32_1;
+ case nir_op_fmul:
+ return ctx->f32_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i32, INT32_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
+ case nir_op_fmin:
+ return LLVMConstReal(ctx->f32, INFINITY);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i32, INT32_MIN, 0);
+ case nir_op_umax:
+ return ctx->i32_0;
+ case nir_op_fmax:
+ return LLVMConstReal(ctx->f32, -INFINITY);
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i32, -1, 0);
+ case nir_op_ior:
+ return ctx->i32_0;
+ case nir_op_ixor:
+ return ctx->i32_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else { /* type_size == 64bit */
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i64_0;
+ case nir_op_fadd:
+ return ctx->f64_0;
+ case nir_op_imul:
+ return ctx->i64_1;
+ case nir_op_fmul:
+ return ctx->f64_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i64, INT64_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+ case nir_op_fmin:
+ return LLVMConstReal(ctx->f64, INFINITY);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i64, INT64_MIN, 0);
+ case nir_op_umax:
+ return ctx->i64_0;
+ case nir_op_fmax:
+ return LLVMConstReal(ctx->f64, -INFINITY);
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i64, -1, 0);
+ case nir_op_ior:
+ return ctx->i64_0;
+ case nir_op_ixor:
+ return ctx->i64_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ }
+}
+
+static LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs,
+ nir_op op)
+{
+ bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
+ bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4;
+ switch (op) {
+ case nir_op_iadd:
+ return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
+ case nir_op_fadd:
+ return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
+ case nir_op_imul:
+ return LLVMBuildMul(ctx->builder, lhs, rhs, "");
+ case nir_op_fmul:
+ return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
+ case nir_op_imin:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_umin:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_fmin:
+ return ac_build_intrinsic(
+ ctx, _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16",
+ _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
+ AC_FUNC_ATTR_READNONE);
+ case nir_op_imax:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_umax:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_fmax:
+ return ac_build_intrinsic(
+ ctx, _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16",
+ _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
+ AC_FUNC_ATTR_READNONE);
+ case nir_op_iand:
+ return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
+ case nir_op_ior:
+ return LLVMBuildOr(ctx->builder, lhs, rhs, "");
+ case nir_op_ixor:
+ return LLVMBuildXor(ctx->builder, lhs, rhs, "");
+ default:
+ unreachable("bad reduction intrinsic");
+ }
}
/**
* prefix of this many threads
* \return src, shifted 1 lane up, and identity shifted into lane 0.
*/
-static LLVMValueRef
-ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
- LLVMValueRef identity, unsigned maxprefix)
-{
- if (ctx->chip_class >= GFX10) {
- /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
- LLVMValueRef active, tmp1, tmp2;
- LLVMValueRef tid = ac_get_thread_id(ctx);
-
- tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
-
- tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
-
- if (maxprefix > 32) {
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
- LLVMConstInt(ctx->i32, 32, false), "");
-
- tmp2 = LLVMBuildSelect(ctx->builder, active,
- ac_build_readlane(ctx, src,
- LLVMConstInt(ctx->i32, 31, false)),
- tmp2, "");
-
- active = LLVMBuildOr(ctx->builder, active,
- LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid,
- LLVMConstInt(ctx->i32, 0x1f, false), ""),
- LLVMConstInt(ctx->i32, 0x10, false), ""), "");
- return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- } else if (maxprefix > 16) {
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
- LLVMConstInt(ctx->i32, 16, false), "");
-
- return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- }
- } else if (ctx->chip_class >= GFX8) {
- return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
- }
-
- /* wavefront shift_right by 1 on SI/CI */
- LLVMValueRef active, tmp1, tmp2;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
- tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
- LLVMConstInt(ctx->i32, 0x4, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
- LLVMConstInt(ctx->i32, 0x8, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
- LLVMConstInt(ctx->i32, 0x10, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
- return LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");
+static LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef identity, unsigned maxprefix)
+{
+ if (ctx->chip_class >= GFX10) {
+ /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
+ LLVMValueRef active, tmp1, tmp2;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+
+ tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
+
+ tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
+
+ if (maxprefix > 32) {
+ active =
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, false), "");
+
+ tmp2 = LLVMBuildSelect(ctx->builder, active,
+ ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, false)),
+ tmp2, "");
+
+ active = LLVMBuildOr(
+ ctx->builder, active,
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, false), ""),
+ LLVMConstInt(ctx->i32, 0x10, false), ""),
+ "");
+ return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ } else if (maxprefix > 16) {
+ active =
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 16, false), "");
+
+ return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ }
+ } else if (ctx->chip_class >= GFX8) {
+ return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
+ }
+
+ /* wavefront shift_right by 1 on SI/CI */
+ LLVMValueRef active, tmp1, tmp2;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
+ LLVMConstInt(ctx->i32, 0x4, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
+ LLVMConstInt(ctx->i32, 0x8, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
+ LLVMConstInt(ctx->i32, 0x10, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
+ return LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");
}
/**
* \param maxprefix specifies that the result only needs to be correct for a
* prefix of this many threads
*/
-static LLVMValueRef
-ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
- unsigned maxprefix, bool inclusive)
-{
- LLVMValueRef result, tmp;
-
- if (!inclusive)
- src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix);
-
- result = src;
-
- if (ctx->chip_class <= GFX7) {
- assert(maxprefix == 64);
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMValueRef active;
- tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""),
- ctx->i32_0, "");
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
- result = ac_build_alu_op(ctx, result, tmp, op);
- tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""),
- ctx->i32_0, "");
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
- result = ac_build_alu_op(ctx, result, tmp, op);
- tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""),
- ctx->i32_0, "");
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
- result = ac_build_alu_op(ctx, result, tmp, op);
- tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""),
- ctx->i32_0, "");
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
- result = ac_build_alu_op(ctx, result, tmp, op);
- tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""),
- ctx->i32_0, "");
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
- result = ac_build_alu_op(ctx, result, tmp, op);
- tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0));
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""),
- ctx->i32_0, "");
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
- result = ac_build_alu_op(ctx, result, tmp, op);
- return result;
- }
-
- if (maxprefix <= 1)
- return result;
- tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 2)
- return result;
- tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 3)
- return result;
- tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 4)
- return result;
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 8)
- return result;
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 16)
- return result;
-
- if (ctx->chip_class >= GFX10) {
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMValueRef active;
-
- tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false);
-
- active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- LLVMBuildAnd(ctx->builder, tid,
- LLVMConstInt(ctx->i32, 16, false), ""),
- ctx->i32_0, "");
-
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
-
- result = ac_build_alu_op(ctx, result, tmp, op);
-
- if (maxprefix <= 32)
- return result;
-
- tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
-
- active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid,
- LLVMConstInt(ctx->i32, 32, false), "");
-
- tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
-
- result = ac_build_alu_op(ctx, result, tmp, op);
- return result;
- }
-
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 32)
- return result;
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- return result;
-}
-
-LLVMValueRef
-ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
-{
- LLVMValueRef result;
-
- if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
- LLVMBuilderRef builder = ctx->builder;
- src = LLVMBuildZExt(builder, src, ctx->i32, "");
- result = ac_build_ballot(ctx, src);
- result = ac_build_mbcnt(ctx, result);
- result = LLVMBuildAdd(builder, result, src, "");
- return result;
- }
-
- ac_build_optimization_barrier(ctx, &src);
-
- LLVMValueRef identity =
- get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
- result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
- LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
-
- return ac_build_wwm(ctx, result);
-}
-
-LLVMValueRef
-ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
-{
- LLVMValueRef result;
-
- if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
- LLVMBuilderRef builder = ctx->builder;
- src = LLVMBuildZExt(builder, src, ctx->i32, "");
- result = ac_build_ballot(ctx, src);
- result = ac_build_mbcnt(ctx, result);
- return result;
- }
-
- ac_build_optimization_barrier(ctx, &src);
-
- LLVMValueRef identity =
- get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
- result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
- LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
-
- return ac_build_wwm(ctx, result);
-}
-
-LLVMValueRef
-ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
-{
- if (cluster_size == 1) return src;
- ac_build_optimization_barrier(ctx, &src);
- LLVMValueRef result, swap;
- LLVMValueRef identity = get_reduction_identity(ctx, op,
- ac_get_type_size(LLVMTypeOf(src)));
- result = LLVMBuildBitCast(ctx->builder,
- ac_build_set_inactive(ctx, src, identity),
- LLVMTypeOf(identity), "");
- swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 2) return ac_build_wwm(ctx, result);
-
- swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 4) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX8)
- swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
- else
- swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 8) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX8)
- swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
- else
- swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 16) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX10)
- swap = ac_build_permlane16(ctx, result, 0, true, false);
- else if (ctx->chip_class >= GFX8 && cluster_size != 32)
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
- else
- swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 32) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX8) {
- if (ctx->wave_size == 64) {
- if (ctx->chip_class >= GFX10)
- swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
- else
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
- result = ac_build_alu_op(ctx, result, swap, op);
- result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
- }
-
- return ac_build_wwm(ctx, result);
- } else {
- swap = ac_build_readlane(ctx, result, ctx->i32_0);
- result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
- result = ac_build_alu_op(ctx, result, swap, op);
- return ac_build_wwm(ctx, result);
- }
+static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src,
+ LLVMValueRef identity, unsigned maxprefix, bool inclusive)
+{
+ LLVMValueRef result, tmp;
+
+ if (!inclusive)
+ src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix);
+
+ result = src;
+
+ if (ctx->chip_class <= GFX7) {
+ assert(maxprefix == 64);
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMValueRef active;
+ tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""), ctx->i32_0, "");
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""),
+ ctx->i32_0, "");
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""),
+ ctx->i32_0, "");
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""),
+ ctx->i32_0, "");
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""),
+ ctx->i32_0, "");
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""),
+ ctx->i32_0, "");
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ return result;
+ }
+
+ if (maxprefix <= 1)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 2)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 3)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 4)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 8)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 16)
+ return result;
+
+ if (ctx->chip_class >= GFX10) {
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMValueRef active;
+
+ tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false);
+
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, false), ""),
+ ctx->i32_0, "");
+
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+
+ result = ac_build_alu_op(ctx, result, tmp, op);
+
+ if (maxprefix <= 32)
+ return result;
+
+ tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+
+ active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid, LLVMConstInt(ctx->i32, 32, false), "");
+
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ return result;
+ }
+
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 32)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ return result;
+}
+
+LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
+{
+ LLVMValueRef result;
+
+ if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
+ LLVMBuilderRef builder = ctx->builder;
+ src = LLVMBuildZExt(builder, src, ctx->i32, "");
+ result = ac_build_ballot(ctx, src);
+ result = ac_build_mbcnt(ctx, result);
+ result = LLVMBuildAdd(builder, result, src, "");
+ return result;
+ }
+
+ ac_build_optimization_barrier(ctx, &src);
+
+ LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+ result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+ LLVMTypeOf(identity), "");
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
+
+ return ac_build_wwm(ctx, result);
+}
+
+LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
+{
+ LLVMValueRef result;
+
+ if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
+ LLVMBuilderRef builder = ctx->builder;
+ src = LLVMBuildZExt(builder, src, ctx->i32, "");
+ result = ac_build_ballot(ctx, src);
+ result = ac_build_mbcnt(ctx, result);
+ return result;
+ }
+
+ ac_build_optimization_barrier(ctx, &src);
+
+ LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+ result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+ LLVMTypeOf(identity), "");
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
+
+ return ac_build_wwm(ctx, result);
+}
+
+LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op,
+ unsigned cluster_size)
+{
+ if (cluster_size == 1)
+ return src;
+ ac_build_optimization_barrier(ctx, &src);
+ LLVMValueRef result, swap;
+ LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+ result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+ LLVMTypeOf(identity), "");
+ swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 2)
+ return ac_build_wwm(ctx, result);
+
+ swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 4)
+ return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX8)
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
+ else
+ swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 8)
+ return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX8)
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
+ else
+ swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 16)
+ return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_permlane16(ctx, result, 0, true, false);
+ else if (ctx->chip_class >= GFX8 && cluster_size != 32)
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
+ else
+ swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 32)
+ return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX8) {
+ if (ctx->wave_size == 64) {
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+ else
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
+ }
+
+ return ac_build_wwm(ctx, result);
+ } else {
+ swap = ac_build_readlane(ctx, result, ctx->i32_0);
+ result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ return ac_build_wwm(ctx, result);
+ }
}
/**
* The source value must be present in the highest lane of the wave, and the
* highest lane must be live.
*/
-void
-ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+void ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
{
- if (ws->maxwaves <= 1)
- return;
+ if (ws->maxwaves <= 1)
+ return;
- const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMValueRef tmp;
+ const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMValueRef tmp;
- tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
- ac_build_ifcc(ctx, tmp, 1000);
- LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
- ac_build_endif(ctx, 1000);
+ tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
+ ac_build_ifcc(ctx, tmp, 1000);
+ LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
+ ac_build_endif(ctx, 1000);
}
/**
*
* The caller must place a barrier between the top and bottom halves.
*/
-void
-ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- const LLVMTypeRef type = LLVMTypeOf(ws->src);
- const LLVMValueRef identity =
- get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
-
- if (ws->maxwaves <= 1) {
- ws->result_reduce = ws->src;
- ws->result_inclusive = ws->src;
- ws->result_exclusive = identity;
- return;
- }
- assert(ws->maxwaves <= 32);
-
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMBasicBlockRef bbs[2];
- LLVMValueRef phivalues_scan[2];
- LLVMValueRef tmp, tmp2;
-
- bbs[0] = LLVMGetInsertBlock(builder);
- phivalues_scan[0] = LLVMGetUndef(type);
-
- if (ws->enable_reduce)
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
- else if (ws->enable_inclusive)
- tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
- else
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
- ac_build_ifcc(ctx, tmp, 1001);
- {
- tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
-
- ac_build_optimization_barrier(ctx, &tmp);
-
- bbs[1] = LLVMGetInsertBlock(builder);
- phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
- }
- ac_build_endif(ctx, 1001);
-
- const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
-
- if (ws->enable_reduce) {
- tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
- ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
- }
- if (ws->enable_inclusive)
- ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
- if (ws->enable_exclusive) {
- tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
- tmp = ac_build_readlane(ctx, scan, tmp);
- tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
- ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
- }
+void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ const LLVMTypeRef type = LLVMTypeOf(ws->src);
+ const LLVMValueRef identity = get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
+
+ if (ws->maxwaves <= 1) {
+ ws->result_reduce = ws->src;
+ ws->result_inclusive = ws->src;
+ ws->result_exclusive = identity;
+ return;
+ }
+ assert(ws->maxwaves <= 32);
+
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMBasicBlockRef bbs[2];
+ LLVMValueRef phivalues_scan[2];
+ LLVMValueRef tmp, tmp2;
+
+ bbs[0] = LLVMGetInsertBlock(builder);
+ phivalues_scan[0] = LLVMGetUndef(type);
+
+ if (ws->enable_reduce)
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
+ else if (ws->enable_inclusive)
+ tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
+ else
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
+ ac_build_ifcc(ctx, tmp, 1001);
+ {
+ tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
+
+ ac_build_optimization_barrier(ctx, &tmp);
+
+ bbs[1] = LLVMGetInsertBlock(builder);
+ phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
+ }
+ ac_build_endif(ctx, 1001);
+
+ const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
+
+ if (ws->enable_reduce) {
+ tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
+ ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
+ }
+ if (ws->enable_inclusive)
+ ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
+ if (ws->enable_exclusive) {
+ tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
+ tmp = ac_build_readlane(ctx, scan, tmp);
+ tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
+ ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
+ }
}
/**
* of the workgroup are live. (This requirement cannot easily be relaxed in a
* useful manner because of the barrier in the algorithm.)
*/
-void
-ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+void ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
{
- ac_build_wg_wavescan_top(ctx, ws);
- ac_build_s_barrier(ctx);
- ac_build_wg_wavescan_bottom(ctx, ws);
+ ac_build_wg_wavescan_top(ctx, ws);
+ ac_build_s_barrier(ctx);
+ ac_build_wg_wavescan_bottom(ctx, ws);
}
/**
*
* All lanes must be active when this code runs.
*/
-void
-ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- if (ws->enable_exclusive) {
- ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
- if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
- ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
- ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
- } else {
- ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
- }
-
- bool enable_inclusive = ws->enable_inclusive;
- bool enable_exclusive = ws->enable_exclusive;
- ws->enable_inclusive = false;
- ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
- ac_build_wg_wavescan_top(ctx, ws);
- ws->enable_inclusive = enable_inclusive;
- ws->enable_exclusive = enable_exclusive;
+void ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ if (ws->enable_exclusive) {
+ ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
+ if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
+ ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
+ ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
+ } else {
+ ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
+ }
+
+ bool enable_inclusive = ws->enable_inclusive;
+ bool enable_exclusive = ws->enable_exclusive;
+ ws->enable_inclusive = false;
+ ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
+ ac_build_wg_wavescan_top(ctx, ws);
+ ws->enable_inclusive = enable_inclusive;
+ ws->enable_exclusive = enable_exclusive;
}
/**
*
* The caller must place a barrier between the top and bottom halves.
*/
-void
-ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+void ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
{
- bool enable_inclusive = ws->enable_inclusive;
- bool enable_exclusive = ws->enable_exclusive;
- ws->enable_inclusive = false;
- ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
- ac_build_wg_wavescan_bottom(ctx, ws);
- ws->enable_inclusive = enable_inclusive;
- ws->enable_exclusive = enable_exclusive;
+ bool enable_inclusive = ws->enable_inclusive;
+ bool enable_exclusive = ws->enable_exclusive;
+ ws->enable_inclusive = false;
+ ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
+ ac_build_wg_wavescan_bottom(ctx, ws);
+ ws->enable_inclusive = enable_inclusive;
+ ws->enable_exclusive = enable_exclusive;
- /* ws->result_reduce is already the correct value */
- if (ws->enable_inclusive)
- ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
- if (ws->enable_exclusive)
- ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
+ /* ws->result_reduce is already the correct value */
+ if (ws->enable_inclusive)
+ ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
+ if (ws->enable_exclusive)
+ ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
}
/**
* The caller must ensure that all lanes are active when this code runs
* (WWM is insufficient!), because there is an implied barrier.
*/
-void
-ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- ac_build_wg_scan_top(ctx, ws);
- ac_build_s_barrier(ctx);
- ac_build_wg_scan_bottom(ctx, ws);
-}
-
-LLVMValueRef
-ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
- unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
- unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
- if (ctx->chip_class >= GFX8) {
- return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
- } else {
- return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
- }
-}
-
-LLVMValueRef
-ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
-{
- LLVMTypeRef type = LLVMTypeOf(src);
- LLVMValueRef result;
-
- index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
- src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
-
- result = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32,
- (LLVMValueRef []) {index, src}, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
- return LLVMBuildTrunc(ctx->builder, result, type, "");
-}
-
-LLVMValueRef
-ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.frexp.exp.i16.f16";
- type = ctx->i16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.frexp.exp.i32.f32";
- type = ctx->i32;
- } else {
- intr = "llvm.amdgcn.frexp.exp.i32.f64";
- type = ctx->i32;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
-}
-LLVMValueRef
-ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.frexp.mant.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.frexp.mant.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.amdgcn.frexp.mant.f64";
- type = ctx->f64;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.canonicalize.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.canonicalize.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.canonicalize.f64";
- type = ctx->f64;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
+void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ ac_build_wg_scan_top(ctx, ws);
+ ac_build_s_barrier(ctx);
+ ac_build_wg_scan_bottom(ctx, ws);
+}
+
+LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
+ unsigned lane1, unsigned lane2, unsigned lane3)
+{
+ unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
+ if (ctx->chip_class >= GFX8) {
+ return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
+ } else {
+ return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
+ }
+}
+
+LLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef result;
+
+ index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ result =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, (LLVMValueRef[]){index, src}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+ return LLVMBuildTrunc(ctx->builder, result, type, "");
+}
+
+LLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.frexp.exp.i16.f16";
+ type = ctx->i16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.frexp.exp.i32.f32";
+ type = ctx->i32;
+ } else {
+ intr = "llvm.amdgcn.frexp.exp.i32.f64";
+ type = ctx->i32;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
+}
+LLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.frexp.mant.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.frexp.mant.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.frexp.mant.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.canonicalize.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.canonicalize.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.canonicalize.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
}
/*
* and works out the X and Y derivatives.
* it returns DDX(I), DDX(J), DDY(I), DDY(J).
*/
-LLVMValueRef
-ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
-{
- LLVMValueRef result[4], a;
- unsigned i;
-
- for (i = 0; i < 2; i++) {
- a = LLVMBuildExtractElement(ctx->builder, interp_ij,
- LLVMConstInt(ctx->i32, i, false), "");
- result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
- result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
- }
- return ac_build_gather_values(ctx, result, 4);
-}
-
-LLVMValueRef
-ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
-{
- LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
- ctx->i1, NULL, 0,
- AC_FUNC_ATTR_READNONE);
- result = LLVMBuildNot(ctx->builder, result, "");
- return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
-}
-
-LLVMValueRef
-ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
-{
- if (!ctx->postponed_kill)
- return ac_build_load_helper_invocation(ctx);
-
- /* !(exact && postponed) */
- LLVMValueRef exact = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
- ctx->i1, NULL, 0,
- AC_FUNC_ATTR_READNONE);
-
- LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
- LLVMValueRef result = LLVMBuildAnd(ctx->builder, exact, postponed, "");
-
- return LLVMBuildSelect(ctx->builder, result, ctx->i32_0,
- LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), "");
-}
-
-LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
- LLVMValueRef *args, unsigned num_args)
-{
- LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
- LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
- return ret;
-}
-
-void
-ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
- LLVMValueRef stencil, LLVMValueRef samplemask,
- struct ac_export_args *args)
-{
- unsigned mask = 0;
- unsigned format = ac_get_spi_shader_z_format(depth != NULL,
- stencil != NULL,
- samplemask != NULL);
-
- assert(depth || stencil || samplemask);
-
- memset(args, 0, sizeof(*args));
-
- args->valid_mask = 1; /* whether the EXEC mask is valid */
- args->done = 1; /* DONE bit */
-
- /* Specify the target we are exporting */
- args->target = V_008DFC_SQ_EXP_MRTZ;
-
- args->compr = 0; /* COMP flag */
- args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
- args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
- args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
- args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
-
- if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
- assert(!depth);
- args->compr = 1; /* COMPR flag */
-
- if (stencil) {
- /* Stencil should be in X[23:16]. */
- stencil = ac_to_integer(ctx, stencil);
- stencil = LLVMBuildShl(ctx->builder, stencil,
- LLVMConstInt(ctx->i32, 16, 0), "");
- args->out[0] = ac_to_float(ctx, stencil);
- mask |= 0x3;
- }
- if (samplemask) {
- /* SampleMask should be in Y[15:0]. */
- args->out[1] = samplemask;
- mask |= 0xc;
- }
- } else {
- if (depth) {
- args->out[0] = depth;
- mask |= 0x1;
- }
- if (stencil) {
- args->out[1] = stencil;
- mask |= 0x2;
- }
- if (samplemask) {
- args->out[2] = samplemask;
- mask |= 0x4;
- }
- }
-
- /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
- * at the X writemask component. */
- if (ctx->chip_class == GFX6 &&
- ctx->family != CHIP_OLAND &&
- ctx->family != CHIP_HAINAN)
- mask |= 0x1;
-
- /* Specify which components to enable */
- args->enabled_channels = mask;
+LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
+{
+ LLVMValueRef result[4], a;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ a = LLVMBuildExtractElement(ctx->builder, interp_ij, LLVMConstInt(ctx->i32, i, false), "");
+ result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
+ result[2 + i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
+ }
+ return ac_build_gather_values(ctx, result, 4);
+}
+
+LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef result =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE);
+ result = LLVMBuildNot(ctx->builder, result, "");
+ return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
+}
+
+LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
+{
+ if (!ctx->postponed_kill)
+ return ac_build_load_helper_invocation(ctx);
+
+ /* !(exact && postponed) */
+ LLVMValueRef exact =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE);
+
+ LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, exact, postponed, "");
+
+ return LLVMBuildSelect(ctx->builder, result, ctx->i32_0,
+ LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), "");
+}
+
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
+ unsigned num_args)
+{
+ LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
+ LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
+ return ret;
+}
+
+void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask, struct ac_export_args *args)
+{
+ unsigned mask = 0;
+ unsigned format = ac_get_spi_shader_z_format(depth != NULL, stencil != NULL, samplemask != NULL);
+
+ assert(depth || stencil || samplemask);
+
+ memset(args, 0, sizeof(*args));
+
+ args->valid_mask = 1; /* whether the EXEC mask is valid */
+ args->done = 1; /* DONE bit */
+
+ /* Specify the target we are exporting */
+ args->target = V_008DFC_SQ_EXP_MRTZ;
+
+ args->compr = 0; /* COMP flag */
+ args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
+ args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
+ args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
+ args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
+
+ if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+ assert(!depth);
+ args->compr = 1; /* COMPR flag */
+
+ if (stencil) {
+ /* Stencil should be in X[23:16]. */
+ stencil = ac_to_integer(ctx, stencil);
+ stencil = LLVMBuildShl(ctx->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), "");
+ args->out[0] = ac_to_float(ctx, stencil);
+ mask |= 0x3;
+ }
+ if (samplemask) {
+ /* SampleMask should be in Y[15:0]. */
+ args->out[1] = samplemask;
+ mask |= 0xc;
+ }
+ } else {
+ if (depth) {
+ args->out[0] = depth;
+ mask |= 0x1;
+ }
+ if (stencil) {
+ args->out[1] = stencil;
+ mask |= 0x2;
+ }
+ if (samplemask) {
+ args->out[2] = samplemask;
+ mask |= 0x4;
+ }
+ }
+
+ /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
+ * at the X writemask component. */
+ if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN)
+ mask |= 0x1;
+
+ /* Specify which components to enable */
+ args->enabled_channels = mask;
}
/* Send GS Alloc Req message from the first wave of the group to SPI.
* - bits 12..22: primitives in group
*/
void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id,
- LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt)
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tmp;
- bool export_dummy_prim = false;
-
- /* HW workaround for a GPU hang with 100% culling.
- * We always have to export at least 1 primitive.
- * Export a degenerate triangle using vertex 0 for all 3 vertices.
- */
- if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) {
- assert(vtx_cnt == ctx->i32_0);
- prim_cnt = ctx->i32_1;
- vtx_cnt = ctx->i32_1;
- export_dummy_prim = true;
- }
-
- ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
-
- tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false),"");
- tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
- ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
-
- if (export_dummy_prim) {
- struct ac_ngg_prim prim = {};
- /* The vertex indices are 0,0,0. */
- prim.passthrough = ctx->i32_0;
-
- struct ac_export_args pos = {};
- pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0;
- pos.target = V_008DFC_SQ_EXP_POS;
- pos.enabled_channels = 0xf;
- pos.done = true;
-
- ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx),
- ctx->i32_0, ""), 5021);
- ac_build_export_prim(ctx, &prim);
- ac_build_export(ctx, &pos);
- ac_build_endif(ctx, 5021);
- }
-
- ac_build_endif(ctx, 5020);
-}
-
-LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx,
- const struct ac_ngg_prim *prim)
-{
- /* The prim export format is:
- * - bits 0..8: index 0
- * - bit 9: edge flag 0
- * - bits 10..18: index 1
- * - bit 19: edge flag 1
- * - bits 20..28: index 2
- * - bit 29: edge flag 2
- * - bit 31: null primitive (skip)
- */
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, "");
- LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), "");
-
- for (unsigned i = 0; i < prim->num_vertices; ++i) {
- tmp = LLVMBuildShl(builder, prim->index[i],
- LLVMConstInt(ctx->i32, 10 * i, false), "");
- result = LLVMBuildOr(builder, result, tmp, "");
- tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, "");
- tmp = LLVMBuildShl(builder, tmp,
- LLVMConstInt(ctx->i32, 10 * i + 9, false), "");
- result = LLVMBuildOr(builder, result, tmp, "");
- }
- return result;
-}
-
-void ac_build_export_prim(struct ac_llvm_context *ctx,
- const struct ac_ngg_prim *prim)
-{
- struct ac_export_args args;
-
- if (prim->passthrough) {
- args.out[0] = prim->passthrough;
- } else {
- args.out[0] = ac_pack_prim_export(ctx, prim);
- }
-
- args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, "");
- args.out[1] = LLVMGetUndef(ctx->f32);
- args.out[2] = LLVMGetUndef(ctx->f32);
- args.out[3] = LLVMGetUndef(ctx->f32);
-
- args.target = V_008DFC_SQ_EXP_PRIM;
- args.enabled_channels = 1;
- args.done = true;
- args.valid_mask = false;
- args.compr = false;
-
- ac_build_export(ctx, &args);
-}
-
-static LLVMTypeRef
-arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
-{
- if (type == AC_ARG_FLOAT) {
- return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
- } else if (type == AC_ARG_INT) {
- return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
- } else {
- LLVMTypeRef ptr_type;
- switch (type) {
- case AC_ARG_CONST_PTR:
- ptr_type = ctx->i8;
- break;
- case AC_ARG_CONST_FLOAT_PTR:
- ptr_type = ctx->f32;
- break;
- case AC_ARG_CONST_PTR_PTR:
- ptr_type = ac_array_in_const32_addr_space(ctx->i8);
- break;
- case AC_ARG_CONST_DESC_PTR:
- ptr_type = ctx->v4i32;
- break;
- case AC_ARG_CONST_IMAGE_PTR:
- ptr_type = ctx->v8i32;
- break;
- default:
- unreachable("unknown arg type");
- }
- if (size == 1) {
- return ac_array_in_const32_addr_space(ptr_type);
- } else {
- assert(size == 2);
- return ac_array_in_const_addr_space(ptr_type);
- }
- }
-}
-
-LLVMValueRef
-ac_build_main(const struct ac_shader_args *args,
- struct ac_llvm_context *ctx,
- enum ac_llvm_calling_convention convention,
- const char *name, LLVMTypeRef ret_type,
- LLVMModuleRef module)
-{
- LLVMTypeRef arg_types[AC_MAX_ARGS];
-
- for (unsigned i = 0; i < args->arg_count; i++) {
- arg_types[i] = arg_llvm_type(args->args[i].type,
- args->args[i].size, ctx);
- }
-
- LLVMTypeRef main_function_type =
- LLVMFunctionType(ret_type, arg_types, args->arg_count, 0);
-
- LLVMValueRef main_function =
- LLVMAddFunction(module, name, main_function_type);
- LLVMBasicBlockRef main_function_body =
- LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body");
- LLVMPositionBuilderAtEnd(ctx->builder, main_function_body);
-
- LLVMSetFunctionCallConv(main_function, convention);
- for (unsigned i = 0; i < args->arg_count; ++i) {
- LLVMValueRef P = LLVMGetParam(main_function, i);
-
- if (args->args[i].file != AC_ARG_SGPR)
- continue;
-
- ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG);
-
- if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
- ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
- ac_add_attr_dereferenceable(P, UINT64_MAX);
- ac_add_attr_alignment(P, 32);
- }
- }
-
- ctx->main_function = main_function;
-
- if (LLVM_VERSION_MAJOR >= 11) {
- /* Enable denormals for FP16 and FP64: */
- LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math",
- "ieee,ieee");
- /* Disable denormals for FP32: */
- LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
- "preserve-sign,preserve-sign");
- }
- return main_function;
+ LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tmp;
+ bool export_dummy_prim = false;
+
+ /* HW workaround for a GPU hang with 100% culling.
+ * We always have to export at least 1 primitive.
+ * Export a degenerate triangle using vertex 0 for all 3 vertices.
+ */
+ if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) {
+ assert(vtx_cnt == ctx->i32_0);
+ prim_cnt = ctx->i32_1;
+ vtx_cnt = ctx->i32_1;
+ export_dummy_prim = true;
+ }
+
+ ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
+
+ tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), "");
+ tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
+ ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
+
+ if (export_dummy_prim) {
+ struct ac_ngg_prim prim = {};
+ /* The vertex indices are 0,0,0. */
+ prim.passthrough = ctx->i32_0;
+
+ struct ac_export_args pos = {};
+ pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0;
+ pos.target = V_008DFC_SQ_EXP_POS;
+ pos.enabled_channels = 0xf;
+ pos.done = true;
+
+ ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""),
+ 5021);
+ ac_build_export_prim(ctx, &prim);
+ ac_build_export(ctx, &pos);
+ ac_build_endif(ctx, 5021);
+ }
+
+ ac_build_endif(ctx, 5020);
+}
+
+LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
+{
+ /* The prim export format is:
+ * - bits 0..8: index 0
+ * - bit 9: edge flag 0
+ * - bits 10..18: index 1
+ * - bit 19: edge flag 1
+ * - bits 20..28: index 2
+ * - bit 29: edge flag 2
+ * - bit 31: null primitive (skip)
+ */
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, "");
+ LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), "");
+
+ for (unsigned i = 0; i < prim->num_vertices; ++i) {
+ tmp = LLVMBuildShl(builder, prim->index[i], LLVMConstInt(ctx->i32, 10 * i, false), "");
+ result = LLVMBuildOr(builder, result, tmp, "");
+ tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, "");
+ tmp = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 10 * i + 9, false), "");
+ result = LLVMBuildOr(builder, result, tmp, "");
+ }
+ return result;
+}
+
+void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
+{
+ struct ac_export_args args;
+
+ if (prim->passthrough) {
+ args.out[0] = prim->passthrough;
+ } else {
+ args.out[0] = ac_pack_prim_export(ctx, prim);
+ }
+
+ args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, "");
+ args.out[1] = LLVMGetUndef(ctx->f32);
+ args.out[2] = LLVMGetUndef(ctx->f32);
+ args.out[3] = LLVMGetUndef(ctx->f32);
+
+ args.target = V_008DFC_SQ_EXP_PRIM;
+ args.enabled_channels = 1;
+ args.done = true;
+ args.valid_mask = false;
+ args.compr = false;
+
+ ac_build_export(ctx, &args);
+}
+
+static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
+{
+ if (type == AC_ARG_FLOAT) {
+ return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
+ } else if (type == AC_ARG_INT) {
+ return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
+ } else {
+ LLVMTypeRef ptr_type;
+ switch (type) {
+ case AC_ARG_CONST_PTR:
+ ptr_type = ctx->i8;
+ break;
+ case AC_ARG_CONST_FLOAT_PTR:
+ ptr_type = ctx->f32;
+ break;
+ case AC_ARG_CONST_PTR_PTR:
+ ptr_type = ac_array_in_const32_addr_space(ctx->i8);
+ break;
+ case AC_ARG_CONST_DESC_PTR:
+ ptr_type = ctx->v4i32;
+ break;
+ case AC_ARG_CONST_IMAGE_PTR:
+ ptr_type = ctx->v8i32;
+ break;
+ default:
+ unreachable("unknown arg type");
+ }
+ if (size == 1) {
+ return ac_array_in_const32_addr_space(ptr_type);
+ } else {
+ assert(size == 2);
+ return ac_array_in_const_addr_space(ptr_type);
+ }
+ }
+}
+
+LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
+ enum ac_llvm_calling_convention convention, const char *name,
+ LLVMTypeRef ret_type, LLVMModuleRef module)
+{
+ LLVMTypeRef arg_types[AC_MAX_ARGS];
+
+ for (unsigned i = 0; i < args->arg_count; i++) {
+ arg_types[i] = arg_llvm_type(args->args[i].type, args->args[i].size, ctx);
+ }
+
+ LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, args->arg_count, 0);
+
+ LLVMValueRef main_function = LLVMAddFunction(module, name, main_function_type);
+ LLVMBasicBlockRef main_function_body =
+ LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body");
+ LLVMPositionBuilderAtEnd(ctx->builder, main_function_body);
+
+ LLVMSetFunctionCallConv(main_function, convention);
+ for (unsigned i = 0; i < args->arg_count; ++i) {
+ LLVMValueRef P = LLVMGetParam(main_function, i);
+
+ if (args->args[i].file != AC_ARG_SGPR)
+ continue;
+
+ ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG);
+
+ if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
+ ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
+ ac_add_attr_dereferenceable(P, UINT64_MAX);
+ ac_add_attr_alignment(P, 32);
+ }
+ }
+
+ ctx->main_function = main_function;
+
+ if (LLVM_VERSION_MAJOR >= 11) {
+ /* Enable denormals for FP16 and FP64: */
+ LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee");
+ /* Disable denormals for FP32: */
+ LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
+ "preserve-sign,preserve-sign");
+ }
+ return main_function;
}
void ac_build_s_endpgm(struct ac_llvm_context *ctx)
{
- LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
- LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false);
- LLVMBuildCall(ctx->builder, code, NULL, 0, "");
+ LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false);
+ LLVMBuildCall(ctx->builder, code, NULL, 0, "");
}
-LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx,
- LLVMValueRef mask, LLVMValueRef index)
+LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef index)
{
- LLVMBuilderRef builder = ctx->builder;
- LLVMTypeRef type = LLVMTypeOf(mask);
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMTypeRef type = LLVMTypeOf(mask);
- LLVMValueRef bit = LLVMBuildShl(builder, LLVMConstInt(type, 1, 0),
- LLVMBuildZExt(builder, index, type, ""), "");
- LLVMValueRef prefix_bits = LLVMBuildSub(builder, bit, LLVMConstInt(type, 1, 0), "");
- LLVMValueRef prefix_mask = LLVMBuildAnd(builder, mask, prefix_bits, "");
- return ac_build_bit_count(ctx, prefix_mask);
+ LLVMValueRef bit =
+ LLVMBuildShl(builder, LLVMConstInt(type, 1, 0), LLVMBuildZExt(builder, index, type, ""), "");
+ LLVMValueRef prefix_bits = LLVMBuildSub(builder, bit, LLVMConstInt(type, 1, 0), "");
+ LLVMValueRef prefix_mask = LLVMBuildAnd(builder, mask, prefix_bits, "");
+ return ac_build_bit_count(ctx, prefix_mask);
}
/* Compute the prefix sum of the "mask" bit array with 128 elements (bits). */
-LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx,
- LLVMValueRef mask[2], LLVMValueRef index)
+LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, LLVMValueRef mask[2],
+ LLVMValueRef index)
{
- LLVMBuilderRef builder = ctx->builder;
+ LLVMBuilderRef builder = ctx->builder;
#if 0
/* Reference version using i128. */
LLVMValueRef input_mask =
return ac_prefix_bitcount(ctx, input_mask, index);
#else
- /* Optimized version using 2 64-bit masks. */
- LLVMValueRef is_hi, is_0, c64, c128, all_bits;
- LLVMValueRef prefix_mask[2], shift[2], mask_bcnt0, prefix_bcnt[2];
-
- /* Compute the 128-bit prefix mask. */
- c64 = LLVMConstInt(ctx->i32, 64, 0);
- c128 = LLVMConstInt(ctx->i32, 128, 0);
- all_bits = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
- /* The first index that can have non-zero high bits in the prefix mask is 65. */
- is_hi = LLVMBuildICmp(builder, LLVMIntUGT, index, c64, "");
- is_0 = LLVMBuildICmp(builder, LLVMIntEQ, index, ctx->i32_0, "");
- mask_bcnt0 = ac_build_bit_count(ctx, mask[0]);
-
- for (unsigned i = 0; i < 2; i++) {
- shift[i] = LLVMBuildSub(builder, i ? c128 : c64, index, "");
- /* For i==0, index==0, the right shift by 64 doesn't give the desired result,
- * so we handle it by the is_0 select.
- * For i==1, index==64, same story, so we handle it by the last is_hi select.
- * For i==0, index==64, we shift by 0, which is what we want.
- */
- prefix_mask[i] = LLVMBuildLShr(builder, all_bits,
- LLVMBuildZExt(builder, shift[i], ctx->i64, ""), "");
- prefix_mask[i] = LLVMBuildAnd(builder, mask[i], prefix_mask[i], "");
- prefix_bcnt[i] = ac_build_bit_count(ctx, prefix_mask[i]);
- }
-
- prefix_bcnt[0] = LLVMBuildSelect(builder, is_0, ctx->i32_0, prefix_bcnt[0], "");
- prefix_bcnt[0] = LLVMBuildSelect(builder, is_hi, mask_bcnt0, prefix_bcnt[0], "");
- prefix_bcnt[1] = LLVMBuildSelect(builder, is_hi, prefix_bcnt[1], ctx->i32_0, "");
-
- return LLVMBuildAdd(builder, prefix_bcnt[0], prefix_bcnt[1], "");
+ /* Optimized version using 2 64-bit masks. */
+ LLVMValueRef is_hi, is_0, c64, c128, all_bits;
+ LLVMValueRef prefix_mask[2], shift[2], mask_bcnt0, prefix_bcnt[2];
+
+ /* Compute the 128-bit prefix mask. */
+ c64 = LLVMConstInt(ctx->i32, 64, 0);
+ c128 = LLVMConstInt(ctx->i32, 128, 0);
+ all_bits = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+ /* The first index that can have non-zero high bits in the prefix mask is 65. */
+ is_hi = LLVMBuildICmp(builder, LLVMIntUGT, index, c64, "");
+ is_0 = LLVMBuildICmp(builder, LLVMIntEQ, index, ctx->i32_0, "");
+ mask_bcnt0 = ac_build_bit_count(ctx, mask[0]);
+
+ for (unsigned i = 0; i < 2; i++) {
+ shift[i] = LLVMBuildSub(builder, i ? c128 : c64, index, "");
+ /* For i==0, index==0, the right shift by 64 doesn't give the desired result,
+ * so we handle it by the is_0 select.
+ * For i==1, index==64, same story, so we handle it by the last is_hi select.
+ * For i==0, index==64, we shift by 0, which is what we want.
+ */
+ prefix_mask[i] =
+ LLVMBuildLShr(builder, all_bits, LLVMBuildZExt(builder, shift[i], ctx->i64, ""), "");
+ prefix_mask[i] = LLVMBuildAnd(builder, mask[i], prefix_mask[i], "");
+ prefix_bcnt[i] = ac_build_bit_count(ctx, prefix_mask[i]);
+ }
+
+ prefix_bcnt[0] = LLVMBuildSelect(builder, is_0, ctx->i32_0, prefix_bcnt[0], "");
+ prefix_bcnt[0] = LLVMBuildSelect(builder, is_hi, mask_bcnt0, prefix_bcnt[0], "");
+ prefix_bcnt[1] = LLVMBuildSelect(builder, is_hi, prefix_bcnt[1], ctx->i32_0, "");
+
+ return LLVMBuildAdd(builder, prefix_bcnt[0], prefix_bcnt[1], "");
#endif
}
* Convert triangle strip indices to triangle indices. This is used to decompose
* triangle strips into triangles.
*/
-void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx,
- LLVMValueRef is_odd,
- LLVMValueRef flatshade_first,
- LLVMValueRef index[3])
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef out[3];
-
- /* We need to change the vertex order for odd triangles to get correct
- * front/back facing by swapping 2 vertex indices, but we also have to
- * keep the provoking vertex in the same place.
- *
- * If the first vertex is provoking, swap index 1 and 2.
- * If the last vertex is provoking, swap index 0 and 1.
- */
- out[0] = LLVMBuildSelect(builder, flatshade_first,
- index[0],
- LLVMBuildSelect(builder, is_odd,
- index[1], index[0], ""), "");
- out[1] = LLVMBuildSelect(builder, flatshade_first,
- LLVMBuildSelect(builder, is_odd,
- index[2], index[1], ""),
- LLVMBuildSelect(builder, is_odd,
- index[0], index[1], ""), "");
- out[2] = LLVMBuildSelect(builder, flatshade_first,
- LLVMBuildSelect(builder, is_odd,
- index[1], index[2], ""),
- index[2], "");
- memcpy(index, out, sizeof(out));
+void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd,
+ LLVMValueRef flatshade_first,
+ LLVMValueRef index[3])
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef out[3];
+
+ /* We need to change the vertex order for odd triangles to get correct
+ * front/back facing by swapping 2 vertex indices, but we also have to
+ * keep the provoking vertex in the same place.
+ *
+ * If the first vertex is provoking, swap index 1 and 2.
+ * If the last vertex is provoking, swap index 0 and 1.
+ */
+ out[0] = LLVMBuildSelect(builder, flatshade_first, index[0],
+ LLVMBuildSelect(builder, is_odd, index[1], index[0], ""), "");
+ out[1] = LLVMBuildSelect(builder, flatshade_first,
+ LLVMBuildSelect(builder, is_odd, index[2], index[1], ""),
+ LLVMBuildSelect(builder, is_odd, index[0], index[1], ""), "");
+ out[2] = LLVMBuildSelect(builder, flatshade_first,
+ LLVMBuildSelect(builder, is_odd, index[1], index[2], ""), index[2], "");
+ memcpy(index, out, sizeof(out));
}
#ifndef AC_LLVM_BUILD_H
#define AC_LLVM_BUILD_H
-#include <stdbool.h>
-#include <llvm-c/Core.h>
-#include "compiler/nir/nir.h"
-#include "amd_family.h"
-#include "ac_shader_util.h"
-#include "ac_shader_args.h"
#include "ac_shader_abi.h"
+#include "ac_shader_args.h"
+#include "ac_shader_util.h"
+#include "amd_family.h"
+#include "compiler/nir/nir.h"
+#include <llvm-c/Core.h>
+
+#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
-enum {
- AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
- AC_ADDR_SPACE_GLOBAL = 1,
- AC_ADDR_SPACE_GDS = 2,
- AC_ADDR_SPACE_LDS = 3,
- AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
- AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
+enum
+{
+ AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
+ AC_ADDR_SPACE_GLOBAL = 1,
+ AC_ADDR_SPACE_GDS = 2,
+ AC_ADDR_SPACE_LDS = 3,
+ AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
-#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
-#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
-#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
+#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
struct ac_llvm_flow;
struct ac_llvm_compiler;
enum ac_float_mode;
struct ac_llvm_flow_state {
- struct ac_llvm_flow *stack;
- unsigned depth_max;
- unsigned depth;
+ struct ac_llvm_flow *stack;
+ unsigned depth_max;
+ unsigned depth;
};
struct ac_llvm_context {
- LLVMContextRef context;
- LLVMModuleRef module;
- LLVMBuilderRef builder;
-
- LLVMValueRef main_function;
-
- LLVMTypeRef voidt;
- LLVMTypeRef i1;
- LLVMTypeRef i8;
- LLVMTypeRef i16;
- LLVMTypeRef i32;
- LLVMTypeRef i64;
- LLVMTypeRef i128;
- LLVMTypeRef intptr;
- LLVMTypeRef f16;
- LLVMTypeRef f32;
- LLVMTypeRef f64;
- LLVMTypeRef v2i16;
- LLVMTypeRef v4i16;
- LLVMTypeRef v2f16;
- LLVMTypeRef v4f16;
- LLVMTypeRef v2i32;
- LLVMTypeRef v3i32;
- LLVMTypeRef v4i32;
- LLVMTypeRef v2f32;
- LLVMTypeRef v3f32;
- LLVMTypeRef v4f32;
- LLVMTypeRef v8i32;
- LLVMTypeRef iN_wavemask;
- LLVMTypeRef iN_ballotmask;
-
- LLVMValueRef i8_0;
- LLVMValueRef i8_1;
- LLVMValueRef i16_0;
- LLVMValueRef i16_1;
- LLVMValueRef i32_0;
- LLVMValueRef i32_1;
- LLVMValueRef i64_0;
- LLVMValueRef i64_1;
- LLVMValueRef i128_0;
- LLVMValueRef i128_1;
- LLVMValueRef f16_0;
- LLVMValueRef f16_1;
- LLVMValueRef f32_0;
- LLVMValueRef f32_1;
- LLVMValueRef f64_0;
- LLVMValueRef f64_1;
- LLVMValueRef i1true;
- LLVMValueRef i1false;
-
- /* Temporary helper to implement demote_to_helper:
- * True = live lanes
- * False = demoted lanes
- */
- LLVMValueRef postponed_kill;
-
- /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
- * are two ac_llvm_contexts. Declare a pointer here, so that the control
- * flow stack is shared by both ac_llvm_contexts.
- */
- struct ac_llvm_flow_state *flow;
-
- unsigned range_md_kind;
- unsigned invariant_load_md_kind;
- unsigned uniform_md_kind;
- LLVMValueRef empty_md;
-
- enum chip_class chip_class;
- enum radeon_family family;
-
- unsigned wave_size;
- unsigned ballot_mask_bits;
-
- unsigned float_mode;
-
- LLVMValueRef lds;
+ LLVMContextRef context;
+ LLVMModuleRef module;
+ LLVMBuilderRef builder;
+
+ LLVMValueRef main_function;
+
+ LLVMTypeRef voidt;
+ LLVMTypeRef i1;
+ LLVMTypeRef i8;
+ LLVMTypeRef i16;
+ LLVMTypeRef i32;
+ LLVMTypeRef i64;
+ LLVMTypeRef i128;
+ LLVMTypeRef intptr;
+ LLVMTypeRef f16;
+ LLVMTypeRef f32;
+ LLVMTypeRef f64;
+ LLVMTypeRef v2i16;
+ LLVMTypeRef v4i16;
+ LLVMTypeRef v2f16;
+ LLVMTypeRef v4f16;
+ LLVMTypeRef v2i32;
+ LLVMTypeRef v3i32;
+ LLVMTypeRef v4i32;
+ LLVMTypeRef v2f32;
+ LLVMTypeRef v3f32;
+ LLVMTypeRef v4f32;
+ LLVMTypeRef v8i32;
+ LLVMTypeRef iN_wavemask;
+ LLVMTypeRef iN_ballotmask;
+
+ LLVMValueRef i8_0;
+ LLVMValueRef i8_1;
+ LLVMValueRef i16_0;
+ LLVMValueRef i16_1;
+ LLVMValueRef i32_0;
+ LLVMValueRef i32_1;
+ LLVMValueRef i64_0;
+ LLVMValueRef i64_1;
+ LLVMValueRef i128_0;
+ LLVMValueRef i128_1;
+ LLVMValueRef f16_0;
+ LLVMValueRef f16_1;
+ LLVMValueRef f32_0;
+ LLVMValueRef f32_1;
+ LLVMValueRef f64_0;
+ LLVMValueRef f64_1;
+ LLVMValueRef i1true;
+ LLVMValueRef i1false;
+
+ /* Temporary helper to implement demote_to_helper:
+ * True = live lanes
+ * False = demoted lanes
+ */
+ LLVMValueRef postponed_kill;
+
+ /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
+ * are two ac_llvm_contexts. Declare a pointer here, so that the control
+ * flow stack is shared by both ac_llvm_contexts.
+ */
+ struct ac_llvm_flow_state *flow;
+
+ unsigned range_md_kind;
+ unsigned invariant_load_md_kind;
+ unsigned uniform_md_kind;
+ LLVMValueRef empty_md;
+
+ enum chip_class chip_class;
+ enum radeon_family family;
+
+ unsigned wave_size;
+ unsigned ballot_mask_bits;
+
+ unsigned float_mode;
+
+ LLVMValueRef lds;
};
-void
-ac_llvm_context_init(struct ac_llvm_context *ctx,
- struct ac_llvm_compiler *compiler,
- enum chip_class chip_class, enum radeon_family family,
- enum ac_float_mode float_mode, unsigned wave_size,
- unsigned ballot_mask_bits);
+void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
+ enum chip_class chip_class, enum radeon_family family,
+ enum ac_float_mode float_mode, unsigned wave_size,
+ unsigned ballot_mask_bits);
-void
-ac_llvm_context_dispose(struct ac_llvm_context *ctx);
+void ac_llvm_context_dispose(struct ac_llvm_context *ctx);
-int
-ac_get_llvm_num_components(LLVMValueRef value);
+int ac_get_llvm_num_components(LLVMValueRef value);
-int
-ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
+int ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
-LLVMValueRef
-ac_llvm_extract_elem(struct ac_llvm_context *ac,
- LLVMValueRef value,
- int index);
+LLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index);
unsigned ac_get_type_size(LLVMTypeRef type);
LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
-LLVMValueRef
-ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
- LLVMTypeRef return_type, LLVMValueRef *params,
- unsigned param_count, unsigned attrib_mask);
+LLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
+ LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count,
+ unsigned attrib_mask);
void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
-LLVMValueRef
-ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
- unsigned count_incoming, LLVMValueRef *values,
- LLVMBasicBlockRef *blocks);
+LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming,
+ LLVMValueRef *values, LLVMBasicBlockRef *blocks);
void ac_build_s_barrier(struct ac_llvm_context *ctx);
-void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef *pvgpr);
+void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pvgpr);
-LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx,
- nir_scope scope);
+LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope);
LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
-LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
- LLVMValueRef value);
+LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value);
LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
-LLVMValueRef
-ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
- unsigned value_count, unsigned component);
-
-LLVMValueRef
-ac_build_gather_values_extended(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count,
- unsigned value_stride,
- bool load,
- bool always_vector);
-LLVMValueRef
-ac_build_gather_values(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count);
-
-LLVMValueRef
-ac_extract_components(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned start,
- unsigned channels);
-
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned num_channels);
+LLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned component);
+
+LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned value_stride, bool load,
+ bool always_vector);
+LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count);
+
+LLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start,
+ unsigned channels);
+
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value,
+ unsigned num_channels);
LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
-LLVMValueRef
-ac_build_fdiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef den);
-
-LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment);
-LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment);
-LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef post_shift);
-
-void
-ac_prepare_cube_coords(struct ac_llvm_context *ctx,
- bool is_deriv, bool is_array, bool is_lod,
- LLVMValueRef *coords_arg,
- LLVMValueRef *derivs_arg);
-
-
-LLVMValueRef
-ac_build_fs_interp(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j);
-
-LLVMValueRef
-ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j);
-
-LLVMValueRef
-ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
- LLVMValueRef parameter,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params);
-
-LLVMValueRef
-ac_build_gep_ptr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index);
-
-LLVMValueRef
-ac_build_gep0(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index);
+LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den);
+
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num,
+ LLVMValueRef multiplier, LLVMValueRef pre_shift,
+ LLVMValueRef post_shift, LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num,
+ LLVMValueRef multiplier, LLVMValueRef pre_shift,
+ LLVMValueRef post_shift, LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num,
+ LLVMValueRef multiplier, LLVMValueRef post_shift);
+
+void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
+ LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg);
+
+LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
+ LLVMValueRef j);
+
+LLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
+ LLVMValueRef j);
+
+LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter,
+ LLVMValueRef llvm_chan, LLVMValueRef attr_number,
+ LLVMValueRef params);
+
+LLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index);
+
+LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index);
LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef index);
-
-void
-ac_build_indexed_store(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- LLVMValueRef value);
-
-LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
- LLVMValueRef index);
-LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
-LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
+ LLVMValueRef index);
+
+void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value);
+
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index);
LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
-
-void
-ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy);
-
-void
-ac_build_buffer_store_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef data,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_build_buffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- int num_channels,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy,
- bool can_speculate,
- bool allow_smem);
-
-LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- unsigned cache_policy,
- bool can_speculate,
- bool d16);
-
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate);
-
-LLVMValueRef
-ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate);
+ LLVMValueRef base_ptr, LLVMValueRef index);
+
+void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
+ unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned inst_offset, unsigned cache_policy);
+
+void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
+ LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy);
+
+LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
+ LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned inst_offset, unsigned cache_policy, bool can_speculate,
+ bool allow_smem);
+
+LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ unsigned num_channels, unsigned cache_policy,
+ bool can_speculate, bool d16);
+
+LLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ LLVMValueRef immoffset, unsigned cache_policy);
+
+LLVMValueRef ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ LLVMValueRef immoffset, unsigned cache_policy);
+
+LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy, bool can_speculate);
+
+LLVMValueRef ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef voffset, LLVMValueRef soffset,
+ LLVMValueRef immoffset, unsigned num_channels, unsigned dfmt,
+ unsigned nfmt, unsigned cache_policy, bool can_speculate);
/* For ac_build_fetch_format.
*
* Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
*/
-enum {
- AC_FETCH_FORMAT_FLOAT = 0,
- AC_FETCH_FORMAT_FIXED,
- AC_FETCH_FORMAT_UNORM,
- AC_FETCH_FORMAT_SNORM,
- AC_FETCH_FORMAT_USCALED,
- AC_FETCH_FORMAT_SSCALED,
- AC_FETCH_FORMAT_UINT,
- AC_FETCH_FORMAT_SINT,
+enum
+{
+ AC_FETCH_FORMAT_FLOAT = 0,
+ AC_FETCH_FORMAT_FIXED,
+ AC_FETCH_FORMAT_UNORM,
+ AC_FETCH_FORMAT_SNORM,
+ AC_FETCH_FORMAT_USCALED,
+ AC_FETCH_FORMAT_SSCALED,
+ AC_FETCH_FORMAT_UINT,
+ AC_FETCH_FORMAT_SINT,
};
-LLVMValueRef
-ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
- unsigned log_size,
- unsigned num_channels,
- unsigned format,
- bool reverse,
- bool known_aligned,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy,
- bool can_speculate);
-
-void
-ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy);
-
-void
-ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy);
-
-void
-ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy);
-
-void
-ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_get_thread_id(struct ac_llvm_context *ctx);
+LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size,
+ unsigned num_channels, unsigned format, bool reverse,
+ bool known_aligned, LLVMValueRef rsrc,
+ LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, unsigned cache_policy,
+ bool can_speculate);
+
+void ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset,
+ unsigned cache_policy);
+
+void ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
+ LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy);
+
+void ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
+ LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
+ LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy);
+
+void ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
+ LLVMValueRef voffset, LLVMValueRef soffset, LLVMValueRef immoffset,
+ unsigned num_channels, unsigned dfmt, unsigned nfmt,
+ unsigned cache_policy);
+
+LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx);
#define AC_TID_MASK_TOP_LEFT 0xfffffffc
#define AC_TID_MASK_TOP 0xfffffffd
#define AC_TID_MASK_LEFT 0xfffffffe
-LLVMValueRef
-ac_build_ddxy(struct ac_llvm_context *ctx,
- uint32_t mask,
- int idx,
- LLVMValueRef val);
+LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val);
-#define AC_SENDMSG_GS 2
-#define AC_SENDMSG_GS_DONE 3
+#define AC_SENDMSG_GS 2
+#define AC_SENDMSG_GS_DONE 3
#define AC_SENDMSG_GS_ALLOC_REQ 9
#define AC_SENDMSG_GS_OP_NOP (0 << 4)
#define AC_SENDMSG_GS_OP_EMIT (2 << 4)
#define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
-void ac_build_sendmsg(struct ac_llvm_context *ctx,
- uint32_t msg,
- LLVMValueRef wave_id);
-
-LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type);
-
-LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type);
-LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
+void ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id);
+
+LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type);
+
+LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type);
+LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
struct ac_export_args {
- LLVMValueRef out[4];
- unsigned target;
- unsigned enabled_channels;
- bool compr;
- bool done;
- bool valid_mask;
+ LLVMValueRef out[4];
+ unsigned target;
+ unsigned enabled_channels;
+ bool compr;
+ bool done;
+ bool valid_mask;
};
void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
void ac_build_export_null(struct ac_llvm_context *ctx);
-enum ac_image_opcode {
- ac_image_sample,
- ac_image_gather4,
- ac_image_load,
- ac_image_load_mip,
- ac_image_store,
- ac_image_store_mip,
- ac_image_get_lod,
- ac_image_get_resinfo,
- ac_image_atomic,
- ac_image_atomic_cmpswap,
+enum ac_image_opcode
+{
+ ac_image_sample,
+ ac_image_gather4,
+ ac_image_load,
+ ac_image_load_mip,
+ ac_image_store,
+ ac_image_store_mip,
+ ac_image_get_lod,
+ ac_image_get_resinfo,
+ ac_image_atomic,
+ ac_image_atomic_cmpswap,
};
-enum ac_atomic_op {
- ac_atomic_swap,
- ac_atomic_add,
- ac_atomic_sub,
- ac_atomic_smin,
- ac_atomic_umin,
- ac_atomic_smax,
- ac_atomic_umax,
- ac_atomic_and,
- ac_atomic_or,
- ac_atomic_xor,
- ac_atomic_inc_wrap,
- ac_atomic_dec_wrap,
+enum ac_atomic_op
+{
+ ac_atomic_swap,
+ ac_atomic_add,
+ ac_atomic_sub,
+ ac_atomic_smin,
+ ac_atomic_umin,
+ ac_atomic_smax,
+ ac_atomic_umax,
+ ac_atomic_and,
+ ac_atomic_or,
+ ac_atomic_xor,
+ ac_atomic_inc_wrap,
+ ac_atomic_dec_wrap,
};
/* These cache policy bits match the definitions used by the LLVM intrinsics. */
-enum ac_image_cache_policy {
- ac_glc = 1 << 0, /* per-CU cache control */
- ac_slc = 1 << 1, /* global L2 cache control */
- ac_dlc = 1 << 2, /* per-shader-array cache control */
- ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
+enum ac_image_cache_policy
+{
+ ac_glc = 1 << 0, /* per-CU cache control */
+ ac_slc = 1 << 1, /* global L2 cache control */
+ ac_dlc = 1 << 2, /* per-shader-array cache control */
+ ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
};
struct ac_image_args {
- enum ac_image_opcode opcode : 4;
- enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
- enum ac_image_dim dim : 3;
- unsigned dmask : 4;
- unsigned cache_policy : 3;
- bool unorm : 1;
- bool level_zero : 1;
- bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */
- unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
-
- LLVMValueRef resource;
- LLVMValueRef sampler;
- LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
- LLVMValueRef offset;
- LLVMValueRef bias;
- LLVMValueRef compare;
- LLVMValueRef derivs[6];
- LLVMValueRef coords[4];
- LLVMValueRef lod; // also used by ac_image_get_resinfo
- LLVMValueRef min_lod;
+ enum ac_image_opcode opcode : 4;
+ enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
+ enum ac_image_dim dim : 3;
+ unsigned dmask : 4;
+ unsigned cache_policy : 3;
+ bool unorm : 1;
+ bool level_zero : 1;
+ bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */
+ unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
+
+ LLVMValueRef resource;
+ LLVMValueRef sampler;
+ LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
+ LLVMValueRef offset;
+ LLVMValueRef bias;
+ LLVMValueRef compare;
+ LLVMValueRef derivs[6];
+ LLVMValueRef coords[4];
+ LLVMValueRef lod; // also used by ac_image_get_resinfo
+ LLVMValueRef min_lod;
};
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
- struct ac_image_args *a);
-LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc);
-LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi);
-LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi);
+LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a);
+LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc);
+LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi);
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi);
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
-LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
- LLVMValueRef offset, LLVMValueRef width,
- bool is_signed);
-LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2);
-LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset,
+ LLVMValueRef width, bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
+ LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
+ LLVMValueRef s2);
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
-LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
+LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize);
LLVMValueRef ac_const_uint_vec(struct ac_llvm_context *ctx, LLVMTypeRef type, uint64_t value);
LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0);
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src);
LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
-LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
- LLVMValueRef src0);
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0);
-void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
- LLVMValueRef main_fn,
- uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- uint32_t skip_output_mask,
- uint8_t *num_param_exports);
+void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn,
+ uint8_t *vs_output_param_offset, uint32_t num_outputs,
+ uint32_t skip_output_mask, uint8_t *num_param_exports);
void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
-LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr);
-void ac_lds_store(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr, LLVMValueRef value);
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr);
+void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value);
-LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
- LLVMTypeRef dst_type,
- LLVMValueRef src0);
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0);
LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
-void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
- int lable_id);
-void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
- int lable_id);
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, int lable_id);
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, int lable_id);
-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
- const char *name);
-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
- const char *name);
+LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name);
+LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name);
-LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMTypeRef type);
+LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type);
-LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
- unsigned count);
+LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count);
-LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
- unsigned rshift, unsigned bitwidth);
+LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift,
+ unsigned bitwidth);
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
- LLVMValueRef *addr, bool is_array_tex);
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
+ bool is_array_tex);
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
+LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
-LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef src, LLVMValueRef lane);
+LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef lane);
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
+LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
-LLVMValueRef
-ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
+LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value,
+ LLVMValueRef lane);
-LLVMValueRef
-ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
+LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
-LLVMValueRef
-ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
+LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
-LLVMValueRef
-ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
+LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
-LLVMValueRef
-ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
+LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op,
+ unsigned cluster_size);
/**
* Common arguments for a scan/reduce operation that accumulates per-wave
* values across an entire workgroup, while respecting the order of waves.
*/
struct ac_wg_scan {
- bool enable_reduce;
- bool enable_exclusive;
- bool enable_inclusive;
- nir_op op;
- LLVMValueRef src; /* clobbered! */
- LLVMValueRef result_reduce;
- LLVMValueRef result_exclusive;
- LLVMValueRef result_inclusive;
- LLVMValueRef extra;
- LLVMValueRef waveidx;
- LLVMValueRef numwaves; /* only needed for "reduce" operations */
-
- /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
- LLVMValueRef scratch;
- unsigned maxwaves;
+ bool enable_reduce;
+ bool enable_exclusive;
+ bool enable_inclusive;
+ nir_op op;
+ LLVMValueRef src; /* clobbered! */
+ LLVMValueRef result_reduce;
+ LLVMValueRef result_exclusive;
+ LLVMValueRef result_inclusive;
+ LLVMValueRef extra;
+ LLVMValueRef waveidx;
+ LLVMValueRef numwaves; /* only needed for "reduce" operations */
+
+ /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
+ LLVMValueRef scratch;
+ unsigned maxwaves;
};
-void
-ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-LLVMValueRef
-ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
- unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
+LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
+ unsigned lane1, unsigned lane2, unsigned lane3);
-LLVMValueRef
-ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
+LLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
-LLVMValueRef
-ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
+LLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize);
-LLVMValueRef
-ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
+LLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize);
-LLVMValueRef
-ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
+LLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
-LLVMValueRef
-ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
+LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
-LLVMValueRef
-ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
+LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
-LLVMValueRef
-ac_build_is_helper_invocation(struct ac_llvm_context *ctx);
+LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx);
-LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
- LLVMValueRef *args, unsigned num_args);
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
+ unsigned num_args);
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
- LLVMValueRef ptr, LLVMValueRef val,
- const char *sync_scope);
+ LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope);
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef cmp, LLVMValueRef val,
- const char *sync_scope);
+ LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope);
-void
-ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
- LLVMValueRef stencil, LLVMValueRef samplemask,
- struct ac_export_args *args);
+void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask, struct ac_export_args *args);
void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id,
- LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt);
+ LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt);
struct ac_ngg_prim {
- unsigned num_vertices;
- LLVMValueRef isnull;
- LLVMValueRef index[3];
- LLVMValueRef edgeflag[3];
- LLVMValueRef passthrough;
+ unsigned num_vertices;
+ LLVMValueRef isnull;
+ LLVMValueRef index[3];
+ LLVMValueRef edgeflag[3];
+ LLVMValueRef passthrough;
};
-LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx,
- const struct ac_ngg_prim *prim);
-void ac_build_export_prim(struct ac_llvm_context *ctx,
- const struct ac_ngg_prim *prim);
+LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim);
+void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim);
-static inline LLVMValueRef
-ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg)
+static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg)
{
- assert(arg.used);
- return LLVMGetParam(ctx->main_function, arg.arg_index);
+ assert(arg.used);
+ return LLVMGetParam(ctx->main_function, arg.arg_index);
}
-enum ac_llvm_calling_convention {
- AC_LLVM_AMDGPU_VS = 87,
- AC_LLVM_AMDGPU_GS = 88,
- AC_LLVM_AMDGPU_PS = 89,
- AC_LLVM_AMDGPU_CS = 90,
- AC_LLVM_AMDGPU_HS = 93,
+enum ac_llvm_calling_convention
+{
+ AC_LLVM_AMDGPU_VS = 87,
+ AC_LLVM_AMDGPU_GS = 88,
+ AC_LLVM_AMDGPU_PS = 89,
+ AC_LLVM_AMDGPU_CS = 90,
+ AC_LLVM_AMDGPU_HS = 93,
};
-LLVMValueRef ac_build_main(const struct ac_shader_args *args,
- struct ac_llvm_context *ctx,
- enum ac_llvm_calling_convention convention,
- const char *name, LLVMTypeRef ret_type,
- LLVMModuleRef module);
+LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
+ enum ac_llvm_calling_convention convention, const char *name,
+ LLVMTypeRef ret_type, LLVMModuleRef module);
void ac_build_s_endpgm(struct ac_llvm_context *ctx);
-LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx,
- LLVMValueRef mask, LLVMValueRef index);
-LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx,
- LLVMValueRef mask[2], LLVMValueRef index);
-void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx,
- LLVMValueRef is_odd,
- LLVMValueRef flatshade_first,
- LLVMValueRef index[3]);
+LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef index);
+LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, LLVMValueRef mask[2],
+ LLVMValueRef index);
+void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd,
+ LLVMValueRef flatshade_first,
+ LLVMValueRef index[3]);
#ifdef __cplusplus
}
*/
#include "ac_llvm_cull.h"
+
#include <llvm-c/Core.h>
struct ac_position_w_info {
- /* If a primitive intersects the W=0 plane, it causes a reflection
- * of the determinant used for face culling. Every vertex behind
- * the W=0 plane negates the determinant, so having 2 vertices behind
- * the plane has no effect. This is i1 true if the determinant should be
- * negated.
- */
- LLVMValueRef w_reflection;
-
- /* If we simplify the "-w <= p <= w" view culling equation, we get
- * "-w <= w", which can't be satisfied when w is negative.
- * In perspective projection, a negative W means that the primitive
- * is behind the viewer, but the equation is independent of the type
- * of projection.
- *
- * w_accepted is false when all W are negative and therefore
- * the primitive is invisible.
- */
- LLVMValueRef w_accepted;
-
- LLVMValueRef all_w_positive;
- LLVMValueRef any_w_negative;
+ /* If a primitive intersects the W=0 plane, it causes a reflection
+ * of the determinant used for face culling. Every vertex behind
+ * the W=0 plane negates the determinant, so having 2 vertices behind
+ * the plane has no effect. This is i1 true if the determinant should be
+ * negated.
+ */
+ LLVMValueRef w_reflection;
+
+ /* If we simplify the "-w <= p <= w" view culling equation, we get
+ * "-w <= w", which can't be satisfied when w is negative.
+ * In perspective projection, a negative W means that the primitive
+ * is behind the viewer, but the equation is independent of the type
+ * of projection.
+ *
+ * w_accepted is false when all W are negative and therefore
+ * the primitive is invisible.
+ */
+ LLVMValueRef w_accepted;
+
+ LLVMValueRef all_w_positive;
+ LLVMValueRef any_w_negative;
};
-static void ac_analyze_position_w(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- struct ac_position_w_info *w)
+static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
+ struct ac_position_w_info *w)
{
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef all_w_negative = ctx->i1true;
-
- w->w_reflection = ctx->i1false;
- w->any_w_negative = ctx->i1false;
-
- for (unsigned i = 0; i < 3; i++) {
- LLVMValueRef neg_w;
-
- neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
- /* If neg_w is true, negate w_reflection. */
- w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
- w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
- all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
- }
- w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
- w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef all_w_negative = ctx->i1true;
+
+ w->w_reflection = ctx->i1false;
+ w->any_w_negative = ctx->i1false;
+
+ for (unsigned i = 0; i < 3; i++) {
+ LLVMValueRef neg_w;
+
+ neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
+ /* If neg_w is true, negate w_reflection. */
+ w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
+ w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
+ all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
+ }
+ w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
+ w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
}
/* Perform front/back face culling and return true if the primitive is accepted. */
-static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- struct ac_position_w_info *w,
- bool cull_front,
- bool cull_back,
- bool cull_zero_area)
+static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
+ struct ac_position_w_info *w, bool cull_front, bool cull_back,
+ bool cull_zero_area)
{
- LLVMBuilderRef builder = ctx->builder;
-
- if (cull_front && cull_back)
- return ctx->i1false;
-
- if (!cull_front && !cull_back && !cull_zero_area)
- return ctx->i1true;
-
- /* Front/back face culling. Also if the determinant == 0, the triangle
- * area is 0.
- */
- LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
- LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
- LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
- LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
- LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
- LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
- LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
-
- /* Negative W negates the determinant. */
- det = LLVMBuildSelect(builder, w->w_reflection,
- LLVMBuildFNeg(builder, det, ""),
- det, "");
-
- LLVMValueRef accepted = NULL;
- if (cull_front) {
- LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
- accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
- } else if (cull_back) {
- LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
- accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
- } else if (cull_zero_area) {
- accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
- }
- return accepted;
+ LLVMBuilderRef builder = ctx->builder;
+
+ if (cull_front && cull_back)
+ return ctx->i1false;
+
+ if (!cull_front && !cull_back && !cull_zero_area)
+ return ctx->i1true;
+
+ /* Front/back face culling. Also if the determinant == 0, the triangle
+ * area is 0.
+ */
+ LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
+ LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
+ LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
+ LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
+ LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
+ LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
+ LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
+
+ /* Negative W negates the determinant. */
+ det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, "");
+
+ LLVMValueRef accepted = NULL;
+ if (cull_front) {
+ LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
+ accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
+ } else if (cull_back) {
+ LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
+ accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
+ } else if (cull_zero_area) {
+ accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
+ }
+ return accepted;
}
/* Perform view culling and small primitive elimination and return true
* if the primitive is accepted and initially_accepted == true. */
-static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- LLVMValueRef initially_accepted,
- struct ac_position_w_info *w,
- LLVMValueRef vp_scale[2],
- LLVMValueRef vp_translate[2],
- LLVMValueRef small_prim_precision,
- bool cull_view_xy,
- bool cull_view_near_z,
- bool cull_view_far_z,
- bool cull_small_prims,
- bool use_halfz_clip_space)
+static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
+ LLVMValueRef initially_accepted, struct ac_position_w_info *w,
+ LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
+ LLVMValueRef small_prim_precision, bool cull_view_xy,
+ bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
+ bool use_halfz_clip_space)
{
- LLVMBuilderRef builder = ctx->builder;
-
- if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
- return initially_accepted;
-
- /* Skip the culling if the primitive has already been rejected or
- * if any W is negative. The bounding box culling doesn't work when
- * W is negative.
- */
- LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
- w->all_w_positive, "");
- LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
- LLVMBuildStore(builder, initially_accepted, accepted_var);
-
- ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
- {
- LLVMValueRef bbox_min[3], bbox_max[3];
- LLVMValueRef accepted = initially_accepted;
-
- /* Compute the primitive bounding box for easy culling. */
- for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
- bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
- bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
-
- bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
- bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
- }
-
- /* View culling. */
- if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
- for (unsigned chan = 0; chan < 3; chan++) {
- LLVMValueRef visible;
-
- if ((cull_view_xy && chan <= 1) ||
- (cull_view_near_z && chan == 2)) {
- float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
- visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
- LLVMConstReal(ctx->f32, t), "");
- accepted = LLVMBuildAnd(builder, accepted, visible, "");
- }
-
- if ((cull_view_xy && chan <= 1) ||
- (cull_view_far_z && chan == 2)) {
- visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
- ctx->f32_1, "");
- accepted = LLVMBuildAnd(builder, accepted, visible, "");
- }
- }
- }
-
- /* Small primitive elimination. */
- if (cull_small_prims) {
- /* Assuming a sample position at (0.5, 0.5), if we round
- * the bounding box min/max extents and the results of
- * the rounding are equal in either the X or Y direction,
- * the bounding box does not intersect the sample.
- *
- * See these GDC slides for pictures:
- * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
- */
- LLVMValueRef min, max, not_equal[2], visible;
-
- for (unsigned chan = 0; chan < 2; chan++) {
- /* Convert the position to screen-space coordinates. */
- min = ac_build_fmad(ctx, bbox_min[chan],
- vp_scale[chan], vp_translate[chan]);
- max = ac_build_fmad(ctx, bbox_max[chan],
- vp_scale[chan], vp_translate[chan]);
- /* Scale the bounding box according to the precision of
- * the rasterizer and the number of MSAA samples. */
- min = LLVMBuildFSub(builder, min, small_prim_precision, "");
- max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
-
- /* Determine if the bbox intersects the sample point.
- * It also works for MSAA, but vp_scale, vp_translate,
- * and small_prim_precision are computed differently.
- */
- min = ac_build_round(ctx, min);
- max = ac_build_round(ctx, max);
- not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
- }
- visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
- accepted = LLVMBuildAnd(builder, accepted, visible, "");
- }
-
- LLVMBuildStore(builder, accepted, accepted_var);
- }
- ac_build_endif(ctx, 10000000);
-
- return LLVMBuildLoad(builder, accepted_var, "");
+ LLVMBuilderRef builder = ctx->builder;
+
+ if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
+ return initially_accepted;
+
+ /* Skip the culling if the primitive has already been rejected or
+ * if any W is negative. The bounding box culling doesn't work when
+ * W is negative.
+ */
+ LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, w->all_w_positive, "");
+ LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
+ LLVMBuildStore(builder, initially_accepted, accepted_var);
+
+ ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
+ {
+ LLVMValueRef bbox_min[3], bbox_max[3];
+ LLVMValueRef accepted = initially_accepted;
+
+ /* Compute the primitive bounding box for easy culling. */
+ for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
+ bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
+ bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
+
+ bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
+ bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
+ }
+
+ /* View culling. */
+ if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
+ for (unsigned chan = 0; chan < 3; chan++) {
+ LLVMValueRef visible;
+
+ if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) {
+ float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
+ visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
+ LLVMConstReal(ctx->f32, t), "");
+ accepted = LLVMBuildAnd(builder, accepted, visible, "");
+ }
+
+ if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) {
+ visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, "");
+ accepted = LLVMBuildAnd(builder, accepted, visible, "");
+ }
+ }
+ }
+
+ /* Small primitive elimination. */
+ if (cull_small_prims) {
+ /* Assuming a sample position at (0.5, 0.5), if we round
+ * the bounding box min/max extents and the results of
+ * the rounding are equal in either the X or Y direction,
+ * the bounding box does not intersect the sample.
+ *
+ * See these GDC slides for pictures:
+ * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
+ */
+ LLVMValueRef min, max, not_equal[2], visible;
+
+ for (unsigned chan = 0; chan < 2; chan++) {
+ /* Convert the position to screen-space coordinates. */
+ min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
+ max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
+ /* Scale the bounding box according to the precision of
+ * the rasterizer and the number of MSAA samples. */
+ min = LLVMBuildFSub(builder, min, small_prim_precision, "");
+ max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
+
+ /* Determine if the bbox intersects the sample point.
+ * It also works for MSAA, but vp_scale, vp_translate,
+ * and small_prim_precision are computed differently.
+ */
+ min = ac_build_round(ctx, min);
+ max = ac_build_round(ctx, max);
+ not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
+ }
+ visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
+ accepted = LLVMBuildAnd(builder, accepted, visible, "");
+ }
+
+ LLVMBuildStore(builder, accepted, accepted_var);
+ }
+ ac_build_endif(ctx, 10000000);
+
+ return LLVMBuildLoad(builder, accepted_var, "");
}
/**
* subpixel_bits are defined by the quantization mode.
* \param options See ac_cull_options.
*/
-LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- LLVMValueRef initially_accepted,
- LLVMValueRef vp_scale[2],
- LLVMValueRef vp_translate[2],
- LLVMValueRef small_prim_precision,
- struct ac_cull_options *options)
+LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
+ LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
+ LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
+ struct ac_cull_options *options)
{
- struct ac_position_w_info w;
- ac_analyze_position_w(ctx, pos, &w);
-
- /* W culling. */
- LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
- accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
-
- /* Face culling. */
- accepted = LLVMBuildAnd(ctx->builder, accepted,
- ac_cull_face(ctx, pos, &w,
- options->cull_front,
- options->cull_back,
- options->cull_zero_area), "");
-
- /* View culling and small primitive elimination. */
- accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
- small_prim_precision,
- options->cull_view_xy,
- options->cull_view_near_z,
- options->cull_view_far_z,
- options->cull_small_prims,
- options->use_halfz_clip_space);
- return accepted;
+ struct ac_position_w_info w;
+ ac_analyze_position_w(ctx, pos, &w);
+
+ /* W culling. */
+ LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
+ accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
+
+ /* Face culling. */
+ accepted = LLVMBuildAnd(
+ ctx->builder, accepted,
+ ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area),
+ "");
+
+ /* View culling and small primitive elimination. */
+ accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
+ options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
+ options->cull_small_prims, options->use_halfz_clip_space);
+ return accepted;
}
#include "ac_llvm_build.h"
struct ac_cull_options {
- /* In general, I recommend setting all to true except view Z culling,
- * which isn't so effective because W culling is cheaper and partially
- * replaces near Z culling, and you don't need to set Position.z
- * if Z culling is disabled.
- *
- * If something doesn't work, turn some of these off to find out what.
- */
- bool cull_front;
- bool cull_back;
- bool cull_view_xy;
- bool cull_view_near_z;
- bool cull_view_far_z;
- bool cull_small_prims;
- bool cull_zero_area;
- bool cull_w; /* cull primitives with all W < 0 */
+ /* In general, I recommend setting all to true except view Z culling,
+ * which isn't so effective because W culling is cheaper and partially
+ * replaces near Z culling, and you don't need to set Position.z
+ * if Z culling is disabled.
+ *
+ * If something doesn't work, turn some of these off to find out what.
+ */
+ bool cull_front;
+ bool cull_back;
+ bool cull_view_xy;
+ bool cull_view_near_z;
+ bool cull_view_far_z;
+ bool cull_small_prims;
+ bool cull_zero_area;
+ bool cull_w; /* cull primitives with all W < 0 */
- bool use_halfz_clip_space;
+ bool use_halfz_clip_space;
};
-LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- LLVMValueRef initially_accepted,
- LLVMValueRef vp_scale[2],
- LLVMValueRef vp_translate[2],
- LLVMValueRef small_prim_precision,
- struct ac_cull_options *options);
+LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
+ LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
+ LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
+ struct ac_cull_options *options);
#endif
*
*/
-#include <cstring>
-
#include <llvm-c/Core.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/IRBuilder.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/Target/TargetMachine.h>
#include <llvm/Transforms/IPO.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <cstring>
/* DO NOT REORDER THE HEADERS
* The LLVM headers need to all be included before any Mesa header,
#include "ac_binary.h"
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
-
#include "util/macros.h"
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
{
#if LLVM_VERSION_MAJOR >= 10
- llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
- A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
+ llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
+ A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
#else
- /* Avoid unused parameter warnings. */
- (void)val;
- (void)bytes;
+ /* Avoid unused parameter warnings. */
+ (void)val;
+ (void)bytes;
#endif
}
bool ac_is_sgpr_param(LLVMValueRef arg)
{
- llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
- llvm::AttributeList AS = A->getParent()->getAttributes();
- unsigned ArgNo = A->getArgNo();
- return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
+ llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
+ llvm::AttributeList AS = A->getParent()->getAttributes();
+ unsigned ArgNo = A->getArgNo();
+ return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
}
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
{
- return LLVMGetCalledValue(call);
+ return LLVMGetCalledValue(call);
}
bool ac_llvm_is_function(LLVMValueRef v)
{
- return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+ return LLVMGetValueKind(v) == LLVMFunctionValueKind;
}
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
{
- llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
+ llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
return module;
}
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
- enum ac_float_mode float_mode)
+LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
{
- LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
+ LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
- llvm::FastMathFlags flags;
+ llvm::FastMathFlags flags;
- switch (float_mode) {
- case AC_FLOAT_MODE_DEFAULT:
- case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
- break;
+ switch (float_mode) {
+ case AC_FLOAT_MODE_DEFAULT:
+ case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
+ break;
- case AC_FLOAT_MODE_DEFAULT_OPENGL:
- /* Allow optimizations to treat the sign of a zero argument or
- * result as insignificant.
- */
- flags.setNoSignedZeros(); /* nsz */
+ case AC_FLOAT_MODE_DEFAULT_OPENGL:
+ /* Allow optimizations to treat the sign of a zero argument or
+ * result as insignificant.
+ */
+ flags.setNoSignedZeros(); /* nsz */
- /* Allow optimizations to use the reciprocal of an argument
- * rather than perform division.
- */
- flags.setAllowReciprocal(); /* arcp */
+ /* Allow optimizations to use the reciprocal of an argument
+ * rather than perform division.
+ */
+ flags.setAllowReciprocal(); /* arcp */
- llvm::unwrap(builder)->setFastMathFlags(flags);
- break;
- }
+ llvm::unwrap(builder)->setFastMathFlags(flags);
+ break;
+ }
- return builder;
+ return builder;
}
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
{
- if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
- auto *b = llvm::unwrap(ctx->builder);
- llvm::FastMathFlags flags = b->getFastMathFlags();
-
- /* This disables the optimization of (x + 0), which is used
- * to convert negative zero to positive zero.
- */
- flags.setNoSignedZeros(false);
- b->setFastMathFlags(flags);
- }
+ if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
+ auto *b = llvm::unwrap(ctx->builder);
+ llvm::FastMathFlags flags = b->getFastMathFlags();
+
+ /* This disables the optimization of (x + 0), which is used
+ * to convert negative zero to positive zero.
+ */
+ flags.setNoSignedZeros(false);
+ b->setFastMathFlags(flags);
+ }
}
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
{
- if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
- auto *b = llvm::unwrap(ctx->builder);
- llvm::FastMathFlags flags = b->getFastMathFlags();
+ if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
+ auto *b = llvm::unwrap(ctx->builder);
+ llvm::FastMathFlags flags = b->getFastMathFlags();
- flags.setNoSignedZeros();
- b->setFastMathFlags(flags);
- }
+ flags.setNoSignedZeros();
+ b->setFastMathFlags(flags);
+ }
}
-LLVMTargetLibraryInfoRef
-ac_create_target_library_info(const char *triple)
+LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
{
- return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(
+ new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
}
-void
-ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
+void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
{
- delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
+ delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
}
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
* better compatibility with C code. */
struct raw_memory_ostream : public llvm::raw_pwrite_stream {
- char *buffer;
- size_t written;
- size_t bufsize;
-
- raw_memory_ostream()
- {
- buffer = NULL;
- written = 0;
- bufsize = 0;
- SetUnbuffered();
- }
-
- ~raw_memory_ostream()
- {
- free(buffer);
- }
-
- void clear()
- {
- written = 0;
- }
-
- void take(char *&out_buffer, size_t &out_size)
- {
- out_buffer = buffer;
- out_size = written;
- buffer = NULL;
- written = 0;
- bufsize = 0;
- }
-
- void flush() = delete;
-
- void write_impl(const char *ptr, size_t size) override
- {
- if (unlikely(written + size < written))
- abort();
- if (written + size > bufsize) {
- bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
- buffer = (char *)realloc(buffer, bufsize);
- if (!buffer) {
- fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
- abort();
- }
- }
- memcpy(buffer + written, ptr, size);
- written += size;
- }
-
- void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
- {
- assert(offset == (size_t)offset &&
- offset + size >= offset && offset + size <= written);
- memcpy(buffer + offset, ptr, size);
- }
-
- uint64_t current_pos() const override
- {
- return written;
- }
+ char *buffer;
+ size_t written;
+ size_t bufsize;
+
+ raw_memory_ostream()
+ {
+ buffer = NULL;
+ written = 0;
+ bufsize = 0;
+ SetUnbuffered();
+ }
+
+ ~raw_memory_ostream()
+ {
+ free(buffer);
+ }
+
+ void clear()
+ {
+ written = 0;
+ }
+
+ void take(char *&out_buffer, size_t &out_size)
+ {
+ out_buffer = buffer;
+ out_size = written;
+ buffer = NULL;
+ written = 0;
+ bufsize = 0;
+ }
+
+ void flush() = delete;
+
+ void write_impl(const char *ptr, size_t size) override
+ {
+ if (unlikely(written + size < written))
+ abort();
+ if (written + size > bufsize) {
+ bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
+ buffer = (char *)realloc(buffer, bufsize);
+ if (!buffer) {
+ fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
+ abort();
+ }
+ }
+ memcpy(buffer + written, ptr, size);
+ written += size;
+ }
+
+ void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
+ {
+ assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
+ memcpy(buffer + offset, ptr, size);
+ }
+
+ uint64_t current_pos() const override
+ {
+ return written;
+ }
};
/* The LLVM compiler is represented as a pass manager containing passes for
* optimizations, instruction selection, and code generation.
*/
struct ac_compiler_passes {
- raw_memory_ostream ostream; /* ELF shader binary stream */
- llvm::legacy::PassManager passmgr; /* list of passes */
+ raw_memory_ostream ostream; /* ELF shader binary stream */
+ llvm::legacy::PassManager passmgr; /* list of passes */
};
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
{
- struct ac_compiler_passes *p = new ac_compiler_passes();
- if (!p)
- return NULL;
+ struct ac_compiler_passes *p = new ac_compiler_passes();
+ if (!p)
+ return NULL;
- llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
+ llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
- if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
- nullptr,
+ if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
#if LLVM_VERSION_MAJOR >= 10
- llvm::CGFT_ObjectFile)) {
+ llvm::CGFT_ObjectFile)) {
#else
- llvm::TargetMachine::CGFT_ObjectFile)) {
+ llvm::TargetMachine::CGFT_ObjectFile)) {
#endif
- fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
- delete p;
- return NULL;
- }
- return p;
+ fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
+ delete p;
+ return NULL;
+ }
+ return p;
}
void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
{
- delete p;
+ delete p;
}
/* This returns false on failure. */
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size)
+ char **pelf_buffer, size_t *pelf_size)
{
- p->passmgr.run(*llvm::unwrap(module));
- p->ostream.take(*pelf_buffer, *pelf_size);
- return true;
+ p->passmgr.run(*llvm::unwrap(module));
+ p->ostream.take(*pelf_buffer, *pelf_size);
+ return true;
}
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
{
- llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
+ llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
}
void ac_enable_global_isel(LLVMTargetMachineRef tm)
{
- reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
+ reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
}
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
- LLVMValueRef ptr, LLVMValueRef val,
- const char *sync_scope) {
- llvm::AtomicRMWInst::BinOp binop;
- switch (op) {
- case LLVMAtomicRMWBinOpXchg:
- binop = llvm::AtomicRMWInst::Xchg;
- break;
- case LLVMAtomicRMWBinOpAdd:
- binop = llvm::AtomicRMWInst::Add;
- break;
- case LLVMAtomicRMWBinOpSub:
- binop = llvm::AtomicRMWInst::Sub;
- break;
- case LLVMAtomicRMWBinOpAnd:
- binop = llvm::AtomicRMWInst::And;
- break;
- case LLVMAtomicRMWBinOpNand:
- binop = llvm::AtomicRMWInst::Nand;
- break;
- case LLVMAtomicRMWBinOpOr:
- binop = llvm::AtomicRMWInst::Or;
- break;
- case LLVMAtomicRMWBinOpXor:
- binop = llvm::AtomicRMWInst::Xor;
- break;
- case LLVMAtomicRMWBinOpMax:
- binop = llvm::AtomicRMWInst::Max;
- break;
- case LLVMAtomicRMWBinOpMin:
- binop = llvm::AtomicRMWInst::Min;
- break;
- case LLVMAtomicRMWBinOpUMax:
- binop = llvm::AtomicRMWInst::UMax;
- break;
- case LLVMAtomicRMWBinOpUMin:
- binop = llvm::AtomicRMWInst::UMin;
- break;
- default:
- unreachable(!"invalid LLVMAtomicRMWBinOp");
- break;
- }
- unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
- return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
- binop, llvm::unwrap(ptr), llvm::unwrap(val),
- llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+ LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
+{
+ llvm::AtomicRMWInst::BinOp binop;
+ switch (op) {
+ case LLVMAtomicRMWBinOpXchg:
+ binop = llvm::AtomicRMWInst::Xchg;
+ break;
+ case LLVMAtomicRMWBinOpAdd:
+ binop = llvm::AtomicRMWInst::Add;
+ break;
+ case LLVMAtomicRMWBinOpSub:
+ binop = llvm::AtomicRMWInst::Sub;
+ break;
+ case LLVMAtomicRMWBinOpAnd:
+ binop = llvm::AtomicRMWInst::And;
+ break;
+ case LLVMAtomicRMWBinOpNand:
+ binop = llvm::AtomicRMWInst::Nand;
+ break;
+ case LLVMAtomicRMWBinOpOr:
+ binop = llvm::AtomicRMWInst::Or;
+ break;
+ case LLVMAtomicRMWBinOpXor:
+ binop = llvm::AtomicRMWInst::Xor;
+ break;
+ case LLVMAtomicRMWBinOpMax:
+ binop = llvm::AtomicRMWInst::Max;
+ break;
+ case LLVMAtomicRMWBinOpMin:
+ binop = llvm::AtomicRMWInst::Min;
+ break;
+ case LLVMAtomicRMWBinOpUMax:
+ binop = llvm::AtomicRMWInst::UMax;
+ break;
+ case LLVMAtomicRMWBinOpUMin:
+ binop = llvm::AtomicRMWInst::UMin;
+ break;
+ default:
+ unreachable(!"invalid LLVMAtomicRMWBinOp");
+ break;
+ }
+ unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return llvm::wrap(llvm::unwrap(ctx->builder)
+ ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
}
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef cmp, LLVMValueRef val,
- const char *sync_scope) {
- unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
- return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
- llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
- llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+ LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
+{
+ unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return llvm::wrap(llvm::unwrap(ctx->builder)
+ ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
+ llvm::unwrap(val),
+ llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
}
*/
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
#include "ac_llvm_util.h"
+
#include "ac_llvm_build.h"
+#include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
#include "util/bitscan.h"
+#include "util/u_math.h"
#include <llvm-c/Core.h>
#include <llvm-c/Support.h>
#include <llvm-c/Transforms/IPO.h>
#include <llvm-c/Transforms/Scalar.h>
#include <llvm-c/Transforms/Utils.h>
-#include "c11/threads.h"
-#include "gallivm/lp_bld_misc.h"
-#include "util/u_math.h"
#include <assert.h>
#include <stdio.h>
static void ac_init_llvm_target()
{
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetMC();
- LLVMInitializeAMDGPUAsmPrinter();
-
- /* For inline assembly. */
- LLVMInitializeAMDGPUAsmParser();
-
- /* For ACO disassembly. */
- LLVMInitializeAMDGPUDisassembler();
-
- /* Workaround for bug in llvm 4.0 that causes image intrinsics
- * to disappear.
- * https://reviews.llvm.org/D26348
- *
- * "mesa" is the prefix for error messages.
- *
- * -global-isel-abort=2 is a no-op unless global isel has been enabled.
- * This option tells the backend to fall-back to SelectionDAG and print
- * a diagnostic message if global isel fails.
- */
- const char *argv[] = {
- "mesa",
- "-simplifycfg-sink-common=false",
- "-global-isel-abort=2",
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+
+ /* For inline assembly. */
+ LLVMInitializeAMDGPUAsmParser();
+
+ /* For ACO disassembly. */
+ LLVMInitializeAMDGPUDisassembler();
+
+ /* Workaround for bug in llvm 4.0 that causes image intrinsics
+ * to disappear.
+ * https://reviews.llvm.org/D26348
+ *
+ * "mesa" is the prefix for error messages.
+ *
+ * -global-isel-abort=2 is a no-op unless global isel has been enabled.
+ * This option tells the backend to fall-back to SelectionDAG and print
+ * a diagnostic message if global isel fails.
+ */
+ const char *argv[] = {
+ "mesa",
+ "-simplifycfg-sink-common=false",
+ "-global-isel-abort=2",
#if LLVM_VERSION_MAJOR >= 10
- /* Atomic optimizations require LLVM 10.0 for gfx10 support. */
- "-amdgpu-atomic-optimizations=true",
+ /* Atomic optimizations require LLVM 10.0 for gfx10 support. */
+ "-amdgpu-atomic-optimizations=true",
#endif
#if LLVM_VERSION_MAJOR >= 11
- /* This was disabled by default in: https://reviews.llvm.org/D77228 */
- "-structurizecfg-skip-uniform-regions",
+ /* This was disabled by default in: https://reviews.llvm.org/D77228 */
+ "-structurizecfg-skip-uniform-regions",
#endif
- };
- LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
+ };
+ LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
}
PUBLIC void ac_init_shared_llvm_once(void)
{
- static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
- call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
+ static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+ call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
}
#if !LLVM_IS_SHARED
static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT;
static void ac_init_static_llvm_once(void)
{
- call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
+ call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
}
#endif
void ac_init_llvm_once(void)
{
#if LLVM_IS_SHARED
- ac_init_shared_llvm_once();
+ ac_init_shared_llvm_once();
#else
- ac_init_static_llvm_once();
+ ac_init_static_llvm_once();
#endif
}
static LLVMTargetRef ac_get_llvm_target(const char *triple)
{
- LLVMTargetRef target = NULL;
- char *err_message = NULL;
-
- if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
- fprintf(stderr, "Cannot find target for triple %s ", triple);
- if (err_message) {
- fprintf(stderr, "%s\n", err_message);
- }
- LLVMDisposeMessage(err_message);
- return NULL;
- }
- return target;
+ LLVMTargetRef target = NULL;
+ char *err_message = NULL;
+
+ if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
+ fprintf(stderr, "Cannot find target for triple %s ", triple);
+ if (err_message) {
+ fprintf(stderr, "%s\n", err_message);
+ }
+ LLVMDisposeMessage(err_message);
+ return NULL;
+ }
+ return target;
}
const char *ac_get_llvm_processor_name(enum radeon_family family)
{
- switch (family) {
- case CHIP_TAHITI:
- return "tahiti";
- case CHIP_PITCAIRN:
- return "pitcairn";
- case CHIP_VERDE:
- return "verde";
- case CHIP_OLAND:
- return "oland";
- case CHIP_HAINAN:
- return "hainan";
- case CHIP_BONAIRE:
- return "bonaire";
- case CHIP_KABINI:
- return "kabini";
- case CHIP_KAVERI:
- return "kaveri";
- case CHIP_HAWAII:
- return "hawaii";
- case CHIP_TONGA:
- return "tonga";
- case CHIP_ICELAND:
- return "iceland";
- case CHIP_CARRIZO:
- return "carrizo";
- case CHIP_FIJI:
- return "fiji";
- case CHIP_STONEY:
- return "stoney";
- case CHIP_POLARIS10:
- return "polaris10";
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- return "polaris11";
- case CHIP_VEGA10:
- return "gfx900";
- case CHIP_RAVEN:
- return "gfx902";
- case CHIP_VEGA12:
- return "gfx904";
- case CHIP_VEGA20:
- return "gfx906";
- case CHIP_RAVEN2:
- case CHIP_RENOIR:
- return "gfx909";
- case CHIP_ARCTURUS:
- return "gfx908";
- case CHIP_NAVI10:
- return "gfx1010";
- case CHIP_NAVI12:
- return "gfx1011";
- case CHIP_NAVI14:
- return "gfx1012";
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- return "gfx1030";
- default:
- return "";
- }
+ switch (family) {
+ case CHIP_TAHITI:
+ return "tahiti";
+ case CHIP_PITCAIRN:
+ return "pitcairn";
+ case CHIP_VERDE:
+ return "verde";
+ case CHIP_OLAND:
+ return "oland";
+ case CHIP_HAINAN:
+ return "hainan";
+ case CHIP_BONAIRE:
+ return "bonaire";
+ case CHIP_KABINI:
+ return "kabini";
+ case CHIP_KAVERI:
+ return "kaveri";
+ case CHIP_HAWAII:
+ return "hawaii";
+ case CHIP_TONGA:
+ return "tonga";
+ case CHIP_ICELAND:
+ return "iceland";
+ case CHIP_CARRIZO:
+ return "carrizo";
+ case CHIP_FIJI:
+ return "fiji";
+ case CHIP_STONEY:
+ return "stoney";
+ case CHIP_POLARIS10:
+ return "polaris10";
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ return "polaris11";
+ case CHIP_VEGA10:
+ return "gfx900";
+ case CHIP_RAVEN:
+ return "gfx902";
+ case CHIP_VEGA12:
+ return "gfx904";
+ case CHIP_VEGA20:
+ return "gfx906";
+ case CHIP_RAVEN2:
+ case CHIP_RENOIR:
+ return "gfx909";
+ case CHIP_ARCTURUS:
+ return "gfx908";
+ case CHIP_NAVI10:
+ return "gfx1010";
+ case CHIP_NAVI12:
+ return "gfx1011";
+ case CHIP_NAVI14:
+ return "gfx1012";
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
+ return "gfx1030";
+ default:
+ return "";
+ }
}
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
- enum ac_target_machine_options tm_options,
- LLVMCodeGenOptLevel level,
- const char **out_triple)
+ enum ac_target_machine_options tm_options,
+ LLVMCodeGenOptLevel level,
+ const char **out_triple)
{
- assert(family >= CHIP_TAHITI);
- char features[256];
- const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
- LLVMTargetRef target = ac_get_llvm_target(triple);
-
- snprintf(features, sizeof(features),
- "+DumpCode%s%s%s%s%s",
- LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals",
- family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
- ",+wavefrontsize64,-wavefrontsize32" : "",
- family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
- family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
- tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
-
- LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
- target,
- triple,
- ac_get_llvm_processor_name(family),
- features,
- level,
- LLVMRelocDefault,
- LLVMCodeModelDefault);
-
- if (out_triple)
- *out_triple = triple;
- if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
- ac_enable_global_isel(tm);
- return tm;
+ assert(family >= CHIP_TAHITI);
+ char features[256];
+ const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
+ LLVMTargetRef target = ac_get_llvm_target(triple);
+
+ snprintf(features, sizeof(features), "+DumpCode%s%s%s%s%s",
+ LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals",
+ family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32)
+ ? ",+wavefrontsize64,-wavefrontsize32"
+ : "",
+ family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+ family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+ tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
+
+ LLVMTargetMachineRef tm =
+ LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level,
+ LLVMRelocDefault, LLVMCodeModelDefault);
+
+ if (out_triple)
+ *out_triple = triple;
+ if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
+ ac_enable_global_isel(tm);
+ return tm;
}
static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
- bool check_ir)
+ bool check_ir)
{
- LLVMPassManagerRef passmgr = LLVMCreatePassManager();
- if (!passmgr)
- return NULL;
-
- if (target_library_info)
- LLVMAddTargetLibraryInfo(target_library_info,
- passmgr);
-
- if (check_ir)
- LLVMAddVerifierPass(passmgr);
- LLVMAddAlwaysInlinerPass(passmgr);
- /* Normally, the pass manager runs all passes on one function before
- * moving onto another. Adding a barrier no-op pass forces the pass
- * manager to run the inliner on all functions first, which makes sure
- * that the following passes are only run on the remaining non-inline
- * function, so it removes useless work done on dead inline functions.
- */
- ac_llvm_add_barrier_noop_pass(passmgr);
- /* This pass should eliminate all the load and store instructions. */
- LLVMAddPromoteMemoryToRegisterPass(passmgr);
- LLVMAddScalarReplAggregatesPass(passmgr);
- LLVMAddLICMPass(passmgr);
- LLVMAddAggressiveDCEPass(passmgr);
- LLVMAddCFGSimplificationPass(passmgr);
- /* This is recommended by the instruction combining pass. */
- LLVMAddEarlyCSEMemSSAPass(passmgr);
- LLVMAddInstructionCombiningPass(passmgr);
- return passmgr;
+ LLVMPassManagerRef passmgr = LLVMCreatePassManager();
+ if (!passmgr)
+ return NULL;
+
+ if (target_library_info)
+ LLVMAddTargetLibraryInfo(target_library_info, passmgr);
+
+ if (check_ir)
+ LLVMAddVerifierPass(passmgr);
+ LLVMAddAlwaysInlinerPass(passmgr);
+ /* Normally, the pass manager runs all passes on one function before
+ * moving onto another. Adding a barrier no-op pass forces the pass
+ * manager to run the inliner on all functions first, which makes sure
+ * that the following passes are only run on the remaining non-inline
+ * function, so it removes useless work done on dead inline functions.
+ */
+ ac_llvm_add_barrier_noop_pass(passmgr);
+ /* This pass should eliminate all the load and store instructions. */
+ LLVMAddPromoteMemoryToRegisterPass(passmgr);
+ LLVMAddScalarReplAggregatesPass(passmgr);
+ LLVMAddLICMPass(passmgr);
+ LLVMAddAggressiveDCEPass(passmgr);
+ LLVMAddCFGSimplificationPass(passmgr);
+ /* This is recommended by the instruction combining pass. */
+ LLVMAddEarlyCSEMemSSAPass(passmgr);
+ LLVMAddInstructionCombiningPass(passmgr);
+ return passmgr;
}
static const char *attr_to_str(enum ac_func_attr attr)
{
switch (attr) {
- case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
- case AC_FUNC_ATTR_INREG: return "inreg";
- case AC_FUNC_ATTR_NOALIAS: return "noalias";
- case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
- case AC_FUNC_ATTR_READNONE: return "readnone";
- case AC_FUNC_ATTR_READONLY: return "readonly";
- case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
- case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
- case AC_FUNC_ATTR_CONVERGENT: return "convergent";
+ case AC_FUNC_ATTR_ALWAYSINLINE:
+ return "alwaysinline";
+ case AC_FUNC_ATTR_INREG:
+ return "inreg";
+ case AC_FUNC_ATTR_NOALIAS:
+ return "noalias";
+ case AC_FUNC_ATTR_NOUNWIND:
+ return "nounwind";
+ case AC_FUNC_ATTR_READNONE:
+ return "readnone";
+ case AC_FUNC_ATTR_READONLY:
+ return "readonly";
+ case AC_FUNC_ATTR_WRITEONLY:
+ return "writeonly";
+ case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY:
+ return "inaccessiblememonly";
+ case AC_FUNC_ATTR_CONVERGENT:
+ return "convergent";
default:
- fprintf(stderr, "Unhandled function attribute: %x\n", attr);
- return 0;
+ fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+ return 0;
}
}
-void
-ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
- int attr_idx, enum ac_func_attr attr)
+void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
+ enum ac_func_attr attr)
{
const char *attr_name = attr_to_str(attr);
- unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
- strlen(attr_name));
+ unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name));
LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
if (LLVMIsAFunction(function))
LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
}
-void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
- unsigned attrib_mask)
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask)
{
- attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
- attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
+ attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+ attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
- while (attrib_mask) {
- enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
- ac_add_function_attr(ctx, function, -1, attr);
- }
+ while (attrib_mask) {
+ enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+ ac_add_function_attr(ctx, function, -1, attr);
+ }
}
-void
-ac_dump_module(LLVMModuleRef module)
+void ac_dump_module(LLVMModuleRef module)
{
- char *str = LLVMPrintModuleToString(module);
- fprintf(stderr, "%s", str);
- LLVMDisposeMessage(str);
+ char *str = LLVMPrintModuleToString(module);
+ fprintf(stderr, "%s", str);
+ LLVMDisposeMessage(str);
}
-void
-ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
- const char *name, unsigned value)
+void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value)
{
- char str[16];
+ char str[16];
- snprintf(str, sizeof(str), "0x%x", value);
- LLVMAddTargetDependentFunctionAttr(F, name, str);
+ snprintf(str, sizeof(str), "0x%x", value);
+ LLVMAddTargetDependentFunctionAttr(F, name, str);
}
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
{
- if (!size)
- return;
+ if (!size)
+ return;
- char str[32];
- snprintf(str, sizeof(str), "%u,%u", size, size);
- LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
+ char str[32];
+ snprintf(str, sizeof(str), "%u,%u", size, size);
+ LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
}
-unsigned
-ac_count_scratch_private_memory(LLVMValueRef function)
+unsigned ac_count_scratch_private_memory(LLVMValueRef function)
{
- unsigned private_mem_vgprs = 0;
-
- /* Process all LLVM instructions. */
- LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
- while (bb) {
- LLVMValueRef next = LLVMGetFirstInstruction(bb);
-
- while (next) {
- LLVMValueRef inst = next;
- next = LLVMGetNextInstruction(next);
-
- if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
- continue;
-
- LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
- /* No idea why LLVM aligns allocas to 4 elements. */
- unsigned alignment = LLVMGetAlignment(inst);
- unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
- private_mem_vgprs += dw_size;
- }
- bb = LLVMGetNextBasicBlock(bb);
- }
-
- return private_mem_vgprs;
+ unsigned private_mem_vgprs = 0;
+
+ /* Process all LLVM instructions. */
+ LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
+ while (bb) {
+ LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+ while (next) {
+ LLVMValueRef inst = next;
+ next = LLVMGetNextInstruction(next);
+
+ if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+ continue;
+
+ LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+ /* No idea why LLVM aligns allocas to 4 elements. */
+ unsigned alignment = LLVMGetAlignment(inst);
+ unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
+ private_mem_vgprs += dw_size;
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ return private_mem_vgprs;
}
-bool
-ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
- enum radeon_family family,
- enum ac_target_machine_options tm_options)
+bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
+ enum ac_target_machine_options tm_options)
{
- const char *triple;
- memset(compiler, 0, sizeof(*compiler));
-
- compiler->tm = ac_create_target_machine(family, tm_options,
- LLVMCodeGenLevelDefault,
- &triple);
- if (!compiler->tm)
- return false;
-
- if (tm_options & AC_TM_CREATE_LOW_OPT) {
- compiler->low_opt_tm =
- ac_create_target_machine(family, tm_options,
- LLVMCodeGenLevelLess, NULL);
- if (!compiler->low_opt_tm)
- goto fail;
- }
-
- if (family >= CHIP_NAVI10) {
- assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
- compiler->tm_wave32 = ac_create_target_machine(family,
- tm_options | AC_TM_WAVE32,
- LLVMCodeGenLevelDefault,
- NULL);
- if (!compiler->tm_wave32)
- goto fail;
- }
-
- compiler->target_library_info =
- ac_create_target_library_info(triple);
- if (!compiler->target_library_info)
- goto fail;
-
- compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
- tm_options & AC_TM_CHECK_IR);
- if (!compiler->passmgr)
- goto fail;
-
- return true;
+ const char *triple;
+ memset(compiler, 0, sizeof(*compiler));
+
+ compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple);
+ if (!compiler->tm)
+ return false;
+
+ if (tm_options & AC_TM_CREATE_LOW_OPT) {
+ compiler->low_opt_tm =
+ ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL);
+ if (!compiler->low_opt_tm)
+ goto fail;
+ }
+
+ if (family >= CHIP_NAVI10) {
+ assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
+ compiler->tm_wave32 =
+ ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL);
+ if (!compiler->tm_wave32)
+ goto fail;
+ }
+
+ compiler->target_library_info = ac_create_target_library_info(triple);
+ if (!compiler->target_library_info)
+ goto fail;
+
+ compiler->passmgr =
+ ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR);
+ if (!compiler->passmgr)
+ goto fail;
+
+ return true;
fail:
- ac_destroy_llvm_compiler(compiler);
- return false;
+ ac_destroy_llvm_compiler(compiler);
+ return false;
}
-void
-ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
+void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
{
- ac_destroy_llvm_passes(compiler->passes);
- ac_destroy_llvm_passes(compiler->passes_wave32);
- ac_destroy_llvm_passes(compiler->low_opt_passes);
-
- if (compiler->passmgr)
- LLVMDisposePassManager(compiler->passmgr);
- if (compiler->target_library_info)
- ac_dispose_target_library_info(compiler->target_library_info);
- if (compiler->low_opt_tm)
- LLVMDisposeTargetMachine(compiler->low_opt_tm);
- if (compiler->tm)
- LLVMDisposeTargetMachine(compiler->tm);
- if (compiler->tm_wave32)
- LLVMDisposeTargetMachine(compiler->tm_wave32);
+ ac_destroy_llvm_passes(compiler->passes);
+ ac_destroy_llvm_passes(compiler->passes_wave32);
+ ac_destroy_llvm_passes(compiler->low_opt_passes);
+
+ if (compiler->passmgr)
+ LLVMDisposePassManager(compiler->passmgr);
+ if (compiler->target_library_info)
+ ac_dispose_target_library_info(compiler->target_library_info);
+ if (compiler->low_opt_tm)
+ LLVMDisposeTargetMachine(compiler->low_opt_tm);
+ if (compiler->tm)
+ LLVMDisposeTargetMachine(compiler->tm);
+ if (compiler->tm_wave32)
+ LLVMDisposeTargetMachine(compiler->tm_wave32);
}
#ifndef AC_LLVM_UTIL_H
#define AC_LLVM_UTIL_H
-#include <stdbool.h>
+#include "amd_family.h"
#include <llvm-c/TargetMachine.h>
#include <llvm/Config/llvm-config.h>
-#include "amd_family.h"
+#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
struct ac_compiler_passes;
struct ac_llvm_context;
-enum ac_func_attr {
- AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
- AC_FUNC_ATTR_INREG = (1 << 2),
- AC_FUNC_ATTR_NOALIAS = (1 << 3),
- AC_FUNC_ATTR_NOUNWIND = (1 << 4),
- AC_FUNC_ATTR_READNONE = (1 << 5),
- AC_FUNC_ATTR_READONLY = (1 << 6),
- AC_FUNC_ATTR_WRITEONLY = (1 << 7),
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
- AC_FUNC_ATTR_CONVERGENT = (1 << 9),
-
- /* Legacy intrinsic that needs attributes on function declarations
- * and they must match the internal LLVM definition exactly, otherwise
- * intrinsic selection fails.
- */
- AC_FUNC_ATTR_LEGACY = (1u << 31),
+enum ac_func_attr
+{
+ AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
+ AC_FUNC_ATTR_INREG = (1 << 2),
+ AC_FUNC_ATTR_NOALIAS = (1 << 3),
+ AC_FUNC_ATTR_NOUNWIND = (1 << 4),
+ AC_FUNC_ATTR_READNONE = (1 << 5),
+ AC_FUNC_ATTR_READONLY = (1 << 6),
+ AC_FUNC_ATTR_WRITEONLY = (1 << 7),
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
+ AC_FUNC_ATTR_CONVERGENT = (1 << 9),
+
+ /* Legacy intrinsic that needs attributes on function declarations
+ * and they must match the internal LLVM definition exactly, otherwise
+ * intrinsic selection fails.
+ */
+ AC_FUNC_ATTR_LEGACY = (1u << 31),
};
-enum ac_target_machine_options {
- AC_TM_SUPPORTS_SPILL = (1 << 0),
- AC_TM_FORCE_ENABLE_XNACK = (1 << 1),
- AC_TM_FORCE_DISABLE_XNACK = (1 << 2),
- AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
- AC_TM_CHECK_IR = (1 << 4),
- AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
- AC_TM_CREATE_LOW_OPT = (1 << 6),
- AC_TM_WAVE32 = (1 << 7),
+enum ac_target_machine_options
+{
+ AC_TM_SUPPORTS_SPILL = (1 << 0),
+ AC_TM_FORCE_ENABLE_XNACK = (1 << 1),
+ AC_TM_FORCE_DISABLE_XNACK = (1 << 2),
+ AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
+ AC_TM_CHECK_IR = (1 << 4),
+ AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
+ AC_TM_CREATE_LOW_OPT = (1 << 6),
+ AC_TM_WAVE32 = (1 << 7),
};
-enum ac_float_mode {
- AC_FLOAT_MODE_DEFAULT,
- AC_FLOAT_MODE_DEFAULT_OPENGL,
- AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO,
+enum ac_float_mode
+{
+ AC_FLOAT_MODE_DEFAULT,
+ AC_FLOAT_MODE_DEFAULT_OPENGL,
+ AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO,
};
/* Per-thread persistent LLVM objects. */
struct ac_llvm_compiler {
- LLVMTargetLibraryInfoRef target_library_info;
- LLVMPassManagerRef passmgr;
-
- /* Default compiler. */
- LLVMTargetMachineRef tm;
- struct ac_compiler_passes *passes;
-
- /* Wave32 compiler for GFX10. */
- LLVMTargetMachineRef tm_wave32;
- struct ac_compiler_passes *passes_wave32;
-
- /* Optional compiler for faster compilation with fewer optimizations.
- * LLVM modules can be created with "tm" too. There is no difference.
- */
- LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
- struct ac_compiler_passes *low_opt_passes;
+ LLVMTargetLibraryInfoRef target_library_info;
+ LLVMPassManagerRef passmgr;
+
+ /* Default compiler. */
+ LLVMTargetMachineRef tm;
+ struct ac_compiler_passes *passes;
+
+ /* Wave32 compiler for GFX10. */
+ LLVMTargetMachineRef tm_wave32;
+ struct ac_compiler_passes *passes_wave32;
+
+ /* Optional compiler for faster compilation with fewer optimizations.
+ * LLVM modules can be created with "tm" too. There is no difference.
+ */
+ LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
+ struct ac_compiler_passes *low_opt_passes;
};
const char *ac_get_llvm_processor_name(enum radeon_family family);
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes);
bool ac_is_sgpr_param(LLVMValueRef param);
-void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
- int attr_idx, enum ac_func_attr attr);
-void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
- unsigned attrib_mask);
+void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
+ enum ac_func_attr attr);
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask);
void ac_dump_module(LLVMModuleRef module);
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
bool ac_llvm_is_function(LLVMValueRef v);
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
- enum ac_float_mode float_mode);
+LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode);
void ac_enable_signed_zeros(struct ac_llvm_context *ctx);
void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
-void
-ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
- const char *name, unsigned value);
+void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
-static inline unsigned
-ac_get_load_intr_attribs(bool can_speculate)
+static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
{
- /* READNONE means writes can't affect it, while READONLY means that
- * writes can affect it. */
- return can_speculate ? AC_FUNC_ATTR_READNONE :
- AC_FUNC_ATTR_READONLY;
+ /* READNONE means writes can't affect it, while READONLY means that
+ * writes can affect it. */
+ return can_speculate ? AC_FUNC_ATTR_READNONE : AC_FUNC_ATTR_READONLY;
}
-unsigned
-ac_count_scratch_private_memory(LLVMValueRef function);
+unsigned ac_count_scratch_private_memory(LLVMValueRef function);
LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
void ac_init_shared_llvm_once(void); /* Do not use directly, use ac_init_llvm_once */
void ac_init_llvm_once(void);
-
-bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
- enum radeon_family family,
- enum ac_target_machine_options tm_options);
+bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
+ enum ac_target_machine_options tm_options);
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size);
+ char **pelf_buffer, size_t *pelf_size);
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
void ac_enable_global_isel(LLVMTargetMachineRef tm);
-static inline bool
-ac_has_vec3_support(enum chip_class chip, bool use_format)
+static inline bool ac_has_vec3_support(enum chip_class chip, bool use_format)
{
- if (chip == GFX6 && !use_format) {
- /* GFX6 only supports vec3 with load/store format. */
- return false;
- }
+ if (chip == GFX6 && !use_format) {
+ /* GFX6 only supports vec3 with load/store format. */
+ return false;
+ }
- return LLVM_VERSION_MAJOR >= 9;
+ return LLVM_VERSION_MAJOR >= 9;
}
#ifdef __cplusplus
* IN THE SOFTWARE.
*/
-#include <llvm/Config/llvm-config.h>
-
#include "ac_nir_to_llvm.h"
+
+#include "ac_binary.h"
#include "ac_llvm_build.h"
#include "ac_llvm_util.h"
-#include "ac_binary.h"
-#include "sid.h"
+#include "ac_shader_abi.h"
+#include "ac_shader_util.h"
#include "nir/nir.h"
#include "nir/nir_deref.h"
+#include "sid.h"
#include "util/bitscan.h"
#include "util/u_math.h"
-#include "ac_shader_abi.h"
-#include "ac_shader_util.h"
+#include <llvm/Config/llvm-config.h>
struct ac_nir_context {
- struct ac_llvm_context ac;
- struct ac_shader_abi *abi;
- const struct ac_shader_args *args;
+ struct ac_llvm_context ac;
+ struct ac_shader_abi *abi;
+ const struct ac_shader_args *args;
- gl_shader_stage stage;
- shader_info *info;
+ gl_shader_stage stage;
+ shader_info *info;
- LLVMValueRef *ssa_defs;
+ LLVMValueRef *ssa_defs;
- LLVMValueRef scratch;
- LLVMValueRef constant_data;
+ LLVMValueRef scratch;
+ LLVMValueRef constant_data;
- struct hash_table *defs;
- struct hash_table *phis;
- struct hash_table *vars;
- struct hash_table *verified_interp;
+ struct hash_table *defs;
+ struct hash_table *phis;
+ struct hash_table *vars;
+ struct hash_table *verified_interp;
- LLVMValueRef main_function;
- LLVMBasicBlockRef continue_block;
- LLVMBasicBlockRef break_block;
+ LLVMValueRef main_function;
+ LLVMBasicBlockRef continue_block;
+ LLVMBasicBlockRef break_block;
- int num_locals;
- LLVMValueRef *locals;
+ int num_locals;
+ LLVMValueRef *locals;
};
-static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- const nir_instr *instr,
- bool image);
-
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- enum ac_descriptor_type desc_type,
- const nir_instr *instr,
- LLVMValueRef index,
- bool image, bool write);
-
-static void
-build_store_values_extended(struct ac_llvm_context *ac,
- LLVMValueRef *values,
- unsigned value_count,
- unsigned value_stride,
- LLVMValueRef vec)
+static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
+ const nir_instr *instr, bool image);
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
+ enum ac_descriptor_type desc_type, const nir_instr *instr,
+ LLVMValueRef index, bool image, bool write);
+
+static void build_store_values_extended(struct ac_llvm_context *ac, LLVMValueRef *values,
+ unsigned value_count, unsigned value_stride,
+ LLVMValueRef vec)
{
- LLVMBuilderRef builder = ac->builder;
- unsigned i;
-
- for (i = 0; i < value_count; i++) {
- LLVMValueRef ptr = values[i * value_stride];
- LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
- LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
- LLVMBuildStore(builder, value, ptr);
- }
+ LLVMBuilderRef builder = ac->builder;
+ unsigned i;
+
+ for (i = 0; i < value_count; i++) {
+ LLVMValueRef ptr = values[i * value_stride];
+ LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
+ LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
+ LLVMBuildStore(builder, value, ptr);
+ }
}
-static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
- const nir_ssa_def *def)
+static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
{
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
- if (def->num_components > 1) {
- type = LLVMVectorType(type, def->num_components);
- }
- return type;
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
+ if (def->num_components > 1) {
+ type = LLVMVectorType(type, def->num_components);
+ }
+ return type;
}
static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
{
- assert(src.is_ssa);
- return nir->ssa_defs[src.ssa->index];
+ assert(src.is_ssa);
+ return nir->ssa_defs[src.ssa->index];
}
-static LLVMValueRef
-get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
+static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
{
- LLVMValueRef ptr = get_src(ctx, src);
- ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
- int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ LLVMValueRef ptr = get_src(ctx, src);
+ ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
+ int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
- return LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(type, addr_space), "");
+ return LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(type, addr_space), "");
}
-static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
- const struct nir_block *b)
+static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b)
{
- struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
- return (LLVMBasicBlockRef)entry->data;
+ struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
+ return (LLVMBasicBlockRef)entry->data;
}
-static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
- nir_alu_src src,
+static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src,
unsigned num_components)
{
- LLVMValueRef value = get_src(ctx, src.src);
- bool need_swizzle = false;
-
- assert(value);
- unsigned src_components = ac_get_llvm_num_components(value);
- for (unsigned i = 0; i < num_components; ++i) {
- assert(src.swizzle[i] < src_components);
- if (src.swizzle[i] != i)
- need_swizzle = true;
- }
-
- if (need_swizzle || num_components != src_components) {
- LLVMValueRef masks[] = {
- LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
- LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
- LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
- LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
-
- if (src_components > 1 && num_components == 1) {
- value = LLVMBuildExtractElement(ctx->ac.builder, value,
- masks[0], "");
- } else if (src_components == 1 && num_components > 1) {
- LLVMValueRef values[] = {value, value, value, value};
- value = ac_build_gather_values(&ctx->ac, values, num_components);
- } else {
- LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
- value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
- swizzle, "");
- }
- }
- assert(!src.negate);
- assert(!src.abs);
- return value;
+ LLVMValueRef value = get_src(ctx, src.src);
+ bool need_swizzle = false;
+
+ assert(value);
+ unsigned src_components = ac_get_llvm_num_components(value);
+ for (unsigned i = 0; i < num_components; ++i) {
+ assert(src.swizzle[i] < src_components);
+ if (src.swizzle[i] != i)
+ need_swizzle = true;
+ }
+
+ if (need_swizzle || num_components != src_components) {
+ LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
+
+ if (src_components > 1 && num_components == 1) {
+ value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], "");
+ } else if (src_components == 1 && num_components > 1) {
+ LLVMValueRef values[] = {value, value, value, value};
+ value = ac_build_gather_values(&ctx->ac, values, num_components);
+ } else {
+ LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
+ value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, "");
+ }
+ }
+ assert(!src.negate);
+ assert(!src.abs);
+ return value;
}
-static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
- LLVMIntPredicate pred, LLVMValueRef src0,
- LLVMValueRef src1)
+static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred,
+ LLVMValueRef src0, LLVMValueRef src1)
{
- LLVMTypeRef src0_type = LLVMTypeOf(src0);
- LLVMTypeRef src1_type = LLVMTypeOf(src1);
-
- if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
- LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
- src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
- } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
- LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
- src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
- }
-
- LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
- return LLVMBuildSelect(ctx->builder, result,
- LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
- ctx->i32_0, "");
+ LLVMTypeRef src0_type = LLVMTypeOf(src0);
+ LLVMTypeRef src1_type = LLVMTypeOf(src1);
+
+ if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+ src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
+ } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
+ src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
+ }
+
+ LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
+ return LLVMBuildSelect(ctx->builder, result, LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
+ ctx->i32_0, "");
}
-static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
- LLVMRealPredicate pred, LLVMValueRef src0,
- LLVMValueRef src1)
+static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred,
+ LLVMValueRef src0, LLVMValueRef src1)
{
- LLVMValueRef result;
- src0 = ac_to_float(ctx, src0);
- src1 = ac_to_float(ctx, src1);
- result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
- return LLVMBuildSelect(ctx->builder, result,
- LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
- ctx->i32_0, "");
+ LLVMValueRef result;
+ src0 = ac_to_float(ctx, src0);
+ src1 = ac_to_float(ctx, src1);
+ result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
+ return LLVMBuildSelect(ctx->builder, result, LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
+ ctx->i32_0, "");
}
-static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0)
+static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0)
{
- char name[64], type[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, src0),
- };
-
- ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
- assert(length < sizeof(name));
- return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, src0),
+ };
+
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
}
-static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0)
+static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0)
{
- if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
- return emit_intrin_1f_param(ctx, intrin, result_type, src0);
-
- LLVMTypeRef elem_type = LLVMGetElementType(result_type);
- LLVMValueRef ret = LLVMGetUndef(result_type);
-
- /* Scalarize the intrinsic, because vectors are not supported. */
- for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
- char name[64], type[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
- };
-
- ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
- assert(length < sizeof(name));
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ac_build_intrinsic(ctx, name, elem_type, params,
- 1, AC_FUNC_ATTR_READNONE),
- LLVMConstInt(ctx->i32, i, 0), "");
- }
- return ret;
+ if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
+ return emit_intrin_1f_param(ctx, intrin, result_type, src0);
+
+ LLVMTypeRef elem_type = LLVMGetElementType(result_type);
+ LLVMValueRef ret = LLVMGetUndef(result_type);
+
+ /* Scalarize the intrinsic, because vectors are not supported. */
+ for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
+ };
+
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ ret = LLVMBuildInsertElement(
+ ctx->builder, ret,
+ ac_build_intrinsic(ctx, name, elem_type, params, 1, AC_FUNC_ATTR_READNONE),
+ LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ return ret;
}
-static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0, LLVMValueRef src1)
+static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0,
+ LLVMValueRef src1)
{
- char name[64], type[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, src0),
- ac_to_float(ctx, src1),
- };
-
- ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
- assert(length < sizeof(name));
- return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, src0),
+ ac_to_float(ctx, src1),
+ };
+
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
}
-static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
+static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2)
{
- char name[64], type[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, src0),
- ac_to_float(ctx, src1),
- ac_to_float(ctx, src2),
- };
-
- ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
- assert(length < sizeof(name));
- return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, src0),
+ ac_to_float(ctx, src1),
+ ac_to_float(ctx, src2),
+ };
+
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
}
-static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
+static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
+ LLVMValueRef src2)
{
- LLVMTypeRef src1_type = LLVMTypeOf(src1);
- LLVMTypeRef src2_type = LLVMTypeOf(src2);
-
- if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
- LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
- src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
- } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
- LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
- src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
- }
-
- LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
- LLVMConstNull(LLVMTypeOf(src0)), "");
- return LLVMBuildSelect(ctx->builder, v,
- ac_to_integer_or_pointer(ctx, src1),
- ac_to_integer_or_pointer(ctx, src2), "");
+ LLVMTypeRef src1_type = LLVMTypeOf(src1);
+ LLVMTypeRef src2_type = LLVMTypeOf(src2);
+
+ if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
+ src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
+ } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+ src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
+ }
+
+ LLVMValueRef v =
+ LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, LLVMConstNull(LLVMTypeOf(src0)), "");
+ return LLVMBuildSelect(ctx->builder, v, ac_to_integer_or_pointer(ctx, src1),
+ ac_to_integer_or_pointer(ctx, src2), "");
}
-static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
+static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
+ return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
}
-static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMValueRef src0, LLVMValueRef src1)
+static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin,
+ LLVMValueRef src0, LLVMValueRef src1)
{
- LLVMTypeRef ret_type;
- LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
- LLVMValueRef res;
- LLVMValueRef params[] = { src0, src1 };
- ret_type = LLVMStructTypeInContext(ctx->context, types,
- 2, true);
-
- res = ac_build_intrinsic(ctx, intrin, ret_type,
- params, 2, AC_FUNC_ATTR_READNONE);
-
- res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
- res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
- return res;
+ LLVMTypeRef ret_type;
+ LLVMTypeRef types[] = {ctx->i32, ctx->i1};
+ LLVMValueRef res;
+ LLVMValueRef params[] = {src0, src1};
+ ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
+
+ res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE);
+
+ res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
+ res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
+ return res;
}
-static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
- LLVMValueRef src0,
- unsigned bitsize)
+static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
{
- assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 32);
- LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
- ac_const_uint_vec(ctx, LLVMTypeOf(src0), 0x3f800000),
- "");
- result = ac_to_float(ctx, result);
-
- switch (bitsize) {
- case 16: {
- bool vec2 = LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind;
- return LLVMBuildFPTrunc(ctx->builder, result, vec2 ? ctx->v2f16 : ctx->f16, "");
- }
- case 32:
- return result;
- case 64:
- return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
- default:
- unreachable("Unsupported bit size.");
- }
+ assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 32);
+ LLVMValueRef result =
+ LLVMBuildAnd(ctx->builder, src0, ac_const_uint_vec(ctx, LLVMTypeOf(src0), 0x3f800000), "");
+ result = ac_to_float(ctx, result);
+
+ switch (bitsize) {
+ case 16: {
+ bool vec2 = LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind;
+ return LLVMBuildFPTrunc(ctx->builder, result, vec2 ? ctx->v2f16 : ctx->f16, "");
+ }
+ case 32:
+ return result;
+ case 64:
+ return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
}
-static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
+static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- src0 = ac_to_float(ctx, src0);
- LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
- return LLVMBuildSExt(ctx->builder,
- LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
- ctx->i32, "");
+ src0 = ac_to_float(ctx, src0);
+ LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
+ return LLVMBuildSExt(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
+ ctx->i32, "");
}
-static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
- LLVMValueRef src0,
- unsigned bitsize)
+static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
{
- LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
-
- switch (bitsize) {
- case 8:
- return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
- case 16:
- return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
- case 32:
- return result;
- case 64:
- return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
- default:
- unreachable("Unsupported bit size.");
- }
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
+
+ switch (bitsize) {
+ case 8:
+ return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
+ case 16:
+ return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
+ case 32:
+ return result;
+ case 64:
+ return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
}
-static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
+static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
- return LLVMBuildSExt(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
- ctx->i32, "");
+ LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
+ return LLVMBuildSExt(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
+ ctx->i32, "");
}
-static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
+static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- LLVMValueRef result;
- LLVMValueRef cond = NULL;
-
- src0 = ac_to_float(ctx, src0);
- result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
-
- if (ctx->chip_class >= GFX8) {
- LLVMValueRef args[2];
- /* Check if the result is a denormal - and flush to 0 if so. */
- args[0] = result;
- args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
- cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
- }
-
- /* need to convert back up to f32 */
- result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
-
- if (ctx->chip_class >= GFX8)
- result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
- else {
- /* for GFX6-GFX7 */
- /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
- * so compare the result and flush to 0 if it's smaller.
- */
- LLVMValueRef temp, cond2;
- temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
- cond = LLVMBuildFCmp(ctx->builder, LLVMRealOGT,
- LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
- temp, "");
- cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE,
- temp, ctx->f32_0, "");
- cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
- result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
- }
- return result;
+ LLVMValueRef result;
+ LLVMValueRef cond = NULL;
+
+ src0 = ac_to_float(ctx, src0);
+ result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
+
+ if (ctx->chip_class >= GFX8) {
+ LLVMValueRef args[2];
+ /* Check if the result is a denormal - and flush to 0 if so. */
+ args[0] = result;
+ args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
+ cond =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
+ }
+
+ /* need to convert back up to f32 */
+ result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+
+ if (ctx->chip_class >= GFX8)
+ result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
+ else {
+ /* for GFX6-GFX7 */
+ /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
+ * so compare the result and flush to 0 if it's smaller.
+ */
+ LLVMValueRef temp, cond2;
+ temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
+ cond = LLVMBuildFCmp(
+ ctx->builder, LLVMRealOGT,
+ LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
+ temp, "");
+ cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, "");
+ cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
+ result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
+ }
+ return result;
}
-static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1)
+static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1)
{
- LLVMValueRef dst64, result;
- src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
- src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
-
- dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
- dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
- result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
- return result;
+ LLVMValueRef dst64, result;
+ src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
+ src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
+
+ dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
+ dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
+ result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
+ return result;
}
-static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1)
+static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1)
{
- LLVMValueRef dst64, result;
- src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
- src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
-
- dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
- dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
- result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
- return result;
+ LLVMValueRef dst64, result;
+ src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
+ src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
+
+ dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
+ dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
+ result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
+ return result;
}
-static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
- LLVMValueRef bits, LLVMValueRef offset)
+static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
{
- /* mask = ((1 << bits) - 1) << offset */
- return LLVMBuildShl(ctx->builder,
- LLVMBuildSub(ctx->builder,
- LLVMBuildShl(ctx->builder,
- ctx->i32_1,
- bits, ""),
- ctx->i32_1, ""),
- offset, "");
+ /* mask = ((1 << bits) - 1) << offset */
+ return LLVMBuildShl(
+ ctx->builder,
+ LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
+ offset, "");
}
-static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
- LLVMValueRef mask, LLVMValueRef insert,
- LLVMValueRef base)
+static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask,
+ LLVMValueRef insert, LLVMValueRef base)
{
- /* Calculate:
- * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
- * Use the right-hand side, which the LLVM backend can convert to V_BFI.
- */
- return LLVMBuildXor(ctx->builder, base,
- LLVMBuildAnd(ctx->builder, mask,
- LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
+ /* Calculate:
+ * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
+ * Use the right-hand side, which the LLVM backend can convert to V_BFI.
+ */
+ return LLVMBuildXor(
+ ctx->builder, base,
+ LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
}
-static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
- LLVMValueRef src0,
- LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]))
+static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]))
{
- LLVMValueRef comp[2];
+ LLVMValueRef comp[2];
- src0 = ac_to_float(ctx, src0);
- comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
- comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
+ src0 = ac_to_float(ctx, src0);
+ comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
+ comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
- return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
+ return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
}
-static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
+static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
- LLVMValueRef temps[2], val;
- int i;
-
- for (i = 0; i < 2; i++) {
- val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
- val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
- val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
- temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
- }
- return ac_build_gather_values(ctx, temps, 2);
+ LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
+ LLVMValueRef temps[2], val;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
+ val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
+ val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
+ temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
+ }
+ return ac_build_gather_values(ctx, temps, 2);
}
-static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
- nir_op op,
- LLVMValueRef src0)
+static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0)
{
- unsigned mask;
- int idx;
- LLVMValueRef result;
-
- if (op == nir_op_fddx_fine)
- mask = AC_TID_MASK_LEFT;
- else if (op == nir_op_fddy_fine)
- mask = AC_TID_MASK_TOP;
- else
- mask = AC_TID_MASK_TOP_LEFT;
-
- /* for DDX we want to next X pixel, DDY next Y pixel. */
- if (op == nir_op_fddx_fine ||
- op == nir_op_fddx_coarse ||
- op == nir_op_fddx)
- idx = 1;
- else
- idx = 2;
-
- result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
- return result;
+ unsigned mask;
+ int idx;
+ LLVMValueRef result;
+
+ if (op == nir_op_fddx_fine)
+ mask = AC_TID_MASK_LEFT;
+ else if (op == nir_op_fddy_fine)
+ mask = AC_TID_MASK_TOP;
+ else
+ mask = AC_TID_MASK_TOP_LEFT;
+
+ /* for DDX we want to next X pixel, DDY next Y pixel. */
+ if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
+ idx = 1;
+ else
+ idx = 2;
+
+ result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
+ return result;
}
struct waterfall_context {
- LLVMBasicBlockRef phi_bb[2];
- bool use_waterfall;
+ LLVMBasicBlockRef phi_bb[2];
+ bool use_waterfall;
};
/* To deal with divergent descriptors we can create a loop that handles all
*
* These helper create the begin and end of the loop leaving the caller
* to implement the body.
- *
+ *
* params:
* - ctx is the usal nir context
* - wctx is a temporary struct containing some loop info. Can be left uninitialized.
* - divergent is whether value is actually divergent. If false we just pass
* things through.
*/
-static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx,
- struct waterfall_context *wctx,
- LLVMValueRef value, bool divergent)
+static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
+ LLVMValueRef value, bool divergent)
{
- /* If the app claims the value is divergent but it is constant we can
- * end up with a dynamic index of NULL. */
- if (!value)
- divergent = false;
+ /* If the app claims the value is divergent but it is constant we can
+ * end up with a dynamic index of NULL. */
+ if (!value)
+ divergent = false;
- wctx->use_waterfall = divergent;
- if (!divergent)
- return value;
+ wctx->use_waterfall = divergent;
+ if (!divergent)
+ return value;
- ac_build_bgnloop(&ctx->ac, 6000);
+ ac_build_bgnloop(&ctx->ac, 6000);
- LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
+ LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
- LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value,
- scalar_value, "uniform_active");
+ LLVMValueRef active =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value, scalar_value, "uniform_active");
- wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
- ac_build_ifcc(&ctx->ac, active, 6001);
+ wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
+ ac_build_ifcc(&ctx->ac, active, 6001);
- return scalar_value;
+ return scalar_value;
}
-static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx,
- struct waterfall_context *wctx,
- LLVMValueRef value)
+static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
+ LLVMValueRef value)
{
- LLVMValueRef ret = NULL;
- LLVMValueRef phi_src[2];
- LLVMValueRef cc_phi_src[2] = {
- LLVMConstInt(ctx->ac.i32, 0, false),
- LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
- };
-
- if (!wctx->use_waterfall)
- return value;
-
- wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
-
- ac_build_endif(&ctx->ac, 6001);
-
- if (value) {
- phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
- phi_src[1] = value;
-
- ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
- }
-
- /*
- * By using the optimization barrier on the exit decision, we decouple
- * the operations from the break, and hence avoid LLVM hoisting the
- * opteration into the break block.
- */
- LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
- ac_build_optimization_barrier(&ctx->ac, &cc);
-
- LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
- ac_build_ifcc(&ctx->ac, active, 6002);
- ac_build_break(&ctx->ac);
- ac_build_endif(&ctx->ac, 6002);
-
- ac_build_endloop(&ctx->ac, 6000);
- return ret;
+ LLVMValueRef ret = NULL;
+ LLVMValueRef phi_src[2];
+ LLVMValueRef cc_phi_src[2] = {
+ LLVMConstInt(ctx->ac.i32, 0, false),
+ LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
+ };
+
+ if (!wctx->use_waterfall)
+ return value;
+
+ wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
+
+ ac_build_endif(&ctx->ac, 6001);
+
+ if (value) {
+ phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
+ phi_src[1] = value;
+
+ ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
+ }
+
+ /*
+ * By using the optimization barrier on the exit decision, we decouple
+ * the operations from the break, and hence avoid LLVM hoisting the
+ * opteration into the break block.
+ */
+ LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
+ ac_build_optimization_barrier(&ctx->ac, &cc);
+
+ LLVMValueRef active =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
+ ac_build_ifcc(&ctx->ac, active, 6002);
+ ac_build_break(&ctx->ac);
+ ac_build_endif(&ctx->ac, 6002);
+
+ ac_build_endloop(&ctx->ac, 6000);
+ return ret;
}
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
- LLVMValueRef src[4], result = NULL;
- unsigned num_components = instr->dest.dest.ssa.num_components;
- unsigned src_components;
- LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
-
- assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
- switch (instr->op) {
- case nir_op_vec2:
- case nir_op_vec3:
- case nir_op_vec4:
- src_components = 1;
- break;
- case nir_op_pack_half_2x16:
- case nir_op_pack_snorm_2x16:
- case nir_op_pack_unorm_2x16:
- src_components = 2;
- break;
- case nir_op_unpack_half_2x16:
- src_components = 1;
- break;
- case nir_op_cube_face_coord:
- case nir_op_cube_face_index:
- src_components = 3;
- break;
- default:
- src_components = num_components;
- break;
- }
- for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
- src[i] = get_alu_src(ctx, instr->src[i], src_components);
-
- switch (instr->op) {
- case nir_op_mov:
- result = src[0];
- break;
- case nir_op_fneg:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
- if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
- /* fneg will be optimized by backend compiler with sign
- * bit removed via XOR. This is probably a LLVM bug.
- */
- result = ac_build_canonicalize(&ctx->ac, result,
- instr->dest.dest.ssa.bit_size);
- }
- break;
- case nir_op_ineg:
- result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
- break;
- case nir_op_inot:
- result = LLVMBuildNot(ctx->ac.builder, src[0], "");
- break;
- case nir_op_iadd:
- result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_fadd:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_fsub:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_isub:
- result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_imul:
- result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_imod:
- result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_umod:
- result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_irem:
- result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_idiv:
- result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_udiv:
- result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_fmul:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_frcp:
- /* For doubles, we need precise division to pass GLCTS. */
- if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
- ac_get_type_size(def_type) == 8) {
- result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
- ac_to_float(&ctx->ac, src[0]), "");
- } else {
- result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- }
- if (ctx->abi->clamp_div_by_zero)
- result = ac_build_fmin(&ctx->ac, result,
- LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
- break;
- case nir_op_iand:
- result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ior:
- result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ixor:
- result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ishl:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ishr:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ushr:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ilt32:
- result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
- break;
- case nir_op_ine32:
- result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
- break;
- case nir_op_ieq32:
- result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
- break;
- case nir_op_ige32:
- result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
- break;
- case nir_op_ult32:
- result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
- break;
- case nir_op_uge32:
- result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
- break;
- case nir_op_feq32:
- result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
- break;
- case nir_op_fneu32:
- result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
- break;
- case nir_op_flt32:
- result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
- break;
- case nir_op_fge32:
- result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
- break;
- case nir_op_fabs:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
- /* fabs will be optimized by backend compiler with sign
- * bit removed via AND.
- */
- result = ac_build_canonicalize(&ctx->ac, result,
- instr->dest.dest.ssa.bit_size);
- }
- break;
- case nir_op_iabs:
- result = emit_iabs(&ctx->ac, src[0]);
- break;
- case nir_op_imax:
- result = ac_build_imax(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_imin:
- result = ac_build_imin(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_umax:
- result = ac_build_umax(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_umin:
- result = ac_build_umin(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_isign:
- result = ac_build_isign(&ctx->ac, src[0]);
- break;
- case nir_op_fsign:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fsign(&ctx->ac, src[0]);
- break;
- case nir_op_ffloor:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_ftrunc:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fceil:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fround_even:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
- ac_to_float_type(&ctx->ac, def_type),src[0]);
- break;
- case nir_op_ffract:
- result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fsin:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fcos:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fsqrt:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fexp2:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_flog2:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_frsq:
- result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- if (ctx->abi->clamp_div_by_zero)
- result = ac_build_fmin(&ctx->ac, result,
- LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
- break;
- case nir_op_frexp_exp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_frexp_exp(&ctx->ac, src[0],
- ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
- result = LLVMBuildSExt(ctx->ac.builder, result,
- ctx->ac.i32, "");
- break;
- case nir_op_frexp_sig:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_frexp_mant(&ctx->ac, src[0],
- instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_fpow:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- break;
- case nir_op_fmax:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- if (ctx->ac.chip_class < GFX9 &&
- instr->dest.dest.ssa.bit_size == 32) {
- /* Only pre-GFX9 chips do not flush denorms. */
- result = ac_build_canonicalize(&ctx->ac, result,
- instr->dest.dest.ssa.bit_size);
- }
- break;
- case nir_op_fmin:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- if (ctx->ac.chip_class < GFX9 &&
- instr->dest.dest.ssa.bit_size == 32) {
- /* Only pre-GFX9 chips do not flush denorms. */
- result = ac_build_canonicalize(&ctx->ac, result,
- instr->dest.dest.ssa.bit_size);
- }
- break;
- case nir_op_ffma:
- /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
- result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
- break;
- case nir_op_ldexp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
- else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
- else
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
- break;
- case nir_op_bfm:
- result = emit_bfm(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_bitfield_select:
- result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
- break;
- case nir_op_ubfe:
- result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
- break;
- case nir_op_ibfe:
- result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
- break;
- case nir_op_bitfield_reverse:
- result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
- break;
- case nir_op_bit_count:
- result = ac_build_bit_count(&ctx->ac, src[0]);
- break;
- case nir_op_vec2:
- case nir_op_vec3:
- case nir_op_vec4:
- for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
- src[i] = ac_to_integer(&ctx->ac, src[i]);
- result = ac_build_gather_values(&ctx->ac, src, num_components);
- break;
- case nir_op_f2i8:
- case nir_op_f2i16:
- case nir_op_f2i32:
- case nir_op_f2i64:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_f2u8:
- case nir_op_f2u16:
- case nir_op_f2u32:
- case nir_op_f2u64:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_i2f16:
- case nir_op_i2f32:
- case nir_op_i2f64:
- result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- break;
- case nir_op_u2f16:
- case nir_op_u2f32:
- case nir_op_u2f64:
- result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- break;
- case nir_op_f2f16_rtz:
- case nir_op_f2f16:
- case nir_op_f2fmp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
-
- /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
- * all f32->f16 conversions have to round towards zero, because both scalar
- * and vec2 down-conversions have to round equally.
- */
- if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL ||
- instr->op == nir_op_f2f16_rtz) {
- src[0] = ac_to_float(&ctx->ac, src[0]);
-
- if (LLVMTypeOf(src[0]) == ctx->ac.f64)
- src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
-
- /* Fast path conversion. This only works if NIR is vectorized
- * to vec2 16.
- */
- if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
- LLVMValueRef args[] = {
- ac_llvm_extract_elem(&ctx->ac, src[0], 0),
- ac_llvm_extract_elem(&ctx->ac, src[0], 1),
- };
- result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
- break;
- }
-
- assert(ac_get_llvm_num_components(src[0]) == 1);
- LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) };
- result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
- } else {
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- else
- result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- }
- break;
- case nir_op_f2f16_rtne:
- case nir_op_f2f32:
- case nir_op_f2f64:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- else
- result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- break;
- case nir_op_u2u8:
- case nir_op_u2u16:
- case nir_op_u2ump:
- case nir_op_u2u32:
- case nir_op_u2u64:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
- else
- result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_i2i8:
- case nir_op_i2i16:
- case nir_op_i2imp:
- case nir_op_i2i32:
- case nir_op_i2i64:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
- else
- result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_b32csel:
- result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
- break;
- case nir_op_find_lsb:
- result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
- break;
- case nir_op_ufind_msb:
- result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
- break;
- case nir_op_ifind_msb:
- result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
- break;
- case nir_op_uadd_carry:
- result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
- break;
- case nir_op_usub_borrow:
- result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
- break;
- case nir_op_b2f16:
- case nir_op_b2f32:
- case nir_op_b2f64:
- result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_f2b32:
- result = emit_f2b(&ctx->ac, src[0]);
- break;
- case nir_op_b2i8:
- case nir_op_b2i16:
- case nir_op_b2i32:
- case nir_op_b2i64:
- result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_i2b32:
- result = emit_i2b(&ctx->ac, src[0]);
- break;
- case nir_op_fquantize2f16:
- result = emit_f2f16(&ctx->ac, src[0]);
- break;
- case nir_op_umul_high:
- result = emit_umul_high(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_imul_high:
- result = emit_imul_high(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_pack_half_2x16:
- result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
- break;
- case nir_op_pack_snorm_2x16:
- result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
- break;
- case nir_op_pack_unorm_2x16:
- result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
- break;
- case nir_op_unpack_half_2x16:
- result = emit_unpack_half_2x16(&ctx->ac, src[0]);
- break;
- case nir_op_fddx:
- case nir_op_fddy:
- case nir_op_fddx_fine:
- case nir_op_fddy_fine:
- case nir_op_fddx_coarse:
- case nir_op_fddy_coarse:
- result = emit_ddxy(ctx, instr->op, src[0]);
- break;
-
- case nir_op_unpack_64_2x32_split_x: {
- assert(ac_get_llvm_num_components(src[0]) == 1);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i32,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_0, "");
- break;
- }
-
- case nir_op_unpack_64_2x32_split_y: {
- assert(ac_get_llvm_num_components(src[0]) == 1);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i32,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_1, "");
- break;
- }
-
- case nir_op_pack_64_2x32_split: {
- LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
- result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
- break;
- }
-
- case nir_op_pack_32_2x16_split: {
- LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
- result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
-
- case nir_op_unpack_32_2x16_split_x: {
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i16,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_0, "");
- break;
- }
-
- case nir_op_unpack_32_2x16_split_y: {
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i16,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_1, "");
- break;
- }
-
- case nir_op_cube_face_coord: {
- src[0] = ac_to_float(&ctx->ac, src[0]);
- LLVMValueRef results[2];
- LLVMValueRef in[3];
- for (unsigned chan = 0; chan < 3; chan++)
- in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
- results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
- results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
- LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
- results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
- results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
- result = ac_build_gather_values(&ctx->ac, results, 2);
- break;
- }
-
- case nir_op_cube_face_index: {
- src[0] = ac_to_float(&ctx->ac, src[0]);
- LLVMValueRef in[3];
- for (unsigned chan = 0; chan < 3; chan++)
- in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- break;
- }
-
- default:
- fprintf(stderr, "Unknown NIR alu instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
-
- if (result) {
- assert(instr->dest.dest.is_ssa);
- result = ac_to_integer_or_pointer(&ctx->ac, result);
- ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
- }
+ LLVMValueRef src[4], result = NULL;
+ unsigned num_components = instr->dest.dest.ssa.num_components;
+ unsigned src_components;
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
+
+ assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
+ switch (instr->op) {
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ src_components = 1;
+ break;
+ case nir_op_pack_half_2x16:
+ case nir_op_pack_snorm_2x16:
+ case nir_op_pack_unorm_2x16:
+ src_components = 2;
+ break;
+ case nir_op_unpack_half_2x16:
+ src_components = 1;
+ break;
+ case nir_op_cube_face_coord:
+ case nir_op_cube_face_index:
+ src_components = 3;
+ break;
+ default:
+ src_components = num_components;
+ break;
+ }
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ src[i] = get_alu_src(ctx, instr->src[i], src_components);
+
+ switch (instr->op) {
+ case nir_op_mov:
+ result = src[0];
+ break;
+ case nir_op_fneg:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
+ /* fneg will be optimized by backend compiler with sign
+ * bit removed via XOR. This is probably a LLVM bug.
+ */
+ result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
+ }
+ break;
+ case nir_op_ineg:
+ result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
+ break;
+ case nir_op_inot:
+ result = LLVMBuildNot(ctx->ac.builder, src[0], "");
+ break;
+ case nir_op_iadd:
+ result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_fadd:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_fsub:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_isub:
+ result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_imul:
+ result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_imod:
+ result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_umod:
+ result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_irem:
+ result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_idiv:
+ result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_udiv:
+ result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_fmul:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_frcp:
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
+ result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), "");
+ } else {
+ result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ }
+ if (ctx->abi->clamp_div_by_zero)
+ result = ac_build_fmin(&ctx->ac, result,
+ LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
+ break;
+ case nir_op_iand:
+ result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ior:
+ result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ixor:
+ result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ishl:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
+ else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
+ result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ishr:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
+ else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
+ result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ushr:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
+ else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
+ result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ilt32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
+ break;
+ case nir_op_ine32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
+ break;
+ case nir_op_ieq32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
+ break;
+ case nir_op_ige32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
+ break;
+ case nir_op_ult32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
+ break;
+ case nir_op_uge32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
+ break;
+ case nir_op_feq32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
+ break;
+ case nir_op_fneu32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
+ break;
+ case nir_op_flt32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
+ break;
+ case nir_op_fge32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
+ break;
+ case nir_op_fabs:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
+ /* fabs will be optimized by backend compiler with sign
+ * bit removed via AND.
+ */
+ result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
+ }
+ break;
+ case nir_op_iabs:
+ result = emit_iabs(&ctx->ac, src[0]);
+ break;
+ case nir_op_imax:
+ result = ac_build_imax(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_imin:
+ result = ac_build_imin(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_umax:
+ result = ac_build_umax(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_umin:
+ result = ac_build_umin(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_isign:
+ result = ac_build_isign(&ctx->ac, src[0]);
+ break;
+ case nir_op_fsign:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_fsign(&ctx->ac, src[0]);
+ break;
+ case nir_op_ffloor:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_ftrunc:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fceil:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fround_even:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_ffract:
+ result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fsin:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fcos:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fsqrt:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fexp2:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_flog2:
+ result =
+ emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_frsq:
+ result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ if (ctx->abi->clamp_div_by_zero)
+ result = ac_build_fmin(&ctx->ac, result,
+ LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
+ break;
+ case nir_op_frexp_exp:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
+ result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
+ break;
+ case nir_op_frexp_sig:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_fpow:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
+ src[0], src[1]);
+ break;
+ case nir_op_fmax:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
+ src[0], src[1]);
+ if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
+ /* Only pre-GFX9 chips do not flush denorms. */
+ result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
+ }
+ break;
+ case nir_op_fmin:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),
+ src[0], src[1]);
+ if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
+ /* Only pre-GFX9 chips do not flush denorms. */
+ result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
+ }
+ break;
+ case nir_op_ffma:
+ /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
+ result =
+ emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+ break;
+ case nir_op_ldexp:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2,
+ AC_FUNC_ATTR_READNONE);
+ else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2,
+ AC_FUNC_ATTR_READNONE);
+ else
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2,
+ AC_FUNC_ATTR_READNONE);
+ break;
+ case nir_op_bfm:
+ result = emit_bfm(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_bitfield_select:
+ result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
+ break;
+ case nir_op_ubfe:
+ result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
+ break;
+ case nir_op_ibfe:
+ result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
+ break;
+ case nir_op_bitfield_reverse:
+ result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
+ break;
+ case nir_op_bit_count:
+ result = ac_build_bit_count(&ctx->ac, src[0]);
+ break;
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ src[i] = ac_to_integer(&ctx->ac, src[i]);
+ result = ac_build_gather_values(&ctx->ac, src, num_components);
+ break;
+ case nir_op_f2i8:
+ case nir_op_f2i16:
+ case nir_op_f2i32:
+ case nir_op_f2i64:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_f2u8:
+ case nir_op_f2u16:
+ case nir_op_f2u32:
+ case nir_op_f2u64:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_i2f16:
+ case nir_op_i2f32:
+ case nir_op_i2f64:
+ result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ break;
+ case nir_op_u2f16:
+ case nir_op_u2f32:
+ case nir_op_u2f64:
+ result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ break;
+ case nir_op_f2f16_rtz:
+ case nir_op_f2f16:
+ case nir_op_f2fmp:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+
+ /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
+ * all f32->f16 conversions have to round towards zero, because both scalar
+ * and vec2 down-conversions have to round equally.
+ */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+
+ if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
+
+ /* Fast path conversion. This only works if NIR is vectorized
+ * to vec2 16.
+ */
+ if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
+ LLVMValueRef args[] = {
+ ac_llvm_extract_elem(&ctx->ac, src[0], 0),
+ ac_llvm_extract_elem(&ctx->ac, src[0], 1),
+ };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
+ break;
+ }
+
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ } else {
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result =
+ LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ else
+ result =
+ LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ }
+ break;
+ case nir_op_f2f16_rtne:
+ case nir_op_f2f32:
+ case nir_op_f2f64:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ else
+ result =
+ LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ break;
+ case nir_op_u2u8:
+ case nir_op_u2u16:
+ case nir_op_u2ump:
+ case nir_op_u2u32:
+ case nir_op_u2u64:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
+ else
+ result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_i2i8:
+ case nir_op_i2i16:
+ case nir_op_i2imp:
+ case nir_op_i2i32:
+ case nir_op_i2i64:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
+ else
+ result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_b32csel:
+ result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
+ break;
+ case nir_op_find_lsb:
+ result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
+ break;
+ case nir_op_ufind_msb:
+ result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
+ break;
+ case nir_op_ifind_msb:
+ result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
+ break;
+ case nir_op_uadd_carry:
+ result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
+ break;
+ case nir_op_usub_borrow:
+ result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
+ break;
+ case nir_op_b2f16:
+ case nir_op_b2f32:
+ case nir_op_b2f64:
+ result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_f2b32:
+ result = emit_f2b(&ctx->ac, src[0]);
+ break;
+ case nir_op_b2i8:
+ case nir_op_b2i16:
+ case nir_op_b2i32:
+ case nir_op_b2i64:
+ result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_i2b32:
+ result = emit_i2b(&ctx->ac, src[0]);
+ break;
+ case nir_op_fquantize2f16:
+ result = emit_f2f16(&ctx->ac, src[0]);
+ break;
+ case nir_op_umul_high:
+ result = emit_umul_high(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_imul_high:
+ result = emit_imul_high(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_pack_half_2x16:
+ result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
+ break;
+ case nir_op_pack_snorm_2x16:
+ result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
+ break;
+ case nir_op_pack_unorm_2x16:
+ result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
+ break;
+ case nir_op_unpack_half_2x16:
+ result = emit_unpack_half_2x16(&ctx->ac, src[0]);
+ break;
+ case nir_op_fddx:
+ case nir_op_fddy:
+ case nir_op_fddx_fine:
+ case nir_op_fddy_fine:
+ case nir_op_fddx_coarse:
+ case nir_op_fddy_coarse:
+ result = emit_ddxy(ctx, instr->op, src[0]);
+ break;
+
+ case nir_op_unpack_64_2x32_split_x: {
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_64_2x32_split_y: {
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
+ break;
+ }
+
+ case nir_op_pack_64_2x32_split: {
+ LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
+ result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
+ break;
+ }
+
+ case nir_op_pack_32_2x16_split: {
+ LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
+ result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_x: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_y: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
+ break;
+ }
+
+ case nir_op_cube_face_coord: {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ LLVMValueRef results[2];
+ LLVMValueRef in[3];
+ for (unsigned chan = 0; chan < 3; chan++)
+ in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
+ results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,
+ AC_FUNC_ATTR_READNONE);
+ results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,
+ AC_FUNC_ATTR_READNONE);
+ LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3,
+ AC_FUNC_ATTR_READNONE);
+ results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
+ results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
+ LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
+ results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
+ results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
+ result = ac_build_gather_values(&ctx->ac, results, 2);
+ break;
+ }
+
+ case nir_op_cube_face_index: {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ LLVMValueRef in[3];
+ for (unsigned chan = 0; chan < 3; chan++)
+ in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3,
+ AC_FUNC_ATTR_READNONE);
+ break;
+ }
+
+ default:
+ fprintf(stderr, "Unknown NIR alu instr: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+
+ if (result) {
+ assert(instr->dest.dest.is_ssa);
+ result = ac_to_integer_or_pointer(&ctx->ac, result);
+ ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
+ }
}
-static void visit_load_const(struct ac_nir_context *ctx,
- const nir_load_const_instr *instr)
+static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr)
{
- LLVMValueRef values[4], value = NULL;
- LLVMTypeRef element_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
-
- for (unsigned i = 0; i < instr->def.num_components; ++i) {
- switch (instr->def.bit_size) {
- case 8:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u8, false);
- break;
- case 16:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u16, false);
- break;
- case 32:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u32, false);
- break;
- case 64:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u64, false);
- break;
- default:
- fprintf(stderr,
- "unsupported nir load_const bit_size: %d\n",
- instr->def.bit_size);
- abort();
- }
- }
- if (instr->def.num_components > 1) {
- value = LLVMConstVector(values, instr->def.num_components);
- } else
- value = values[0];
-
- ctx->ssa_defs[instr->def.index] = value;
+ LLVMValueRef values[4], value = NULL;
+ LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
+
+ for (unsigned i = 0; i < instr->def.num_components; ++i) {
+ switch (instr->def.bit_size) {
+ case 8:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
+ break;
+ case 16:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
+ break;
+ case 32:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
+ break;
+ case 64:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
+ break;
+ default:
+ fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
+ abort();
+ }
+ }
+ if (instr->def.num_components > 1) {
+ value = LLVMConstVector(values, instr->def.num_components);
+ } else
+ value = values[0];
+
+ ctx->ssa_defs[instr->def.index] = value;
}
-static LLVMValueRef
-get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
+static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor,
+ bool in_elements)
{
- LLVMValueRef size =
- LLVMBuildExtractElement(ctx->ac.builder, descriptor,
- LLVMConstInt(ctx->ac.i32, 2, false), "");
-
- /* GFX8 only */
- if (ctx->ac.chip_class == GFX8 && in_elements) {
- /* On GFX8, the descriptor contains the size in bytes,
- * but TXQ must return the size in elements.
- * The stride is always non-zero for resources using TXQ.
- */
- LLVMValueRef stride =
- LLVMBuildExtractElement(ctx->ac.builder, descriptor,
- ctx->ac.i32_1, "");
- stride = LLVMBuildLShr(ctx->ac.builder, stride,
- LLVMConstInt(ctx->ac.i32, 16, false), "");
- stride = LLVMBuildAnd(ctx->ac.builder, stride,
- LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
-
- size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
- }
- return size;
+ LLVMValueRef size =
+ LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), "");
+
+ /* GFX8 only */
+ if (ctx->ac.chip_class == GFX8 && in_elements) {
+ /* On GFX8, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, "");
+ stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, false), "");
+ stride = LLVMBuildAnd(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
+
+ size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
+ }
+ return size;
}
/* Gather4 should follow the same rules as bilinear filtering, but the hardware
* runtime. In this case, return an i1 value that indicates whether the
* descriptor was overridden (and hence a fixup of the sampler result is needed).
*/
-static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
- nir_variable *var,
- struct ac_image_args *args,
- const nir_tex_instr *instr)
+static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, nir_variable *var,
+ struct ac_image_args *args, const nir_tex_instr *instr)
{
- const struct glsl_type *type = glsl_without_array(var->type);
- enum glsl_base_type stype = glsl_get_sampler_result_type(type);
- LLVMValueRef wa_8888 = NULL;
- LLVMValueRef half_texel[2];
- LLVMValueRef result;
-
- assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
- LLVMValueRef formats;
- LLVMValueRef data_format;
- LLVMValueRef wa_formats;
-
- formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
-
- data_format = LLVMBuildLShr(ctx->builder, formats,
- LLVMConstInt(ctx->i32, 20, false), "");
- data_format = LLVMBuildAnd(ctx->builder, data_format,
- LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
- wa_8888 = LLVMBuildICmp(
- ctx->builder, LLVMIntEQ, data_format,
- LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
- "");
-
- uint32_t wa_num_format =
- stype == GLSL_TYPE_UINT ?
- S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
- S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
- wa_formats = LLVMBuildAnd(ctx->builder, formats,
- LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
- "");
- wa_formats = LLVMBuildOr(ctx->builder, wa_formats,
- LLVMConstInt(ctx->i32, wa_num_format, false), "");
-
- formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
- args->resource = LLVMBuildInsertElement(
- ctx->builder, args->resource, formats, ctx->i32_1, "");
- }
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
- assert(!wa_8888);
- half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
- } else {
- struct ac_image_args resinfo = {};
- LLVMBasicBlockRef bbs[2];
-
- LLVMValueRef unnorm = NULL;
- LLVMValueRef default_offset = ctx->f32_0;
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D &&
- !instr->is_array) {
- /* In vulkan, whether the sampler uses unnormalized
- * coordinates or not is a dynamic property of the
- * sampler. Hence, to figure out whether or not we
- * need to divide by the texture size, we need to test
- * the sampler at runtime. This tests the bit set by
- * radv_init_sampler().
- */
- LLVMValueRef sampler0 =
- LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
- sampler0 = LLVMBuildLShr(ctx->builder, sampler0,
- LLVMConstInt(ctx->i32, 15, false), "");
- sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
- unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
- default_offset = LLVMConstReal(ctx->f32, -0.5);
- }
-
- bbs[0] = LLVMGetInsertBlock(ctx->builder);
- if (wa_8888 || unnorm) {
- assert(!(wa_8888 && unnorm));
- LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
- /* Skip the texture size query entirely if we don't need it. */
- ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
- bbs[1] = LLVMGetInsertBlock(ctx->builder);
- }
-
- /* Query the texture size. */
- resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
- resinfo.opcode = ac_image_get_resinfo;
- resinfo.dmask = 0xf;
- resinfo.lod = ctx->i32_0;
- resinfo.resource = args->resource;
- resinfo.attributes = AC_FUNC_ATTR_READNONE;
- LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
-
- /* Compute -0.5 / size. */
- for (unsigned c = 0; c < 2; c++) {
- half_texel[c] =
- LLVMBuildExtractElement(ctx->builder, size,
- LLVMConstInt(ctx->i32, c, 0), "");
- half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
- half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
- half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
- LLVMConstReal(ctx->f32, -0.5), "");
- }
-
- if (wa_8888 || unnorm) {
- ac_build_endif(ctx, 2000);
-
- for (unsigned c = 0; c < 2; c++) {
- LLVMValueRef values[2] = { default_offset, half_texel[c] };
- half_texel[c] = ac_build_phi(ctx, ctx->f32, 2,
- values, bbs);
- }
- }
- }
-
- for (unsigned c = 0; c < 2; c++) {
- LLVMValueRef tmp;
- tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
- args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
- }
-
- args->attributes = AC_FUNC_ATTR_READNONE;
- result = ac_build_image_opcode(ctx, args);
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
- LLVMValueRef tmp, tmp2;
-
- /* if the cube workaround is in place, f2i the result. */
- for (unsigned c = 0; c < 4; c++) {
- tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
- if (stype == GLSL_TYPE_UINT)
- tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
- else
- tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
- tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
- tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
- tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
- tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
- result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
- }
- }
- return result;
+ const struct glsl_type *type = glsl_without_array(var->type);
+ enum glsl_base_type stype = glsl_get_sampler_result_type(type);
+ LLVMValueRef wa_8888 = NULL;
+ LLVMValueRef half_texel[2];
+ LLVMValueRef result;
+
+ assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ LLVMValueRef formats;
+ LLVMValueRef data_format;
+ LLVMValueRef wa_formats;
+
+ formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
+
+ data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), "");
+ data_format =
+ LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
+ wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format,
+ LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
+
+ uint32_t wa_num_format = stype == GLSL_TYPE_UINT
+ ? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED)
+ : S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
+ wa_formats = LLVMBuildAnd(ctx->builder, formats,
+ LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
+ wa_formats =
+ LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), "");
+
+ formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
+ args->resource =
+ LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, "");
+ }
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ assert(!wa_8888);
+ half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
+ } else {
+ struct ac_image_args resinfo = {};
+ LLVMBasicBlockRef bbs[2];
+
+ LLVMValueRef unnorm = NULL;
+ LLVMValueRef default_offset = ctx->f32_0;
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {
+ /* In vulkan, whether the sampler uses unnormalized
+ * coordinates or not is a dynamic property of the
+ * sampler. Hence, to figure out whether or not we
+ * need to divide by the texture size, we need to test
+ * the sampler at runtime. This tests the bit set by
+ * radv_init_sampler().
+ */
+ LLVMValueRef sampler0 =
+ LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
+ sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), "");
+ sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
+ unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
+ default_offset = LLVMConstReal(ctx->f32, -0.5);
+ }
+
+ bbs[0] = LLVMGetInsertBlock(ctx->builder);
+ if (wa_8888 || unnorm) {
+ assert(!(wa_8888 && unnorm));
+ LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
+ /* Skip the texture size query entirely if we don't need it. */
+ ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
+ bbs[1] = LLVMGetInsertBlock(ctx->builder);
+ }
+
+ /* Query the texture size. */
+ resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
+ resinfo.opcode = ac_image_get_resinfo;
+ resinfo.dmask = 0xf;
+ resinfo.lod = ctx->i32_0;
+ resinfo.resource = args->resource;
+ resinfo.attributes = AC_FUNC_ATTR_READNONE;
+ LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
+
+ /* Compute -0.5 / size. */
+ for (unsigned c = 0; c < 2; c++) {
+ half_texel[c] =
+ LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), "");
+ half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
+ half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
+ half_texel[c] =
+ LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), "");
+ }
+
+ if (wa_8888 || unnorm) {
+ ac_build_endif(ctx, 2000);
+
+ for (unsigned c = 0; c < 2; c++) {
+ LLVMValueRef values[2] = {default_offset, half_texel[c]};
+ half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs);
+ }
+ }
+ }
+
+ for (unsigned c = 0; c < 2; c++) {
+ LLVMValueRef tmp;
+ tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
+ args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
+ }
+
+ args->attributes = AC_FUNC_ATTR_READNONE;
+ result = ac_build_image_opcode(ctx, args);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ LLVMValueRef tmp, tmp2;
+
+ /* if the cube workaround is in place, f2i the result. */
+ for (unsigned c = 0; c < 4; c++) {
+ tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
+ if (stype == GLSL_TYPE_UINT)
+ tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
+ else
+ tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
+ tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
+ tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
+ tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
+ tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
+ result =
+ LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
+ }
+ }
+ return result;
}
static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
{
- nir_deref_instr *texture_deref_instr = NULL;
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_texture_deref:
- texture_deref_instr = nir_src_as_deref(instr->src[i].src);
- break;
- default:
- break;
- }
- }
- return texture_deref_instr;
+ nir_deref_instr *texture_deref_instr = NULL;
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_texture_deref:
+ texture_deref_instr = nir_src_as_deref(instr->src[i].src);
+ break;
+ default:
+ break;
+ }
+ }
+ return texture_deref_instr;
}
-static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
- const nir_tex_instr *instr,
- struct ac_image_args *args)
+static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr,
+ struct ac_image_args *args)
{
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
- unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
-
- assert(instr->dest.is_ssa);
- return ac_build_buffer_load_format(&ctx->ac,
- args->resource,
- args->coords[0],
- ctx->ac.i32_0,
- util_last_bit(mask),
- 0, true,
- instr->dest.ssa.bit_size == 16);
- }
-
- args->opcode = ac_image_sample;
-
- switch (instr->op) {
- case nir_texop_txf:
- case nir_texop_txf_ms:
- case nir_texop_samples_identical:
- args->opcode = args->level_zero ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
- ac_image_load : ac_image_load_mip;
- args->level_zero = false;
- break;
- case nir_texop_txs:
- case nir_texop_query_levels:
- args->opcode = ac_image_get_resinfo;
- if (!args->lod)
- args->lod = ctx->ac.i32_0;
- args->level_zero = false;
- break;
- case nir_texop_tex:
- if (ctx->stage != MESA_SHADER_FRAGMENT) {
- assert(!args->lod);
- args->level_zero = true;
- }
- break;
- case nir_texop_tg4:
- args->opcode = ac_image_gather4;
- if (!args->lod && !args->bias)
- args->level_zero = true;
- break;
- case nir_texop_lod:
- args->opcode = ac_image_get_lod;
- break;
- case nir_texop_fragment_fetch:
- case nir_texop_fragment_mask_fetch:
- args->opcode = ac_image_load;
- args->level_zero = false;
- break;
- default:
- break;
- }
-
- if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
- nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
- nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
- const struct glsl_type *type = glsl_without_array(var->type);
- enum glsl_base_type stype = glsl_get_sampler_result_type(type);
- if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
- return lower_gather4_integer(&ctx->ac, var, args, instr);
- }
- }
-
- /* Fixup for GFX9 which allocates 1D textures as 2D. */
- if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
- if ((args->dim == ac_image_2darray ||
- args->dim == ac_image_2d) && !args->coords[1]) {
- args->coords[1] = ctx->ac.i32_0;
- }
- }
-
- args->attributes = AC_FUNC_ATTR_READNONE;
- bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
- ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
- if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
- /* Prevent texture instructions with implicit derivatives from being
- * sinked into branches. */
- switch (instr->op) {
- case nir_texop_tex:
- case nir_texop_txb:
- case nir_texop_lod:
- args->attributes |= AC_FUNC_ATTR_CONVERGENT;
- break;
- default:
- break;
- }
- }
-
- return ac_build_image_opcode(&ctx->ac, args);
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+
+ assert(instr->dest.is_ssa);
+ return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
+ util_last_bit(mask), 0, true,
+ instr->dest.ssa.bit_size == 16);
+ }
+
+ args->opcode = ac_image_sample;
+
+ switch (instr->op) {
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
+ args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS
+ ? ac_image_load
+ : ac_image_load_mip;
+ args->level_zero = false;
+ break;
+ case nir_texop_txs:
+ case nir_texop_query_levels:
+ args->opcode = ac_image_get_resinfo;
+ if (!args->lod)
+ args->lod = ctx->ac.i32_0;
+ args->level_zero = false;
+ break;
+ case nir_texop_tex:
+ if (ctx->stage != MESA_SHADER_FRAGMENT) {
+ assert(!args->lod);
+ args->level_zero = true;
+ }
+ break;
+ case nir_texop_tg4:
+ args->opcode = ac_image_gather4;
+ if (!args->lod && !args->bias)
+ args->level_zero = true;
+ break;
+ case nir_texop_lod:
+ args->opcode = ac_image_get_lod;
+ break;
+ case nir_texop_fragment_fetch:
+ case nir_texop_fragment_mask_fetch:
+ args->opcode = ac_image_load;
+ args->level_zero = false;
+ break;
+ default:
+ break;
+ }
+
+ if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
+ nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
+ nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
+ const struct glsl_type *type = glsl_without_array(var->type);
+ enum glsl_base_type stype = glsl_get_sampler_result_type(type);
+ if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
+ return lower_gather4_integer(&ctx->ac, var, args, instr);
+ }
+ }
+
+ /* Fixup for GFX9 which allocates 1D textures as 2D. */
+ if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
+ if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
+ args->coords[1] = ctx->ac.i32_0;
+ }
+ }
+
+ args->attributes = AC_FUNC_ATTR_READNONE;
+ bool cs_derivs =
+ ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
+ if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
+ /* Prevent texture instructions with implicit derivatives from being
+ * sinked into branches. */
+ switch (instr->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+ args->attributes |= AC_FUNC_ATTR_CONVERGENT;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ac_build_image_opcode(&ctx->ac, args);
}
static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
- LLVMValueRef ptr = get_src(ctx, instr->src[0]);
- LLVMValueRef index = get_src(ctx, instr->src[1]);
+ LLVMValueRef ptr = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[1]);
- LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
- LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
- return result;
+ LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
+ LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+ return result;
}
-static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- LLVMValueRef ptr, addr;
- LLVMValueRef src0 = get_src(ctx, instr->src[0]);
- unsigned index = nir_intrinsic_base(instr);
-
- addr = LLVMConstInt(ctx->ac.i32, index, 0);
- addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
-
- /* Load constant values from user SGPRS when possible, otherwise
- * fallback to the default path that loads directly from memory.
- */
- if (LLVMIsConstant(src0) &&
- instr->dest.ssa.bit_size == 32) {
- unsigned count = instr->dest.ssa.num_components;
- unsigned offset = index;
-
- offset += LLVMConstIntGetZExtValue(src0);
- offset /= 4;
-
- offset -= ctx->args->base_inline_push_consts;
-
- unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;
- if (offset + count <= num_inline_push_consts) {
- LLVMValueRef push_constants[num_inline_push_consts];
- for (unsigned i = 0; i < num_inline_push_consts; i++)
- push_constants[i] = ac_get_arg(&ctx->ac,
- ctx->args->inline_push_consts[i]);
- return ac_build_gather_values(&ctx->ac,
- push_constants + offset,
- count);
- }
- }
-
- ptr = LLVMBuildGEP(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
-
- if (instr->dest.ssa.bit_size == 8) {
- unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
- LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
- ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
- LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
-
- LLVMValueRef params[3];
- if (load_dwords > 1) {
- LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
- params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
- params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
- } else {
- res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
- params[0] = ctx->ac.i32_0;
- params[1] = res;
- }
- params[2] = addr;
- res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
-
- res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
- if (instr->dest.ssa.num_components > 1)
- res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
- return res;
- } else if (instr->dest.ssa.bit_size == 16) {
- unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
- LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
- ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
- LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
- LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
- cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
- LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
- LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
- LLVMConstInt(ctx->ac.i32, 4, false)};
- LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
- LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
- LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
- LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
- res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
- return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
- }
-
- ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
-
- return LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ LLVMValueRef ptr, addr;
+ LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
+
+ addr = LLVMConstInt(ctx->ac.i32, index, 0);
+ addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
+
+ /* Load constant values from user SGPRS when possible, otherwise
+ * fallback to the default path that loads directly from memory.
+ */
+ if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size == 32) {
+ unsigned count = instr->dest.ssa.num_components;
+ unsigned offset = index;
+
+ offset += LLVMConstIntGetZExtValue(src0);
+ offset /= 4;
+
+ offset -= ctx->args->base_inline_push_consts;
+
+ unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;
+ if (offset + count <= num_inline_push_consts) {
+ LLVMValueRef push_constants[num_inline_push_consts];
+ for (unsigned i = 0; i < num_inline_push_consts; i++)
+ push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]);
+ return ac_build_gather_values(&ctx->ac, push_constants + offset, count);
+ }
+ }
+
+ ptr =
+ LLVMBuildGEP(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
+
+ if (instr->dest.ssa.bit_size == 8) {
+ unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
+ ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
+ LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+
+ LLVMValueRef params[3];
+ if (load_dwords > 1) {
+ LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
+ params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
+ LLVMConstInt(ctx->ac.i32, 1, false), "");
+ params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ } else {
+ res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
+ params[0] = ctx->ac.i32_0;
+ params[1] = res;
+ }
+ params[2] = addr;
+ res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
+
+ res = LLVMBuildTrunc(
+ ctx->ac.builder, res,
+ LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
+ if (instr->dest.ssa.num_components > 1)
+ res = LLVMBuildBitCast(ctx->ac.builder, res,
+ LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
+ return res;
+ } else if (instr->dest.ssa.bit_size == 16) {
+ unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
+ ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
+ LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
+ LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
+ cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
+ LLVMValueRef mask[] = {
+ LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
+ LLVMConstInt(ctx->ac.i32, 4, false)};
+ LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
+ LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
+ LLVMValueRef shuffle_aligned =
+ LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
+ LLVMValueRef shuffle_unaligned =
+ LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
+ res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
+ return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
+ }
+
+ ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
+
+ return LLVMBuildLoad(ctx->ac.builder, ptr, "");
}
static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
- LLVMValueRef index = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[0]);
- return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
+ return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
}
static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
{
- uint32_t new_mask = 0;
- for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
- if (mask & (1u << i))
- new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
- return new_mask;
+ uint32_t new_mask = 0;
+ for (unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
+ if (mask & (1u << i))
+ new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
+ return new_mask;
}
static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
unsigned start, unsigned count)
{
- LLVMValueRef mask[] = {
- ctx->i32_0, ctx->i32_1,
- LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
-
- unsigned src_elements = ac_get_llvm_num_components(src);
-
- if (count == src_elements) {
- assert(start == 0);
- return src;
- } else if (count == 1) {
- assert(start < src_elements);
- return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
- } else {
- assert(start + count <= src_elements);
- assert(count <= 4);
- LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
- return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
- }
+ LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false),
+ LLVMConstInt(ctx->i32, 3, false)};
+
+ unsigned src_elements = ac_get_llvm_num_components(src);
+
+ if (count == src_elements) {
+ assert(start == 0);
+ return src;
+ } else if (count == 1) {
+ assert(start < src_elements);
+ return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
+ } else {
+ assert(start + count <= src_elements);
+ assert(count <= 4);
+ LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
+ return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
+ }
}
-static unsigned get_cache_policy(struct ac_nir_context *ctx,
- enum gl_access_qualifier access,
- bool may_store_unaligned,
- bool writeonly_memory)
+static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,
+ bool may_store_unaligned, bool writeonly_memory)
{
- unsigned cache_policy = 0;
-
- /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
- * store opcodes not aligned to a dword are affected. The only way to
- * get unaligned stores is through shader images.
- */
- if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
- /* If this is write-only, don't keep data in L1 to prevent
- * evicting L1 cache lines that may be needed by other
- * instructions.
- */
- writeonly_memory ||
- access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
- cache_policy |= ac_glc;
- }
-
- if (access & ACCESS_STREAM_CACHE_POLICY)
- cache_policy |= ac_slc | ac_glc;
-
- return cache_policy;
+ unsigned cache_policy = 0;
+
+ /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
+ * store opcodes not aligned to a dword are affected. The only way to
+ * get unaligned stores is through shader images.
+ */
+ if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
+ /* If this is write-only, don't keep data in L1 to prevent
+ * evicting L1 cache lines that may be needed by other
+ * instructions.
+ */
+ writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
+ cache_policy |= ac_glc;
+ }
+
+ if (access & ACCESS_STREAM_CACHE_POLICY)
+ cache_policy |= ac_slc | ac_glc;
+
+ return cache_policy;
}
-static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx,
- struct waterfall_context *wctx,
- const nir_intrinsic_instr *instr,
- nir_src src)
+static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr, nir_src src)
{
- return enter_waterfall(ctx, wctx, get_src(ctx, src),
- nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+ return enter_waterfall(ctx, wctx, get_src(ctx, src),
+ nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
-static void visit_store_ssbo(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7000);
- }
-
- LLVMValueRef src_data = get_src(ctx, instr->src[0]);
- int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
- unsigned writemask = nir_intrinsic_write_mask(instr);
- enum gl_access_qualifier access = nir_intrinsic_access(instr);
- bool writeonly_memory = access & ACCESS_NON_READABLE;
- unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
-
- struct waterfall_context wctx;
- LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
-
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
- LLVMValueRef base_data = src_data;
- base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
- LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
-
- while (writemask) {
- int start, count;
- LLVMValueRef data, offset;
- LLVMTypeRef data_type;
-
- u_bit_scan_consecutive_range(&writemask, &start, &count);
-
- /* Due to an LLVM limitation with LLVM < 9, split 3-element
- * writes into a 2-element and a 1-element write. */
- if (count == 3 &&
- (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
- writemask |= 1 << (start + 2);
- count = 2;
- }
- int num_bytes = count * elem_size_bytes; /* count in bytes */
-
- /* we can only store 4 DWords at the same time.
- * can only happen for 64 Bit vectors. */
- if (num_bytes > 16) {
- writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
- count = 2;
- num_bytes = 16;
- }
-
- /* check alignment of 16 Bit stores */
- if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
- writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
- count = 1;
- num_bytes = 2;
- }
-
- /* Due to alignment issues, split stores of 8-bit/16-bit
- * vectors.
- */
- if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
- writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
- count = 1;
- num_bytes = elem_size_bytes;
- }
-
- data = extract_vector_range(&ctx->ac, base_data, start, count);
-
- offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
- LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
-
- if (num_bytes == 1) {
- ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data,
- offset, ctx->ac.i32_0,
- cache_policy);
- } else if (num_bytes == 2) {
- ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
- offset, ctx->ac.i32_0,
- cache_policy);
- } else {
- int num_channels = num_bytes / 4;
-
- switch (num_bytes) {
- case 16: /* v4f32 */
- data_type = ctx->ac.v4f32;
- break;
- case 12: /* v3f32 */
- data_type = ctx->ac.v3f32;
- break;
- case 8: /* v2f32 */
- data_type = ctx->ac.v2f32;
- break;
- case 4: /* f32 */
- data_type = ctx->ac.f32;
- break;
- default:
- unreachable("Malformed vector store.");
- }
- data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
-
- ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
- num_channels, offset,
- ctx->ac.i32_0, 0,
- cache_policy);
- }
- }
-
- exit_waterfall(ctx, &wctx, NULL);
-
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7000);
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7000);
+ }
+
+ LLVMValueRef src_data = get_src(ctx, instr->src[0]);
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
+ bool writeonly_memory = access & ACCESS_NON_READABLE;
+ unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
+
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
+
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
+ LLVMValueRef base_data = src_data;
+ base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
+ LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
+
+ while (writemask) {
+ int start, count;
+ LLVMValueRef data, offset;
+ LLVMTypeRef data_type;
+
+ u_bit_scan_consecutive_range(&writemask, &start, &count);
+
+ /* Due to an LLVM limitation with LLVM < 9, split 3-element
+ * writes into a 2-element and a 1-element write. */
+ if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
+ writemask |= 1 << (start + 2);
+ count = 2;
+ }
+ int num_bytes = count * elem_size_bytes; /* count in bytes */
+
+ /* we can only store 4 DWords at the same time.
+ * can only happen for 64 Bit vectors. */
+ if (num_bytes > 16) {
+ writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
+ count = 2;
+ num_bytes = 16;
+ }
+
+ /* check alignment of 16 Bit stores */
+ if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
+ writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+ count = 1;
+ num_bytes = 2;
+ }
+
+ /* Due to alignment issues, split stores of 8-bit/16-bit
+ * vectors.
+ */
+ if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
+ writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+ count = 1;
+ num_bytes = elem_size_bytes;
+ }
+
+ data = extract_vector_range(&ctx->ac, base_data, start, count);
+
+ offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
+ LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
+
+ if (num_bytes == 1) {
+ ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
+ } else if (num_bytes == 2) {
+ ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
+ } else {
+ int num_channels = num_bytes / 4;
+
+ switch (num_bytes) {
+ case 16: /* v4f32 */
+ data_type = ctx->ac.v4f32;
+ break;
+ case 12: /* v3f32 */
+ data_type = ctx->ac.v3f32;
+ break;
+ case 8: /* v2f32 */
+ data_type = ctx->ac.v2f32;
+ break;
+ case 4: /* f32 */
+ data_type = ctx->ac.f32;
+ break;
+ default:
+ unreachable("Malformed vector store.");
+ }
+ data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
+
+ ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, offset, ctx->ac.i32_0, 0,
+ cache_policy);
+ }
+ }
+
+ exit_waterfall(ctx, &wctx, NULL);
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7000);
}
-static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
- LLVMValueRef descriptor,
- LLVMValueRef offset,
- LLVMValueRef compare,
- LLVMValueRef exchange)
+static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor,
+ LLVMValueRef offset, LLVMValueRef compare,
+ LLVMValueRef exchange)
{
- LLVMBasicBlockRef start_block = NULL, then_block = NULL;
- if (ctx->abi->robust_buffer_access) {
- LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
-
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
- start_block = LLVMGetInsertBlock(ctx->ac.builder);
-
- ac_build_ifcc(&ctx->ac, cond, -1);
-
- then_block = LLVMGetInsertBlock(ctx->ac.builder);
- }
-
- LLVMValueRef ptr_parts[2] = {
- ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
- LLVMBuildAnd(ctx->ac.builder,
- ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
- LLVMConstInt(ctx->ac.i32, 65535, 0), "")
- };
-
- ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
- ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
-
- offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
-
- LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
- ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
- ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), "");
-
- LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
- result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
-
- if (ctx->abi->robust_buffer_access) {
- ac_build_endif(&ctx->ac, -1);
-
- LLVMBasicBlockRef incoming_blocks[2] = {
- start_block,
- then_block,
- };
-
- LLVMValueRef incoming_values[2] = {
- LLVMConstInt(ctx->ac.i64, 0, 0),
- result,
- };
- LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
- LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
- return ret;
- } else {
- return result;
- }
+ LLVMBasicBlockRef start_block = NULL, then_block = NULL;
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
+
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+ start_block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ ac_build_ifcc(&ctx->ac, cond, -1);
+
+ then_block = LLVMGetInsertBlock(ctx->ac.builder);
+ }
+
+ LLVMValueRef ptr_parts[2] = {
+ ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
+ LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
+ LLVMConstInt(ctx->ac.i32, 65535, 0), "")};
+
+ ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
+ ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
+
+ offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
+
+ LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
+ ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
+ ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL),
+ "");
+
+ LLVMValueRef result =
+ ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
+ result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
+
+ if (ctx->abi->robust_buffer_access) {
+ ac_build_endif(&ctx->ac, -1);
+
+ LLVMBasicBlockRef incoming_blocks[2] = {
+ start_block,
+ then_block,
+ };
+
+ LLVMValueRef incoming_values[2] = {
+ LLVMConstInt(ctx->ac.i64, 0, 0),
+ result,
+ };
+ LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
+ LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
+ return ret;
+ } else {
+ return result;
+ }
}
-static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7001);
- }
-
- LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
- const char *op;
- char name[64], type[8];
- LLVMValueRef params[6], descriptor;
- LLVMValueRef result;
- int arg_count = 0;
-
- struct waterfall_context wctx;
- LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
-
- switch (instr->intrinsic) {
- case nir_intrinsic_ssbo_atomic_add:
- op = "add";
- break;
- case nir_intrinsic_ssbo_atomic_imin:
- op = "smin";
- break;
- case nir_intrinsic_ssbo_atomic_umin:
- op = "umin";
- break;
- case nir_intrinsic_ssbo_atomic_imax:
- op = "smax";
- break;
- case nir_intrinsic_ssbo_atomic_umax:
- op = "umax";
- break;
- case nir_intrinsic_ssbo_atomic_and:
- op = "and";
- break;
- case nir_intrinsic_ssbo_atomic_or:
- op = "or";
- break;
- case nir_intrinsic_ssbo_atomic_xor:
- op = "xor";
- break;
- case nir_intrinsic_ssbo_atomic_exchange:
- op = "swap";
- break;
- case nir_intrinsic_ssbo_atomic_comp_swap:
- op = "cmpswap";
- break;
- default:
- abort();
- }
-
- descriptor = ctx->abi->load_ssbo(ctx->abi,
- rsrc_base,
- true);
-
- if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
- return_type == ctx->ac.i64) {
- result = emit_ssbo_comp_swap_64(ctx, descriptor,
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]),
- get_src(ctx, instr->src[3]));
- } else {
- if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
- }
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
- params[arg_count++] = descriptor;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- /* XXX: The new raw/struct atomic intrinsics are buggy with
- * LLVM 8, see r358579.
- */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i32_0; /* soffset */
- params[arg_count++] = ctx->ac.i32_0; /* slc */
-
- ac_build_type_name_for_intr(return_type, type, sizeof(type));
- snprintf(name, sizeof(name),
- "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
- } else {
- params[arg_count++] = ctx->ac.i32_0; /* vindex */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i1false; /* slc */
-
- assert(return_type == ctx->ac.i32);
- snprintf(name, sizeof(name),
- "llvm.amdgcn.buffer.atomic.%s", op);
- }
-
- result = ac_build_intrinsic(&ctx->ac, name, return_type, params,
- arg_count, 0);
- }
-
- result = exit_waterfall(ctx, &wctx, result);
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7001);
- return result;
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7001);
+ }
+
+ LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
+ const char *op;
+ char name[64], type[8];
+ LLVMValueRef params[6], descriptor;
+ LLVMValueRef result;
+ int arg_count = 0;
+
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_ssbo_atomic_add:
+ op = "add";
+ break;
+ case nir_intrinsic_ssbo_atomic_imin:
+ op = "smin";
+ break;
+ case nir_intrinsic_ssbo_atomic_umin:
+ op = "umin";
+ break;
+ case nir_intrinsic_ssbo_atomic_imax:
+ op = "smax";
+ break;
+ case nir_intrinsic_ssbo_atomic_umax:
+ op = "umax";
+ break;
+ case nir_intrinsic_ssbo_atomic_and:
+ op = "and";
+ break;
+ case nir_intrinsic_ssbo_atomic_or:
+ op = "or";
+ break;
+ case nir_intrinsic_ssbo_atomic_xor:
+ op = "xor";
+ break;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ op = "swap";
+ break;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ op = "cmpswap";
+ break;
+ default:
+ abort();
+ }
+
+ descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
+
+ if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) {
+ result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]),
+ get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3]));
+ } else {
+ if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
+ }
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+ params[arg_count++] = descriptor;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* XXX: The new raw/struct atomic intrinsics are buggy with
+ * LLVM 8, see r358579.
+ */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i32_0; /* soffset */
+ params[arg_count++] = ctx->ac.i32_0; /* slc */
+
+ ac_build_type_name_for_intr(return_type, type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
+ } else {
+ params[arg_count++] = ctx->ac.i32_0; /* vindex */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i1false; /* slc */
+
+ assert(return_type == ctx->ac.i32);
+ snprintf(name, sizeof(name), "llvm.amdgcn.buffer.atomic.%s", op);
+ }
+
+ result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0);
+ }
+
+ result = exit_waterfall(ctx, &wctx, result);
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7001);
+ return result;
}
-static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- struct waterfall_context wctx;
- LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
-
- int elem_size_bytes = instr->dest.ssa.bit_size / 8;
- int num_components = instr->num_components;
- enum gl_access_qualifier access = nir_intrinsic_access(instr);
- unsigned cache_policy = get_cache_policy(ctx, access, false, false);
-
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
- LLVMValueRef vindex = ctx->ac.i32_0;
-
- LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
- LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
-
- LLVMValueRef results[4];
- for (int i = 0; i < num_components;) {
- int num_elems = num_components - i;
- if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
- num_elems = 1;
- if (num_elems * elem_size_bytes > 16)
- num_elems = 16 / elem_size_bytes;
- int load_bytes = num_elems * elem_size_bytes;
-
- LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
-
- LLVMValueRef ret;
-
- if (load_bytes == 1) {
- ret = ac_build_tbuffer_load_byte(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- cache_policy);
- } else if (load_bytes == 2) {
- ret = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- cache_policy);
- } else {
- int num_channels = util_next_power_of_two(load_bytes) / 4;
- bool can_speculate = access & ACCESS_CAN_REORDER;
-
- ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
- vindex, offset, immoffset, 0,
- cache_policy, can_speculate, false);
- }
-
- LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
- ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
- ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
-
- LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
- ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
-
- for (unsigned j = 0; j < num_elems; j++) {
- results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
- }
- i += num_elems;
- }
-
- LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
- return exit_waterfall(ctx, &wctx, ret);
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
+
+ int elem_size_bytes = instr->dest.ssa.bit_size / 8;
+ int num_components = instr->num_components;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
+ unsigned cache_policy = get_cache_policy(ctx, access, false, false);
+
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
+ LLVMValueRef vindex = ctx->ac.i32_0;
+
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
+
+ LLVMValueRef results[4];
+ for (int i = 0; i < num_components;) {
+ int num_elems = num_components - i;
+ if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
+ num_elems = 1;
+ if (num_elems * elem_size_bytes > 16)
+ num_elems = 16 / elem_size_bytes;
+ int load_bytes = num_elems * elem_size_bytes;
+
+ LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
+
+ LLVMValueRef ret;
+
+ if (load_bytes == 1) {
+ ret = ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
+ cache_policy);
+ } else if (load_bytes == 2) {
+ ret = ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
+ cache_policy);
+ } else {
+ int num_channels = util_next_power_of_two(load_bytes) / 4;
+ bool can_speculate = access & ACCESS_CAN_REORDER;
+
+ ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0,
+ cache_policy, can_speculate, false);
+ }
+
+ LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+ ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
+
+ LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
+
+ for (unsigned j = 0; j < num_elems; j++) {
+ results[i + j] =
+ LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
+ }
+ i += num_elems;
+ }
+
+ LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
+ return exit_waterfall(ctx, &wctx, ret);
}
-static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx,
- struct waterfall_context *wctx,
- const nir_intrinsic_instr *instr)
+static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr)
{
- return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
- nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+ return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
+ nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
-static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- struct waterfall_context wctx;
- LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
-
- LLVMValueRef ret;
- LLVMValueRef rsrc = rsrc_base;
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- int num_components = instr->num_components;
-
- if (ctx->abi->load_ubo)
- rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
-
- if (instr->dest.ssa.bit_size == 64)
- num_components *= 2;
-
- if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
- unsigned load_bytes = instr->dest.ssa.bit_size / 8;
- LLVMValueRef results[num_components];
- for (unsigned i = 0; i < num_components; ++i) {
- LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32,
- load_bytes * i, 0);
-
- if (load_bytes == 1) {
- results[i] = ac_build_tbuffer_load_byte(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- 0);
- } else {
- assert(load_bytes == 2);
- results[i] = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- 0);
- }
- }
- ret = ac_build_gather_values(&ctx->ac, results, num_components);
- } else {
- ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
- NULL, 0, 0, true, true);
-
- ret = ac_trim_vector(&ctx->ac, ret, num_components);
- }
-
- ret = LLVMBuildBitCast(ctx->ac.builder, ret,
- get_def_type(ctx, &instr->dest.ssa), "");
-
- return exit_waterfall(ctx, &wctx, ret);
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
+
+ LLVMValueRef ret;
+ LLVMValueRef rsrc = rsrc_base;
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ int num_components = instr->num_components;
+
+ if (ctx->abi->load_ubo)
+ rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
+
+ if (instr->dest.ssa.bit_size == 64)
+ num_components *= 2;
+
+ if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
+ unsigned load_bytes = instr->dest.ssa.bit_size / 8;
+ LLVMValueRef results[num_components];
+ for (unsigned i = 0; i < num_components; ++i) {
+ LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, load_bytes * i, 0);
+
+ if (load_bytes == 1) {
+ results[i] =
+ ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0);
+ } else {
+ assert(load_bytes == 2);
+ results[i] =
+ ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0);
+ }
+ }
+ ret = ac_build_gather_values(&ctx->ac, results, num_components);
+ } else {
+ ret =
+ ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0, 0, true, true);
+
+ ret = ac_trim_vector(&ctx->ac, ret, num_components);
+ }
+
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+
+ return exit_waterfall(ctx, &wctx, ret);
}
-static void
-get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr,
- bool vs_in, unsigned *vertex_index_out,
- LLVMValueRef *vertex_index_ref,
- unsigned *const_out, LLVMValueRef *indir_out)
+static void get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr, bool vs_in,
+ unsigned *vertex_index_out, LLVMValueRef *vertex_index_ref,
+ unsigned *const_out, LLVMValueRef *indir_out)
{
- nir_variable *var = nir_deref_instr_get_variable(instr);
- nir_deref_path path;
- unsigned idx_lvl = 1;
-
- nir_deref_path_init(&path, instr, NULL);
-
- if (vertex_index_out != NULL || vertex_index_ref != NULL) {
- if (vertex_index_ref) {
- *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
- if (vertex_index_out)
- *vertex_index_out = 0;
- } else {
- *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
- }
- ++idx_lvl;
- }
-
- uint32_t const_offset = 0;
- LLVMValueRef offset = NULL;
-
- if (var->data.compact) {
- assert(instr->deref_type == nir_deref_type_array);
- const_offset = nir_src_as_uint(instr->arr.index);
- goto out;
- }
-
- for (; path.path[idx_lvl]; ++idx_lvl) {
- const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
- if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
- unsigned index = path.path[idx_lvl]->strct.index;
-
- for (unsigned i = 0; i < index; i++) {
- const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
- const_offset += glsl_count_attribute_slots(ft, vs_in);
- }
- } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
- unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
- if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
- const_offset += size *
- nir_src_as_uint(path.path[idx_lvl]->arr.index);
- } else {
- LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
- get_src(ctx, path.path[idx_lvl]->arr.index), "");
- if (offset)
- offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
- else
- offset = array_off;
- }
- } else
- unreachable("Uhandled deref type in get_deref_instr_offset");
- }
+ nir_variable *var = nir_deref_instr_get_variable(instr);
+ nir_deref_path path;
+ unsigned idx_lvl = 1;
+
+ nir_deref_path_init(&path, instr, NULL);
+
+ if (vertex_index_out != NULL || vertex_index_ref != NULL) {
+ if (vertex_index_ref) {
+ *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
+ if (vertex_index_out)
+ *vertex_index_out = 0;
+ } else {
+ *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
+ }
+ ++idx_lvl;
+ }
+
+ uint32_t const_offset = 0;
+ LLVMValueRef offset = NULL;
+
+ if (var->data.compact) {
+ assert(instr->deref_type == nir_deref_type_array);
+ const_offset = nir_src_as_uint(instr->arr.index);
+ goto out;
+ }
+
+ for (; path.path[idx_lvl]; ++idx_lvl) {
+ const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
+ if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
+ unsigned index = path.path[idx_lvl]->strct.index;
+
+ for (unsigned i = 0; i < index; i++) {
+ const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
+ const_offset += glsl_count_attribute_slots(ft, vs_in);
+ }
+ } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
+ unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
+ if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
+ const_offset += size * nir_src_as_uint(path.path[idx_lvl]->arr.index);
+ } else {
+ LLVMValueRef array_off =
+ LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
+ get_src(ctx, path.path[idx_lvl]->arr.index), "");
+ if (offset)
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
+ else
+ offset = array_off;
+ }
+ } else
+ unreachable("Uhandled deref type in get_deref_instr_offset");
+ }
out:
- nir_deref_path_finish(&path);
+ nir_deref_path_finish(&path);
- if (const_offset && offset)
- offset = LLVMBuildAdd(ctx->ac.builder, offset,
- LLVMConstInt(ctx->ac.i32, const_offset, 0),
- "");
+ if (const_offset && offset)
+ offset =
+ LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, const_offset, 0), "");
- *const_out = const_offset;
- *indir_out = offset;
+ *const_out = const_offset;
+ *indir_out = offset;
}
-static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr,
- bool load_inputs)
+static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
+ bool load_inputs)
{
- LLVMValueRef result;
- LLVMValueRef vertex_index = NULL;
- LLVMValueRef indir_index = NULL;
- unsigned const_index = 0;
-
- nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-
- unsigned location = var->data.location;
- unsigned driver_location = var->data.driver_location;
- const bool is_patch = var->data.patch ||
- var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
- var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
- const bool is_compact = var->data.compact;
-
- get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- false, NULL, is_patch ? NULL : &vertex_index,
- &const_index, &indir_index);
-
- LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
-
- LLVMTypeRef src_component_type;
- if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
- src_component_type = LLVMGetElementType(dest_type);
- else
- src_component_type = dest_type;
-
- result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
- vertex_index, indir_index,
- const_index, location, driver_location,
- var->data.location_frac,
- instr->num_components,
- is_patch, is_compact, load_inputs);
- if (instr->dest.ssa.bit_size == 16) {
- result = ac_to_integer(&ctx->ac, result);
- result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
- }
- return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+ LLVMValueRef result;
+ LLVMValueRef vertex_index = NULL;
+ LLVMValueRef indir_index = NULL;
+ unsigned const_index = 0;
+
+ nir_variable *var =
+ nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+
+ unsigned location = var->data.location;
+ unsigned driver_location = var->data.driver_location;
+ const bool is_patch = var->data.patch || var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
+ const bool is_compact = var->data.compact;
+
+ get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), false, NULL,
+ is_patch ? NULL : &vertex_index, &const_index, &indir_index);
+
+ LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
+
+ LLVMTypeRef src_component_type;
+ if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
+ src_component_type = LLVMGetElementType(dest_type);
+ else
+ src_component_type = dest_type;
+
+ result =
+ ctx->abi->load_tess_varyings(ctx->abi, src_component_type, vertex_index, indir_index,
+ const_index, location, driver_location, var->data.location_frac,
+ instr->num_components, is_patch, is_compact, load_inputs);
+ if (instr->dest.ssa.bit_size == 16) {
+ result = ac_to_integer(&ctx->ac, result);
+ result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
+ }
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
}
-static unsigned
-type_scalar_size_bytes(const struct glsl_type *type)
+static unsigned type_scalar_size_bytes(const struct glsl_type *type)
{
- assert(glsl_type_is_vector_or_scalar(type) ||
- glsl_type_is_matrix(type));
+ assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
}
-static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
- nir_variable *var = nir_deref_instr_get_variable(deref);
-
- LLVMValueRef values[8];
- int idx = 0;
- int ve = instr->dest.ssa.num_components;
- unsigned comp = 0;
- LLVMValueRef indir_index;
- LLVMValueRef ret;
- unsigned const_index;
- unsigned stride = 4;
- int mode = deref->mode;
-
- if (var) {
- bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
- var->data.mode == nir_var_shader_in;
- idx = var->data.driver_location;
- comp = var->data.location_frac;
- mode = var->data.mode;
-
- get_deref_offset(ctx, deref, vs_in, NULL, NULL,
- &const_index, &indir_index);
-
- if (var->data.compact) {
- stride = 1;
- const_index += comp;
- comp = 0;
- }
- }
-
- if (instr->dest.ssa.bit_size == 64 &&
- (deref->mode == nir_var_shader_in ||
- deref->mode == nir_var_shader_out ||
- deref->mode == nir_var_function_temp))
- ve *= 2;
-
- switch (mode) {
- case nir_var_shader_in:
- /* TODO: remove this after RADV switches to lowered IO */
- if (ctx->stage == MESA_SHADER_TESS_CTRL ||
- ctx->stage == MESA_SHADER_TESS_EVAL) {
- return load_tess_varyings(ctx, instr, true);
- }
-
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMValueRef indir_index;
- unsigned const_index, vertex_index;
- get_deref_offset(ctx, deref, false, &vertex_index, NULL,
- &const_index, &indir_index);
- assert(indir_index == NULL);
-
- return ctx->abi->load_inputs(ctx->abi, var->data.location,
- var->data.driver_location,
- var->data.location_frac,
- instr->num_components, vertex_index, const_index, type);
- }
-
- for (unsigned chan = comp; chan < ve + comp; chan++) {
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type,
- ctx->stage == MESA_SHADER_VERTEX);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->abi->inputs + idx + chan, count,
- stride, false, true);
-
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- tmp_vec,
- indir_index, "");
- } else
- values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
- }
- break;
- case nir_var_function_temp:
- for (unsigned chan = 0; chan < ve; chan++) {
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->locals + idx + chan, count,
- stride, true, true);
-
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- tmp_vec,
- indir_index, "");
- } else {
- values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
- }
- }
- break;
- case nir_var_shader_out:
- /* TODO: remove this after RADV switches to lowered IO */
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- return load_tess_varyings(ctx, instr, false);
- }
-
- if (ctx->stage == MESA_SHADER_FRAGMENT &&
- var->data.fb_fetch_output &&
- ctx->abi->emit_fbfetch)
- return ctx->abi->emit_fbfetch(ctx->abi);
-
- for (unsigned chan = comp; chan < ve + comp; chan++) {
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->abi->outputs + idx + chan, count,
- stride, true, true);
-
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- tmp_vec,
- indir_index, "");
- } else {
- values[chan] = LLVMBuildLoad(ctx->ac.builder,
- ctx->abi->outputs[idx + chan + const_index * stride],
- "");
- }
- }
- break;
- case nir_var_mem_global: {
- LLVMValueRef address = get_src(ctx, instr->src[0]);
- LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
- unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
- unsigned natural_stride = type_scalar_size_bytes(deref->type);
- unsigned stride = explicit_stride ? explicit_stride : natural_stride;
- int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
- bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
-
- if (stride != natural_stride || split_loads) {
- if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
- result_type = LLVMGetElementType(result_type);
-
- LLVMTypeRef ptr_type = LLVMPointerType(result_type,
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
-
- for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
- values[i] = LLVMBuildLoad(ctx->ac.builder,
- ac_build_gep_ptr(&ctx->ac, address, offset), "");
-
- if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
- LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
- }
- return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
- } else {
- LLVMTypeRef ptr_type = LLVMPointerType(result_type,
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
- LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
-
- if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
- LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
- return val;
- }
- }
- default:
- unreachable("unhandle variable mode");
- }
- ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
- return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ LLVMValueRef values[8];
+ int idx = 0;
+ int ve = instr->dest.ssa.num_components;
+ unsigned comp = 0;
+ LLVMValueRef indir_index;
+ LLVMValueRef ret;
+ unsigned const_index;
+ unsigned stride = 4;
+ int mode = deref->mode;
+
+ if (var) {
+ bool vs_in = ctx->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in;
+ idx = var->data.driver_location;
+ comp = var->data.location_frac;
+ mode = var->data.mode;
+
+ get_deref_offset(ctx, deref, vs_in, NULL, NULL, &const_index, &indir_index);
+
+ if (var->data.compact) {
+ stride = 1;
+ const_index += comp;
+ comp = 0;
+ }
+ }
+
+ if (instr->dest.ssa.bit_size == 64 &&
+ (deref->mode == nir_var_shader_in || deref->mode == nir_var_shader_out ||
+ deref->mode == nir_var_function_temp))
+ ve *= 2;
+
+ switch (mode) {
+ case nir_var_shader_in:
+ /* TODO: remove this after RADV switches to lowered IO */
+ if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
+ return load_tess_varyings(ctx, instr, true);
+ }
+
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMValueRef indir_index;
+ unsigned const_index, vertex_index;
+ get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index);
+ assert(indir_index == NULL);
+
+ return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location,
+ var->data.location_frac, instr->num_components, vertex_index,
+ const_index, type);
+ }
+
+ for (unsigned chan = comp; chan < ve + comp; chan++) {
+ if (indir_index) {
+ unsigned count =
+ glsl_count_attribute_slots(var->type, ctx->stage == MESA_SHADER_VERTEX);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->abi->inputs + idx + chan, count, stride, false, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, "");
+ } else
+ values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
+ }
+ break;
+ case nir_var_function_temp:
+ for (unsigned chan = 0; chan < ve; chan++) {
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->locals + idx + chan, count, stride, true, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, "");
+ } else {
+ values[chan] =
+ LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
+ }
+ }
+ break;
+ case nir_var_shader_out:
+ /* TODO: remove this after RADV switches to lowered IO */
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ return load_tess_varyings(ctx, instr, false);
+ }
+
+ if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch)
+ return ctx->abi->emit_fbfetch(ctx->abi);
+
+ for (unsigned chan = comp; chan < ve + comp; chan++) {
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->abi->outputs + idx + chan, count, stride, true, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, "");
+ } else {
+ values[chan] = LLVMBuildLoad(ctx->ac.builder,
+ ctx->abi->outputs[idx + chan + const_index * stride], "");
+ }
+ }
+ break;
+ case nir_var_mem_global: {
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
+ unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+ unsigned natural_stride = type_scalar_size_bytes(deref->type);
+ unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
+ bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
+
+ if (stride != natural_stride || split_loads) {
+ if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
+ result_type = LLVMGetElementType(result_type);
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, "");
+
+ for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
+ values[i] =
+ LLVMBuildLoad(ctx->ac.builder, ac_build_gep_ptr(&ctx->ac, address, offset), "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
+ }
+ return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
+ } else {
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, "");
+ LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
+ return val;
+ }
+ }
+ default:
+ unreachable("unhandle variable mode");
+ }
+ ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
+ return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
}
-static void
-visit_store_var(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static void visit_store_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7002);
- }
-
- nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
- nir_variable *var = nir_deref_instr_get_variable(deref);
-
- LLVMValueRef temp_ptr, value;
- int idx = 0;
- unsigned comp = 0;
- LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
- int writemask = instr->const_index[0];
- LLVMValueRef indir_index;
- unsigned const_index;
-
- if (var) {
- get_deref_offset(ctx, deref, false,
- NULL, NULL, &const_index, &indir_index);
- idx = var->data.driver_location;
- comp = var->data.location_frac;
-
- if (var->data.compact) {
- const_index += comp;
- comp = 0;
- }
- }
-
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
- (deref->mode == nir_var_shader_out ||
- deref->mode == nir_var_function_temp)) {
-
- src = LLVMBuildBitCast(ctx->ac.builder, src,
- LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
- "");
-
- writemask = widen_mask(writemask, 2);
- }
-
- writemask = writemask << comp;
-
- switch (deref->mode) {
- case nir_var_shader_out:
- /* TODO: remove this after RADV switches to lowered IO */
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- LLVMValueRef vertex_index = NULL;
- LLVMValueRef indir_index = NULL;
- unsigned const_index = 0;
- const bool is_patch = var->data.patch ||
- var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
- var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
-
- get_deref_offset(ctx, deref, false, NULL,
- is_patch ? NULL : &vertex_index,
- &const_index, &indir_index);
-
- ctx->abi->store_tcs_outputs(ctx->abi, var,
- vertex_index, indir_index,
- const_index, src, writemask,
- var->data.location_frac,
- var->data.driver_location);
- break;
- }
-
- for (unsigned chan = 0; chan < 8; chan++) {
- int stride = 4;
- if (!(writemask & (1 << chan)))
- continue;
-
- value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
-
- if (var->data.compact)
- stride = 1;
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->abi->outputs + idx + chan, count,
- stride, true, true);
-
- tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
- value, indir_index, "");
- build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
- count, stride, tmp_vec);
-
- } else {
- temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
-
- LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
- }
- }
- break;
- case nir_var_function_temp:
- for (unsigned chan = 0; chan < 8; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
-
- value = ac_llvm_extract_elem(&ctx->ac, src, chan);
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->locals + idx + chan, count,
- 4, true, true);
-
- tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
- value, indir_index, "");
- build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
- count, 4, tmp_vec);
- } else {
- temp_ptr = ctx->locals[idx + chan + const_index * 4];
-
- LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
- }
- }
- break;
-
- case nir_var_mem_global: {
- int writemask = instr->const_index[0];
- LLVMValueRef address = get_src(ctx, instr->src[0]);
- LLVMValueRef val = get_src(ctx, instr->src[1]);
-
- unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
- unsigned natural_stride = type_scalar_size_bytes(deref->type);
- unsigned stride = explicit_stride ? explicit_stride : natural_stride;
- int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
- bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
-
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
-
- if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
- stride == natural_stride && !split_stores) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
-
- val = LLVMBuildBitCast(ctx->ac.builder, val,
- LLVMGetElementType(LLVMTypeOf(address)), "");
- LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address);
-
- if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
- LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
- } else {
- LLVMTypeRef val_type = LLVMTypeOf(val);
- if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
- val_type = LLVMGetElementType(val_type);
-
- LLVMTypeRef ptr_type = LLVMPointerType(val_type,
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
- for (unsigned chan = 0; chan < 4; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
-
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
-
- LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
- LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
- chan);
- src = LLVMBuildBitCast(ctx->ac.builder, src,
- LLVMGetElementType(LLVMTypeOf(ptr)), "");
- LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr);
-
- if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
- LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
- }
- }
- break;
- }
- default:
- abort();
- break;
- }
-
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7002);
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7002);
+ }
+
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ LLVMValueRef temp_ptr, value;
+ int idx = 0;
+ unsigned comp = 0;
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
+ int writemask = instr->const_index[0];
+ LLVMValueRef indir_index;
+ unsigned const_index;
+
+ if (var) {
+ get_deref_offset(ctx, deref, false, NULL, NULL, &const_index, &indir_index);
+ idx = var->data.driver_location;
+ comp = var->data.location_frac;
+
+ if (var->data.compact) {
+ const_index += comp;
+ comp = 0;
+ }
+ }
+
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
+ (deref->mode == nir_var_shader_out || deref->mode == nir_var_function_temp)) {
+
+ src = LLVMBuildBitCast(ctx->ac.builder, src,
+ LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), "");
+
+ writemask = widen_mask(writemask, 2);
+ }
+
+ writemask = writemask << comp;
+
+ switch (deref->mode) {
+ case nir_var_shader_out:
+ /* TODO: remove this after RADV switches to lowered IO */
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ LLVMValueRef vertex_index = NULL;
+ LLVMValueRef indir_index = NULL;
+ unsigned const_index = 0;
+ const bool is_patch = var->data.patch ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
+
+ get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index,
+ &indir_index);
+
+ ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src,
+ writemask, var->data.location_frac, var->data.driver_location);
+ break;
+ }
+
+ for (unsigned chan = 0; chan < 8; chan++) {
+ int stride = 4;
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
+
+ if (var->data.compact)
+ stride = 1;
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->abi->outputs + idx + chan, count, stride, true, true);
+
+ tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, "");
+ build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan, count, stride,
+ tmp_vec);
+
+ } else {
+ temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
+
+ LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
+ }
+ }
+ break;
+ case nir_var_function_temp:
+ for (unsigned chan = 0; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ value = ac_llvm_extract_elem(&ctx->ac, src, chan);
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->locals + idx + chan, count, 4, true, true);
+
+ tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, "");
+ build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, count, 4, tmp_vec);
+ } else {
+ temp_ptr = ctx->locals[idx + chan + const_index * 4];
+
+ LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
+ }
+ }
+ break;
+
+ case nir_var_mem_global: {
+ int writemask = instr->const_index[0];
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMValueRef val = get_src(ctx, instr->src[1]);
+
+ unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+ unsigned natural_stride = type_scalar_size_bytes(deref->type);
+ unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
+ bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, "");
+
+ if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 && stride == natural_stride &&
+ !split_stores) {
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, "");
+
+ val = LLVMBuildBitCast(ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), "");
+ LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
+ } else {
+ LLVMTypeRef val_type = LLVMTypeOf(val);
+ if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
+ val_type = LLVMGetElementType(val_type);
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(val_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, "");
+ for (unsigned chan = 0; chan < 4; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
+
+ LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
+ LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, chan);
+ src = LLVMBuildBitCast(ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), "");
+ LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
+ }
+ }
+ break;
+ }
+ default:
+ abort();
+ break;
+ }
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7002);
}
-static void
-visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
+static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7002);
- }
-
- unsigned base = nir_intrinsic_base(instr);
- unsigned writemask = nir_intrinsic_write_mask(instr);
- unsigned component = nir_intrinsic_component(instr);
- LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
- nir_src offset = *nir_get_io_offset_src(instr);
- LLVMValueRef indir_index = NULL;
-
- if (nir_src_is_const(offset))
- assert(nir_src_as_uint(offset) == 0);
- else
- indir_index = get_src(ctx, offset);
-
- switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
- case 32:
- break;
- case 64:
- writemask = widen_mask(writemask, 2);
- src = LLVMBuildBitCast(ctx->ac.builder, src,
- LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
- "");
- break;
- default:
- unreachable("unhandled store_output bit size");
- return;
- }
-
- writemask <<= component;
-
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
- LLVMValueRef vertex_index =
- vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
-
- ctx->abi->store_tcs_outputs(ctx->abi, NULL,
- vertex_index, indir_index,
- 0, src, writemask,
- component, base * 4);
- return;
- }
-
- /* No indirect indexing is allowed after this point. */
- assert(!indir_index);
-
- for (unsigned chan = 0; chan < 8; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
-
- LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
- LLVMBuildStore(ctx->ac.builder, value,
- ctx->abi->outputs[base * 4 + chan]);
- }
-
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7002);
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7002);
+ }
+
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ nir_src offset = *nir_get_io_offset_src(instr);
+ LLVMValueRef indir_index = NULL;
+
+ if (nir_src_is_const(offset))
+ assert(nir_src_as_uint(offset) == 0);
+ else
+ indir_index = get_src(ctx, offset);
+
+ switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
+ case 32:
+ break;
+ case 64:
+ writemask = widen_mask(writemask, 2);
+ src = LLVMBuildBitCast(ctx->ac.builder, src,
+ LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), "");
+ break;
+ default:
+ unreachable("unhandled store_output bit size");
+ return;
+ }
+
+ writemask <<= component;
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
+ LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
+
+ ctx->abi->store_tcs_outputs(ctx->abi, NULL, vertex_index, indir_index, 0, src, writemask,
+ component, base * 4);
+ return;
+ }
+
+ /* No indirect indexing is allowed after this point. */
+ assert(!indir_index);
+
+ for (unsigned chan = 0; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+ LLVMBuildStore(ctx->ac.builder, value, ctx->abi->outputs[base * 4 + chan]);
+ }
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7002);
}
static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
{
- switch (dim) {
- case GLSL_SAMPLER_DIM_BUF:
- return 1;
- case GLSL_SAMPLER_DIM_1D:
- return array ? 2 : 1;
- case GLSL_SAMPLER_DIM_2D:
- return array ? 3 : 2;
- case GLSL_SAMPLER_DIM_MS:
- return array ? 4 : 3;
- case GLSL_SAMPLER_DIM_3D:
- case GLSL_SAMPLER_DIM_CUBE:
- return 3;
- case GLSL_SAMPLER_DIM_RECT:
- case GLSL_SAMPLER_DIM_SUBPASS:
- return 2;
- case GLSL_SAMPLER_DIM_SUBPASS_MS:
- return 3;
- default:
- break;
- }
- return 0;
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_BUF:
+ return 1;
+ case GLSL_SAMPLER_DIM_1D:
+ return array ? 2 : 1;
+ case GLSL_SAMPLER_DIM_2D:
+ return array ? 3 : 2;
+ case GLSL_SAMPLER_DIM_MS:
+ return array ? 4 : 3;
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ return 3;
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_SUBPASS:
+ return 2;
+ case GLSL_SAMPLER_DIM_SUBPASS_MS:
+ return 3;
+ default:
+ break;
+ }
+ return 0;
}
static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
- LLVMValueRef coord_x, LLVMValueRef coord_y,
- LLVMValueRef coord_z,
- LLVMValueRef sample_index,
- LLVMValueRef fmask_desc_ptr)
+ LLVMValueRef coord_x, LLVMValueRef coord_y,
+ LLVMValueRef coord_z, LLVMValueRef sample_index,
+ LLVMValueRef fmask_desc_ptr)
{
- unsigned sample_chan = coord_z ? 3 : 2;
- LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
- addr[sample_chan] = sample_index;
+ unsigned sample_chan = coord_z ? 3 : 2;
+ LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
+ addr[sample_chan] = sample_index;
- ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
- return addr[sample_chan];
+ ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
+ return addr[sample_chan];
}
static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
{
- assert(instr->src[0].is_ssa);
- return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ assert(instr->src[0].is_ssa);
+ return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
}
static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
LLVMValueRef dynamic_index,
- enum ac_descriptor_type desc_type,
- bool write)
+ enum ac_descriptor_type desc_type, bool write)
{
- nir_deref_instr *deref_instr =
- instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
- nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
+ nir_deref_instr *deref_instr = instr->src[0].ssa->parent_instr->type == nir_instr_type_deref
+ ? nir_instr_as_deref(instr->src[0].ssa->parent_instr)
+ : NULL;
- return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
+ return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
}
-static void get_image_coords(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- LLVMValueRef dynamic_desc_index,
- struct ac_image_args *args,
- enum glsl_sampler_dim dim,
- bool is_array)
+static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_desc_index, struct ac_image_args *args,
+ enum glsl_sampler_dim dim, bool is_array)
{
- LLVMValueRef src0 = get_src(ctx, instr->src[1]);
- LLVMValueRef masks[] = {
- LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
- LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
- };
- LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
-
- int count;
- ASSERTED bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
- dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
- bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
- dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
- bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
- assert(!add_frag_pos && "Input attachments should be lowered by this point.");
- count = image_type_to_components_count(dim, is_array);
-
- if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
- instr->intrinsic == nir_intrinsic_bindless_image_load)) {
- LLVMValueRef fmask_load_address[3];
-
- fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
- fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
- if (is_array)
- fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
- else
- fmask_load_address[2] = NULL;
-
- sample_index = adjust_sample_index_using_fmask(&ctx->ac,
- fmask_load_address[0],
- fmask_load_address[1],
- fmask_load_address[2],
- sample_index,
- get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- AC_DESC_FMASK, &instr->instr, dynamic_desc_index, true, false));
- }
- if (count == 1 && !gfx9_1d) {
- if (instr->src[1].ssa->num_components)
- args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
- else
- args->coords[0] = src0;
- } else {
- int chan;
- if (is_ms)
- count--;
- for (chan = 0; chan < count; ++chan) {
- args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
- }
-
- if (gfx9_1d) {
- if (is_array) {
- args->coords[2] = args->coords[1];
- args->coords[1] = ctx->ac.i32_0;
- } else
- args->coords[1] = ctx->ac.i32_0;
- count++;
- }
- if (ctx->ac.chip_class == GFX9 &&
- dim == GLSL_SAMPLER_DIM_2D &&
- !is_array) {
- /* The hw can't bind a slice of a 3D image as a 2D
- * image, because it ignores BASE_ARRAY if the target
- * is 3D. The workaround is to read BASE_ARRAY and set
- * it as the 3rd address operand for all 2D images.
- */
- LLVMValueRef first_layer, const5, mask;
-
- const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
- mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
- first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
- first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
-
- args->coords[count] = first_layer;
- count++;
- }
-
-
- if (is_ms) {
- args->coords[count] = sample_index;
- count++;
- }
- }
+ LLVMValueRef src0 = get_src(ctx, instr->src[1]);
+ LLVMValueRef masks[] = {
+ LLVMConstInt(ctx->ac.i32, 0, false),
+ LLVMConstInt(ctx->ac.i32, 1, false),
+ LLVMConstInt(ctx->ac.i32, 2, false),
+ LLVMConstInt(ctx->ac.i32, 3, false),
+ };
+ LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+
+ int count;
+ ASSERTED bool add_frag_pos =
+ (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
+ bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
+ bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
+ assert(!add_frag_pos && "Input attachments should be lowered by this point.");
+ count = image_type_to_components_count(dim, is_array);
+
+ if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
+ instr->intrinsic == nir_intrinsic_bindless_image_load)) {
+ LLVMValueRef fmask_load_address[3];
+
+ fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
+ fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
+ if (is_array)
+ fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
+ else
+ fmask_load_address[2] = NULL;
+
+ sample_index = adjust_sample_index_using_fmask(
+ &ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],
+ sample_index,
+ get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), AC_DESC_FMASK,
+ &instr->instr, dynamic_desc_index, true, false));
+ }
+ if (count == 1 && !gfx9_1d) {
+ if (instr->src[1].ssa->num_components)
+ args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
+ else
+ args->coords[0] = src0;
+ } else {
+ int chan;
+ if (is_ms)
+ count--;
+ for (chan = 0; chan < count; ++chan) {
+ args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
+ }
+
+ if (gfx9_1d) {
+ if (is_array) {
+ args->coords[2] = args->coords[1];
+ args->coords[1] = ctx->ac.i32_0;
+ } else
+ args->coords[1] = ctx->ac.i32_0;
+ count++;
+ }
+ if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {
+ /* The hw can't bind a slice of a 3D image as a 2D
+ * image, because it ignores BASE_ARRAY if the target
+ * is 3D. The workaround is to read BASE_ARRAY and set
+ * it as the 3rd address operand for all 2D images.
+ */
+ LLVMValueRef first_layer, const5, mask;
+
+ const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
+ mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
+ first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
+ first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
+
+ args->coords[count] = first_layer;
+ count++;
+ }
+
+ if (is_ms) {
+ args->coords[count] = sample_index;
+ count++;
+ }
+ }
}
static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
- LLVMValueRef dynamic_index,
- bool write, bool atomic)
+ LLVMValueRef dynamic_index, bool write, bool atomic)
{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
- if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
- LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
- LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
- stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
-
- LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
- LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
- elem_count, stride, "");
-
- rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
- LLVMConstInt(ctx->ac.i32, 2, 0), "");
- }
- return rsrc;
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
+ if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
+ LLVMValueRef elem_count =
+ LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
+ stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
+
+ LLVMValueRef new_elem_count = LLVMBuildSelect(
+ ctx->ac.builder, LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
+ elem_count, stride, "");
+
+ rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
+ LLVMConstInt(ctx->ac.i32, 2, 0), "");
+ }
+ return rsrc;
}
static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
- struct waterfall_context *wctx,
- const nir_intrinsic_instr *instr)
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr)
{
- nir_deref_instr *deref_instr = NULL;
+ nir_deref_instr *deref_instr = NULL;
- if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
- deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
+ deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
- LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
- return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+ LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
+ return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
-static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
+static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
+ bool bindless)
{
- LLVMValueRef res;
-
- enum glsl_sampler_dim dim;
- enum gl_access_qualifier access = nir_intrinsic_access(instr);
- bool is_array;
- if (bindless) {
- dim = nir_intrinsic_image_dim(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const nir_deref_instr *image_deref = get_image_deref(instr);
- const struct glsl_type *type = image_deref->type;
- const nir_variable *var = nir_deref_instr_get_variable(image_deref);
- dim = glsl_get_sampler_dim(type);
- access |= var->data.access;
- is_array = glsl_sampler_type_is_array(type);
- }
-
- struct waterfall_context wctx;
- LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
-
- struct ac_image_args args = {};
-
- args.cache_policy = get_cache_policy(ctx, access, false, false);
-
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
- unsigned num_channels = util_last_bit(mask);
- LLVMValueRef rsrc, vindex;
-
- rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
- vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
- ctx->ac.i32_0, "");
-
- assert(instr->dest.is_ssa);
- bool can_speculate = access & ACCESS_CAN_REORDER;
- res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
- ctx->ac.i32_0, num_channels,
- args.cache_policy,
- can_speculate,
- instr->dest.ssa.bit_size == 16);
- res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
-
- res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
- res = ac_to_integer(&ctx->ac, res);
- } else {
- bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
-
- args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
- args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
- get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- if (!level_zero)
- args.lod = get_src(ctx, instr->src[3]);
- args.dmask = 15;
- args.attributes = AC_FUNC_ATTR_READONLY;
-
- assert(instr->dest.is_ssa);
- args.d16 = instr->dest.ssa.bit_size == 16;
-
- res = ac_build_image_opcode(&ctx->ac, &args);
- }
- return exit_waterfall(ctx, &wctx, res);
+ LLVMValueRef res;
+
+ enum glsl_sampler_dim dim;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
+ bool is_array;
+ if (bindless) {
+ dim = nir_intrinsic_image_dim(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const nir_deref_instr *image_deref = get_image_deref(instr);
+ const struct glsl_type *type = image_deref->type;
+ const nir_variable *var = nir_deref_instr_get_variable(image_deref);
+ dim = glsl_get_sampler_dim(type);
+ access |= var->data.access;
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
+ struct ac_image_args args = {};
+
+ args.cache_policy = get_cache_policy(ctx, access, false, false);
+
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ unsigned num_channels = util_last_bit(mask);
+ LLVMValueRef rsrc, vindex;
+
+ rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
+ vindex =
+ LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
+
+ assert(instr->dest.is_ssa);
+ bool can_speculate = access & ACCESS_CAN_REORDER;
+ res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels,
+ args.cache_policy, can_speculate,
+ instr->dest.ssa.bit_size == 16);
+ res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
+
+ res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
+ res = ac_to_integer(&ctx->ac, res);
+ } else {
+ bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
+
+ args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ if (!level_zero)
+ args.lod = get_src(ctx, instr->src[3]);
+ args.dmask = 15;
+ args.attributes = AC_FUNC_ATTR_READONLY;
+
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
+ res = ac_build_image_opcode(&ctx->ac, &args);
+ }
+ return exit_waterfall(ctx, &wctx, res);
}
-static void visit_image_store(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
+static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
+ bool bindless)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7003);
- }
-
- enum glsl_sampler_dim dim;
- enum gl_access_qualifier access = nir_intrinsic_access(instr);
- bool is_array;
-
- if (bindless) {
- dim = nir_intrinsic_image_dim(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const nir_deref_instr *image_deref = get_image_deref(instr);
- const struct glsl_type *type = image_deref->type;
- const nir_variable *var = nir_deref_instr_get_variable(image_deref);
- dim = glsl_get_sampler_dim(type);
- access |= var->data.access;
- is_array = glsl_sampler_type_is_array(type);
- }
-
- struct waterfall_context wctx;
- LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
-
- bool writeonly_memory = access & ACCESS_NON_READABLE;
- struct ac_image_args args = {};
-
- args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
-
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
- LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
- unsigned src_channels = ac_get_llvm_num_components(src);
- LLVMValueRef vindex;
-
- if (src_channels == 3)
- src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
-
- vindex = LLVMBuildExtractElement(ctx->ac.builder,
- get_src(ctx, instr->src[1]),
- ctx->ac.i32_0, "");
-
- ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
- ctx->ac.i32_0, args.cache_policy);
- } else {
- bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
-
- args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
- args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
- args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- if (!level_zero)
- args.lod = get_src(ctx, instr->src[4]);
- args.dmask = 15;
- args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
-
- ac_build_image_opcode(&ctx->ac, &args);
- }
-
- exit_waterfall(ctx, &wctx, NULL);
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7003);
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7003);
+ }
+
+ enum glsl_sampler_dim dim;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
+ bool is_array;
+
+ if (bindless) {
+ dim = nir_intrinsic_image_dim(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const nir_deref_instr *image_deref = get_image_deref(instr);
+ const struct glsl_type *type = image_deref->type;
+ const nir_variable *var = nir_deref_instr_get_variable(image_deref);
+ dim = glsl_get_sampler_dim(type);
+ access |= var->data.access;
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
+ bool writeonly_memory = access & ACCESS_NON_READABLE;
+ struct ac_image_args args = {};
+
+ args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
+
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
+ unsigned src_channels = ac_get_llvm_num_components(src);
+ LLVMValueRef vindex;
+
+ if (src_channels == 3)
+ src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
+
+ vindex =
+ LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
+
+ ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy);
+ } else {
+ bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
+
+ args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
+ args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ if (!level_zero)
+ args.lod = get_src(ctx, instr->src[4]);
+ args.dmask = 15;
+ args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
+
+ ac_build_image_opcode(&ctx->ac, &args);
+ }
+
+ exit_waterfall(ctx, &wctx, NULL);
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7003);
}
-static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
+static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
+ bool bindless)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7004);
- }
-
- LLVMValueRef params[7];
- int param_count = 0;
-
- bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
- const char *atomic_name;
- char intrinsic_name[64];
- enum ac_atomic_op atomic_subop;
- ASSERTED int length;
-
- enum glsl_sampler_dim dim;
- bool is_array;
- if (bindless) {
- if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) {
- ASSERTED const GLenum format = nir_intrinsic_format(instr);
- assert(format == GL_R32UI || format == GL_R32I);
- }
- dim = nir_intrinsic_image_dim(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const struct glsl_type *type = get_image_deref(instr)->type;
- dim = glsl_get_sampler_dim(type);
- is_array = glsl_sampler_type_is_array(type);
- }
-
- struct waterfall_context wctx;
- LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
-
- switch (instr->intrinsic) {
- case nir_intrinsic_bindless_image_atomic_add:
- case nir_intrinsic_image_deref_atomic_add:
- atomic_name = "add";
- atomic_subop = ac_atomic_add;
- break;
- case nir_intrinsic_bindless_image_atomic_imin:
- case nir_intrinsic_image_deref_atomic_imin:
- atomic_name = "smin";
- atomic_subop = ac_atomic_smin;
- break;
- case nir_intrinsic_bindless_image_atomic_umin:
- case nir_intrinsic_image_deref_atomic_umin:
- atomic_name = "umin";
- atomic_subop = ac_atomic_umin;
- break;
- case nir_intrinsic_bindless_image_atomic_imax:
- case nir_intrinsic_image_deref_atomic_imax:
- atomic_name = "smax";
- atomic_subop = ac_atomic_smax;
- break;
- case nir_intrinsic_bindless_image_atomic_umax:
- case nir_intrinsic_image_deref_atomic_umax:
- atomic_name = "umax";
- atomic_subop = ac_atomic_umax;
- break;
- case nir_intrinsic_bindless_image_atomic_and:
- case nir_intrinsic_image_deref_atomic_and:
- atomic_name = "and";
- atomic_subop = ac_atomic_and;
- break;
- case nir_intrinsic_bindless_image_atomic_or:
- case nir_intrinsic_image_deref_atomic_or:
- atomic_name = "or";
- atomic_subop = ac_atomic_or;
- break;
- case nir_intrinsic_bindless_image_atomic_xor:
- case nir_intrinsic_image_deref_atomic_xor:
- atomic_name = "xor";
- atomic_subop = ac_atomic_xor;
- break;
- case nir_intrinsic_bindless_image_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_exchange:
- atomic_name = "swap";
- atomic_subop = ac_atomic_swap;
- break;
- case nir_intrinsic_bindless_image_atomic_comp_swap:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- atomic_name = "cmpswap";
- atomic_subop = 0; /* not used */
- break;
- case nir_intrinsic_bindless_image_atomic_inc_wrap:
- case nir_intrinsic_image_deref_atomic_inc_wrap: {
- atomic_name = "inc";
- atomic_subop = ac_atomic_inc_wrap;
- break;
- }
- case nir_intrinsic_bindless_image_atomic_dec_wrap:
- case nir_intrinsic_image_deref_atomic_dec_wrap:
- atomic_name = "dec";
- atomic_subop = ac_atomic_dec_wrap;
- break;
- default:
- abort();
- }
-
- if (cmpswap)
- params[param_count++] = get_src(ctx, instr->src[4]);
- params[param_count++] = get_src(ctx, instr->src[3]);
-
- LLVMValueRef result;
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
- params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
- ctx->ac.i32_0, ""); /* vindex */
- params[param_count++] = ctx->ac.i32_0; /* voffset */
- if (LLVM_VERSION_MAJOR >= 9) {
- /* XXX: The new raw/struct atomic intrinsics are buggy
- * with LLVM 8, see r358579.
- */
- params[param_count++] = ctx->ac.i32_0; /* soffset */
- params[param_count++] = ctx->ac.i32_0; /* slc */
-
- length = snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
- } else {
- params[param_count++] = ctx->ac.i1false; /* slc */
-
- length = snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.buffer.atomic.%s", atomic_name);
- }
-
- assert(length < sizeof(intrinsic_name));
- result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
- params, param_count, 0);
- } else {
- struct ac_image_args args = {};
- args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
- args.atomic = atomic_subop;
- args.data[0] = params[0];
- if (cmpswap)
- args.data[1] = params[1];
- args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
-
- result = ac_build_image_opcode(&ctx->ac, &args);
- }
-
- result = exit_waterfall(ctx, &wctx, result);
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7004);
- return result;
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7004);
+ }
+
+ LLVMValueRef params[7];
+ int param_count = 0;
+
+ bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
+ const char *atomic_name;
+ char intrinsic_name[64];
+ enum ac_atomic_op atomic_subop;
+ ASSERTED int length;
+
+ enum glsl_sampler_dim dim;
+ bool is_array;
+ if (bindless) {
+ if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) {
+ ASSERTED const GLenum format = nir_intrinsic_format(instr);
+ assert(format == GL_R32UI || format == GL_R32I);
+ }
+ dim = nir_intrinsic_image_dim(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const struct glsl_type *type = get_image_deref(instr)->type;
+ dim = glsl_get_sampler_dim(type);
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_image_deref_atomic_add:
+ atomic_name = "add";
+ atomic_subop = ac_atomic_add;
+ break;
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_imin:
+ atomic_name = "smin";
+ atomic_subop = ac_atomic_smin;
+ break;
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ atomic_name = "umin";
+ atomic_subop = ac_atomic_umin;
+ break;
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_imax:
+ atomic_name = "smax";
+ atomic_subop = ac_atomic_smax;
+ break;
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ atomic_name = "umax";
+ atomic_subop = ac_atomic_umax;
+ break;
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_image_deref_atomic_and:
+ atomic_name = "and";
+ atomic_subop = ac_atomic_and;
+ break;
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_image_deref_atomic_or:
+ atomic_name = "or";
+ atomic_subop = ac_atomic_or;
+ break;
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_xor:
+ atomic_name = "xor";
+ atomic_subop = ac_atomic_xor;
+ break;
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ atomic_name = "swap";
+ atomic_subop = ac_atomic_swap;
+ break;
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ atomic_name = "cmpswap";
+ atomic_subop = 0; /* not used */
+ break;
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_image_deref_atomic_inc_wrap: {
+ atomic_name = "inc";
+ atomic_subop = ac_atomic_inc_wrap;
+ break;
+ }
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ case nir_intrinsic_image_deref_atomic_dec_wrap:
+ atomic_name = "dec";
+ atomic_subop = ac_atomic_dec_wrap;
+ break;
+ default:
+ abort();
+ }
+
+ if (cmpswap)
+ params[param_count++] = get_src(ctx, instr->src[4]);
+ params[param_count++] = get_src(ctx, instr->src[3]);
+
+ LLVMValueRef result;
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
+ params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
+ ctx->ac.i32_0, ""); /* vindex */
+ params[param_count++] = ctx->ac.i32_0; /* voffset */
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* XXX: The new raw/struct atomic intrinsics are buggy
+ * with LLVM 8, see r358579.
+ */
+ params[param_count++] = ctx->ac.i32_0; /* soffset */
+ params[param_count++] = ctx->ac.i32_0; /* slc */
+
+ length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
+ } else {
+ params[param_count++] = ctx->ac.i1false; /* slc */
+
+ length = snprintf(intrinsic_name, sizeof(intrinsic_name), "llvm.amdgcn.buffer.atomic.%s",
+ atomic_name);
+ }
+
+ assert(length < sizeof(intrinsic_name));
+ result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
+ } else {
+ struct ac_image_args args = {};
+ args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
+ args.atomic = atomic_subop;
+ args.data[0] = params[0];
+ if (cmpswap)
+ args.data[1] = params[1];
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+
+ result = ac_build_image_opcode(&ctx->ac, &args);
+ }
+
+ result = exit_waterfall(ctx, &wctx, result);
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7004);
+ return result;
}
-static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- struct waterfall_context wctx;
- LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
- LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
+ LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
- return exit_waterfall(ctx, &wctx, ret);
+ return exit_waterfall(ctx, &wctx, ret);
}
-static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
+static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
+ bool bindless)
{
- LLVMValueRef res;
-
- enum glsl_sampler_dim dim;
- bool is_array;
- if (bindless) {
- dim = nir_intrinsic_image_dim(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const struct glsl_type *type = get_image_deref(instr)->type;
- dim = glsl_get_sampler_dim(type);
- is_array = glsl_sampler_type_is_array(type);
- }
-
- struct waterfall_context wctx;
- LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
-
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- res = get_buffer_size(ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
- } else {
-
- struct ac_image_args args = { 0 };
-
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- args.dmask = 0xf;
- args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
- args.opcode = ac_image_get_resinfo;
- assert(nir_src_as_uint(instr->src[1]) == 0);
- args.lod = ctx->ac.i32_0;
- args.attributes = AC_FUNC_ATTR_READNONE;
-
- res = ac_build_image_opcode(&ctx->ac, &args);
-
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
-
- if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
- LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
- }
-
- if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
- ctx->ac.i32_1, "");
- }
- }
- return exit_waterfall(ctx, &wctx, res);
+ LLVMValueRef res;
+
+ enum glsl_sampler_dim dim;
+ bool is_array;
+ if (bindless) {
+ dim = nir_intrinsic_image_dim(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const struct glsl_type *type = get_image_deref(instr)->type;
+ dim = glsl_get_sampler_dim(type);
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ res = get_buffer_size(
+ ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
+ } else {
+
+ struct ac_image_args args = {0};
+
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ args.dmask = 0xf;
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ args.opcode = ac_image_get_resinfo;
+ assert(nir_src_as_uint(instr->src[1]) == 0);
+ args.lod = ctx->ac.i32_0;
+ args.attributes = AC_FUNC_ATTR_READNONE;
+
+ res = ac_build_image_opcode(&ctx->ac, &args);
+
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+
+ if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
+ }
+
+ if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");
+ }
+ }
+ return exit_waterfall(ctx, &wctx, res);
}
-static void emit_membar(struct ac_llvm_context *ac,
- const nir_intrinsic_instr *instr)
+static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr)
{
- unsigned wait_flags = 0;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_memory_barrier:
- case nir_intrinsic_group_memory_barrier:
- wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
- break;
- case nir_intrinsic_memory_barrier_buffer:
- case nir_intrinsic_memory_barrier_image:
- wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
- break;
- case nir_intrinsic_memory_barrier_shared:
- wait_flags = AC_WAIT_LGKM;
- break;
- default:
- break;
- }
-
- ac_build_waitcnt(ac, wait_flags);
+ unsigned wait_flags = 0;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ break;
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ break;
+ case nir_intrinsic_memory_barrier_shared:
+ wait_flags = AC_WAIT_LGKM;
+ break;
+ default:
+ break;
+ }
+
+ ac_build_waitcnt(ac, wait_flags);
}
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
{
- /* GFX6 only (thanks to a hw bug workaround):
- * The real barrier instruction isn’t needed, because an entire patch
- * always fits into a single wave.
- */
- if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
- ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
- return;
- }
- ac_build_s_barrier(ac);
+ /* GFX6 only (thanks to a hw bug workaround):
+ * The real barrier instruction isn’t needed, because an entire patch
+ * always fits into a single wave.
+ */
+ if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
+ ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
+ return;
+ }
+ ac_build_s_barrier(ac);
}
-static void emit_discard(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
{
- LLVMValueRef cond;
-
- if (instr->intrinsic == nir_intrinsic_discard_if) {
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- get_src(ctx, instr->src[0]),
- ctx->ac.i32_0, "");
- } else {
- assert(instr->intrinsic == nir_intrinsic_discard);
- cond = ctx->ac.i1false;
- }
-
- ac_build_kill_if_false(&ctx->ac, cond);
+ LLVMValueRef cond;
+
+ if (instr->intrinsic == nir_intrinsic_discard_if) {
+ cond =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, get_src(ctx, instr->src[0]), ctx->ac.i32_0, "");
+ } else {
+ assert(instr->intrinsic == nir_intrinsic_discard);
+ cond = ctx->ac.i1false;
+ }
+
+ ac_build_kill_if_false(&ctx->ac, cond);
}
-static void emit_demote(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
{
- LLVMValueRef cond;
-
- if (instr->intrinsic == nir_intrinsic_demote_if) {
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- get_src(ctx, instr->src[0]),
- ctx->ac.i32_0, "");
- } else {
- assert(instr->intrinsic == nir_intrinsic_demote);
- cond = ctx->ac.i1false;
- }
-
- /* Kill immediately while maintaining WQM. */
- ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
-
- LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
- mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
- LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
- return;
+ LLVMValueRef cond;
+
+ if (instr->intrinsic == nir_intrinsic_demote_if) {
+ cond =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, get_src(ctx, instr->src[0]), ctx->ac.i32_0, "");
+ } else {
+ assert(instr->intrinsic == nir_intrinsic_demote);
+ cond = ctx->ac.i1false;
+ }
+
+ /* Kill immediately while maintaining WQM. */
+ ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
+
+ LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
+ LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
+ return;
}
-static LLVMValueRef
-visit_load_local_invocation_index(struct ac_nir_context *ctx)
+static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)
{
- LLVMValueRef result;
- LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
- result = LLVMBuildAnd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->tg_size),
- LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
+ LLVMValueRef result;
+ LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
+ result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
+ LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
- if (ctx->ac.wave_size == 32)
- result = LLVMBuildLShr(ctx->ac.builder, result,
- LLVMConstInt(ctx->ac.i32, 1, false), "");
+ if (ctx->ac.wave_size == 32)
+ result = LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 1, false), "");
- return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
+ return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
}
-static LLVMValueRef
-visit_load_subgroup_id(struct ac_nir_context *ctx)
+static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)
{
- if (ctx->stage == MESA_SHADER_COMPUTE) {
- LLVMValueRef result;
- result = LLVMBuildAnd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->tg_size),
- LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
- return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
- } else {
- return LLVMConstInt(ctx->ac.i32, 0, false);
- }
+ if (ctx->stage == MESA_SHADER_COMPUTE) {
+ LLVMValueRef result;
+ result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
+ LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
+ return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
+ } else {
+ return LLVMConstInt(ctx->ac.i32, 0, false);
+ }
}
-static LLVMValueRef
-visit_load_num_subgroups(struct ac_nir_context *ctx)
+static LLVMValueRef visit_load_num_subgroups(struct ac_nir_context *ctx)
{
- if (ctx->stage == MESA_SHADER_COMPUTE) {
- return LLVMBuildAnd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->tg_size),
- LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
- } else {
- return LLVMConstInt(ctx->ac.i32, 1, false);
- }
+ if (ctx->stage == MESA_SHADER_COMPUTE) {
+ return LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
+ LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
+ } else {
+ return LLVMConstInt(ctx->ac.i32, 1, false);
+ }
}
-static LLVMValueRef
-visit_first_invocation(struct ac_nir_context *ctx)
+static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx)
{
- LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
- const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
+ LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
+ const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
- /* The second argument is whether cttz(0) should be defined, but we do not care. */
- LLVMValueRef args[] = {active_set, ctx->ac.i1false};
- LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr,
- ctx->ac.iN_wavemask, args, 2,
- AC_FUNC_ATTR_NOUNWIND |
- AC_FUNC_ATTR_READNONE);
+ /* The second argument is whether cttz(0) should be defined, but we do not care. */
+ LLVMValueRef args[] = {active_set, ctx->ac.i1false};
+ LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, ctx->ac.iN_wavemask, args, 2,
+ AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE);
- return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
+ return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
}
-static LLVMValueRef
-visit_load_shared(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
{
- LLVMValueRef values[4], derived_ptr, index, ret;
+ LLVMValueRef values[4], derived_ptr, index, ret;
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
- instr->dest.ssa.bit_size);
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size);
- for (int chan = 0; chan < instr->num_components; chan++) {
- index = LLVMConstInt(ctx->ac.i32, chan, 0);
- derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
- values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
- }
+ for (int chan = 0; chan < instr->num_components; chan++) {
+ index = LLVMConstInt(ctx->ac.i32, chan, 0);
+ derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
+ values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
+ }
- ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
- return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+ ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
+ return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
}
-static void
-visit_store_shared(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+static void visit_store_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
{
- LLVMValueRef derived_ptr, data,index;
- LLVMBuilderRef builder = ctx->ac.builder;
-
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1],
- instr->src[0].ssa->bit_size);
- LLVMValueRef src = get_src(ctx, instr->src[0]);
-
- int writemask = nir_intrinsic_write_mask(instr);
- for (int chan = 0; chan < 4; chan++) {
- if (!(writemask & (1 << chan))) {
- continue;
- }
- data = ac_llvm_extract_elem(&ctx->ac, src, chan);
- index = LLVMConstInt(ctx->ac.i32, chan, 0);
- derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
- LLVMBuildStore(builder, data, derived_ptr);
- }
+ LLVMValueRef derived_ptr, data, index;
+ LLVMBuilderRef builder = ctx->ac.builder;
+
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size);
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+
+ int writemask = nir_intrinsic_write_mask(instr);
+ for (int chan = 0; chan < 4; chan++) {
+ if (!(writemask & (1 << chan))) {
+ continue;
+ }
+ data = ac_llvm_extract_elem(&ctx->ac, src, chan);
+ index = LLVMConstInt(ctx->ac.i32, chan, 0);
+ derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+ LLVMBuildStore(builder, data, derived_ptr);
+ }
}
-static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- LLVMValueRef ptr, int src_idx)
+static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
+ LLVMValueRef ptr, int src_idx)
{
- if (ctx->ac.postponed_kill) {
- LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
- ctx->ac.postponed_kill, "");
- ac_build_ifcc(&ctx->ac, cond, 7005);
- }
-
- LLVMValueRef result;
- LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
-
- const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
- if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) {
- nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
- if (deref->mode == nir_var_mem_global) {
- /* use "singlethread" sync scope to implement relaxed ordering */
- sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread";
-
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type , "");
- }
- }
-
- if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
- instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
- LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
- result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
- result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
- } else {
- LLVMAtomicRMWBinOp op;
- switch (instr->intrinsic) {
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_deref_atomic_add:
- op = LLVMAtomicRMWBinOpAdd;
- break;
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_deref_atomic_umin:
- op = LLVMAtomicRMWBinOpUMin;
- break;
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_deref_atomic_umax:
- op = LLVMAtomicRMWBinOpUMax;
- break;
- case nir_intrinsic_shared_atomic_imin:
- case nir_intrinsic_deref_atomic_imin:
- op = LLVMAtomicRMWBinOpMin;
- break;
- case nir_intrinsic_shared_atomic_imax:
- case nir_intrinsic_deref_atomic_imax:
- op = LLVMAtomicRMWBinOpMax;
- break;
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_deref_atomic_and:
- op = LLVMAtomicRMWBinOpAnd;
- break;
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_deref_atomic_or:
- op = LLVMAtomicRMWBinOpOr;
- break;
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_deref_atomic_xor:
- op = LLVMAtomicRMWBinOpXor;
- break;
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_deref_atomic_exchange:
- op = LLVMAtomicRMWBinOpXchg;
- break;
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7005);
+ }
+
+ LLVMValueRef result;
+ LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
+
+ const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
+
+ if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) {
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ if (deref->mode == nir_var_mem_global) {
+ /* use "singlethread" sync scope to implement relaxed ordering */
+ sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread";
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
+ }
+ }
+
+ if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
+ instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
+ LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
+ result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
+ result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
+ } else {
+ LLVMAtomicRMWBinOp op;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_deref_atomic_add:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_deref_atomic_umin:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_deref_atomic_umax:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_deref_atomic_imin:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_deref_atomic_imax:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_deref_atomic_and:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_deref_atomic_or:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_deref_atomic_xor:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_deref_atomic_exchange:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
#if LLVM_VERSION_MAJOR >= 10
- case nir_intrinsic_shared_atomic_fadd:
- case nir_intrinsic_deref_atomic_fadd:
- op = LLVMAtomicRMWBinOpFAdd;
- break;
+ case nir_intrinsic_shared_atomic_fadd:
+ case nir_intrinsic_deref_atomic_fadd:
+ op = LLVMAtomicRMWBinOpFAdd;
+ break;
#endif
- default:
- return NULL;
- }
+ default:
+ return NULL;
+ }
- LLVMValueRef val;
+ LLVMValueRef val;
- if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
- instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
- val = ac_to_float(&ctx->ac, src);
- } else {
- val = ac_to_integer(&ctx->ac, src);
- }
+ if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
+ instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
+ val = ac_to_float(&ctx->ac, src);
+ } else {
+ val = ac_to_integer(&ctx->ac, src);
+ }
- result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
- }
+ result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
+ }
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7005);
- return result;
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7005);
+ return result;
}
static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
{
- LLVMValueRef values[2];
- LLVMValueRef pos[2];
+ LLVMValueRef values[2];
+ LLVMValueRef pos[2];
- pos[0] = ac_to_float(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
- pos[1] = ac_to_float(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
+ pos[0] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
+ pos[1] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
- values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
- values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
- return ac_build_gather_values(&ctx->ac, values, 2);
+ values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
+ values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
+ return ac_build_gather_values(&ctx->ac, values, 2);
}
-static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx,
- enum glsl_interp_mode interp, unsigned location)
+static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, enum glsl_interp_mode interp,
+ unsigned location)
{
- switch (interp) {
- case INTERP_MODE_FLAT:
- default:
- return NULL;
- case INTERP_MODE_SMOOTH:
- case INTERP_MODE_NONE:
- if (location == INTERP_CENTER)
- return ac_get_arg(&ctx->ac, ctx->args->persp_center);
- else if (location == INTERP_CENTROID)
- return ctx->abi->persp_centroid;
- else if (location == INTERP_SAMPLE)
- return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
- break;
- case INTERP_MODE_NOPERSPECTIVE:
- if (location == INTERP_CENTER)
- return ac_get_arg(&ctx->ac, ctx->args->linear_center);
- else if (location == INTERP_CENTROID)
- return ctx->abi->linear_centroid;
- else if (location == INTERP_SAMPLE)
- return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
- break;
- }
- return NULL;
+ switch (interp) {
+ case INTERP_MODE_FLAT:
+ default:
+ return NULL;
+ case INTERP_MODE_SMOOTH:
+ case INTERP_MODE_NONE:
+ if (location == INTERP_CENTER)
+ return ac_get_arg(&ctx->ac, ctx->args->persp_center);
+ else if (location == INTERP_CENTROID)
+ return ctx->abi->persp_centroid;
+ else if (location == INTERP_SAMPLE)
+ return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
+ break;
+ case INTERP_MODE_NOPERSPECTIVE:
+ if (location == INTERP_CENTER)
+ return ac_get_arg(&ctx->ac, ctx->args->linear_center);
+ else if (location == INTERP_CENTROID)
+ return ctx->abi->linear_centroid;
+ else if (location == INTERP_SAMPLE)
+ return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
+ break;
+ }
+ return NULL;
}
-static LLVMValueRef barycentric_center(struct ac_nir_context *ctx,
- unsigned mode)
+static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, unsigned mode)
{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
}
-static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx,
- unsigned mode,
- LLVMValueRef offset)
+static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, unsigned mode,
+ LLVMValueRef offset)
{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
- LLVMValueRef src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
- LLVMValueRef src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
-
- LLVMValueRef ij_out[2];
- LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
-
- /*
- * take the I then J parameters, and the DDX/Y for it, and
- * calculate the IJ inputs for the interpolator.
- * temp1 = ddx * offset/sample.x + I;
- * interp_param.I = ddy * offset/sample.y + temp1;
- * temp1 = ddx * offset/sample.x + J;
- * interp_param.J = ddy * offset/sample.y + temp1;
- */
- for (unsigned i = 0; i < 2; i++) {
- LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
- LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
- LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, ix_ll, "");
- LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, iy_ll, "");
- LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
- interp_param, ix_ll, "");
- LLVMValueRef temp1, temp2;
-
- interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
- ctx->ac.f32, "");
-
- temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
- temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
-
- ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
- temp2, ctx->ac.i32, "");
- }
- interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
+ LLVMValueRef src_c0 =
+ ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
+ LLVMValueRef src_c1 =
+ ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
+
+ LLVMValueRef ij_out[2];
+ LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
+
+ /*
+ * take the I then J parameters, and the DDX/Y for it, and
+ * calculate the IJ inputs for the interpolator.
+ * temp1 = ddx * offset/sample.x + I;
+ * interp_param.I = ddy * offset/sample.y + temp1;
+ * temp1 = ddx * offset/sample.x + J;
+ * interp_param.J = ddy * offset/sample.y + temp1;
+ */
+ for (unsigned i = 0; i < 2; i++) {
+ LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
+ LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
+ LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, "");
+ LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, "");
+ LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, "");
+ LLVMValueRef temp1, temp2;
+
+ interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, "");
+
+ temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
+ temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
+
+ ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, "");
+ }
+ interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
}
-static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx,
- unsigned mode)
+static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, unsigned mode)
{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
}
-static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx,
- unsigned mode,
- LLVMValueRef sample_id)
+static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, unsigned mode,
+ LLVMValueRef sample_id)
{
- if (ctx->abi->interp_at_sample_force_center)
- return barycentric_center(ctx, mode);
+ if (ctx->abi->interp_at_sample_force_center)
+ return barycentric_center(ctx, mode);
- LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
+ LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
- /* fetch sample ID */
- LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
+ /* fetch sample ID */
+ LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
- LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
- src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
- LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
- src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
- LLVMValueRef coords[] = { src_c0, src_c1 };
- LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
+ LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
+ src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
+ LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
+ src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
+ LLVMValueRef coords[] = {src_c0, src_c1};
+ LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
- return barycentric_offset(ctx, mode, offset);
+ return barycentric_offset(ctx, mode, offset);
}
-
-static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx,
- unsigned mode)
+static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, unsigned mode)
{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
}
static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
{
- return LLVMBuildBitCast(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->pull_model),
- ctx->ac.v3i32, "");
+ return LLVMBuildBitCast(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->pull_model),
+ ctx->ac.v3i32, "");
}
-static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
- LLVMValueRef interp_param,
- unsigned index, unsigned comp_start,
- unsigned num_components,
- unsigned bitsize)
+static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValueRef interp_param,
+ unsigned index, unsigned comp_start,
+ unsigned num_components, unsigned bitsize)
{
- LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
- LLVMValueRef interp_param_f;
-
- interp_param_f = LLVMBuildBitCast(ctx->ac.builder,
- interp_param, ctx->ac.v2f32, "");
- LLVMValueRef i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
- LLVMValueRef j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
-
- /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
- if (ctx->verified_interp &&
- !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
- LLVMValueRef args[2];
- args[0] = i;
- args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
- LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
- args, 2, AC_FUNC_ATTR_READNONE);
- ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
- _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
- }
-
- LLVMValueRef values[4];
- assert(bitsize == 16 || bitsize == 32);
- for (unsigned comp = 0; comp < num_components; comp++) {
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
- if (bitsize == 16) {
- values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
- ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
- } else {
- values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
- ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
- }
- }
-
- return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
+ LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+ LLVMValueRef interp_param_f;
+
+ interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
+ LLVMValueRef i = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
+ LLVMValueRef j = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
+
+ /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
+ if (ctx->verified_interp && !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
+ LLVMValueRef args[2];
+ args[0] = i;
+ args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
+ LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
+ _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
+ }
+
+ LLVMValueRef values[4];
+ assert(bitsize == 16 || bitsize == 32);
+ for (unsigned comp = 0; comp < num_components; comp++) {
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
+ if (bitsize == 16) {
+ values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
+ ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
+ } else {
+ values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
+ ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
+ }
+ }
+
+ return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
}
-static LLVMValueRef visit_load(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr, bool is_output)
+static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
+ bool is_output)
{
- LLVMValueRef values[8];
- LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
- LLVMTypeRef component_type;
- unsigned base = nir_intrinsic_base(instr);
- unsigned component = nir_intrinsic_component(instr);
- unsigned count = instr->dest.ssa.num_components *
- (instr->dest.ssa.bit_size == 64 ? 2 : 1);
- nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
- LLVMValueRef vertex_index =
- vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
- nir_src offset = *nir_get_io_offset_src(instr);
- LLVMValueRef indir_index = NULL;
-
- if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
- component_type = LLVMGetElementType(dest_type);
- else
- component_type = dest_type;
-
- if (nir_src_is_const(offset))
- assert(nir_src_as_uint(offset) == 0);
- else
- indir_index = get_src(ctx, offset);
-
- if (ctx->stage == MESA_SHADER_TESS_CTRL ||
- (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
- LLVMValueRef result =
- ctx->abi->load_tess_varyings(ctx->abi, component_type,
- vertex_index, indir_index,
- 0, 0, base * 4,
- component,
- instr->num_components,
- false, false, !is_output);
- if (instr->dest.ssa.bit_size == 16) {
- result = ac_to_integer(&ctx->ac, result);
- result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
- }
- return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
- }
-
- /* No indirect indexing is allowed after this point. */
- assert(!indir_index);
-
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- assert(nir_src_is_const(*vertex_index_src));
-
- return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component,
- instr->num_components,
- nir_src_as_uint(*vertex_index_src),
- 0, type);
- }
-
- if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
- nir_intrinsic_io_semantics(instr).fb_fetch_output)
- return ctx->abi->emit_fbfetch(ctx->abi);
-
- /* Other non-fragment cases have inputs and outputs in temporaries. */
- if (ctx->stage != MESA_SHADER_FRAGMENT) {
- for (unsigned chan = component; chan < count + component; chan++) {
- if (is_output) {
- values[chan] = LLVMBuildLoad(ctx->ac.builder,
- ctx->abi->outputs[base * 4 + chan], "");
- } else {
- values[chan] = ctx->abi->inputs[base * 4 + chan];
- if (!values[chan])
- values[chan] = LLVMGetUndef(ctx->ac.i32);
- }
- }
- LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
- return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
- }
-
- /* Fragment shader inputs. */
- unsigned vertex_id = 2; /* P0 */
-
- if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
- nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
-
- switch (src0[0].i32) {
- case 0:
- vertex_id = 2;
- break;
- case 1:
- vertex_id = 0;
- break;
- case 2:
- vertex_id = 1;
- break;
- default:
- unreachable("Invalid vertex index");
- }
- }
-
- LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
-
- for (unsigned chan = 0; chan < count; chan++) {
- if (component + chan > 4)
- attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false);
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
- values[chan] = ac_build_fs_interp_mov(&ctx->ac,
- LLVMConstInt(ctx->ac.i32, vertex_id, false),
- llvm_chan,
- attr_number,
- ac_get_arg(&ctx->ac, ctx->args->prim_mask));
- values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
- values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
- instr->dest.ssa.bit_size == 16 ? ctx->ac.i16
- : ctx->ac.i32, "");
- }
-
- LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
- return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+ LLVMValueRef values[8];
+ LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMTypeRef component_type;
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned count = instr->dest.ssa.num_components * (instr->dest.ssa.bit_size == 64 ? 2 : 1);
+ nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
+ LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
+ nir_src offset = *nir_get_io_offset_src(instr);
+ LLVMValueRef indir_index = NULL;
+
+ if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
+ component_type = LLVMGetElementType(dest_type);
+ else
+ component_type = dest_type;
+
+ if (nir_src_is_const(offset))
+ assert(nir_src_as_uint(offset) == 0);
+ else
+ indir_index = get_src(ctx, offset);
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL || (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
+ LLVMValueRef result = ctx->abi->load_tess_varyings(
+ ctx->abi, component_type, vertex_index, indir_index, 0, 0, base * 4, component,
+ instr->num_components, false, false, !is_output);
+ if (instr->dest.ssa.bit_size == 16) {
+ result = ac_to_integer(&ctx->ac, result);
+ result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
+ }
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+ }
+
+ /* No indirect indexing is allowed after this point. */
+ assert(!indir_index);
+
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ assert(nir_src_is_const(*vertex_index_src));
+
+ return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, instr->num_components,
+ nir_src_as_uint(*vertex_index_src), 0, type);
+ }
+
+ if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
+ nir_intrinsic_io_semantics(instr).fb_fetch_output)
+ return ctx->abi->emit_fbfetch(ctx->abi);
+
+ /* Other non-fragment cases have inputs and outputs in temporaries. */
+ if (ctx->stage != MESA_SHADER_FRAGMENT) {
+ for (unsigned chan = component; chan < count + component; chan++) {
+ if (is_output) {
+ values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->abi->outputs[base * 4 + chan], "");
+ } else {
+ values[chan] = ctx->abi->inputs[base * 4 + chan];
+ if (!values[chan])
+ values[chan] = LLVMGetUndef(ctx->ac.i32);
+ }
+ }
+ LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+ }
+
+ /* Fragment shader inputs. */
+ unsigned vertex_id = 2; /* P0 */
+
+ if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
+ nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
+
+ switch (src0[0].i32) {
+ case 0:
+ vertex_id = 2;
+ break;
+ case 1:
+ vertex_id = 0;
+ break;
+ case 2:
+ vertex_id = 1;
+ break;
+ default:
+ unreachable("Invalid vertex index");
+ }
+ }
+
+ LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
+
+ for (unsigned chan = 0; chan < count; chan++) {
+ if (component + chan > 4)
+ attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false);
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
+ values[chan] =
+ ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,
+ attr_number, ac_get_arg(&ctx->ac, ctx->args->prim_mask));
+ values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
+ values[chan] =
+ LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
+ instr->dest.ssa.bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
+ }
+
+ LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
}
-static void visit_intrinsic(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
- LLVMValueRef result = NULL;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_ballot:
- result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
- if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
- result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
- break;
- case nir_intrinsic_read_invocation:
- result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
- break;
- case nir_intrinsic_read_first_invocation:
- result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
- break;
- case nir_intrinsic_load_subgroup_invocation:
- result = ac_get_thread_id(&ctx->ac);
- break;
- case nir_intrinsic_load_work_group_id: {
- LLVMValueRef values[3];
-
- for (int i = 0; i < 3; i++) {
- values[i] = ctx->args->workgroup_ids[i].used ?
- ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) : ctx->ac.i32_0;
- }
-
- result = ac_build_gather_values(&ctx->ac, values, 3);
- break;
- }
- case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_first_vertex:
- result = ctx->abi->load_base_vertex(ctx->abi);
- break;
- case nir_intrinsic_load_local_group_size:
- result = ctx->abi->load_local_group_size(ctx->abi);
- break;
- case nir_intrinsic_load_vertex_id:
- result = LLVMBuildAdd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->vertex_id),
- ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
- break;
- case nir_intrinsic_load_vertex_id_zero_base: {
- result = ctx->abi->vertex_id;
- break;
- }
- case nir_intrinsic_load_local_invocation_id: {
- result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
- break;
- }
- case nir_intrinsic_load_base_instance:
- result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
- break;
- case nir_intrinsic_load_draw_id:
- result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
- break;
- case nir_intrinsic_load_view_index:
- result = ac_get_arg(&ctx->ac, ctx->args->view_index);
- break;
- case nir_intrinsic_load_invocation_id:
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- result = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids),
- 8, 5);
- } else {
- if (ctx->ac.chip_class >= GFX10) {
- result = LLVMBuildAnd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
- LLVMConstInt(ctx->ac.i32, 127, 0), "");
- } else {
- result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
- }
- }
- break;
- case nir_intrinsic_load_primitive_id:
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
- } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
- } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
- } else
- fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
- break;
- case nir_intrinsic_load_sample_id:
- result = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ancillary),
- 8, 4);
- break;
- case nir_intrinsic_load_sample_pos:
- result = load_sample_pos(ctx);
- break;
- case nir_intrinsic_load_sample_mask_in:
- result = ctx->abi->load_sample_mask_in(ctx->abi);
- break;
- case nir_intrinsic_load_frag_coord: {
- LLVMValueRef values[4] = {
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]),
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
- ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))
- };
- result = ac_to_integer(&ctx->ac,
- ac_build_gather_values(&ctx->ac, values, 4));
- break;
- }
- case nir_intrinsic_load_layer_id:
- result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
- break;
- case nir_intrinsic_load_front_face:
- result = ac_get_arg(&ctx->ac, ctx->args->front_face);
- break;
- case nir_intrinsic_load_helper_invocation:
- result = ac_build_load_helper_invocation(&ctx->ac);
- break;
- case nir_intrinsic_is_helper_invocation:
- result = ac_build_is_helper_invocation(&ctx->ac);
- break;
- case nir_intrinsic_load_color0:
- result = ctx->abi->color0;
- break;
- case nir_intrinsic_load_color1:
- result = ctx->abi->color1;
- break;
- case nir_intrinsic_load_user_data_amd:
- assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
- result = ctx->abi->user_data;
- break;
- case nir_intrinsic_load_instance_id:
- result = ctx->abi->instance_id;
- break;
- case nir_intrinsic_load_num_work_groups:
- result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
- break;
- case nir_intrinsic_load_local_invocation_index:
- result = visit_load_local_invocation_index(ctx);
- break;
- case nir_intrinsic_load_subgroup_id:
- result = visit_load_subgroup_id(ctx);
- break;
- case nir_intrinsic_load_num_subgroups:
- result = visit_load_num_subgroups(ctx);
- break;
- case nir_intrinsic_first_invocation:
- result = visit_first_invocation(ctx);
- break;
- case nir_intrinsic_load_push_constant:
- result = visit_load_push_constant(ctx, instr);
- break;
- case nir_intrinsic_vulkan_resource_index: {
- LLVMValueRef index = get_src(ctx, instr->src[0]);
- unsigned desc_set = nir_intrinsic_desc_set(instr);
- unsigned binding = nir_intrinsic_binding(instr);
-
- result = ctx->abi->load_resource(ctx->abi, index, desc_set,
- binding);
- break;
- }
- case nir_intrinsic_vulkan_resource_reindex:
- result = visit_vulkan_resource_reindex(ctx, instr);
- break;
- case nir_intrinsic_store_ssbo:
- visit_store_ssbo(ctx, instr);
- break;
- case nir_intrinsic_load_ssbo:
- result = visit_load_buffer(ctx, instr);
- break;
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- result = visit_atomic_ssbo(ctx, instr);
- break;
- case nir_intrinsic_load_ubo:
- result = visit_load_ubo_buffer(ctx, instr);
- break;
- case nir_intrinsic_get_buffer_size:
- result = visit_get_buffer_size(ctx, instr);
- break;
- case nir_intrinsic_load_deref:
- result = visit_load_var(ctx, instr);
- break;
- case nir_intrinsic_store_deref:
- visit_store_var(ctx, instr);
- break;
- case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_vertex:
- case nir_intrinsic_load_per_vertex_input:
- result = visit_load(ctx, instr, false);
- break;
- case nir_intrinsic_load_output:
- case nir_intrinsic_load_per_vertex_output:
- result = visit_load(ctx, instr, true);
- break;
- case nir_intrinsic_store_output:
- case nir_intrinsic_store_per_vertex_output:
- visit_store_output(ctx, instr);
- break;
- case nir_intrinsic_load_shared:
- result = visit_load_shared(ctx, instr);
- break;
- case nir_intrinsic_store_shared:
- visit_store_shared(ctx, instr);
- break;
- case nir_intrinsic_bindless_image_samples:
- case nir_intrinsic_image_deref_samples:
- result = visit_image_samples(ctx, instr);
- break;
- case nir_intrinsic_bindless_image_load:
- result = visit_image_load(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_load:
- result = visit_image_load(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_store:
- visit_image_store(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_store:
- visit_image_store(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_atomic_add:
- case nir_intrinsic_bindless_image_atomic_imin:
- case nir_intrinsic_bindless_image_atomic_umin:
- case nir_intrinsic_bindless_image_atomic_imax:
- case nir_intrinsic_bindless_image_atomic_umax:
- case nir_intrinsic_bindless_image_atomic_and:
- case nir_intrinsic_bindless_image_atomic_or:
- case nir_intrinsic_bindless_image_atomic_xor:
- case nir_intrinsic_bindless_image_atomic_exchange:
- case nir_intrinsic_bindless_image_atomic_comp_swap:
- case nir_intrinsic_bindless_image_atomic_inc_wrap:
- case nir_intrinsic_bindless_image_atomic_dec_wrap:
- result = visit_image_atomic(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- case nir_intrinsic_image_deref_atomic_inc_wrap:
- case nir_intrinsic_image_deref_atomic_dec_wrap:
- result = visit_image_atomic(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_size:
- result = visit_image_size(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_size:
- result = visit_image_size(ctx, instr, false);
- break;
- case nir_intrinsic_shader_clock:
- result = ac_build_shader_clock(&ctx->ac,
- nir_intrinsic_memory_scope(instr));
- break;
- case nir_intrinsic_discard:
- case nir_intrinsic_discard_if:
- emit_discard(ctx, instr);
- break;
- case nir_intrinsic_demote:
- case nir_intrinsic_demote_if:
- emit_demote(ctx, instr);
- break;
- case nir_intrinsic_memory_barrier:
- case nir_intrinsic_group_memory_barrier:
- case nir_intrinsic_memory_barrier_buffer:
- case nir_intrinsic_memory_barrier_image:
- case nir_intrinsic_memory_barrier_shared:
- emit_membar(&ctx->ac, instr);
- break;
- case nir_intrinsic_scoped_barrier: {
- assert(!(nir_intrinsic_memory_semantics(instr) &
- (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
-
- nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
-
- unsigned wait_flags = 0;
- if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
- wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
- if (modes & nir_var_mem_shared)
- wait_flags |= AC_WAIT_LGKM;
-
- if (wait_flags)
- ac_build_waitcnt(&ctx->ac, wait_flags);
-
- if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
- ac_emit_barrier(&ctx->ac, ctx->stage);
- break;
- }
- case nir_intrinsic_memory_barrier_tcs_patch:
- break;
- case nir_intrinsic_control_barrier:
- ac_emit_barrier(&ctx->ac, ctx->stage);
- break;
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_shared_atomic_imin:
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_shared_atomic_imax:
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_shared_atomic_comp_swap:
- case nir_intrinsic_shared_atomic_fadd: {
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
- instr->src[1].ssa->bit_size);
- result = visit_var_atomic(ctx, instr, ptr, 1);
- break;
- }
- case nir_intrinsic_deref_atomic_add:
- case nir_intrinsic_deref_atomic_imin:
- case nir_intrinsic_deref_atomic_umin:
- case nir_intrinsic_deref_atomic_imax:
- case nir_intrinsic_deref_atomic_umax:
- case nir_intrinsic_deref_atomic_and:
- case nir_intrinsic_deref_atomic_or:
- case nir_intrinsic_deref_atomic_xor:
- case nir_intrinsic_deref_atomic_exchange:
- case nir_intrinsic_deref_atomic_comp_swap:
- case nir_intrinsic_deref_atomic_fadd: {
- LLVMValueRef ptr = get_src(ctx, instr->src[0]);
- result = visit_var_atomic(ctx, instr, ptr, 1);
- break;
- }
- case nir_intrinsic_load_barycentric_pixel:
- result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_centroid:
- result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_sample:
- result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_model:
- result = barycentric_model(ctx);
- break;
- case nir_intrinsic_load_barycentric_at_offset: {
- LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
- result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
- break;
- }
- case nir_intrinsic_load_barycentric_at_sample: {
- LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
- result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
- break;
- }
- case nir_intrinsic_load_interpolated_input: {
- /* We assume any indirect loads have been lowered away */
- ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
- assert(offset);
- assert(offset[0].i32 == 0);
-
- LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
- unsigned index = nir_intrinsic_base(instr);
- unsigned component = nir_intrinsic_component(instr);
- result = load_interpolated_input(ctx, interp_param, index,
- component,
- instr->dest.ssa.num_components,
- instr->dest.ssa.bit_size);
- break;
- }
- case nir_intrinsic_emit_vertex:
- ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
- break;
- case nir_intrinsic_emit_vertex_with_counter: {
- unsigned stream = nir_intrinsic_stream_id(instr);
- LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
- ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
- next_vertex,
- ctx->abi->outputs);
- break;
- }
- case nir_intrinsic_end_primitive:
- case nir_intrinsic_end_primitive_with_counter:
- ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
- break;
- case nir_intrinsic_load_tess_coord:
- result = ctx->abi->load_tess_coord(ctx->abi);
- break;
- case nir_intrinsic_load_tess_level_outer:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
- break;
- case nir_intrinsic_load_tess_level_inner:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
- break;
- case nir_intrinsic_load_tess_level_outer_default:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
- break;
- case nir_intrinsic_load_tess_level_inner_default:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
- break;
- case nir_intrinsic_load_patch_vertices_in:
- result = ctx->abi->load_patch_vertices_in(ctx->abi);
- break;
- case nir_intrinsic_vote_all: {
- LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
- result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
- case nir_intrinsic_vote_any: {
- LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
- result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
- case nir_intrinsic_shuffle:
- if (ctx->ac.chip_class == GFX8 ||
- ctx->ac.chip_class == GFX9 ||
- (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
- result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
- } else {
- LLVMValueRef src = get_src(ctx, instr->src[0]);
- LLVMValueRef index = get_src(ctx, instr->src[1]);
- LLVMTypeRef type = LLVMTypeOf(src);
- struct waterfall_context wctx;
- LLVMValueRef index_val;
-
- index_val = enter_waterfall(ctx, &wctx, index, true);
-
- src = LLVMBuildZExt(ctx->ac.builder, src,
- ctx->ac.i32, "");
-
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
- ctx->ac.i32,
- (LLVMValueRef []) { src, index_val }, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-
- result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
-
- result = exit_waterfall(ctx, &wctx, result);
- }
- break;
- case nir_intrinsic_reduce:
- result = ac_build_reduce(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0],
- instr->const_index[1]);
- break;
- case nir_intrinsic_inclusive_scan:
- result = ac_build_inclusive_scan(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0]);
- break;
- case nir_intrinsic_exclusive_scan:
- result = ac_build_exclusive_scan(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0]);
- break;
- case nir_intrinsic_quad_broadcast: {
- unsigned lane = nir_src_as_uint(instr->src[1]);
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
- lane, lane, lane, lane);
- break;
- }
- case nir_intrinsic_quad_swap_horizontal:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
- break;
- case nir_intrinsic_quad_swap_vertical:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
- break;
- case nir_intrinsic_quad_swap_diagonal:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
- break;
- case nir_intrinsic_quad_swizzle_amd: {
- uint32_t mask = nir_intrinsic_swizzle_mask(instr);
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
- mask & 0x3, (mask >> 2) & 0x3,
- (mask >> 4) & 0x3, (mask >> 6) & 0x3);
- break;
- }
- case nir_intrinsic_masked_swizzle_amd: {
- uint32_t mask = nir_intrinsic_swizzle_mask(instr);
- result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
- break;
- }
- case nir_intrinsic_write_invocation_amd:
- result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]));
- break;
- case nir_intrinsic_mbcnt_amd:
- result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
- break;
- case nir_intrinsic_load_scratch: {
- LLVMValueRef offset = get_src(ctx, instr->src[0]);
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMTypeRef vec_type =
- instr->dest.ssa.num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->dest.ssa.num_components);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- break;
- }
- case nir_intrinsic_store_scratch: {
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(comp_type, addr_space), "");
- LLVMValueRef src = get_src(ctx, instr->src[0]);
- unsigned wrmask = nir_intrinsic_write_mask(instr);
- while (wrmask) {
- int start, count;
- u_bit_scan_consecutive_range(&wrmask, &start, &count);
-
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
- LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
- LLVMTypeRef vec_type =
- count == 1 ? comp_type : LLVMVectorType(comp_type, count);
- offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
- offset_ptr,
- LLVMPointerType(vec_type, addr_space),
- "");
- LLVMValueRef offset_src =
- ac_extract_components(&ctx->ac, src, start, count);
- LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
- }
- break;
- }
- case nir_intrinsic_load_constant: {
- unsigned base = nir_intrinsic_base(instr);
- unsigned range = nir_intrinsic_range(instr);
-
- LLVMValueRef offset = get_src(ctx, instr->src[0]);
- offset = LLVMBuildAdd(ctx->ac.builder, offset,
- LLVMConstInt(ctx->ac.i32, base, false), "");
-
- /* Clamp the offset to avoid out-of-bound access because global
- * instructions can't handle them.
- */
- LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
- offset, size, "");
- offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
-
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMTypeRef vec_type =
- instr->dest.ssa.num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->dest.ssa.num_components);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- break;
- }
- default:
- fprintf(stderr, "Unknown intrinsic: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- break;
- }
- if (result) {
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- }
+ LLVMValueRef result = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_ballot:
+ result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+ if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
+ result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
+ break;
+ case nir_intrinsic_read_invocation:
+ result =
+ ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
+ break;
+ case nir_intrinsic_read_first_invocation:
+ result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
+ break;
+ case nir_intrinsic_load_subgroup_invocation:
+ result = ac_get_thread_id(&ctx->ac);
+ break;
+ case nir_intrinsic_load_work_group_id: {
+ LLVMValueRef values[3];
+
+ for (int i = 0; i < 3; i++) {
+ values[i] = ctx->args->workgroup_ids[i].used
+ ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
+ : ctx->ac.i32_0;
+ }
+
+ result = ac_build_gather_values(&ctx->ac, values, 3);
+ break;
+ }
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_first_vertex:
+ result = ctx->abi->load_base_vertex(ctx->abi);
+ break;
+ case nir_intrinsic_load_local_group_size:
+ result = ctx->abi->load_local_group_size(ctx->abi);
+ break;
+ case nir_intrinsic_load_vertex_id:
+ result = LLVMBuildAdd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->vertex_id),
+ ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
+ break;
+ case nir_intrinsic_load_vertex_id_zero_base: {
+ result = ctx->abi->vertex_id;
+ break;
+ }
+ case nir_intrinsic_load_local_invocation_id: {
+ result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
+ break;
+ }
+ case nir_intrinsic_load_base_instance:
+ result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
+ break;
+ case nir_intrinsic_load_draw_id:
+ result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
+ break;
+ case nir_intrinsic_load_view_index:
+ result = ac_get_arg(&ctx->ac, ctx->args->view_index);
+ break;
+ case nir_intrinsic_load_invocation_id:
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
+ } else {
+ if (ctx->ac.chip_class >= GFX10) {
+ result =
+ LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
+ LLVMConstInt(ctx->ac.i32, 127, 0), "");
+ } else {
+ result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
+ }
+ }
+ break;
+ case nir_intrinsic_load_primitive_id:
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
+ } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
+ } else
+ fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
+ break;
+ case nir_intrinsic_load_sample_id:
+ result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ result = load_sample_pos(ctx);
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ result = ctx->abi->load_sample_mask_in(ctx->abi);
+ break;
+ case nir_intrinsic_load_frag_coord: {
+ LLVMValueRef values[4] = {
+ ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
+ ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
+ ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
+ result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
+ break;
+ }
+ case nir_intrinsic_load_layer_id:
+ result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ break;
+ case nir_intrinsic_load_front_face:
+ result = ac_get_arg(&ctx->ac, ctx->args->front_face);
+ break;
+ case nir_intrinsic_load_helper_invocation:
+ result = ac_build_load_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_is_helper_invocation:
+ result = ac_build_is_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_load_color0:
+ result = ctx->abi->color0;
+ break;
+ case nir_intrinsic_load_color1:
+ result = ctx->abi->color1;
+ break;
+ case nir_intrinsic_load_user_data_amd:
+ assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
+ result = ctx->abi->user_data;
+ break;
+ case nir_intrinsic_load_instance_id:
+ result = ctx->abi->instance_id;
+ break;
+ case nir_intrinsic_load_num_work_groups:
+ result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
+ break;
+ case nir_intrinsic_load_local_invocation_index:
+ result = visit_load_local_invocation_index(ctx);
+ break;
+ case nir_intrinsic_load_subgroup_id:
+ result = visit_load_subgroup_id(ctx);
+ break;
+ case nir_intrinsic_load_num_subgroups:
+ result = visit_load_num_subgroups(ctx);
+ break;
+ case nir_intrinsic_first_invocation:
+ result = visit_first_invocation(ctx);
+ break;
+ case nir_intrinsic_load_push_constant:
+ result = visit_load_push_constant(ctx, instr);
+ break;
+ case nir_intrinsic_vulkan_resource_index: {
+ LLVMValueRef index = get_src(ctx, instr->src[0]);
+ unsigned desc_set = nir_intrinsic_desc_set(instr);
+ unsigned binding = nir_intrinsic_binding(instr);
+
+ result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
+ break;
+ }
+ case nir_intrinsic_vulkan_resource_reindex:
+ result = visit_vulkan_resource_reindex(ctx, instr);
+ break;
+ case nir_intrinsic_store_ssbo:
+ visit_store_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ssbo:
+ result = visit_load_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ result = visit_atomic_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ubo:
+ result = visit_load_ubo_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_get_buffer_size:
+ result = visit_get_buffer_size(ctx, instr);
+ break;
+ case nir_intrinsic_load_deref:
+ result = visit_load_var(ctx, instr);
+ break;
+ case nir_intrinsic_store_deref:
+ visit_store_var(ctx, instr);
+ break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_vertex:
+ case nir_intrinsic_load_per_vertex_input:
+ result = visit_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output:
+ result = visit_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ visit_store_output(ctx, instr);
+ break;
+ case nir_intrinsic_load_shared:
+ result = visit_load_shared(ctx, instr);
+ break;
+ case nir_intrinsic_store_shared:
+ visit_store_shared(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_samples:
+ case nir_intrinsic_image_deref_samples:
+ result = visit_image_samples(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_load:
+ result = visit_image_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_load:
+ result = visit_image_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_store:
+ visit_image_store(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_store:
+ visit_image_store(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ result = visit_image_atomic(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic_inc_wrap:
+ case nir_intrinsic_image_deref_atomic_dec_wrap:
+ result = visit_image_atomic(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_size:
+ result = visit_image_size(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_size:
+ result = visit_image_size(ctx, instr, false);
+ break;
+ case nir_intrinsic_shader_clock:
+ result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
+ break;
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ emit_discard(ctx, instr);
+ break;
+ case nir_intrinsic_demote:
+ case nir_intrinsic_demote_if:
+ emit_demote(ctx, instr);
+ break;
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
+ emit_membar(&ctx->ac, instr);
+ break;
+ case nir_intrinsic_scoped_barrier: {
+ assert(!(nir_intrinsic_memory_semantics(instr) &
+ (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+ nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
+
+ unsigned wait_flags = 0;
+ if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
+ wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ if (modes & nir_var_mem_shared)
+ wait_flags |= AC_WAIT_LGKM;
+
+ if (wait_flags)
+ ac_build_waitcnt(&ctx->ac, wait_flags);
+
+ if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ }
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ break;
+ case nir_intrinsic_control_barrier:
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_fadd: {
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
+ result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_deref_atomic_add:
+ case nir_intrinsic_deref_atomic_imin:
+ case nir_intrinsic_deref_atomic_umin:
+ case nir_intrinsic_deref_atomic_imax:
+ case nir_intrinsic_deref_atomic_umax:
+ case nir_intrinsic_deref_atomic_and:
+ case nir_intrinsic_deref_atomic_or:
+ case nir_intrinsic_deref_atomic_xor:
+ case nir_intrinsic_deref_atomic_exchange:
+ case nir_intrinsic_deref_atomic_comp_swap:
+ case nir_intrinsic_deref_atomic_fadd: {
+ LLVMValueRef ptr = get_src(ctx, instr->src[0]);
+ result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_pixel:
+ result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_sample:
+ result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_model:
+ result = barycentric_model(ctx);
+ break;
+ case nir_intrinsic_load_barycentric_at_offset: {
+ LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_at_sample: {
+ LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
+ result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
+ break;
+ }
+ case nir_intrinsic_load_interpolated_input: {
+ /* We assume any indirect loads have been lowered away */
+ ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
+ assert(offset);
+ assert(offset[0].i32 == 0);
+
+ LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ result = load_interpolated_input(ctx, interp_param, index, component,
+ instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
+ break;
+ }
+ case nir_intrinsic_emit_vertex:
+ ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
+ break;
+ case nir_intrinsic_emit_vertex_with_counter: {
+ unsigned stream = nir_intrinsic_stream_id(instr);
+ LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+ ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
+ break;
+ }
+ case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
+ break;
+ case nir_intrinsic_load_tess_coord:
+ result = ctx->abi->load_tess_coord(ctx->abi);
+ break;
+ case nir_intrinsic_load_tess_level_outer:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
+ break;
+ case nir_intrinsic_load_tess_level_inner:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
+ break;
+ case nir_intrinsic_load_tess_level_outer_default:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
+ break;
+ case nir_intrinsic_load_tess_level_inner_default:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ result = ctx->abi->load_patch_vertices_in(ctx->abi);
+ break;
+ case nir_intrinsic_vote_all: {
+ LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_vote_any: {
+ LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_shuffle:
+ if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
+ (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
+ result =
+ ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
+ } else {
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[1]);
+ LLVMTypeRef type = LLVMTypeOf(src);
+ struct waterfall_context wctx;
+ LLVMValueRef index_val;
+
+ index_val = enter_waterfall(ctx, &wctx, index, true);
+
+ src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
+
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
+ (LLVMValueRef[]){src, index_val}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
+
+ result = exit_waterfall(ctx, &wctx, result);
+ }
+ break;
+ case nir_intrinsic_reduce:
+ result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
+ instr->const_index[1]);
+ break;
+ case nir_intrinsic_inclusive_scan:
+ result =
+ ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
+ break;
+ case nir_intrinsic_exclusive_scan:
+ result =
+ ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
+ break;
+ case nir_intrinsic_quad_broadcast: {
+ unsigned lane = nir_src_as_uint(instr->src[1]);
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
+ break;
+ }
+ case nir_intrinsic_quad_swap_horizontal:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
+ break;
+ case nir_intrinsic_quad_swap_vertical:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
+ break;
+ case nir_intrinsic_quad_swap_diagonal:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
+ break;
+ case nir_intrinsic_quad_swizzle_amd: {
+ uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
+ (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
+ break;
+ }
+ case nir_intrinsic_masked_swizzle_amd: {
+ uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
+ break;
+ }
+ case nir_intrinsic_write_invocation_amd:
+ result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
+ break;
+ case nir_intrinsic_mbcnt_amd:
+ result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
+ break;
+ case nir_intrinsic_load_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
+ LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
+ ? comp_type
+ : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ case nir_intrinsic_store_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
+ LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ unsigned wrmask = nir_intrinsic_write_mask(instr);
+ while (wrmask) {
+ int start, count;
+ u_bit_scan_consecutive_range(&wrmask, &start, &count);
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+ LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+ LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+ offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,
+ LLVMPointerType(vec_type, addr_space), "");
+ LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);
+ LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+ }
+ break;
+ }
+ case nir_intrinsic_load_constant: {
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned range = nir_intrinsic_range(instr);
+
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
+
+ /* Clamp the offset to avoid out-of-bound access because global
+ * instructions can't handle them.
+ */
+ LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+ offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
+
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);
+ LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
+ ? comp_type
+ : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ default:
+ fprintf(stderr, "Unknown intrinsic: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ break;
+ }
+ if (result) {
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ }
}
-static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
- unsigned base_index,
- unsigned constant_index,
- LLVMValueRef dynamic_index)
+static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef dynamic_index)
{
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
- LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
- LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
+ LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+ LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
- /* Bindless uniforms are 64bit so multiple index by 8 */
- index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
- offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
+ /* Bindless uniforms are 64bit so multiple index by 8 */
+ index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
- LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
+ LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
- LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
- NULL, 0, 0, true, true);
+ LLVMValueRef ret =
+ ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, NULL, 0, 0, true, true);
- return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
+ return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
}
struct sampler_desc_address {
- unsigned descriptor_set;
- unsigned base_index; /* binding in vulkan */
- unsigned constant_index;
- LLVMValueRef dynamic_index;
- bool image;
- bool bindless;
+ unsigned descriptor_set;
+ unsigned base_index; /* binding in vulkan */
+ unsigned constant_index;
+ LLVMValueRef dynamic_index;
+ bool image;
+ bool bindless;
};
-static struct sampler_desc_address
-get_sampler_desc_internal(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- const nir_instr *instr,
- bool image)
+static struct sampler_desc_address get_sampler_desc_internal(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr, bool image)
{
- LLVMValueRef index = NULL;
- unsigned constant_index = 0;
- unsigned descriptor_set;
- unsigned base_index;
- bool bindless = false;
-
- if (!deref_instr) {
- descriptor_set = 0;
- if (image) {
- nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
- base_index = 0;
- bindless = true;
- index = get_src(ctx, img_instr->src[0]);
- } else {
- nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
- int sampSrcIdx = nir_tex_instr_src_index(tex_instr,
- nir_tex_src_sampler_handle);
- if (sampSrcIdx != -1) {
- base_index = 0;
- bindless = true;
- index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
- } else {
- assert(tex_instr && !image);
- base_index = tex_instr->sampler_index;
- }
- }
- } else {
- while(deref_instr->deref_type != nir_deref_type_var) {
- if (deref_instr->deref_type == nir_deref_type_array) {
- unsigned array_size = glsl_get_aoa_size(deref_instr->type);
- if (!array_size)
- array_size = 1;
-
- if (nir_src_is_const(deref_instr->arr.index)) {
- constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
- } else {
- LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
-
- indirect = LLVMBuildMul(ctx->ac.builder, indirect,
- LLVMConstInt(ctx->ac.i32, array_size, false), "");
-
- if (!index)
- index = indirect;
- else
- index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
- }
-
- deref_instr = nir_src_as_deref(deref_instr->parent);
- } else if (deref_instr->deref_type == nir_deref_type_struct) {
- unsigned sidx = deref_instr->strct.index;
- deref_instr = nir_src_as_deref(deref_instr->parent);
- constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
- } else {
- unreachable("Unsupported deref type");
- }
- }
- descriptor_set = deref_instr->var->data.descriptor_set;
-
- if (deref_instr->var->data.bindless) {
- /* For now just assert on unhandled variable types */
- assert(deref_instr->var->data.mode == nir_var_uniform);
-
- base_index = deref_instr->var->data.driver_location;
- bindless = true;
-
- index = index ? index : ctx->ac.i32_0;
- index = get_bindless_index_from_uniform(ctx, base_index,
- constant_index, index);
- } else
- base_index = deref_instr->var->data.binding;
- }
- return (struct sampler_desc_address) {
- .descriptor_set = descriptor_set,
- .base_index = base_index,
- .constant_index = constant_index,
- .dynamic_index = index,
- .image = image,
- .bindless = bindless,
- };
+ LLVMValueRef index = NULL;
+ unsigned constant_index = 0;
+ unsigned descriptor_set;
+ unsigned base_index;
+ bool bindless = false;
+
+ if (!deref_instr) {
+ descriptor_set = 0;
+ if (image) {
+ nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
+ base_index = 0;
+ bindless = true;
+ index = get_src(ctx, img_instr->src[0]);
+ } else {
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ int sampSrcIdx = nir_tex_instr_src_index(tex_instr, nir_tex_src_sampler_handle);
+ if (sampSrcIdx != -1) {
+ base_index = 0;
+ bindless = true;
+ index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
+ } else {
+ assert(tex_instr && !image);
+ base_index = tex_instr->sampler_index;
+ }
+ }
+ } else {
+ while (deref_instr->deref_type != nir_deref_type_var) {
+ if (deref_instr->deref_type == nir_deref_type_array) {
+ unsigned array_size = glsl_get_aoa_size(deref_instr->type);
+ if (!array_size)
+ array_size = 1;
+
+ if (nir_src_is_const(deref_instr->arr.index)) {
+ constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
+ } else {
+ LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
+
+ indirect = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, array_size, false), "");
+
+ if (!index)
+ index = indirect;
+ else
+ index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
+ }
+
+ deref_instr = nir_src_as_deref(deref_instr->parent);
+ } else if (deref_instr->deref_type == nir_deref_type_struct) {
+ unsigned sidx = deref_instr->strct.index;
+ deref_instr = nir_src_as_deref(deref_instr->parent);
+ constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
+ } else {
+ unreachable("Unsupported deref type");
+ }
+ }
+ descriptor_set = deref_instr->var->data.descriptor_set;
+
+ if (deref_instr->var->data.bindless) {
+ /* For now just assert on unhandled variable types */
+ assert(deref_instr->var->data.mode == nir_var_uniform);
+
+ base_index = deref_instr->var->data.driver_location;
+ bindless = true;
+
+ index = index ? index : ctx->ac.i32_0;
+ index = get_bindless_index_from_uniform(ctx, base_index, constant_index, index);
+ } else
+ base_index = deref_instr->var->data.binding;
+ }
+ return (struct sampler_desc_address){
+ .descriptor_set = descriptor_set,
+ .base_index = base_index,
+ .constant_index = constant_index,
+ .dynamic_index = index,
+ .image = image,
+ .bindless = bindless,
+ };
}
/* Extract any possibly divergent index into a separate value that can be fed
* into get_sampler_desc with the same arguments. */
-static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- const nir_instr *instr,
- bool image)
+static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
+ const nir_instr *instr, bool image)
{
- struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
- return addr.dynamic_index;
+ struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
+ return addr.dynamic_index;
}
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- enum ac_descriptor_type desc_type,
- const nir_instr *instr,
- LLVMValueRef index,
- bool image, bool write)
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
+ enum ac_descriptor_type desc_type, const nir_instr *instr,
+ LLVMValueRef index, bool image, bool write)
{
- struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
- return ctx->abi->load_sampler_desc(ctx->abi,
- addr.descriptor_set,
- addr.base_index,
- addr.constant_index, index,
- desc_type, addr.image, write, addr.bindless);
+ struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
+ return ctx->abi->load_sampler_desc(ctx->abi, addr.descriptor_set, addr.base_index,
+ addr.constant_index, index, desc_type, addr.image, write,
+ addr.bindless);
}
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
* GFX8:
* The ANISO_OVERRIDE sampler field enables this fix in TA.
*/
-static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
- LLVMValueRef res, LLVMValueRef samp)
+static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res,
+ LLVMValueRef samp)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef img7, samp0;
-
- if (ctx->ac.chip_class >= GFX8)
- return samp;
-
- img7 = LLVMBuildExtractElement(builder, res,
- LLVMConstInt(ctx->ac.i32, 7, 0), "");
- samp0 = LLVMBuildExtractElement(builder, samp,
- LLVMConstInt(ctx->ac.i32, 0, 0), "");
- samp0 = LLVMBuildAnd(builder, samp0, img7, "");
- return LLVMBuildInsertElement(builder, samp, samp0,
- LLVMConstInt(ctx->ac.i32, 0, 0), "");
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef img7, samp0;
+
+ if (ctx->ac.chip_class >= GFX8)
+ return samp;
+
+ img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
+ samp0 = LLVMBuildExtractElement(builder, samp, LLVMConstInt(ctx->ac.i32, 0, 0), "");
+ samp0 = LLVMBuildAnd(builder, samp0, img7, "");
+ return LLVMBuildInsertElement(builder, samp, samp0, LLVMConstInt(ctx->ac.i32, 0, 0), "");
}
-static void tex_fetch_ptrs(struct ac_nir_context *ctx,
- nir_tex_instr *instr,
- struct waterfall_context *wctx,
- LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
- LLVMValueRef *fmask_ptr)
+static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
+ struct waterfall_context *wctx, LLVMValueRef *res_ptr,
+ LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
{
- nir_deref_instr *texture_deref_instr = NULL;
- nir_deref_instr *sampler_deref_instr = NULL;
- int plane = -1;
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_texture_deref:
- texture_deref_instr = nir_src_as_deref(instr->src[i].src);
- break;
- case nir_tex_src_sampler_deref:
- sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
- break;
- case nir_tex_src_plane:
- plane = nir_src_as_int(instr->src[i].src);
- break;
- default:
- break;
- }
- }
-
- LLVMValueRef texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr,
- &instr->instr, false);
- if (!sampler_deref_instr)
- sampler_deref_instr = texture_deref_instr;
-
- LLVMValueRef sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr,
- &instr->instr, false);
- if (instr->texture_non_uniform)
- texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
-
- if (instr->sampler_non_uniform)
- sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
-
- enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
-
- if (plane >= 0) {
- assert(instr->op != nir_texop_txf_ms &&
- instr->op != nir_texop_samples_identical);
- assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
-
- main_descriptor = AC_DESC_PLANE_0 + plane;
- }
-
- if (instr->op == nir_texop_fragment_mask_fetch) {
- /* The fragment mask is fetched from the compressed
- * multisampled surface.
- */
- main_descriptor = AC_DESC_FMASK;
- }
-
- *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
- texture_dynamic_index, false, false);
-
- if (samp_ptr) {
- *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
- sampler_dynamic_index, false, false);
- if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
- *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
- }
- if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
- instr->op == nir_texop_samples_identical))
- *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK,
- &instr->instr, texture_dynamic_index, false, false);
+ nir_deref_instr *texture_deref_instr = NULL;
+ nir_deref_instr *sampler_deref_instr = NULL;
+ int plane = -1;
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_texture_deref:
+ texture_deref_instr = nir_src_as_deref(instr->src[i].src);
+ break;
+ case nir_tex_src_sampler_deref:
+ sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
+ break;
+ case nir_tex_src_plane:
+ plane = nir_src_as_int(instr->src[i].src);
+ break;
+ default:
+ break;
+ }
+ }
+
+ LLVMValueRef texture_dynamic_index =
+ get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);
+ if (!sampler_deref_instr)
+ sampler_deref_instr = texture_deref_instr;
+
+ LLVMValueRef sampler_dynamic_index =
+ get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);
+ if (instr->texture_non_uniform)
+ texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
+
+ if (instr->sampler_non_uniform)
+ sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
+
+ enum ac_descriptor_type main_descriptor =
+ instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
+
+ if (plane >= 0) {
+ assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical);
+ assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
+
+ main_descriptor = AC_DESC_PLANE_0 + plane;
+ }
+
+ if (instr->op == nir_texop_fragment_mask_fetch) {
+ /* The fragment mask is fetched from the compressed
+ * multisampled surface.
+ */
+ main_descriptor = AC_DESC_FMASK;
+ }
+
+ *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
+ texture_dynamic_index, false, false);
+
+ if (samp_ptr) {
+ *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
+ sampler_dynamic_index, false, false);
+ if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
+ *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
+ }
+ if (fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical))
+ *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr,
+ texture_dynamic_index, false, false);
}
-static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
- LLVMValueRef coord)
+static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord)
{
- coord = ac_to_float(ctx, coord);
- coord = ac_build_round(ctx, coord);
- coord = ac_to_integer(ctx, coord);
- return coord;
+ coord = ac_to_float(ctx, coord);
+ coord = ac_build_round(ctx, coord);
+ coord = ac_to_integer(ctx, coord);
+ return coord;
}
static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
{
- LLVMValueRef result = NULL;
- struct ac_image_args args = { 0 };
- LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
- LLVMValueRef ddx = NULL, ddy = NULL;
- unsigned offset_src = 0;
- struct waterfall_context wctx[2] = {{{0}}};
-
- tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_coord: {
- LLVMValueRef coord = get_src(ctx, instr->src[i].src);
- for (unsigned chan = 0; chan < instr->coord_components; ++chan)
- args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
- break;
- }
- case nir_tex_src_projector:
- break;
- case nir_tex_src_comparator:
- if (instr->is_shadow) {
- args.compare = get_src(ctx, instr->src[i].src);
- args.compare = ac_to_float(&ctx->ac, args.compare);
- }
- break;
- case nir_tex_src_offset:
- args.offset = get_src(ctx, instr->src[i].src);
- offset_src = i;
- break;
- case nir_tex_src_bias:
- args.bias = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_lod: {
- if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
- args.level_zero = true;
- else
- args.lod = get_src(ctx, instr->src[i].src);
- break;
- }
- case nir_tex_src_ms_index:
- sample_index = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_ms_mcs:
- break;
- case nir_tex_src_ddx:
- ddx = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_ddy:
- ddy = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_min_lod:
- args.min_lod = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_texture_offset:
- case nir_tex_src_sampler_offset:
- case nir_tex_src_plane:
- default:
- break;
- }
- }
-
- if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
- result = get_buffer_size(ctx, args.resource, true);
- goto write_result;
- }
-
- if (instr->op == nir_texop_texture_samples) {
- LLVMValueRef res, samples, is_msaa;
- LLVMValueRef default_sample;
-
- res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
- samples = LLVMBuildExtractElement(ctx->ac.builder, res,
- LLVMConstInt(ctx->ac.i32, 3, false), "");
- is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
- LLVMConstInt(ctx->ac.i32, 28, false), "");
- is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
- LLVMConstInt(ctx->ac.i32, 0xe, false), "");
- is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
- LLVMConstInt(ctx->ac.i32, 0xe, false), "");
-
- samples = LLVMBuildLShr(ctx->ac.builder, samples,
- LLVMConstInt(ctx->ac.i32, 16, false), "");
- samples = LLVMBuildAnd(ctx->ac.builder, samples,
- LLVMConstInt(ctx->ac.i32, 0xf, false), "");
- samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
- samples, "");
-
- if (ctx->abi->robust_buffer_access) {
- LLVMValueRef dword1, is_null_descriptor;
-
- /* Extract the second dword of the descriptor, if it's
- * all zero, then it's a null descriptor.
- */
- dword1 = LLVMBuildExtractElement(ctx->ac.builder, res,
- LLVMConstInt(ctx->ac.i32, 1, false), "");
- is_null_descriptor =
- LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
- LLVMConstInt(ctx->ac.i32, 0, false), "");
- default_sample =
- LLVMBuildSelect(ctx->ac.builder, is_null_descriptor,
- ctx->ac.i32_0, ctx->ac.i32_1, "");
- } else {
- default_sample = ctx->ac.i32_1;
- }
-
- samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
- default_sample, "");
- result = samples;
- goto write_result;
- }
-
- if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
- LLVMValueRef offset[3], pack;
- for (unsigned chan = 0; chan < 3; ++chan)
- offset[chan] = ctx->ac.i32_0;
-
- unsigned num_components = ac_get_llvm_num_components(args.offset);
- for (unsigned chan = 0; chan < num_components; chan++) {
- offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
- offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
- LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
- if (chan)
- offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
- LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
- }
- pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
- pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
- args.offset = pack;
- }
-
- /* Section 8.23.1 (Depth Texture Comparison Mode) of the
- * OpenGL 4.5 spec says:
- *
- * "If the texture’s internal format indicates a fixed-point
- * depth texture, then D_t and D_ref are clamped to the
- * range [0, 1]; otherwise no clamping is performed."
- *
- * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
- * so the depth comparison value isn't clamped for Z16 and
- * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
- * an explicitly clamped 32-bit float format.
- */
- if (args.compare &&
- ctx->ac.chip_class >= GFX8 &&
- ctx->ac.chip_class <= GFX9 &&
- ctx->abi->clamp_shadow_reference) {
- LLVMValueRef upgraded, clamped;
-
- upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
- LLVMConstInt(ctx->ac.i32, 3, false), "");
- upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
- LLVMConstInt(ctx->ac.i32, 29, false), "");
- upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
- clamped = ac_build_clamp(&ctx->ac, args.compare);
- args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped,
- args.compare, "");
- }
-
- /* pack derivatives */
- if (ddx || ddy) {
- int num_src_deriv_channels, num_dest_deriv_channels;
- switch (instr->sampler_dim) {
- case GLSL_SAMPLER_DIM_3D:
- case GLSL_SAMPLER_DIM_CUBE:
- num_src_deriv_channels = 3;
- num_dest_deriv_channels = 3;
- break;
- case GLSL_SAMPLER_DIM_2D:
- default:
- num_src_deriv_channels = 2;
- num_dest_deriv_channels = 2;
- break;
- case GLSL_SAMPLER_DIM_1D:
- num_src_deriv_channels = 1;
- if (ctx->ac.chip_class == GFX9) {
- num_dest_deriv_channels = 2;
- } else {
- num_dest_deriv_channels = 1;
- }
- break;
- }
-
- for (unsigned i = 0; i < num_src_deriv_channels; i++) {
- args.derivs[i] = ac_to_float(&ctx->ac,
- ac_llvm_extract_elem(&ctx->ac, ddx, i));
- args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
- ac_llvm_extract_elem(&ctx->ac, ddy, i));
- }
- for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
- args.derivs[i] = ctx->ac.f32_0;
- args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
- }
- }
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
- for (unsigned chan = 0; chan < instr->coord_components; chan++)
- args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
- if (instr->coord_components == 3)
- args.coords[3] = LLVMGetUndef(ctx->ac.f32);
- ac_prepare_cube_coords(&ctx->ac,
- instr->op == nir_texop_txd, instr->is_array,
- instr->op == nir_texop_lod, args.coords, args.derivs);
- }
-
- /* Texture coordinates fixups */
- if (instr->coord_components > 1 &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->is_array &&
- instr->op != nir_texop_txf) {
- args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
- }
-
- if (instr->coord_components > 2 &&
- (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
- instr->is_array &&
- instr->op != nir_texop_txf &&
- instr->op != nir_texop_txf_ms &&
- instr->op != nir_texop_fragment_fetch &&
- instr->op != nir_texop_fragment_mask_fetch) {
- args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
- }
-
- if (ctx->ac.chip_class == GFX9 &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->op != nir_texop_lod) {
- LLVMValueRef filler;
- if (instr->op == nir_texop_txf)
- filler = ctx->ac.i32_0;
- else
- filler = LLVMConstReal(ctx->ac.f32, 0.5);
-
- if (instr->is_array)
- args.coords[2] = args.coords[1];
- args.coords[1] = filler;
- }
-
- /* Pack sample index */
- if (sample_index && (instr->op == nir_texop_txf_ms ||
- instr->op == nir_texop_fragment_fetch))
- args.coords[instr->coord_components] = sample_index;
-
- if (instr->op == nir_texop_samples_identical) {
- struct ac_image_args txf_args = { 0 };
- memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
-
- txf_args.dmask = 0xf;
- txf_args.resource = fmask_ptr;
- txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
- result = build_tex_intrinsic(ctx, instr, &txf_args);
-
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
- result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
- goto write_result;
- }
-
- if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
- instr->op != nir_texop_txs &&
- instr->op != nir_texop_fragment_fetch &&
- instr->op != nir_texop_fragment_mask_fetch) {
- unsigned sample_chan = instr->is_array ? 3 : 2;
- args.coords[sample_chan] = adjust_sample_index_using_fmask(
- &ctx->ac, args.coords[0], args.coords[1],
- instr->is_array ? args.coords[2] : NULL,
- args.coords[sample_chan], fmask_ptr);
- }
-
- if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
- int num_offsets = instr->src[offset_src].src.ssa->num_components;
- num_offsets = MIN2(num_offsets, instr->coord_components);
- for (unsigned i = 0; i < num_offsets; ++i) {
- args.coords[i] = LLVMBuildAdd(
- ctx->ac.builder, args.coords[i],
- LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false), "");
- }
- args.offset = NULL;
- }
-
- /* DMASK was repurposed for GATHER4. 4 components are always
- * returned and DMASK works like a swizzle - it selects
- * the component to fetch. The only valid DMASK values are
- * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
- * (red,red,red,red) etc.) The ISA document doesn't mention
- * this.
- */
- args.dmask = 0xf;
- if (instr->op == nir_texop_tg4) {
- if (instr->is_shadow)
- args.dmask = 1;
- else
- args.dmask = 1 << instr->component;
- }
-
- if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
- args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
- args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
- }
-
- /* Adjust the number of coordinates because we only need (x,y) for 2D
- * multisampled images and (x,y,layer) for 2D multisampled layered
- * images or for multisampled input attachments.
- */
- if (instr->op == nir_texop_fragment_mask_fetch) {
- if (args.dim == ac_image_2dmsaa) {
- args.dim = ac_image_2d;
- } else {
- assert(args.dim == ac_image_2darraymsaa);
- args.dim = ac_image_2darray;
- }
- }
-
- assert(instr->dest.is_ssa);
- args.d16 = instr->dest.ssa.bit_size == 16;
-
- result = build_tex_intrinsic(ctx, instr, &args);
-
- if (instr->op == nir_texop_query_levels)
- result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
- else if (instr->is_shadow && instr->is_new_style_shadow &&
- instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
- instr->op != nir_texop_tg4)
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
- else if (instr->op == nir_texop_txs &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
- instr->is_array) {
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
- LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
- z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
- result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
- } else if (ctx->ac.chip_class == GFX9 &&
- instr->op == nir_texop_txs &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->is_array) {
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
- result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
- ctx->ac.i32_1, "");
- } else if (instr->dest.ssa.num_components != 4)
- result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
+ LLVMValueRef result = NULL;
+ struct ac_image_args args = {0};
+ LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
+ LLVMValueRef ddx = NULL, ddy = NULL;
+ unsigned offset_src = 0;
+ struct waterfall_context wctx[2] = {{{0}}};
+
+ tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_coord: {
+ LLVMValueRef coord = get_src(ctx, instr->src[i].src);
+ for (unsigned chan = 0; chan < instr->coord_components; ++chan)
+ args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
+ break;
+ }
+ case nir_tex_src_projector:
+ break;
+ case nir_tex_src_comparator:
+ if (instr->is_shadow) {
+ args.compare = get_src(ctx, instr->src[i].src);
+ args.compare = ac_to_float(&ctx->ac, args.compare);
+ }
+ break;
+ case nir_tex_src_offset:
+ args.offset = get_src(ctx, instr->src[i].src);
+ offset_src = i;
+ break;
+ case nir_tex_src_bias:
+ args.bias = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_lod: {
+ if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
+ args.level_zero = true;
+ else
+ args.lod = get_src(ctx, instr->src[i].src);
+ break;
+ }
+ case nir_tex_src_ms_index:
+ sample_index = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_ms_mcs:
+ break;
+ case nir_tex_src_ddx:
+ ddx = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_ddy:
+ ddy = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_min_lod:
+ args.min_lod = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_texture_offset:
+ case nir_tex_src_sampler_offset:
+ case nir_tex_src_plane:
+ default:
+ break;
+ }
+ }
+
+ if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ result = get_buffer_size(ctx, args.resource, true);
+ goto write_result;
+ }
+
+ if (instr->op == nir_texop_texture_samples) {
+ LLVMValueRef res, samples, is_msaa;
+ LLVMValueRef default_sample;
+
+ res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
+ samples =
+ LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), "");
+ is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), "");
+ is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), "");
+ is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
+ LLVMConstInt(ctx->ac.i32, 0xe, false), "");
+
+ samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 16, false), "");
+ samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), "");
+ samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, "");
+
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef dword1, is_null_descriptor;
+
+ /* Extract the second dword of the descriptor, if it's
+ * all zero, then it's a null descriptor.
+ */
+ dword1 =
+ LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 1, false), "");
+ is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ default_sample =
+ LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ctx->ac.i32_1, "");
+ } else {
+ default_sample = ctx->ac.i32_1;
+ }
+
+ samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, default_sample, "");
+ result = samples;
+ goto write_result;
+ }
+
+ if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
+ LLVMValueRef offset[3], pack;
+ for (unsigned chan = 0; chan < 3; ++chan)
+ offset[chan] = ctx->ac.i32_0;
+
+ unsigned num_components = ac_get_llvm_num_components(args.offset);
+ for (unsigned chan = 0; chan < num_components; chan++) {
+ offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
+ offset[chan] =
+ LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
+ if (chan)
+ offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
+ LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
+ }
+ pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
+ pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
+ args.offset = pack;
+ }
+
+ /* Section 8.23.1 (Depth Texture Comparison Mode) of the
+ * OpenGL 4.5 spec says:
+ *
+ * "If the texture’s internal format indicates a fixed-point
+ * depth texture, then D_t and D_ref are clamped to the
+ * range [0, 1]; otherwise no clamping is performed."
+ *
+ * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
+ * an explicitly clamped 32-bit float format.
+ */
+ if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 &&
+ ctx->abi->clamp_shadow_reference) {
+ LLVMValueRef upgraded, clamped;
+
+ upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
+ LLVMConstInt(ctx->ac.i32, 3, false), "");
+ upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->ac.i32, 29, false), "");
+ upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
+ clamped = ac_build_clamp(&ctx->ac, args.compare);
+ args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, args.compare, "");
+ }
+
+ /* pack derivatives */
+ if (ddx || ddy) {
+ int num_src_deriv_channels, num_dest_deriv_channels;
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ num_src_deriv_channels = 3;
+ num_dest_deriv_channels = 3;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ default:
+ num_src_deriv_channels = 2;
+ num_dest_deriv_channels = 2;
+ break;
+ case GLSL_SAMPLER_DIM_1D:
+ num_src_deriv_channels = 1;
+ if (ctx->ac.chip_class == GFX9) {
+ num_dest_deriv_channels = 2;
+ } else {
+ num_dest_deriv_channels = 1;
+ }
+ break;
+ }
+
+ for (unsigned i = 0; i < num_src_deriv_channels; i++) {
+ args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
+ args.derivs[num_dest_deriv_channels + i] =
+ ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
+ }
+ for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
+ args.derivs[i] = ctx->ac.f32_0;
+ args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
+ }
+ }
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
+ for (unsigned chan = 0; chan < instr->coord_components; chan++)
+ args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
+ if (instr->coord_components == 3)
+ args.coords[3] = LLVMGetUndef(ctx->ac.f32);
+ ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
+ instr->op == nir_texop_lod, args.coords, args.derivs);
+ }
+
+ /* Texture coordinates fixups */
+ if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+ instr->is_array && instr->op != nir_texop_txf) {
+ args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
+ }
+
+ if (instr->coord_components > 2 &&
+ (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
+ instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms &&
+ instr->op != nir_texop_fragment_fetch && instr->op != nir_texop_fragment_mask_fetch) {
+ args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
+ }
+
+ if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+ instr->op != nir_texop_lod) {
+ LLVMValueRef filler;
+ if (instr->op == nir_texop_txf)
+ filler = ctx->ac.i32_0;
+ else
+ filler = LLVMConstReal(ctx->ac.f32, 0.5);
+
+ if (instr->is_array)
+ args.coords[2] = args.coords[1];
+ args.coords[1] = filler;
+ }
+
+ /* Pack sample index */
+ if (sample_index && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_fragment_fetch))
+ args.coords[instr->coord_components] = sample_index;
+
+ if (instr->op == nir_texop_samples_identical) {
+ struct ac_image_args txf_args = {0};
+ memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
+
+ txf_args.dmask = 0xf;
+ txf_args.resource = fmask_ptr;
+ txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
+ result = build_tex_intrinsic(ctx, instr, &txf_args);
+
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
+ goto write_result;
+ }
+
+ if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
+ instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch &&
+ instr->op != nir_texop_fragment_mask_fetch) {
+ unsigned sample_chan = instr->is_array ? 3 : 2;
+ args.coords[sample_chan] = adjust_sample_index_using_fmask(
+ &ctx->ac, args.coords[0], args.coords[1], instr->is_array ? args.coords[2] : NULL,
+ args.coords[sample_chan], fmask_ptr);
+ }
+
+ if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
+ int num_offsets = instr->src[offset_src].src.ssa->num_components;
+ num_offsets = MIN2(num_offsets, instr->coord_components);
+ for (unsigned i = 0; i < num_offsets; ++i) {
+ args.coords[i] = LLVMBuildAdd(
+ ctx->ac.builder, args.coords[i],
+ LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false),
+ "");
+ }
+ args.offset = NULL;
+ }
+
+ /* DMASK was repurposed for GATHER4. 4 components are always
+ * returned and DMASK works like a swizzle - it selects
+ * the component to fetch. The only valid DMASK values are
+ * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ * (red,red,red,red) etc.) The ISA document doesn't mention
+ * this.
+ */
+ args.dmask = 0xf;
+ if (instr->op == nir_texop_tg4) {
+ if (instr->is_shadow)
+ args.dmask = 1;
+ else
+ args.dmask = 1 << instr->component;
+ }
+
+ if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
+ args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
+ args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
+ }
+
+ /* Adjust the number of coordinates because we only need (x,y) for 2D
+ * multisampled images and (x,y,layer) for 2D multisampled layered
+ * images or for multisampled input attachments.
+ */
+ if (instr->op == nir_texop_fragment_mask_fetch) {
+ if (args.dim == ac_image_2dmsaa) {
+ args.dim = ac_image_2d;
+ } else {
+ assert(args.dim == ac_image_2darraymsaa);
+ args.dim = ac_image_2darray;
+ }
+ }
+
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
+ result = build_tex_intrinsic(ctx, instr, &args);
+
+ if (instr->op == nir_texop_query_levels)
+ result =
+ LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
+ else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&
+ instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+ instr->is_array) {
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
+ } else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, ctx->ac.i32_1, "");
+ } else if (instr->dest.ssa.num_components != 4)
+ result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
write_result:
- if (result) {
- assert(instr->dest.is_ssa);
- result = ac_to_integer(&ctx->ac, result);
+ if (result) {
+ assert(instr->dest.is_ssa);
+ result = ac_to_integer(&ctx->ac, result);
- for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
- result = exit_waterfall(ctx, wctx + i, result);
- }
+ for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
+ result = exit_waterfall(ctx, wctx + i, result);
+ }
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- }
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ }
}
static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
{
- LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
- LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
+ LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- _mesa_hash_table_insert(ctx->phis, instr, result);
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ _mesa_hash_table_insert(ctx->phis, instr, result);
}
-static void visit_post_phi(struct ac_nir_context *ctx,
- nir_phi_instr *instr,
- LLVMValueRef llvm_phi)
+static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi)
{
- nir_foreach_phi_src(src, instr) {
- LLVMBasicBlockRef block = get_block(ctx, src->pred);
- LLVMValueRef llvm_src = get_src(ctx, src->src);
+ nir_foreach_phi_src (src, instr) {
+ LLVMBasicBlockRef block = get_block(ctx, src->pred);
+ LLVMValueRef llvm_src = get_src(ctx, src->src);
- LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
- }
+ LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
+ }
}
static void phi_post_pass(struct ac_nir_context *ctx)
{
- hash_table_foreach(ctx->phis, entry) {
- visit_post_phi(ctx, (nir_phi_instr*)entry->key,
- (LLVMValueRef)entry->data);
- }
+ hash_table_foreach(ctx->phis, entry)
+ {
+ visit_post_phi(ctx, (nir_phi_instr *)entry->key, (LLVMValueRef)entry->data);
+ }
}
-
-static bool is_def_used_in_an_export(const nir_ssa_def* def) {
- nir_foreach_use(use_src, def) {
- if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
- nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
- if (instr->intrinsic == nir_intrinsic_store_deref)
- return true;
- } else if (use_src->parent_instr->type == nir_instr_type_alu) {
- nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
- if (instr->op == nir_op_vec4 &&
- is_def_used_in_an_export(&instr->dest.dest.ssa)) {
- return true;
- }
- }
- }
- return false;
+static bool is_def_used_in_an_export(const nir_ssa_def *def)
+{
+ nir_foreach_use (use_src, def) {
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+ if (instr->intrinsic == nir_intrinsic_store_deref)
+ return true;
+ } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+ if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+ return true;
+ }
+ }
+ }
+ return false;
}
-static void visit_ssa_undef(struct ac_nir_context *ctx,
- const nir_ssa_undef_instr *instr)
+static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)
{
- unsigned num_components = instr->def.num_components;
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
-
- if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
- LLVMValueRef undef;
-
- if (num_components == 1)
- undef = LLVMGetUndef(type);
- else {
- undef = LLVMGetUndef(LLVMVectorType(type, num_components));
- }
- ctx->ssa_defs[instr->def.index] = undef;
- } else {
- LLVMValueRef zero = LLVMConstInt(type, 0, false);
- if (num_components > 1) {
- zero = ac_build_gather_values_extended(
- &ctx->ac, &zero, 4, 0, false, false);
- }
- ctx->ssa_defs[instr->def.index] = zero;
- }
+ unsigned num_components = instr->def.num_components;
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
+
+ if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+ } else {
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ if (num_components > 1) {
+ zero = ac_build_gather_values_extended(&ctx->ac, &zero, 4, 0, false, false);
+ }
+ ctx->ssa_defs[instr->def.index] = zero;
+ }
}
-static void visit_jump(struct ac_llvm_context *ctx,
- const nir_jump_instr *instr)
+static void visit_jump(struct ac_llvm_context *ctx, const nir_jump_instr *instr)
{
- switch (instr->type) {
- case nir_jump_break:
- ac_build_break(ctx);
- break;
- case nir_jump_continue:
- ac_build_continue(ctx);
- break;
- default:
- fprintf(stderr, "Unknown NIR jump instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
+ switch (instr->type) {
+ case nir_jump_break:
+ ac_build_break(ctx);
+ break;
+ case nir_jump_continue:
+ ac_build_continue(ctx);
+ break;
+ default:
+ fprintf(stderr, "Unknown NIR jump instr: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
}
-static LLVMTypeRef
-glsl_base_to_llvm_type(struct ac_llvm_context *ac,
- enum glsl_base_type type)
+static LLVMTypeRef glsl_base_to_llvm_type(struct ac_llvm_context *ac, enum glsl_base_type type)
{
- switch (type) {
- case GLSL_TYPE_INT:
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_BOOL:
- case GLSL_TYPE_SUBROUTINE:
- return ac->i32;
- case GLSL_TYPE_INT8:
- case GLSL_TYPE_UINT8:
- return ac->i8;
- case GLSL_TYPE_INT16:
- case GLSL_TYPE_UINT16:
- return ac->i16;
- case GLSL_TYPE_FLOAT:
- return ac->f32;
- case GLSL_TYPE_FLOAT16:
- return ac->f16;
- case GLSL_TYPE_INT64:
- case GLSL_TYPE_UINT64:
- return ac->i64;
- case GLSL_TYPE_DOUBLE:
- return ac->f64;
- default:
- unreachable("unknown GLSL type");
- }
+ switch (type) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_SUBROUTINE:
+ return ac->i32;
+ case GLSL_TYPE_INT8:
+ case GLSL_TYPE_UINT8:
+ return ac->i8;
+ case GLSL_TYPE_INT16:
+ case GLSL_TYPE_UINT16:
+ return ac->i16;
+ case GLSL_TYPE_FLOAT:
+ return ac->f32;
+ case GLSL_TYPE_FLOAT16:
+ return ac->f16;
+ case GLSL_TYPE_INT64:
+ case GLSL_TYPE_UINT64:
+ return ac->i64;
+ case GLSL_TYPE_DOUBLE:
+ return ac->f64;
+ default:
+ unreachable("unknown GLSL type");
+ }
}
-static LLVMTypeRef
-glsl_to_llvm_type(struct ac_llvm_context *ac,
- const struct glsl_type *type)
+static LLVMTypeRef glsl_to_llvm_type(struct ac_llvm_context *ac, const struct glsl_type *type)
{
- if (glsl_type_is_scalar(type)) {
- return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
- }
-
- if (glsl_type_is_vector(type)) {
- return LLVMVectorType(
- glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
- glsl_get_vector_elements(type));
- }
-
- if (glsl_type_is_matrix(type)) {
- return LLVMArrayType(
- glsl_to_llvm_type(ac, glsl_get_column_type(type)),
- glsl_get_matrix_columns(type));
- }
-
- if (glsl_type_is_array(type)) {
- return LLVMArrayType(
- glsl_to_llvm_type(ac, glsl_get_array_element(type)),
- glsl_get_length(type));
- }
-
- assert(glsl_type_is_struct_or_ifc(type));
-
- LLVMTypeRef member_types[glsl_get_length(type)];
-
- for (unsigned i = 0; i < glsl_get_length(type); i++) {
- member_types[i] =
- glsl_to_llvm_type(ac,
- glsl_get_struct_field(type, i));
- }
-
- return LLVMStructTypeInContext(ac->context, member_types,
- glsl_get_length(type), false);
+ if (glsl_type_is_scalar(type)) {
+ return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
+ }
+
+ if (glsl_type_is_vector(type)) {
+ return LLVMVectorType(glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
+ glsl_get_vector_elements(type));
+ }
+
+ if (glsl_type_is_matrix(type)) {
+ return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_column_type(type)),
+ glsl_get_matrix_columns(type));
+ }
+
+ if (glsl_type_is_array(type)) {
+ return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_array_element(type)),
+ glsl_get_length(type));
+ }
+
+ assert(glsl_type_is_struct_or_ifc(type));
+
+ LLVMTypeRef member_types[glsl_get_length(type)];
+
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ member_types[i] = glsl_to_llvm_type(ac, glsl_get_struct_field(type, i));
+ }
+
+ return LLVMStructTypeInContext(ac->context, member_types, glsl_get_length(type), false);
}
-static void visit_deref(struct ac_nir_context *ctx,
- nir_deref_instr *instr)
+static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr)
{
- if (instr->mode != nir_var_mem_shared &&
- instr->mode != nir_var_mem_global)
- return;
-
- LLVMValueRef result = NULL;
- switch(instr->deref_type) {
- case nir_deref_type_var: {
- struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
- result = entry->data;
- break;
- }
- case nir_deref_type_struct:
- if (instr->mode == nir_var_mem_global) {
- nir_deref_instr *parent = nir_deref_instr_parent(instr);
- uint64_t offset = glsl_get_struct_field_offset(parent->type,
- instr->strct.index);
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
- LLVMConstInt(ctx->ac.i32, offset, 0));
- } else {
- result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
- LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
- }
- break;
- case nir_deref_type_array:
- if (instr->mode == nir_var_mem_global) {
- nir_deref_instr *parent = nir_deref_instr_parent(instr);
- unsigned stride = glsl_get_explicit_stride(parent->type);
-
- if ((glsl_type_is_matrix(parent->type) &&
- glsl_matrix_type_is_row_major(parent->type)) ||
- (glsl_type_is_vector(parent->type) && stride == 0))
- stride = type_scalar_size_bytes(parent->type);
-
- assert(stride > 0);
- LLVMValueRef index = get_src(ctx, instr->arr.index);
- if (LLVMTypeOf(index) != ctx->ac.i64)
- index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
-
- LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
-
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
- } else {
- result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
- get_src(ctx, instr->arr.index));
- }
- break;
- case nir_deref_type_ptr_as_array:
- if (instr->mode == nir_var_mem_global) {
- unsigned stride = nir_deref_instr_array_stride(instr);
-
- LLVMValueRef index = get_src(ctx, instr->arr.index);
- if (LLVMTypeOf(index) != ctx->ac.i64)
- index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
-
- LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
-
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
- } else {
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
- get_src(ctx, instr->arr.index));
- }
- break;
- case nir_deref_type_cast: {
- result = get_src(ctx, instr->parent);
-
- /* We can't use the structs from LLVM because the shader
- * specifies its own offsets. */
- LLVMTypeRef pointee_type = ctx->ac.i8;
- if (instr->mode == nir_var_mem_shared)
- pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
-
- unsigned address_space;
-
- switch(instr->mode) {
- case nir_var_mem_shared:
- address_space = AC_ADDR_SPACE_LDS;
- break;
- case nir_var_mem_global:
- address_space = AC_ADDR_SPACE_GLOBAL;
- break;
- default:
- unreachable("Unhandled address space");
- }
-
- LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
-
- if (LLVMTypeOf(result) != type) {
- if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
- result = LLVMBuildBitCast(ctx->ac.builder, result,
- type, "");
- } else {
- result = LLVMBuildIntToPtr(ctx->ac.builder, result,
- type, "");
- }
- }
- break;
- }
- default:
- unreachable("Unhandled deref_instr deref type");
- }
-
- ctx->ssa_defs[instr->dest.ssa.index] = result;
+ if (instr->mode != nir_var_mem_shared && instr->mode != nir_var_mem_global)
+ return;
+
+ LLVMValueRef result = NULL;
+ switch (instr->deref_type) {
+ case nir_deref_type_var: {
+ struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
+ result = entry->data;
+ break;
+ }
+ case nir_deref_type_struct:
+ if (instr->mode == nir_var_mem_global) {
+ nir_deref_instr *parent = nir_deref_instr_parent(instr);
+ uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+ LLVMConstInt(ctx->ac.i32, offset, 0));
+ } else {
+ result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
+ LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
+ }
+ break;
+ case nir_deref_type_array:
+ if (instr->mode == nir_var_mem_global) {
+ nir_deref_instr *parent = nir_deref_instr_parent(instr);
+ unsigned stride = glsl_get_explicit_stride(parent->type);
+
+ if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
+ (glsl_type_is_vector(parent->type) && stride == 0))
+ stride = type_scalar_size_bytes(parent->type);
+
+ assert(stride > 0);
+ LLVMValueRef index = get_src(ctx, instr->arr.index);
+ if (LLVMTypeOf(index) != ctx->ac.i64)
+ index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
+
+ LLVMValueRef offset =
+ LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
+
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
+ } else {
+ result =
+ ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
+ }
+ break;
+ case nir_deref_type_ptr_as_array:
+ if (instr->mode == nir_var_mem_global) {
+ unsigned stride = nir_deref_instr_array_stride(instr);
+
+ LLVMValueRef index = get_src(ctx, instr->arr.index);
+ if (LLVMTypeOf(index) != ctx->ac.i64)
+ index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
+
+ LLVMValueRef offset =
+ LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
+
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
+ } else {
+ result =
+ ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
+ }
+ break;
+ case nir_deref_type_cast: {
+ result = get_src(ctx, instr->parent);
+
+ /* We can't use the structs from LLVM because the shader
+ * specifies its own offsets. */
+ LLVMTypeRef pointee_type = ctx->ac.i8;
+ if (instr->mode == nir_var_mem_shared)
+ pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
+
+ unsigned address_space;
+
+ switch (instr->mode) {
+ case nir_var_mem_shared:
+ address_space = AC_ADDR_SPACE_LDS;
+ break;
+ case nir_var_mem_global:
+ address_space = AC_ADDR_SPACE_GLOBAL;
+ break;
+ default:
+ unreachable("Unhandled address space");
+ }
+
+ LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
+
+ if (LLVMTypeOf(result) != type) {
+ if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
+ result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
+ } else {
+ result = LLVMBuildIntToPtr(ctx->ac.builder, result, type, "");
+ }
+ }
+ break;
+ }
+ default:
+ unreachable("Unhandled deref_instr deref type");
+ }
+
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
}
-static void visit_cf_list(struct ac_nir_context *ctx,
- struct exec_list *list);
+static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list);
static void visit_block(struct ac_nir_context *ctx, nir_block *block)
{
- nir_foreach_instr(instr, block)
- {
- switch (instr->type) {
- case nir_instr_type_alu:
- visit_alu(ctx, nir_instr_as_alu(instr));
- break;
- case nir_instr_type_load_const:
- visit_load_const(ctx, nir_instr_as_load_const(instr));
- break;
- case nir_instr_type_intrinsic:
- visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
- break;
- case nir_instr_type_tex:
- visit_tex(ctx, nir_instr_as_tex(instr));
- break;
- case nir_instr_type_phi:
- visit_phi(ctx, nir_instr_as_phi(instr));
- break;
- case nir_instr_type_ssa_undef:
- visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
- break;
- case nir_instr_type_jump:
- visit_jump(&ctx->ac, nir_instr_as_jump(instr));
- break;
- case nir_instr_type_deref:
- visit_deref(ctx, nir_instr_as_deref(instr));
- break;
- default:
- fprintf(stderr, "Unknown NIR instr type: ");
- nir_print_instr(instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
- }
-
- _mesa_hash_table_insert(ctx->defs, block,
- LLVMGetInsertBlock(ctx->ac.builder));
+ nir_foreach_instr (instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ visit_alu(ctx, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_load_const:
+ visit_load_const(ctx, nir_instr_as_load_const(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ visit_tex(ctx, nir_instr_as_tex(instr));
+ break;
+ case nir_instr_type_phi:
+ visit_phi(ctx, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_ssa_undef:
+ visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
+ break;
+ case nir_instr_type_jump:
+ visit_jump(&ctx->ac, nir_instr_as_jump(instr));
+ break;
+ case nir_instr_type_deref:
+ visit_deref(ctx, nir_instr_as_deref(instr));
+ break;
+ default:
+ fprintf(stderr, "Unknown NIR instr type: ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+ }
+
+ _mesa_hash_table_insert(ctx->defs, block, LLVMGetInsertBlock(ctx->ac.builder));
}
static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
{
- LLVMValueRef value = get_src(ctx, if_stmt->condition);
+ LLVMValueRef value = get_src(ctx, if_stmt->condition);
- nir_block *then_block =
- (nir_block *) exec_list_get_head(&if_stmt->then_list);
+ nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);
- ac_build_uif(&ctx->ac, value, then_block->index);
+ ac_build_uif(&ctx->ac, value, then_block->index);
- visit_cf_list(ctx, &if_stmt->then_list);
+ visit_cf_list(ctx, &if_stmt->then_list);
- if (!exec_list_is_empty(&if_stmt->else_list)) {
- nir_block *else_block =
- (nir_block *) exec_list_get_head(&if_stmt->else_list);
+ if (!exec_list_is_empty(&if_stmt->else_list)) {
+ nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);
- ac_build_else(&ctx->ac, else_block->index);
- visit_cf_list(ctx, &if_stmt->else_list);
- }
+ ac_build_else(&ctx->ac, else_block->index);
+ visit_cf_list(ctx, &if_stmt->else_list);
+ }
- ac_build_endif(&ctx->ac, then_block->index);
+ ac_build_endif(&ctx->ac, then_block->index);
}
static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
{
- nir_block *first_loop_block =
- (nir_block *) exec_list_get_head(&loop->body);
+ nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);
- ac_build_bgnloop(&ctx->ac, first_loop_block->index);
+ ac_build_bgnloop(&ctx->ac, first_loop_block->index);
- visit_cf_list(ctx, &loop->body);
+ visit_cf_list(ctx, &loop->body);
- ac_build_endloop(&ctx->ac, first_loop_block->index);
+ ac_build_endloop(&ctx->ac, first_loop_block->index);
}
-static void visit_cf_list(struct ac_nir_context *ctx,
- struct exec_list *list)
+static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list)
{
- foreach_list_typed(nir_cf_node, node, node, list)
- {
- switch (node->type) {
- case nir_cf_node_block:
- visit_block(ctx, nir_cf_node_as_block(node));
- break;
-
- case nir_cf_node_if:
- visit_if(ctx, nir_cf_node_as_if(node));
- break;
-
- case nir_cf_node_loop:
- visit_loop(ctx, nir_cf_node_as_loop(node));
- break;
-
- default:
- assert(0);
- }
- }
+ foreach_list_typed(nir_cf_node, node, node, list)
+ {
+ switch (node->type) {
+ case nir_cf_node_block:
+ visit_block(ctx, nir_cf_node_as_block(node));
+ break;
+
+ case nir_cf_node_if:
+ visit_if(ctx, nir_cf_node_as_if(node));
+ break;
+
+ case nir_cf_node_loop:
+ visit_loop(ctx, nir_cf_node_as_loop(node));
+ break;
+
+ default:
+ assert(0);
+ }
+ }
}
-void
-ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
- struct ac_shader_abi *abi,
- struct nir_shader *nir,
- struct nir_variable *variable,
- gl_shader_stage stage)
+void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
+ struct nir_shader *nir, struct nir_variable *variable,
+ gl_shader_stage stage)
{
- unsigned output_loc = variable->data.driver_location / 4;
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
-
- /* tess ctrl has it's own load/store paths for outputs */
- if (stage == MESA_SHADER_TESS_CTRL)
- return;
-
- if (stage == MESA_SHADER_VERTEX ||
- stage == MESA_SHADER_TESS_EVAL ||
- stage == MESA_SHADER_GEOMETRY) {
- int idx = variable->data.location + variable->data.index;
- if (idx == VARYING_SLOT_CLIP_DIST0) {
- int length = nir->info.clip_distance_array_size +
- nir->info.cull_distance_array_size;
-
- if (length > 4)
- attrib_count = 2;
- else
- attrib_count = 1;
- }
- }
-
- bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
- LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
- for (unsigned i = 0; i < attrib_count; ++i) {
- for (unsigned chan = 0; chan < 4; chan++) {
- abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
- ac_build_alloca_undef(ctx, type, "");
- }
- }
+ unsigned output_loc = variable->data.driver_location / 4;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+
+ /* tess ctrl has it's own load/store paths for outputs */
+ if (stage == MESA_SHADER_TESS_CTRL)
+ return;
+
+ if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY) {
+ int idx = variable->data.location + variable->data.index;
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;
+
+ if (length > 4)
+ attrib_count = 2;
+ else
+ attrib_count = 1;
+ }
+ }
+
+ bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
+ LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ for (unsigned chan = 0; chan < 4; chan++) {
+ abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
+ ac_build_alloca_undef(ctx, type, "");
+ }
+ }
}
-static void
-setup_locals(struct ac_nir_context *ctx,
- struct nir_function *func)
+static void setup_locals(struct ac_nir_context *ctx, struct nir_function *func)
{
- int i, j;
- ctx->num_locals = 0;
- nir_foreach_function_temp_variable(variable, func->impl) {
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
- variable->data.driver_location = ctx->num_locals * 4;
- variable->data.location_frac = 0;
- ctx->num_locals += attrib_count;
- }
- ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
- if (!ctx->locals)
- return;
-
- for (i = 0; i < ctx->num_locals; i++) {
- for (j = 0; j < 4; j++) {
- ctx->locals[i * 4 + j] =
- ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
- }
- }
+ int i, j;
+ ctx->num_locals = 0;
+ nir_foreach_function_temp_variable(variable, func->impl)
+ {
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+ variable->data.driver_location = ctx->num_locals * 4;
+ variable->data.location_frac = 0;
+ ctx->num_locals += attrib_count;
+ }
+ ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
+ if (!ctx->locals)
+ return;
+
+ for (i = 0; i < ctx->num_locals; i++) {
+ for (j = 0; j < 4; j++) {
+ ctx->locals[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
+ }
+ }
}
-static void
-setup_scratch(struct ac_nir_context *ctx,
- struct nir_shader *shader)
+static void setup_scratch(struct ac_nir_context *ctx, struct nir_shader *shader)
{
- if (shader->scratch_size == 0)
- return;
+ if (shader->scratch_size == 0)
+ return;
- ctx->scratch = ac_build_alloca_undef(&ctx->ac,
- LLVMArrayType(ctx->ac.i8, shader->scratch_size),
- "scratch");
+ ctx->scratch =
+ ac_build_alloca_undef(&ctx->ac, LLVMArrayType(ctx->ac.i8, shader->scratch_size), "scratch");
}
-static void
-setup_constant_data(struct ac_nir_context *ctx,
- struct nir_shader *shader)
+static void setup_constant_data(struct ac_nir_context *ctx, struct nir_shader *shader)
{
- if (!shader->constant_data)
- return;
-
- LLVMValueRef data =
- LLVMConstStringInContext(ctx->ac.context,
- shader->constant_data,
- shader->constant_data_size,
- true);
- LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
-
- /* We want to put the constant data in the CONST address space so that
- * we can use scalar loads. However, LLVM versions before 10 put these
- * variables in the same section as the code, which is unacceptable
- * for RadeonSI as it needs to relocate all the data sections after
- * the code sections. See https://reviews.llvm.org/D65813.
- */
- unsigned address_space =
- LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST;
-
- LLVMValueRef global =
- LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
- "const_data",
- address_space);
-
- LLVMSetInitializer(global, data);
- LLVMSetGlobalConstant(global, true);
- LLVMSetVisibility(global, LLVMHiddenVisibility);
- ctx->constant_data = global;
+ if (!shader->constant_data)
+ return;
+
+ LLVMValueRef data = LLVMConstStringInContext(ctx->ac.context, shader->constant_data,
+ shader->constant_data_size, true);
+ LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
+
+ /* We want to put the constant data in the CONST address space so that
+ * we can use scalar loads. However, LLVM versions before 10 put these
+ * variables in the same section as the code, which is unacceptable
+ * for RadeonSI as it needs to relocate all the data sections after
+ * the code sections. See https://reviews.llvm.org/D65813.
+ */
+ unsigned address_space = LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST;
+
+ LLVMValueRef global =
+ LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "const_data", address_space);
+
+ LLVMSetInitializer(global, data);
+ LLVMSetGlobalConstant(global, true);
+ LLVMSetVisibility(global, LLVMHiddenVisibility);
+ ctx->constant_data = global;
}
-static void
-setup_shared(struct ac_nir_context *ctx,
- struct nir_shader *nir)
+static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
{
- if (ctx->ac.lds)
- return;
+ if (ctx->ac.lds)
+ return;
- LLVMTypeRef type = LLVMArrayType(ctx->ac.i8,
- nir->info.cs.shared_size);
+ LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.cs.shared_size);
- LLVMValueRef lds =
- LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
- "compute_lds",
- AC_ADDR_SPACE_LDS);
- LLVMSetAlignment(lds, 64 * 1024);
+ LLVMValueRef lds =
+ LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);
+ LLVMSetAlignment(lds, 64 * 1024);
- ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, lds,
- LLVMPointerType(ctx->ac.i8,
- AC_ADDR_SPACE_LDS), "");
+ ctx->ac.lds =
+ LLVMBuildBitCast(ctx->ac.builder, lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");
}
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
- const struct ac_shader_args *args, struct nir_shader *nir)
+ const struct ac_shader_args *args, struct nir_shader *nir)
{
- struct ac_nir_context ctx = {};
- struct nir_function *func;
-
- ctx.ac = *ac;
- ctx.abi = abi;
- ctx.args = args;
-
- ctx.stage = nir->info.stage;
- ctx.info = &nir->info;
-
- ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
-
- /* TODO: remove this after RADV switches to lowered IO */
- if (!nir->info.io_lowered) {
- nir_foreach_shader_out_variable(variable, nir) {
- ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
- ctx.stage);
- }
- }
-
- ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-
- if (ctx.abi->kill_ps_if_inf_interp)
- ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-
- func = (struct nir_function *)exec_list_get_head(&nir->functions);
-
- nir_index_ssa_defs(func->impl);
- ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
-
- setup_locals(&ctx, func);
- setup_scratch(&ctx, nir);
- setup_constant_data(&ctx, nir);
-
- if (gl_shader_stage_is_compute(nir->info.stage))
- setup_shared(&ctx, nir);
-
- if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) {
- ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, "");
- /* true = don't kill. */
- LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill);
- }
-
- visit_cf_list(&ctx, &func->impl->body);
- phi_post_pass(&ctx);
-
- if (ctx.ac.postponed_kill)
- ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder,
- ctx.ac.postponed_kill, ""));
-
- if (!gl_shader_stage_is_compute(nir->info.stage))
- ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
- ctx.abi->outputs);
-
- free(ctx.locals);
- free(ctx.ssa_defs);
- ralloc_free(ctx.defs);
- ralloc_free(ctx.phis);
- ralloc_free(ctx.vars);
- if (ctx.abi->kill_ps_if_inf_interp)
- ralloc_free(ctx.verified_interp);
+ struct ac_nir_context ctx = {};
+ struct nir_function *func;
+
+ ctx.ac = *ac;
+ ctx.abi = abi;
+ ctx.args = args;
+
+ ctx.stage = nir->info.stage;
+ ctx.info = &nir->info;
+
+ ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+
+ /* TODO: remove this after RADV switches to lowered IO */
+ if (!nir->info.io_lowered) {
+ nir_foreach_shader_out_variable(variable, nir)
+ {
+ ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage);
+ }
+ }
+
+ ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ctx.verified_interp =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+ func = (struct nir_function *)exec_list_get_head(&nir->functions);
+
+ nir_index_ssa_defs(func->impl);
+ ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
+
+ setup_locals(&ctx, func);
+ setup_scratch(&ctx, nir);
+ setup_constant_data(&ctx, nir);
+
+ if (gl_shader_stage_is_compute(nir->info.stage))
+ setup_shared(&ctx, nir);
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) {
+ ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, "");
+ /* true = don't kill. */
+ LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill);
+ }
+
+ visit_cf_list(&ctx, &func->impl->body);
+ phi_post_pass(&ctx);
+
+ if (ctx.ac.postponed_kill)
+ ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, ctx.ac.postponed_kill, ""));
+
+ if (!gl_shader_stage_is_compute(nir->info.stage))
+ ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, ctx.abi->outputs);
+
+ free(ctx.locals);
+ free(ctx.ssa_defs);
+ ralloc_free(ctx.defs);
+ ralloc_free(ctx.phis);
+ ralloc_free(ctx.vars);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ralloc_free(ctx.verified_interp);
}
-bool
-ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
+bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
{
- bool progress = false;
-
- /* Lower large variables to scratch first so that we won't bloat the
- * shader by generating large if ladders for them. We later lower
- * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
- */
- NIR_PASS(progress, nir, nir_lower_vars_to_scratch,
- nir_var_function_temp,
- 256,
- glsl_get_natural_size_align_bytes);
-
- /* While it would be nice not to have this flag, we are constrained
- * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
- */
- bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
-
- /* TODO: Indirect indexing of GS inputs is unimplemented.
- *
- * TCS and TES load inputs directly from LDS or offchip memory, so
- * indirect indexing is trivial.
- */
- nir_variable_mode indirect_mask = 0;
- if (nir->info.stage == MESA_SHADER_GEOMETRY ||
- (nir->info.stage != MESA_SHADER_TESS_CTRL &&
- nir->info.stage != MESA_SHADER_TESS_EVAL &&
- !llvm_has_working_vgpr_indexing)) {
- indirect_mask |= nir_var_shader_in;
- }
- if (!llvm_has_working_vgpr_indexing &&
- nir->info.stage != MESA_SHADER_TESS_CTRL)
- indirect_mask |= nir_var_shader_out;
-
- /* TODO: We shouldn't need to do this, however LLVM isn't currently
- * smart enough to handle indirects without causing excess spilling
- * causing the gpu to hang.
- *
- * See the following thread for more details of the problem:
- * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
- */
- indirect_mask |= nir_var_function_temp;
-
- progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
- return progress;
+ bool progress = false;
+
+ /* Lower large variables to scratch first so that we won't bloat the
+ * shader by generating large if ladders for them. We later lower
+ * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+ */
+ NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
+ glsl_get_natural_size_align_bytes);
+
+ /* While it would be nice not to have this flag, we are constrained
+ * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
+ */
+ bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
+
+ /* TODO: Indirect indexing of GS inputs is unimplemented.
+ *
+ * TCS and TES load inputs directly from LDS or offchip memory, so
+ * indirect indexing is trivial.
+ */
+ nir_variable_mode indirect_mask = 0;
+ if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+ (nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL &&
+ !llvm_has_working_vgpr_indexing)) {
+ indirect_mask |= nir_var_shader_in;
+ }
+ if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL)
+ indirect_mask |= nir_var_shader_out;
+
+ /* TODO: We shouldn't need to do this, however LLVM isn't currently
+ * smart enough to handle indirects without causing excess spilling
+ * causing the gpu to hang.
+ *
+ * See the following thread for more details of the problem:
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+ */
+ indirect_mask |= nir_var_function_temp;
+
+ progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
+ return progress;
}
-static unsigned
-get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
+static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
{
- if (intrin->intrinsic != nir_intrinsic_store_output)
- return 0;
+ if (intrin->intrinsic != nir_intrinsic_store_output)
+ return 0;
- unsigned writemask = nir_intrinsic_write_mask(intrin) <<
- nir_intrinsic_component(intrin);
- unsigned location = nir_intrinsic_io_semantics(intrin).location;
+ unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);
+ unsigned location = nir_intrinsic_io_semantics(intrin).location;
- if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
- return writemask << 4;
- else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
- return writemask;
+ if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ return writemask << 4;
+ else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+ return writemask;
- return 0;
+ return 0;
}
-static void
-scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
- unsigned *cond_block_tf_writemask,
- bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
+static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
+ unsigned *cond_block_tf_writemask,
+ bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
{
- switch (cf_node->type) {
- case nir_cf_node_block: {
- nir_block *block = nir_cf_node_as_block(cf_node);
- nir_foreach_instr(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (intrin->intrinsic == nir_intrinsic_control_barrier) {
-
- /* If we find a barrier in nested control flow put this in the
- * too hard basket. In GLSL this is not possible but it is in
- * SPIR-V.
- */
- if (is_nested_cf) {
- *tessfactors_are_def_in_all_invocs = false;
- return;
- }
-
- /* The following case must be prevented:
- * gl_TessLevelInner = ...;
- * barrier();
- * if (gl_InvocationID == 1)
- * gl_TessLevelInner = ...;
- *
- * If you consider disjoint code segments separated by barriers, each
- * such segment that writes tess factor channels should write the same
- * channels in all codepaths within that segment.
- */
- if (upper_block_tf_writemask || cond_block_tf_writemask) {
- /* Accumulate the result: */
- *tessfactors_are_def_in_all_invocs &=
- !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
-
- /* Analyze the next code segment from scratch. */
- *upper_block_tf_writemask = 0;
- *cond_block_tf_writemask = 0;
- }
- } else
- *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
- }
-
- break;
- }
- case nir_cf_node_if: {
- unsigned then_tessfactor_writemask = 0;
- unsigned else_tessfactor_writemask = 0;
-
- nir_if *if_stmt = nir_cf_node_as_if(cf_node);
- foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) {
- scan_tess_ctrl(nested_node, &then_tessfactor_writemask,
- cond_block_tf_writemask,
- tessfactors_are_def_in_all_invocs, true);
- }
-
- foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) {
- scan_tess_ctrl(nested_node, &else_tessfactor_writemask,
- cond_block_tf_writemask,
- tessfactors_are_def_in_all_invocs, true);
- }
-
- if (then_tessfactor_writemask || else_tessfactor_writemask) {
- /* If both statements write the same tess factor channels,
- * we can say that the upper block writes them too.
- */
- *upper_block_tf_writemask |= then_tessfactor_writemask &
- else_tessfactor_writemask;
- *cond_block_tf_writemask |= then_tessfactor_writemask |
- else_tessfactor_writemask;
- }
-
- break;
- }
- case nir_cf_node_loop: {
- nir_loop *loop = nir_cf_node_as_loop(cf_node);
- foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
- scan_tess_ctrl(nested_node, cond_block_tf_writemask,
- cond_block_tf_writemask,
- tessfactors_are_def_in_all_invocs, true);
- }
-
- break;
- }
- default:
- unreachable("unknown cf node type");
- }
+ switch (cf_node->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(cf_node);
+ nir_foreach_instr (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_control_barrier) {
+
+ /* If we find a barrier in nested control flow put this in the
+ * too hard basket. In GLSL this is not possible but it is in
+ * SPIR-V.
+ */
+ if (is_nested_cf) {
+ *tessfactors_are_def_in_all_invocs = false;
+ return;
+ }
+
+ /* The following case must be prevented:
+ * gl_TessLevelInner = ...;
+ * barrier();
+ * if (gl_InvocationID == 1)
+ * gl_TessLevelInner = ...;
+ *
+ * If you consider disjoint code segments separated by barriers, each
+ * such segment that writes tess factor channels should write the same
+ * channels in all codepaths within that segment.
+ */
+ if (upper_block_tf_writemask || cond_block_tf_writemask) {
+ /* Accumulate the result: */
+ *tessfactors_are_def_in_all_invocs &=
+ !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
+
+ /* Analyze the next code segment from scratch. */
+ *upper_block_tf_writemask = 0;
+ *cond_block_tf_writemask = 0;
+ }
+ } else
+ *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
+ }
+
+ break;
+ }
+ case nir_cf_node_if: {
+ unsigned then_tessfactor_writemask = 0;
+ unsigned else_tessfactor_writemask = 0;
+
+ nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+ foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
+ {
+ scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,
+ tessfactors_are_def_in_all_invocs, true);
+ }
+
+ foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
+ {
+ scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,
+ tessfactors_are_def_in_all_invocs, true);
+ }
+
+ if (then_tessfactor_writemask || else_tessfactor_writemask) {
+ /* If both statements write the same tess factor channels,
+ * we can say that the upper block writes them too.
+ */
+ *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;
+ *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;
+ }
+
+ break;
+ }
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(cf_node);
+ foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
+ {
+ scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,
+ tessfactors_are_def_in_all_invocs, true);
+ }
+
+ break;
+ }
+ default:
+ unreachable("unknown cf node type");
+ }
}
-bool
-ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
+bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
{
- assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
-
- /* The pass works as follows:
- * If all codepaths write tess factors, we can say that all
- * invocations define tess factors.
- *
- * Each tess factor channel is tracked separately.
- */
- unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
- unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
-
- /* Initial value = true. Here the pass will accumulate results from
- * multiple segments surrounded by barriers. If tess factors aren't
- * written at all, it's a shader bug and we don't care if this will be
- * true.
- */
- bool tessfactors_are_def_in_all_invocs = true;
-
- nir_foreach_function(function, nir) {
- if (function->impl) {
- foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
- scan_tess_ctrl(node, &main_block_tf_writemask,
- &cond_block_tf_writemask,
- &tessfactors_are_def_in_all_invocs,
- false);
- }
- }
- }
-
- /* Accumulate the result for the last code segment separated by a
- * barrier.
- */
- if (main_block_tf_writemask || cond_block_tf_writemask) {
- tessfactors_are_def_in_all_invocs &=
- !(cond_block_tf_writemask & ~main_block_tf_writemask);
- }
-
- return tessfactors_are_def_in_all_invocs;
+ assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
+
+ /* The pass works as follows:
+ * If all codepaths write tess factors, we can say that all
+ * invocations define tess factors.
+ *
+ * Each tess factor channel is tracked separately.
+ */
+ unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
+ unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
+
+ /* Initial value = true. Here the pass will accumulate results from
+ * multiple segments surrounded by barriers. If tess factors aren't
+ * written at all, it's a shader bug and we don't care if this will be
+ * true.
+ */
+ bool tessfactors_are_def_in_all_invocs = true;
+
+ nir_foreach_function (function, nir) {
+ if (function->impl) {
+ foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
+ {
+ scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,
+ &tessfactors_are_def_in_all_invocs, false);
+ }
+ }
+ }
+
+ /* Accumulate the result for the last code segment separated by a
+ * barrier.
+ */
+ if (main_block_tf_writemask || cond_block_tf_writemask) {
+ tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);
+ }
+
+ return tessfactors_are_def_in_all_invocs;
}
#ifndef AC_NIR_TO_LLVM_H
#define AC_NIR_TO_LLVM_H
-#include <stdbool.h>
-#include "llvm-c/Core.h"
-#include "llvm-c/TargetMachine.h"
#include "amd_family.h"
#include "compiler/shader_enums.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/TargetMachine.h"
+
+#include <stdbool.h>
struct nir_shader;
struct nir_variable;
struct ac_shader_args;
/* Interpolation locations */
-#define INTERP_CENTER 0
+#define INTERP_CENTER 0
#define INTERP_CENTROID 1
-#define INTERP_SAMPLE 2
+#define INTERP_SAMPLE 2
static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
{
- return (index * 4) + chan;
+ return (index * 4) + chan;
}
bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
- const struct ac_shader_args *args, struct nir_shader *nir);
+ const struct ac_shader_args *args, struct nir_shader *nir);
-void
-ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
- struct ac_shader_abi *abi,
- struct nir_shader *nir,
- struct nir_variable *variable,
- gl_shader_stage stage);
+void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
+ struct nir_shader *nir, struct nir_variable *variable,
+ gl_shader_stage stage);
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage);
#ifndef AC_SHADER_ABI_H
#define AC_SHADER_ABI_H
-#include <llvm-c/Core.h>
-#include <assert.h>
#include "ac_shader_args.h"
-
#include "compiler/shader_enums.h"
+#include <llvm-c/Core.h>
+
+#include <assert.h>
struct nir_variable;
#define AC_MAX_INLINE_PUSH_CONSTS 8
-enum ac_descriptor_type {
- AC_DESC_IMAGE,
- AC_DESC_FMASK,
- AC_DESC_SAMPLER,
- AC_DESC_BUFFER,
- AC_DESC_PLANE_0,
- AC_DESC_PLANE_1,
- AC_DESC_PLANE_2,
+enum ac_descriptor_type
+{
+ AC_DESC_IMAGE,
+ AC_DESC_FMASK,
+ AC_DESC_SAMPLER,
+ AC_DESC_BUFFER,
+ AC_DESC_PLANE_0,
+ AC_DESC_PLANE_1,
+ AC_DESC_PLANE_2,
};
/* Document the shader ABI during compilation. This is what allows radeonsi and
* radv to share a compiler backend.
*/
struct ac_shader_abi {
- LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
-
- /* These input registers sometimes need to be fixed up. */
- LLVMValueRef vertex_id;
- LLVMValueRef instance_id;
- LLVMValueRef persp_centroid, linear_centroid;
- LLVMValueRef color0, color1;
- LLVMValueRef user_data;
-
- /* For VS and PS: pre-loaded shader inputs.
- *
- * Currently only used for NIR shaders; indexed by variables'
- * driver_location.
- */
- LLVMValueRef *inputs;
-
- /* Varying -> attribute number mapping. Also NIR-only */
- unsigned fs_input_attr_indices[MAX_VARYING];
-
- void (*emit_outputs)(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs);
-
- void (*emit_vertex)(struct ac_shader_abi *abi,
- unsigned stream,
- LLVMValueRef *addrs);
-
- void (*emit_primitive)(struct ac_shader_abi *abi,
- unsigned stream);
-
- void (*emit_vertex_with_counter)(struct ac_shader_abi *abi,
- unsigned stream,
- LLVMValueRef vertexidx,
- LLVMValueRef *addrs);
-
- LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
- unsigned location,
- unsigned driver_location,
- unsigned component,
- unsigned num_components,
- unsigned vertex_index,
- unsigned const_index,
- LLVMTypeRef type);
-
- LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,
- LLVMTypeRef type,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- unsigned location,
- unsigned driver_location,
- unsigned component,
- unsigned num_components,
- bool is_patch,
- bool is_compact,
- bool load_inputs);
-
- void (*store_tcs_outputs)(struct ac_shader_abi *abi,
- const struct nir_variable *var,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- LLVMValueRef src,
- unsigned writemask,
- unsigned component,
- unsigned driver_location);
-
- LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
- unsigned varying_id,
- bool load_default_state);
-
-
- LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
-
- /**
- * Load the descriptor for the given buffer.
- *
- * \param buffer the buffer as presented in NIR: this is the descriptor
- * in Vulkan, and the buffer index in OpenGL/Gallium
- * \param write whether buffer contents will be written
- */
- LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
- LLVMValueRef buffer, bool write);
-
- /**
- * Load a descriptor associated to a sampler.
- *
- * \param descriptor_set the descriptor set index (only for Vulkan)
- * \param base_index the base index of the sampler variable
- * \param constant_index constant part of an array index (or 0, if the
- * sampler variable is not an array)
- * \param index non-constant part of an array index (may be NULL)
- * \param desc_type the type of descriptor to load
- * \param image whether the descriptor is loaded for an image operation
- */
- LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
- unsigned descriptor_set,
- unsigned base_index,
- unsigned constant_index,
- LLVMValueRef index,
- enum ac_descriptor_type desc_type,
- bool image, bool write,
- bool bindless);
-
- /**
- * Load a Vulkan-specific resource.
- *
- * \param index resource index
- * \param desc_set descriptor set
- * \param binding descriptor set binding
- */
- LLVMValueRef (*load_resource)(struct ac_shader_abi *abi,
- LLVMValueRef index,
- unsigned desc_set,
- unsigned binding);
-
- LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi,
- LLVMValueRef sample_id);
-
- LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
-
- /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
- * uses it due to promoting D16 to D32, but radv needs it off. */
- bool clamp_shadow_reference;
- bool interp_at_sample_force_center;
-
- /* Whether bounds checks are required */
- bool robust_buffer_access;
-
- /* Check for Inf interpolation coeff */
- bool kill_ps_if_inf_interp;
-
- /* Whether undef values must be converted to zero */
- bool convert_undef_to_zero;
-
- /* Clamp div by 0 (so it won't produce NaN) */
- bool clamp_div_by_zero;
+ LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
+
+ /* These input registers sometimes need to be fixed up. */
+ LLVMValueRef vertex_id;
+ LLVMValueRef instance_id;
+ LLVMValueRef persp_centroid, linear_centroid;
+ LLVMValueRef color0, color1;
+ LLVMValueRef user_data;
+
+ /* For VS and PS: pre-loaded shader inputs.
+ *
+ * Currently only used for NIR shaders; indexed by variables'
+ * driver_location.
+ */
+ LLVMValueRef *inputs;
+
+ /* Varying -> attribute number mapping. Also NIR-only */
+ unsigned fs_input_attr_indices[MAX_VARYING];
+
+ void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
+
+ void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs);
+
+ void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
+
+ void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
+ LLVMValueRef vertexidx, LLVMValueRef *addrs);
+
+ LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, unsigned location,
+ unsigned driver_location, unsigned component,
+ unsigned num_components, unsigned vertex_index, unsigned const_index,
+ LLVMTypeRef type);
+
+ LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
+ LLVMValueRef vertex_index, LLVMValueRef param_index,
+ unsigned const_index, unsigned location,
+ unsigned driver_location, unsigned component,
+ unsigned num_components, bool is_patch, bool is_compact,
+ bool load_inputs);
+
+ void (*store_tcs_outputs)(struct ac_shader_abi *abi, const struct nir_variable *var,
+ LLVMValueRef vertex_index, LLVMValueRef param_index,
+ unsigned const_index, LLVMValueRef src, unsigned writemask,
+ unsigned component, unsigned driver_location);
+
+ LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, unsigned varying_id,
+ bool load_default_state);
+
+ LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
+
+ /**
+ * Load the descriptor for the given buffer.
+ *
+ * \param buffer the buffer as presented in NIR: this is the descriptor
+ * in Vulkan, and the buffer index in OpenGL/Gallium
+ * \param write whether buffer contents will be written
+ */
+ LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, LLVMValueRef buffer, bool write);
+
+ /**
+ * Load a descriptor associated to a sampler.
+ *
+ * \param descriptor_set the descriptor set index (only for Vulkan)
+ * \param base_index the base index of the sampler variable
+ * \param constant_index constant part of an array index (or 0, if the
+ * sampler variable is not an array)
+ * \param index non-constant part of an array index (may be NULL)
+ * \param desc_type the type of descriptor to load
+ * \param image whether the descriptor is loaded for an image operation
+ */
+ LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, unsigned descriptor_set,
+ unsigned base_index, unsigned constant_index,
+ LLVMValueRef index, enum ac_descriptor_type desc_type,
+ bool image, bool write, bool bindless);
+
+ /**
+ * Load a Vulkan-specific resource.
+ *
+ * \param index resource index
+ * \param desc_set descriptor set
+ * \param binding descriptor set binding
+ */
+ LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
+ unsigned binding);
+
+ LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id);
+
+ LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
+
+ /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
+ * uses it due to promoting D16 to D32, but radv needs it off. */
+ bool clamp_shadow_reference;
+ bool interp_at_sample_force_center;
+
+ /* Whether bounds checks are required */
+ bool robust_buffer_access;
+
+ /* Check for Inf interpolation coeff */
+ bool kill_ps_if_inf_interp;
+
+ /* Whether undef values must be converted to zero */
+ bool convert_undef_to_zero;
+
+ /* Clamp div by 0 (so it won't produce NaN) */
+ bool clamp_div_by_zero;
};
#endif /* AC_SHADER_ABI_H */