From: Christoph Bumiller Date: Tue, 19 Feb 2013 21:12:01 +0000 (+0100) Subject: nvc0/ir: add formatted surface load lib code, move to extra header X-Git-Tag: mesa-9.2.1~2332 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8ac68b071d5c746b7f0ff175a09647e7dbfc29d1;p=platform%2Fupstream%2Fmesa.git nvc0/ir: add formatted surface load lib code, move to extra header OpenGL is nice and makes the user specify a format with an image unit. OpenCL is evil and doesn't, and what's better than adding a huge load of functions that we call indirectly to handle the conversion ? --- diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 802bd25..0463322 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -39,165 +39,40 @@ TargetNVC0::TargetNVC0(unsigned int card) : Target(false, card >= 0xe4) // lazyness -> will just hardcode everything for the time being -// Will probably make this nicer once we support subroutines properly, -// i.e. when we have an input IR that provides function declarations. - -// TODO: separate version for nve4+ which doesn't like the 4-byte insn formats -static const uint32_t nvc0_builtin_code[] = -{ -// DIV U32: slow unsigned integer division -// -// UNR recurrence (q = a / b): -// look for z such that 2^32 - b <= b * z < 2^32 -// then q - 1 <= (a * z) / 2^32 <= q -// -// INPUT: $r0: dividend, $r1: divisor -// OUTPUT: $r0: result, $r1: modulus -// CLOBBER: $r2 - $r3, $p0 - $p1 -// SIZE: 22 / 14 * 8 bytes -// -#if 1 - 0x04009c03, 0x78000000, - 0x7c209c82, 0x38000000, // 0x7c209cdd, - 0x0400dde2, 0x18000000, // 0x0010dd18, - 0x08309c03, 0x60000000, - 0x05205d04, 0x1c000000, // 0x05605c18, - 0x0810dc03, 0x50000000, // 0x0810dc2a, - 0x0c209c43, 0x20040000, - 0x0810dc03, 0x50000000, - 0x0c209c43, 0x20040000, - 0x0810dc03, 0x50000000, - 0x0c209c43, 0x20040000, - 0x0810dc03, 0x50000000, - 0x0c209c43, 0x20040000, - 0x0810dc03, 0x50000000, - 0x0c209c43, 0x20040000, - 0x0000dde4, 0x28000000, - 0x08001c43, 0x50000000, - 0x05209d04, 0x1c000000, // 0x05609c18, - 0x00105c03, 0x20060000, // 0x0010430d, - 0x0811dc03, 0x1b0e0000, - 0x08104103, 0x48000000, - 0x04000002, 0x08000000, - 0x0811c003, 0x1b0e0000, - 0x08104103, 0x48000000, - 0x04000002, 0x08000000, // 0x040000ac, - 0x00001de7, 0x90000000, // 0x90001dff, -#else - 0x0401dc03, 0x1b0e0000, - 0x00008003, 0x78000000, - 0x0400c003, 0x78000000, - 0x0c20c103, 0x48000000, - 0x0c108003, 0x60000000, - 0x00005c28, - 0x00001d18, - 0x0031c023, 0x1b0ec000, - 0xb000a1e7, 0x40000000, - 0x04000003, 0x6000c000, - 0x0813dc03, 0x1b000000, - 0x0420446c, - 0x040004bd, - 0x04208003, 0x5800c000, - 0x0430c103, 0x4800c000, - 0x0ffc5dff, - 0x90001dff, -#endif - -// DIV S32: slow signed integer division -// -// INPUT: $r0: dividend, $r1: divisor -// OUTPUT: $r0: result, $r1: modulus -// CLOBBER: $r2 - $r3, $p0 - $p3 -// SIZE: 18 * 8 bytes -// - 0xfc05dc23, 0x188e0000, - 0xfc17dc23, 0x18c40000, - 0x01201ec4, 0x1c000000, // 0x03301e18, - 0x05205ec4, 0x1c000000, // 0x07305e18, - 0x0401dc03, 0x1b0e0000, - 0x00008003, 0x78000000, - 0x0400c003, 0x78000000, - 0x0c20c103, 0x48000000, - 0x0c108003, 0x60000000, - 0x00005de4, 0x28000000, // 0x00005c28, - 0x00001de2, 0x18000000, // 0x00001d18, - 0x0031c023, 0x1b0ec000, - 0xe000a1e7, 0x40000000, // 0xb000a1e7, 0x40000000, - 0x04000003, 0x6000c000, - 0x0813dc03, 0x1b000000, - 0x04204603, 0x48000000, // 0x0420446c, - 0x04000442, 0x38000000, // 0x040004bd, - 0x04208003, 0x5800c000, - 0x0430c103, 0x4800c000, - 0xe0001de7, 0x4003fffe, // 0x0ffc5dff, - 0x01200f84, 0x1c000000, // 0x01700e18, - 0x05204b84, 0x1c000000, // 0x05704a18, - 0x00001de7, 0x90000000, // 0x90001dff, - -// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) -// -// INPUT: $r0d (x) -// OUTPUT: $r0d (rcp(x)) -// CLOBBER: $r2 - $r7 -// SIZE: 9 * 8 bytes -// - 0x9810dc08, - 0x00009c28, - 0x4001df18, - 0x00019d18, - 0x08011e01, 0x200c0000, - 0x10209c01, 0x50000000, - 0x08011e01, 0x200c0000, - 0x10209c01, 0x50000000, - 0x08011e01, 0x200c0000, - 0x10201c01, 0x50000000, - 0x00001de7, 0x90000000, - -// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) -// -// INPUT: $r0d (x) -// OUTPUT: $r0d (rsqrt(x)) -// CLOBBER: $r2 - $r7 -// SIZE: 14 * 8 bytes -// - 0x9c10dc08, - 0x00009c28, - 0x00019d18, - 0x3fe1df18, - 0x18001c01, 0x50000000, - 0x0001dde2, 0x18ffe000, - 0x08211c01, 0x50000000, - 0x10011e01, 0x200c0000, - 0x10209c01, 0x50000000, - 0x08211c01, 0x50000000, - 0x10011e01, 0x200c0000, - 0x10209c01, 0x50000000, - 0x08211c01, 0x50000000, - 0x10011e01, 0x200c0000, - 0x10201c01, 0x50000000, - 0x00001de7, 0x90000000, -}; - -static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] = -{ - 0, - 8 * (26), - 8 * (26 + 23), - 8 * (26 + 23 + 9) -}; +#include "target_lib_nvc0.asm.h" +#include "target_lib_nve4.asm.h" +#include "target_lib_nvf0.asm.h" void TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const { - *code = &nvc0_builtin_code[0]; - *size = sizeof(nvc0_builtin_code); + switch (chipset & 0xf0) { + case 0xe0: + *code = (const uint32_t *)&nve4_builtin_code[0]; + *size = sizeof(nve4_builtin_code); + break; + case 0xf0: + *code = (const uint32_t *)&nvf0_builtin_code[0]; + *size = sizeof(nvf0_builtin_code); + break; + default: + *code = (const uint32_t *)&nvc0_builtin_code[0]; + *size = sizeof(nvc0_builtin_code); + break; + } } uint32_t TargetNVC0::getBuiltinOffset(int builtin) const { assert(builtin < NVC0_BUILTIN_COUNT); - return nvc0_builtin_offsets[builtin]; + + switch (chipset & 0xf0) { + case 0xe0: return nve4_builtin_offsets[builtin]; + case 0xf0: return nvf0_builtin_offsets[builtin]; + default: + return nvc0_builtin_offsets[builtin]; + } } struct opProperties diff --git a/src/gallium/drivers/nvc0/codegen/target_lib_nvc0.asm b/src/gallium/drivers/nvc0/codegen/target_lib_nvc0.asm new file mode 100644 index 0000000..f40becc --- /dev/null +++ b/src/gallium/drivers/nvc0/codegen/target_lib_nvc0.asm @@ -0,0 +1,96 @@ +// +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +bfind u32 $r2 $r1 +xor b32 $r2 $r2 0x1f +mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +cvt u32 $r1 neg u32 $r1 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +cvt u32 $r2 neg u32 $r1 +add $r1 (mul u32 $r1 u32 $r0) $r3 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +ret +// +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +set $p2 0x1 lt s32 $r0 0x0 +set $p3 0x1 lt s32 $r1 0x0 xor $p2 +cvt s32 $r0 abs s32 $r0 +cvt s32 $r1 abs s32 $r1 +bfind u32 $r2 $r1 +xor b32 $r2 $r2 0x1f +mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +cvt u32 $r1 neg u32 $r1 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +cvt u32 $r2 neg u32 $r1 +add $r1 (mul u32 $r1 u32 $r0) $r3 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p3 cvt s32 $r0 neg s32 $r0 +$p2 cvt s32 $r1 neg s32 $r1 +ret +// +// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rcp(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 9 * 8 bytes +// +nop +ret +// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rsqrt(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 14 * 8 bytes +// +nop +ret diff --git a/src/gallium/drivers/nvc0/codegen/target_lib_nvc0.asm.h b/src/gallium/drivers/nvc0/codegen/target_lib_nvc0.asm.h new file mode 100644 index 0000000..e5e7475 --- /dev/null +++ b/src/gallium/drivers/nvc0/codegen/target_lib_nvc0.asm.h @@ -0,0 +1,113 @@ + +static const uint32_t nvc0_builtin_code[] = +{ + 0x04009c03, + 0x78000000, + 0x7c209cdd, + 0x0010dd18, + 0x08309c03, + 0x60000000, + 0x05605c18, + 0x0810dc2a, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0000dde4, + 0x28000000, + 0x08001c43, + 0x50000000, + 0x05609c18, + 0x0010430d, + 0x0811dc03, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x04000002, + 0x08000000, + 0x0811c003, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x040000ac, + 0x90001dff, + 0xfc05dc23, + 0x188e0000, + 0xfc17dc23, + 0x18c40000, + 0x03301e18, + 0x07305e18, + 0x04009c03, + 0x78000000, + 0x7c209cdd, + 0x0010dd18, + 0x08309c03, + 0x60000000, + 0x05605c18, + 0x0810dc2a, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0000dde4, + 0x28000000, + 0x08001c43, + 0x50000000, + 0x05609c18, + 0x0010430d, + 0x0811dc03, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x04000002, + 0x08000000, + 0x0811c003, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x040000ac, + 0x01700e18, + 0x05704a18, + 0x90001dff, + 0x00001c08, + 0x90001dff, + 0x00001c08, + 0x90001dff, +}; + +static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] = +{ + 0x0000, + 0x00b0, + 0x0180, + 0x0188 +}; + diff --git a/src/gallium/drivers/nvc0/codegen/target_lib_nve4.asm b/src/gallium/drivers/nvc0/codegen/target_lib_nve4.asm new file mode 100644 index 0000000..2ed8c0a --- /dev/null +++ b/src/gallium/drivers/nvc0/codegen/target_lib_nve4.asm @@ -0,0 +1,563 @@ +// +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28 +bfind u32 $r2 $r1 +long xor b32 $r2 $r2 0x1f +long mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +long cvt u32 $r1 neg u32 $r1 +long mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +long cvt u32 $r2 neg u32 $r1 +long add $r1 (mul u32 $r1 u32 $r0) $r3 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +long ret +// +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +set $p2 0x1 lt s32 $r0 0x0 +set $p3 0x1 lt s32 $r1 0x0 xor $p2 +sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28 +long cvt s32 $r0 abs s32 $r0 +long cvt s32 $r1 abs s32 $r1 +bfind u32 $r2 $r1 +long xor b32 $r2 $r2 0x1f +long mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +cvt u32 $r1 neg u32 $r1 +sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +long cvt u32 $r2 neg u32 $r1 +long add $r1 (mul u32 $r1 u32 $r0) $r3 +sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +long $p0 add b32 $r0 $r0 0x1 +long $p3 cvt s32 $r0 neg s32 $r0 +sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c +$p2 cvt s32 $r1 neg s32 $r1 +long ret +// +// SULDP [for each format] +// $r4d: address +// $r2: surface info (format) +// $p0: access predicate +// $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg) +// +// RGBA32 +$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0 +long ret +// RGBA16_UNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 u16 1 $r1 +cvt rn f32 $r2 u16 0 $r1 +mul f32 $r3 $r3 0x37800074 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt rn f32 $r1 u16 1 $r0 +mul f32 $r2 $r2 0x37800074 +cvt rn f32 $r0 u16 0 $r0 +mul f32 $r1 $r1 0x37800074 +mul f32 $r0 $r0 0x37800074 +long ret +// RGBA16_SNORM +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 s16 1 $r1 +cvt rn f32 $r2 s16 0 $r1 +mul f32 $r3 $r3 0x38000187 +cvt rn f32 $r1 s16 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x38000187 +cvt rn f32 $r0 s16 0 $r0 +mul f32 $r1 $r1 0x38000187 +mul f32 $r0 $r0 0x38000187 +long ret +// RGBA16_SINT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt s32 $r3 s16 1 $r1 +cvt s32 $r2 s16 0 $r1 +cvt s32 $r1 s16 1 $r0 +cvt s32 $r0 s16 0 $r0 +long ret +// RGBA16_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt u32 $r3 u16 1 $r1 +cvt u32 $r2 u16 0 $r1 +cvt u32 $r1 u16 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u16 0 $r0 +long ret +// RGBA16_FLOAT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt f32 $r3 f16 $r1 1 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt f32 $r2 f16 $r1 0 +cvt f32 $r1 f16 $r0 1 +cvt f32 $r0 f16 $r0 0 +long ret +// RG32_FLOAT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r2 0x00000000 +long mov b32 $r3 0x3f800000 +long ret +// RG32_xINT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r2 0x00000000 +long mov b32 $r3 0x00000001 +long ret +// RGB10A2_UNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +ext u32 $r1 $r0 0x0a0a +long mov b32 $r3 0x3f800000 +ext u32 $r2 $r0 0x0a14 +long and b32 $r0 $r0 0x3ff +cvt rn f32 $r2 u16 0 $r2 +cvt rn f32 $r1 u16 0 $r1 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x3a802007 +cvt rn f32 $r0 u16 0 $r0 +mul f32 $r1 $r1 0x3a802007 +mul f32 $r0 $r0 0x3a802007 +long ret +// RGB10A2_UINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +ext u32 $r1 $r0 0x0a0a +long mov b32 $r3 0x00000001 +ext u32 $r2 $r0 0x0a14 +long and b32 $r0 $r0 0x3ff +long ret +// RGBA8_UNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 u8 3 $r0 +cvt rn f32 $r2 u8 2 $r0 +mul f32 $r3 $r3 0x3b808081 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt rn f32 $r1 u8 1 $r0 +mul f32 $r2 $r2 0x3b808081 +cvt rn f32 $r0 u8 0 $r0 +mul f32 $r1 $r1 0x3b808081 +mul f32 $r0 $r0 0x3b808081 +long ret +// RGBA8_SNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 s8 3 $r0 +cvt rn f32 $r2 s8 2 $r0 +mul f32 $r3 $r3 0x3c010204 +cvt rn f32 $r1 s8 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x3c010204 +cvt rn f32 $r0 s8 0 $r0 +mul f32 $r1 $r1 0x3c010204 +mul f32 $r0 $r0 0x3c010204 +long ret +// RGBA8_SINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt s32 $r3 s8 3 $r0 +cvt s32 $r2 s8 2 $r0 +cvt s32 $r1 s8 1 $r0 +cvt s32 $r0 s8 0 $r0 +long ret +// RGBA8_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt u32 $r3 u8 3 $r0 +cvt u32 $r2 u8 2 $r0 +cvt u32 $r1 u8 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u8 0 $r0 +long ret +// R5G6B5_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +ext u32 $r1 $r0 0x0605 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r3 0x3f800000 +ext u32 $r2 $r0 0x050b +long and b32 $r0 $r0 0x1f +cvt rn f32 $r2 u8 0 $r2 +cvt rn f32 $r1 u8 0 $r1 +mul f32 $r2 $r2 0x3d042108 +cvt rn f32 $r0 u8 0 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r1 $r1 0x3c820821 +mul f32 $r0 $r0 0x3d042108 +long ret +// R5G5B5X1_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +ext u32 $r1 $r0 0x0505 +ext u32 $r2 $r0 0x050a +long and b32 $r0 $r0 0x1f +long mov b32 $r3 0x3f800000 +cvt rn f32 $r2 u8 0 $r2 +cvt rn f32 $r1 u8 0 $r1 +cvt rn f32 $r0 u8 0 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x3d042108 +mul f32 $r1 $r1 0x3d042108 +mul f32 $r0 $r0 0x3d042108 +long ret +// RG16_UNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r1 u16 1 $r0 +cvt rn f32 $r0 u16 0 $r0 +mul f32 $r1 $r1 0x37800074 +mul f32 $r0 $r0 0x37800074 +long mov b32 $r2 0x00000000 +long mov b32 $r3 0x3f800000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long ret +// RG16_SNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r1 s16 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mov b32 $r2 0x00000000 +cvt rn f32 $r0 s16 0 $r0 +mul f32 $r1 $r1 0x38000187 +mul f32 $r0 $r0 0x38000187 +long ret +// RG16_SINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x00000001 +cvt s32 $r1 s16 1 $r0 +mov b32 $r2 0x00000000 +cvt s32 $r0 s16 0 $r0 +long ret +// RG16_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x00000001 +cvt u32 $r1 u16 1 $r0 +mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u16 0 $r0 +long ret +// RG16_FLOAT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt f32 $r1 f16 $r0 1 +mov b32 $r2 0x00000000 +cvt f32 $r0 f16 $r0 0 +long ret +// R32_FLOAT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R32_xINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// RG8_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r1 u8 1 $r0 +mov b32 $r2 0x00000000 +cvt rn f32 $r0 u8 0 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r1 $r1 0x3b808081 +mul f32 $r0 $r0 0x3b808081 +long ret +// RG8_SNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r3 0x3f800000 +cvt rn f32 $r1 s8 1 $r0 +long mov b32 $r2 0x00000000 +cvt rn f32 $r0 s8 0 $r0 +mul f32 $r1 $r1 0x3c010204 +mul f32 $r0 $r0 0x3c010204 +long ret +// RG8_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +cvt u32 $r1 u8 1 $r0 +long mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u8 0 $r0 +long ret +// RG8_SINT +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt s32 $r1 s8 1 $r0 +long mov b32 $r2 0x00000000 +cvt s32 $r0 s8 0 $r0 +long ret +// R16_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +cvt rn f32 $r0 u16 0 $r0 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +mul f32 $r0 $r0 0x37800074 +long ret +// R16_SNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r0 s16 0 $r0 +long mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r1 0x00000000 +mul f32 $r0 $r0 0x38000187 +long ret +// R16_SINT +$p1 suldgb s16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb s16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb s16 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R16_UINT +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R16_FLOAT +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +long mov b32 $r2 0x00000000 +cvt f32 $r0 f16 $r0 0 +mov b32 $r1 0x00000000 +long ret +// R8_UNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r0 u8 0 $r0 +mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r0 $r0 0x3b808081 +mov b32 $r1 0x00000000 +long ret +// R8_SNORM +$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mov b32 $r3 0x3f800000 +cvt rn f32 $r0 s8 0 $r0 +mov b32 $r2 0x00000000 +mul f32 $r0 $r0 0x3c010204 +mov b32 $r1 0x00000000 +long ret +// R8_SINT +$p1 suldgb s8 $r0 ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb s8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb s8 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R8_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long ret +// R11G11B10_FLOAT TODO +$p1 suldgb b32 $r3 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r3 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +long nop +long ret +// +// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rcp(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 9 * 8 bytes +// +long nop +long ret +// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rsqrt(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 14 * 8 bytes +// +long nop +long ret diff --git a/src/gallium/drivers/nvc0/codegen/target_lib_nve4.asm.h b/src/gallium/drivers/nvc0/codegen/target_lib_nve4.asm.h new file mode 100644 index 0000000..526e759 --- /dev/null +++ b/src/gallium/drivers/nvc0/codegen/target_lib_nve4.asm.h @@ -0,0 +1,500 @@ + +// Assembled from target_lib_nve4.asm by envyas -m nvc0 -V nve4 -W. + +static const uint64_t nve4_builtin_code[] = +{ + 0x2282828042804287ULL, + 0x7800000004009c03ULL, + 0x380000007c209c82ULL, + 0x180000000400dde2ULL, + 0x6000000008309c03ULL, + 0x1c00000005205d04ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x2282828282828287ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x2042c28280428047ULL, + 0x200400000c209c43ULL, + 0x280000000000dde4ULL, + 0x5000000008001c43ULL, + 0x1c00000005209d04ULL, + 0x2006000000105c03ULL, + 0x1b0e00000811dc03ULL, + 0x4800000008104103ULL, + 0x220282e20042c287ULL, + 0x0800000004000002ULL, + 0x1b0e00000811c003ULL, + 0x4800000008104103ULL, + 0x0800000004000002ULL, + 0x9000000000001de7ULL, + 0x188e0000fc05dc23ULL, + 0x18c40000fc17dc23ULL, + 0x2280428042828207ULL, + 0x1c00000001201ec4ULL, + 0x1c00000005205ec4ULL, + 0x7800000004009c03ULL, + 0x380000007c209c82ULL, + 0x180000000400dde2ULL, + 0x6000000008309c03ULL, + 0x1c00000005205d04ULL, + 0x2282828282828287ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x2282804280428287ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x280000000000dde4ULL, + 0x5000000008001c43ULL, + 0x1c00000005209d04ULL, + 0x2006000000105c03ULL, + 0x22028042c28042c7ULL, + 0x1b0e00000811dc03ULL, + 0x4800000008104103ULL, + 0x0800000004000002ULL, + 0x1b0e00000811c003ULL, + 0x4800000008104103ULL, + 0x0800000004000002ULL, + 0x1c00000001200f84ULL, + 0x22c200428042e047ULL, + 0x1c00000005204b84ULL, + 0x9000000000001de7ULL, + 0xd4004000084004c5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009c5ULL, + 0xd4004000084007c5ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd4004000084004c5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009c5ULL, + 0xd4004000084007c5ULL, + 0x1900000004a0dc04ULL, + 0x1800000004a09c04ULL, + 0x30de0001d030dc02ULL, + 0x2000000000000007ULL, + 0x1900000000a05c04ULL, + 0x30de0001d0209c02ULL, + 0x1800000000a01c04ULL, + 0x30de0001d0105c02ULL, + 0x30de0001d0001c02ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1900000004a0de04ULL, + 0x1800000004a09e04ULL, + 0x30e000061c30dc02ULL, + 0x1900000000a05e04ULL, + 0x2000000000000007ULL, + 0x30e000061c209c02ULL, + 0x1800000000a01e04ULL, + 0x30e000061c105c02ULL, + 0x30e000061c001c02ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1d00000004a0de84ULL, + 0x1c00000004a09e84ULL, + 0x1d00000000a05e84ULL, + 0x1c00000000a01e84ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1d00000004a0dc04ULL, + 0x1c00000004a09c04ULL, + 0x1d00000000a05c04ULL, + 0x2000000000000007ULL, + 0x1c00000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1100000004a0dc04ULL, + 0x2000000000000007ULL, + 0x1000000004a09c04ULL, + 0x1100000000a05c04ULL, + 0x1000000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0x2000000000000007ULL, + 0xd4004000084007a5ULL, + 0x1800000000009de2ULL, + 0x18fe00000000dde2ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0x2000000000000007ULL, + 0xd4004000084007a5ULL, + 0x1800000000009de2ULL, + 0x180000000400dde2ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0x2000000000000007ULL, + 0xd400400008400785ULL, + 0x7000c02828005c03ULL, + 0x18fe00000000dde2ULL, + 0x7000c02850009c03ULL, + 0x3800000ffc001c02ULL, + 0x1800000008a09c04ULL, + 0x1800000004a05c04ULL, + 0x2000000000000007ULL, + 0x30ea00801c209c02ULL, + 0x1800000000a01c04ULL, + 0x30ea00801c105c02ULL, + 0x30ea00801c001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x7000c02828005c03ULL, + 0x180000000400dde2ULL, + 0x7000c02850009c03ULL, + 0x3800000ffc001c02ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x198000000020dc04ULL, + 0x1900000000209c04ULL, + 0x30ee02020430dc02ULL, + 0x2000000000000007ULL, + 0x1880000000205c04ULL, + 0x30ee020204209c02ULL, + 0x1800000000201c04ULL, + 0x30ee020204105c02ULL, + 0x30ee020204001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x198000000020de04ULL, + 0x1900000000209e04ULL, + 0x30f004081030dc02ULL, + 0x1880000000205e04ULL, + 0x2000000000000007ULL, + 0x30f0040810209c02ULL, + 0x1800000000201e04ULL, + 0x30f0040810105c02ULL, + 0x30f0040810001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x1d8000000020de84ULL, + 0x1d00000000209e84ULL, + 0x1c80000000205e84ULL, + 0x1c00000000201e84ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x1d8000000020dc04ULL, + 0x1d00000000209c04ULL, + 0x1c80000000205c04ULL, + 0x2000000000000007ULL, + 0x1c00000000201c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x7000c01814005c03ULL, + 0x2000000000000007ULL, + 0x18fe00000000dde2ULL, + 0x7000c0142c009c03ULL, + 0x380000007c001c02ULL, + 0x1800000008209c04ULL, + 0x1800000004205c04ULL, + 0x30f4108420209c02ULL, + 0x1800000000201c04ULL, + 0x2000000000000007ULL, + 0x30f2082084105c02ULL, + 0x30f4108420001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x2000000000000007ULL, + 0x7000c01414005c03ULL, + 0x7000c01428009c03ULL, + 0x380000007c001c02ULL, + 0x18fe00000000dde2ULL, + 0x1800000008209c04ULL, + 0x1800000004205c04ULL, + 0x1800000000201c04ULL, + 0x2000000000000007ULL, + 0x30f4108420209c02ULL, + 0x30f4108420105c02ULL, + 0x30f4108420001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0x2000000000000007ULL, + 0xd400400008400785ULL, + 0x1900000000a05c04ULL, + 0x1800000000a01c04ULL, + 0x30de0001d0105c02ULL, + 0x30de0001d0001c02ULL, + 0x1800000000009de2ULL, + 0x18fe00000000dde2ULL, + 0x2000000000000007ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x18fe00000000dde2ULL, + 0x1900000000a05e04ULL, + 0x2000000000000007ULL, + 0x1800000000009de2ULL, + 0x1800000000a01e04ULL, + 0x30e000061c105c02ULL, + 0x30e000061c001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x180000000400dde2ULL, + 0x1d00000000a05e84ULL, + 0x1800000000009de2ULL, + 0x1c00000000a01e84ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x180000000400dde2ULL, + 0x1d00000000a05c04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x1c00000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x18fe00000000dde2ULL, + 0x2000000000000007ULL, + 0x1100000000a05c04ULL, + 0x1800000000009de2ULL, + 0x1000000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0x2000000000000007ULL, + 0xd400400008400785ULL, + 0x18fe00000000dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1880000000205c04ULL, + 0x1800000000009de2ULL, + 0x1800000000201c04ULL, + 0x2000000000000007ULL, + 0x30ee020204105c02ULL, + 0x30ee020204001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x2000000000000007ULL, + 0x18fe00000000dde2ULL, + 0x1880000000205e04ULL, + 0x1800000000009de2ULL, + 0x1800000000201e04ULL, + 0x30f0040810105c02ULL, + 0x30f0040810001c02ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x180000000400dde2ULL, + 0x1c80000000205c04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x1c00000000201c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x180000000400dde2ULL, + 0x2000000000000007ULL, + 0x1c80000000205e84ULL, + 0x1800000000009de2ULL, + 0x1c00000000201e84ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0x2000000000000007ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1800000000a01c04ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x30de0001d0001c02ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1800000000a01e04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x1800000000005de2ULL, + 0x30e000061c001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400465ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400965ULL, + 0xd400400008400765ULL, + 0x2000000000000007ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0x2000000000000007ULL, + 0xd400400008400745ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1800000000009de2ULL, + 0x1000000000a01c04ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400405ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400905ULL, + 0xd400400008400705ULL, + 0x18fe00000000dde2ULL, + 0x1800000000201c04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x30ee020204001c02ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400405ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400905ULL, + 0xd400400008400705ULL, + 0x2000000000000007ULL, + 0x18fe00000000dde2ULL, + 0x1800000000201e04ULL, + 0x1800000000009de2ULL, + 0x30f0040810001c02ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400425ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400925ULL, + 0xd400400008400725ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400405ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400905ULL, + 0xd400400008400705ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x2000000000000007ULL, + 0x9000000000001de7ULL, + 0xd40040000840c485ULL, + 0x0c5400000013dc04ULL, + 0xd40040000840c985ULL, + 0xd40040000840c785ULL, + 0x18fe00000000dde2ULL, + 0x4000000000001de4ULL, + 0x9000000000001de7ULL, + 0x4000000000001de4ULL, + 0x9000000000001de7ULL, + 0x4000000000001de4ULL, + 0x9000000000001de7ULL, +}; + +static const uint16_t nve4_builtin_offsets[NVC0_BUILTIN_COUNT] = +{ + 0x0000, + 0x00f0, + 0x0f08, + 0x0f18, +}; + diff --git a/src/gallium/drivers/nvc0/codegen/target_lib_nvf0.asm.h b/src/gallium/drivers/nvc0/codegen/target_lib_nvf0.asm.h new file mode 100644 index 0000000..d10b6b0 --- /dev/null +++ b/src/gallium/drivers/nvc0/codegen/target_lib_nvf0.asm.h @@ -0,0 +1,13 @@ + +static const uint64_t nvf0_builtin_code[] = +{ + 0x19000000001c003cULL, +}; + +static const uint16_t nvf0_builtin_offsets[NVC0_BUILTIN_COUNT] = +{ + 0, + 0, + 0, + 0 +};