From 77ad3883ecc37c1360130546b9546a5dbbde265f Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Mon, 8 Oct 2012 15:20:37 +0000 Subject: [PATCH] Removed utest kernels used for the (removed) internal tests --- backend/kernels/Makefile | 34 - backend/kernels/add.cl | 7 - backend/kernels/add.cl.ll | 17 - backend/kernels/add2.cl | 12 - backend/kernels/add2.cl.ll | 24 - backend/kernels/cmp.cl | 7 - backend/kernels/cmp.cl.ll | 22 - backend/kernels/cmp_cvt.cl | 7 - backend/kernels/cmp_cvt.cl.ll | 22 - backend/kernels/compile.sh | 6 - backend/kernels/complex_struct.cl | 13 - backend/kernels/complex_struct.cl.ll | 22 - backend/kernels/cycle.cl | 16 - backend/kernels/cycle.cl.ll | 15 - backend/kernels/dummy.ll | 74 -- backend/kernels/extract.cl | 8 - backend/kernels/extract.cl.ll | 21 - backend/kernels/function.cl | 13 - backend/kernels/function.cl.ll | 24 - backend/kernels/function_param.cl | 15 - backend/kernels/function_param.cl.ll | 33 - backend/kernels/g.cl | 13 - backend/kernels/g.cl.ll | 91 -- backend/kernels/get_global_id.cl | 10 - backend/kernels/get_global_id.cl.ll | 32 - backend/kernels/insert.cl | 10 - backend/kernels/insert.cl.ll | 18 - backend/kernels/load_store.cl | 6 - backend/kernels/load_store.cl.ll | 17 - backend/kernels/loop.cl | 9 - backend/kernels/loop.cl.ll | 31 - backend/kernels/loop2.cl | 14 - backend/kernels/loop2.cl.ll | 39 - backend/kernels/loop3.cl | 9 - backend/kernels/loop3.cl.ll | 38 - backend/kernels/loop3.ll | 38 - backend/kernels/loop4.cl | 12 - backend/kernels/loop4.cl.ll | 67 -- backend/kernels/loop4.ll | 67 -- backend/kernels/loop5.cl | 17 - backend/kernels/loop5.cl.ll | 69 -- backend/kernels/loop5.ll | 131 --- backend/kernels/mad.cl.ll | 113 --- backend/kernels/select.cl | 10 - backend/kernels/select.cl.ll | 38 - backend/kernels/short.cl | 7 - backend/kernels/short.cl.ll | 17 - backend/kernels/shuffle.cl | 8 - backend/kernels/shuffle.cl.ll | 17 - backend/kernels/simple_float4.cl | 8 - backend/kernels/simple_float4.cl.ll | 29 - backend/kernels/simple_float4_2.cl | 8 - backend/kernels/simple_float4_2.cl.ll | 30 - backend/kernels/simple_float4_3.cl | 9 - backend/kernels/simple_float4_3.cl.ll | 38 - backend/kernels/stdlib.h | 114 --- backend/kernels/store.cl | 7 - backend/kernels/store.cl.ll | 16 - backend/kernels/struct.cl | 26 - backend/kernels/struct.cl.ll | 66 -- backend/kernels/struct2.cl | 22 - backend/kernels/struct2.cl.ll | 38 - backend/kernels/test_select.cl | 11 - backend/kernels/test_select.cl.ll | 32 - backend/kernels/undefined.cl | 11 - backend/kernels/undefined.cl.ll | 16 - backend/kernels/vector_constant.cl | 7 - backend/kernels/vector_constant.cl.ll | 30 - backend/kernels/vector_constant.ll | 84 -- backend/kernels/void.cl | 4 - backend/kernels/void.cl.ll | 12 - backend/src/backend/gen/gen_mesa_defines.h | 1525 ---------------------------- backend/src/backend/gen/gen_mesa_structs.h | 1513 --------------------------- 73 files changed, 5016 deletions(-) delete mode 100644 backend/kernels/Makefile delete mode 100644 backend/kernels/add.cl delete mode 100644 backend/kernels/add.cl.ll delete mode 100644 backend/kernels/add2.cl delete mode 100644 backend/kernels/add2.cl.ll delete mode 100644 backend/kernels/cmp.cl delete mode 100644 backend/kernels/cmp.cl.ll delete mode 100644 backend/kernels/cmp_cvt.cl delete mode 100644 backend/kernels/cmp_cvt.cl.ll delete mode 100755 backend/kernels/compile.sh delete mode 100644 backend/kernels/complex_struct.cl delete mode 100644 backend/kernels/complex_struct.cl.ll delete mode 100644 backend/kernels/cycle.cl delete mode 100644 backend/kernels/cycle.cl.ll delete mode 100644 backend/kernels/dummy.ll delete mode 100644 backend/kernels/extract.cl delete mode 100644 backend/kernels/extract.cl.ll delete mode 100644 backend/kernels/function.cl delete mode 100644 backend/kernels/function.cl.ll delete mode 100644 backend/kernels/function_param.cl delete mode 100644 backend/kernels/function_param.cl.ll delete mode 100644 backend/kernels/g.cl delete mode 100644 backend/kernels/g.cl.ll delete mode 100644 backend/kernels/get_global_id.cl delete mode 100644 backend/kernels/get_global_id.cl.ll delete mode 100644 backend/kernels/insert.cl delete mode 100644 backend/kernels/insert.cl.ll delete mode 100644 backend/kernels/load_store.cl delete mode 100644 backend/kernels/load_store.cl.ll delete mode 100644 backend/kernels/loop.cl delete mode 100644 backend/kernels/loop.cl.ll delete mode 100644 backend/kernels/loop2.cl delete mode 100644 backend/kernels/loop2.cl.ll delete mode 100644 backend/kernels/loop3.cl delete mode 100644 backend/kernels/loop3.cl.ll delete mode 100644 backend/kernels/loop3.ll delete mode 100644 backend/kernels/loop4.cl delete mode 100644 backend/kernels/loop4.cl.ll delete mode 100644 backend/kernels/loop4.ll delete mode 100644 backend/kernels/loop5.cl delete mode 100644 backend/kernels/loop5.cl.ll delete mode 100644 backend/kernels/loop5.ll delete mode 100644 backend/kernels/mad.cl.ll delete mode 100644 backend/kernels/select.cl delete mode 100644 backend/kernels/select.cl.ll delete mode 100644 backend/kernels/short.cl delete mode 100644 backend/kernels/short.cl.ll delete mode 100644 backend/kernels/shuffle.cl delete mode 100644 backend/kernels/shuffle.cl.ll delete mode 100644 backend/kernels/simple_float4.cl delete mode 100644 backend/kernels/simple_float4.cl.ll delete mode 100644 backend/kernels/simple_float4_2.cl delete mode 100644 backend/kernels/simple_float4_2.cl.ll delete mode 100644 backend/kernels/simple_float4_3.cl delete mode 100644 backend/kernels/simple_float4_3.cl.ll delete mode 100644 backend/kernels/stdlib.h delete mode 100644 backend/kernels/store.cl delete mode 100644 backend/kernels/store.cl.ll delete mode 100644 backend/kernels/struct.cl delete mode 100644 backend/kernels/struct.cl.ll delete mode 100644 backend/kernels/struct2.cl delete mode 100644 backend/kernels/struct2.cl.ll delete mode 100644 backend/kernels/test_select.cl delete mode 100644 backend/kernels/test_select.cl.ll delete mode 100644 backend/kernels/undefined.cl delete mode 100644 backend/kernels/undefined.cl.ll delete mode 100644 backend/kernels/vector_constant.cl delete mode 100644 backend/kernels/vector_constant.cl.ll delete mode 100644 backend/kernels/vector_constant.ll delete mode 100644 backend/kernels/void.cl delete mode 100644 backend/kernels/void.cl.ll delete mode 100644 backend/src/backend/gen/gen_mesa_defines.h delete mode 100644 backend/src/backend/gen/gen_mesa_structs.h diff --git a/backend/kernels/Makefile b/backend/kernels/Makefile deleted file mode 100644 index c5a0ebd..0000000 --- a/backend/kernels/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -%.ll : %.cl Makefile stdlib.h - ./compile.sh $< - -all: add.ll\ - add2.ll\ - cmp.ll\ - cmp_cvt.ll\ - complex_struct.ll\ - cycle.ll\ - extract.ll\ - function.ll\ - function_param.ll\ - get_global_id.ll\ - insert.ll\ - load_store.ll\ - loop.ll\ - loop2.ll\ - loop3.ll\ - loop4.ll\ - loop5.ll\ - select.ll\ - short.ll\ - shuffle.ll\ - simple_float4.ll\ - simple_float4_2.ll\ - simple_float4_3.ll\ - store.ll\ - struct.ll\ - struct2.ll\ - test_select.ll\ - undefined.ll\ - vector_constant.ll\ - void.ll - diff --git a/backend/kernels/add.cl b/backend/kernels/add.cl deleted file mode 100644 index b34d1e2..0000000 --- a/backend/kernels/add.cl +++ /dev/null @@ -1,7 +0,0 @@ -#include "stdlib.h" -__kernel void add(__global unsigned int *dst, unsigned int x, unsigned int y) -{ - dst[0] = x + y; -} - - diff --git a/backend/kernels/add.cl.ll b/backend/kernels/add.cl.ll deleted file mode 100644 index 80f45c9..0000000 --- a/backend/kernels/add.cl.ll +++ /dev/null @@ -1,17 +0,0 @@ -; ModuleID = 'add.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, i32 %y) nounwind noinline { -entry: - %add = add i32 %y, %x - store i32 %add, i32 addrspace(1)* %dst, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, i32)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/add2.cl b/backend/kernels/add2.cl deleted file mode 100644 index d309fa3..0000000 --- a/backend/kernels/add2.cl +++ /dev/null @@ -1,12 +0,0 @@ -#include "stdlib.h" -struct big{ - unsigned int a, b; -}; - -__kernel void add(__global struct big *b, unsigned int x, unsigned int y) -{ - b->a = x + y; - b->b = x - y + 10; -} - - diff --git a/backend/kernels/add2.cl.ll b/backend/kernels/add2.cl.ll deleted file mode 100644 index dae1c24..0000000 --- a/backend/kernels/add2.cl.ll +++ /dev/null @@ -1,24 +0,0 @@ -; ModuleID = 'add2.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { i32, i32 } - -define ptx_kernel void @add(%struct.big addrspace(1)* nocapture %b, i32 %x, i32 %y) nounwind noinline { -entry: - %add = add i32 %y, %x - %a = getelementptr inbounds %struct.big addrspace(1)* %b, i32 0, i32 0 - store i32 %add, i32 addrspace(1)* %a, align 4, !tbaa !1 - %sub = add i32 %x, 10 - %add1 = sub i32 %sub, %y - %b2 = getelementptr inbounds %struct.big addrspace(1)* %b, i32 0, i32 1 - store i32 %add1, i32 addrspace(1)* %b2, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (%struct.big addrspace(1)*, i32, i32)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/cmp.cl b/backend/kernels/cmp.cl deleted file mode 100644 index fc5bd67..0000000 --- a/backend/kernels/cmp.cl +++ /dev/null @@ -1,7 +0,0 @@ -#include "stdlib.h" -__kernel void test_cmp(__global bool *dst, int x, int y, float z, float w) -{ - dst[0] = (x < y) + (z > w); -} - - diff --git a/backend/kernels/cmp.cl.ll b/backend/kernels/cmp.cl.ll deleted file mode 100644 index 759c184..0000000 --- a/backend/kernels/cmp.cl.ll +++ /dev/null @@ -1,22 +0,0 @@ -; ModuleID = 'cmp.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @test_cmp(i8 addrspace(1)* nocapture %dst, i32 %x, i32 %y, float %z, float %w) nounwind noinline { -entry: - %cmp = icmp slt i32 %x, %y - %conv = zext i1 %cmp to i32 - %cmp1 = fcmp ogt float %z, %w - %add = sext i1 %cmp1 to i32 - %tobool = icmp ne i32 %conv, %add - %frombool = zext i1 %tobool to i8 - store i8 %frombool, i8 addrspace(1)* %dst, align 1, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i8 addrspace(1)*, i32, i32, float, float)* @test_cmp} -!1 = metadata !{metadata !"bool", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/cmp_cvt.cl b/backend/kernels/cmp_cvt.cl deleted file mode 100644 index 266eae7..0000000 --- a/backend/kernels/cmp_cvt.cl +++ /dev/null @@ -1,7 +0,0 @@ -#include "stdlib.h" - -__kernel void cmp_cvt(__global int *dst, int x, int y) -{ - dst[0] = x + y < get_local_id(0) ; -} - diff --git a/backend/kernels/cmp_cvt.cl.ll b/backend/kernels/cmp_cvt.cl.ll deleted file mode 100644 index 37945e4..0000000 --- a/backend/kernels/cmp_cvt.cl.ll +++ /dev/null @@ -1,22 +0,0 @@ -; ModuleID = 'cmp_cvt.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @cmp_cvt(i32 addrspace(1)* nocapture %dst, i32 %x, i32 %y) nounwind noinline { -get_local_id.exit: - %add = add nsw i32 %y, %x - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %cmp = icmp ult i32 %add, %call.i - %conv = zext i1 %cmp to i32 - store i32 %conv, i32 addrspace(1)* %dst, align 4, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, i32)* @cmp_cvt} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/compile.sh b/backend/kernels/compile.sh deleted file mode 100755 index e1177a7..0000000 --- a/backend/kernels/compile.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -clang -emit-llvm -O3 -ccc-host-triple ptx32 -c $1 -o $1.o -llvm-dis $1.o -rm $1.o -mv $1.o.ll $1.ll - diff --git a/backend/kernels/complex_struct.cl b/backend/kernels/complex_struct.cl deleted file mode 100644 index 666ddcc..0000000 --- a/backend/kernels/complex_struct.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include "stdlib.h" -struct hop { float x, y; }; -struct my_struct { - int a; - struct hop b[5]; -}; - -__kernel void struct_cl (__global struct my_struct *dst, - __global struct my_struct *src) -{ - dst[0].b[2].y = src[1].b[3].x; -} - diff --git a/backend/kernels/complex_struct.cl.ll b/backend/kernels/complex_struct.cl.ll deleted file mode 100644 index ce370b5..0000000 --- a/backend/kernels/complex_struct.cl.ll +++ /dev/null @@ -1,22 +0,0 @@ -; ModuleID = 'complex_struct.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.my_struct = type { i32, [5 x %struct.hop] } -%struct.hop = type { float, float } - -define ptx_kernel void @struct_cl(%struct.my_struct addrspace(1)* nocapture %dst, %struct.my_struct addrspace(1)* nocapture %src) nounwind noinline { -entry: - %x = getelementptr inbounds %struct.my_struct addrspace(1)* %src, i32 1, i32 1, i32 3, i32 0 - %0 = load float addrspace(1)* %x, align 4, !tbaa !1 - %y = getelementptr inbounds %struct.my_struct addrspace(1)* %dst, i32 0, i32 1, i32 2, i32 1 - store float %0, float addrspace(1)* %y, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (%struct.my_struct addrspace(1)*, %struct.my_struct addrspace(1)*)* @struct_cl} -!1 = metadata !{metadata !"float", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/cycle.cl b/backend/kernels/cycle.cl deleted file mode 100644 index 3797bfd..0000000 --- a/backend/kernels/cycle.cl +++ /dev/null @@ -1,16 +0,0 @@ -#include "stdlib.h" -__kernel void cycle(global int *dst) -{ - int x, y; - -hop0: - x = y; - -hop1: - y = x; - goto hop0; - - dst[0] = x; -} - - diff --git a/backend/kernels/cycle.cl.ll b/backend/kernels/cycle.cl.ll deleted file mode 100644 index 2c409ff..0000000 --- a/backend/kernels/cycle.cl.ll +++ /dev/null @@ -1,15 +0,0 @@ -; ModuleID = 'cycle.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @cycle(i32 addrspace(1)* nocapture %dst) noreturn nounwind readnone noinline { -entry: - br label %hop0 - -hop0: ; preds = %hop0, %entry - br label %hop0 -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*)* @cycle} diff --git a/backend/kernels/dummy.ll b/backend/kernels/dummy.ll deleted file mode 100644 index 151fc1c..0000000 --- a/backend/kernels/dummy.ll +++ /dev/null @@ -1,74 +0,0 @@ -; ModuleID = 'void.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_device <2 x float> @_Z3madDv2_fS_S_(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone { -entry: - %0 = extractelement <2 x float> %a, i32 0 - %1 = extractelement <2 x float> %b, i32 0 - %2 = extractelement <2 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <2 x float> undef, float %call, i32 0 - %3 = extractelement <2 x float> %a, i32 1 - %4 = extractelement <2 x float> %b, i32 1 - %5 = extractelement <2 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <2 x float> %vecinit, float %call1, i32 1 - ret <2 x float> %vecinit2 -} - -declare ptx_device float @_Z3madfff(float, float, float) nounwind readnone - -define ptx_device <3 x float> @_Z3madDv3_fS_S_(<3 x float> %a, <3 x float> %b, <3 x float> %c) nounwind readnone { -entry: - %0 = extractelement <3 x float> %a, i32 0 - %1 = extractelement <3 x float> %b, i32 0 - %2 = extractelement <3 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <3 x float> undef, float %call, i32 0 - %3 = extractelement <3 x float> %a, i32 1 - %4 = extractelement <3 x float> %b, i32 1 - %5 = extractelement <3 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <3 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <3 x float> %a, i32 2 - %7 = extractelement <3 x float> %b, i32 2 - %8 = extractelement <3 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <3 x float> %vecinit2, float %call3, i32 2 - ret <3 x float> %vecinit4 -} - -define ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone { -entry: - %0 = extractelement <4 x float> %a, i32 0 - %1 = extractelement <4 x float> %b, i32 0 - %2 = extractelement <4 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <4 x float> undef, float %call, i32 0 - %3 = extractelement <4 x float> %a, i32 1 - %4 = extractelement <4 x float> %b, i32 1 - %5 = extractelement <4 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <4 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <4 x float> %a, i32 2 - %7 = extractelement <4 x float> %b, i32 2 - %8 = extractelement <4 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <4 x float> %vecinit2, float %call3, i32 2 - %9 = extractelement <4 x float> %a, i32 3 - %10 = extractelement <4 x float> %b, i32 3 - %11 = extractelement <4 x float> %c, i32 3 - %call5 = tail call ptx_device float @_Z3madfff(float %9, float %10, float %11) nounwind readnone - %vecinit6 = insertelement <4 x float> %vecinit4, float %call5, i32 3 - ret <4 x float> %vecinit6 -} - -define ptx_kernel void @hop() nounwind readnone noinline { -entry: - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void ()* @hop} diff --git a/backend/kernels/extract.cl b/backend/kernels/extract.cl deleted file mode 100644 index a350575..0000000 --- a/backend/kernels/extract.cl +++ /dev/null @@ -1,8 +0,0 @@ -#include "stdlib.h" -__kernel void extract(__global int4 *dst, __global int4 *src, int c) -{ - const int4 from = src[0]; - dst[0] = (int4)(from.x, 1, 2, 3); -} - - diff --git a/backend/kernels/extract.cl.ll b/backend/kernels/extract.cl.ll deleted file mode 100644 index f9ef7b3..0000000 --- a/backend/kernels/extract.cl.ll +++ /dev/null @@ -1,21 +0,0 @@ -; ModuleID = 'extract.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @extract(<4 x i32> addrspace(1)* nocapture %dst, <4 x i32> addrspace(1)* nocapture %src, i32 %c) nounwind noinline { -entry: - %0 = load <4 x i32> addrspace(1)* %src, align 16, !tbaa !1 - %1 = extractelement <4 x i32> %0, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %1, i32 0 - %vecinit1 = insertelement <4 x i32> %vecinit, i32 1, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit1, i32 2, i32 2 - %vecinit3 = insertelement <4 x i32> %vecinit2, i32 3, i32 3 - store <4 x i32> %vecinit3, <4 x i32> addrspace(1)* %dst, align 16, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x i32> addrspace(1)*, <4 x i32> addrspace(1)*, i32)* @extract} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/function.cl b/backend/kernels/function.cl deleted file mode 100644 index 2cd6ef2..0000000 --- a/backend/kernels/function.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include "stdlib.h" - -void write(__global int *dst) -{ - dst[0] = 1; -} - -__kernel void write2(__global int *dst, int x) -{ - write(dst); - dst[x] = 1; -} - diff --git a/backend/kernels/function.cl.ll b/backend/kernels/function.cl.ll deleted file mode 100644 index 7fdeeab..0000000 --- a/backend/kernels/function.cl.ll +++ /dev/null @@ -1,24 +0,0 @@ -; ModuleID = 'function.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_device void @write(i32 addrspace(1)* nocapture %dst) nounwind { -entry: - store i32 1, i32 addrspace(1)* %dst, align 4, !tbaa !1 - ret void -} - -define ptx_kernel void @write2(i32 addrspace(1)* nocapture %dst, i32 %x) nounwind noinline { -entry: - store i32 1, i32 addrspace(1)* %dst, align 4, !tbaa !1 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %x - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32)* @write2} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/function_param.cl b/backend/kernels/function_param.cl deleted file mode 100644 index 2558f8c..0000000 --- a/backend/kernels/function_param.cl +++ /dev/null @@ -1,15 +0,0 @@ -#include "stdlib.h" -struct struct0 -{ - int hop[5]; - int x, y, z; -}; - -__kernel void param(__global struct struct0 *dst, struct struct0 s, __local int *h, int x, int y) -{ - s.hop[4] += x + h[4]; - dst[0] = s; - dst[0].y += y; -} - - diff --git a/backend/kernels/function_param.cl.ll b/backend/kernels/function_param.cl.ll deleted file mode 100644 index 0e9064f..0000000 --- a/backend/kernels/function_param.cl.ll +++ /dev/null @@ -1,33 +0,0 @@ -; ModuleID = 'function_param.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.struct0 = type { [5 x i32], i32, i32, i32 } - -define ptx_kernel void @param(%struct.struct0 addrspace(1)* nocapture %dst, %struct.struct0* nocapture byval %s, i32 addrspace(4)* nocapture %h, i32 %x, i32 %y) nounwind noinline { -entry: - %arrayidx = getelementptr inbounds i32 addrspace(4)* %h, i32 4 - %0 = load i32 addrspace(4)* %arrayidx, align 4, !tbaa !1 - %arrayidx1 = getelementptr inbounds %struct.struct0* %s, i32 0, i32 0, i32 4 - %1 = load i32* %arrayidx1, align 4, !tbaa !1 - %add = add i32 %0, %x - %add2 = add i32 %add, %1 - store i32 %add2, i32* %arrayidx1, align 4, !tbaa !1 - %2 = bitcast %struct.struct0 addrspace(1)* %dst to i8 addrspace(1)* - %3 = bitcast %struct.struct0* %s to i8* - tail call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %2, i8* %3, i32 32, i32 4, i1 false) - %y5 = getelementptr inbounds %struct.struct0 addrspace(1)* %dst, i32 0, i32 2 - %4 = load i32 addrspace(1)* %y5, align 4, !tbaa !1 - %add6 = add nsw i32 %4, %y - store i32 %add6, i32 addrspace(1)* %y5, align 4, !tbaa !1 - ret void -} - -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind - -!opencl.kernels = !{!0} - -!0 = metadata !{void (%struct.struct0 addrspace(1)*, %struct.struct0*, i32 addrspace(4)*, i32, i32)* @param} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/g.cl b/backend/kernels/g.cl deleted file mode 100644 index b4ac4c5..0000000 --- a/backend/kernels/g.cl +++ /dev/null @@ -1,13 +0,0 @@ -#include "stdlib.h" -struct big{ - unsigned int a, b; -}; - -__kernel void add(__global struct big *b, unsigned int x, unsigned int y) -{ - __private int d[3] = {0,1,2}; - b->a = x + y + d[y]; - b->b = x - y + 10; -} - - diff --git a/backend/kernels/g.cl.ll b/backend/kernels/g.cl.ll deleted file mode 100644 index 8993e6c..0000000 --- a/backend/kernels/g.cl.ll +++ /dev/null @@ -1,91 +0,0 @@ -; ModuleID = 'g.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { i32, i32 } - -@add.d = private unnamed_addr constant [3 x i32] [i32 0, i32 1, i32 2], align 4 - -define ptx_device <2 x float> @_Z3madDv2_fS_S_(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone { -entry: - %0 = extractelement <2 x float> %a, i32 0 - %1 = extractelement <2 x float> %b, i32 0 - %2 = extractelement <2 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <2 x float> undef, float %call, i32 0 - %3 = extractelement <2 x float> %a, i32 1 - %4 = extractelement <2 x float> %b, i32 1 - %5 = extractelement <2 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <2 x float> %vecinit, float %call1, i32 1 - ret <2 x float> %vecinit2 -} - -declare ptx_device float @_Z3madfff(float, float, float) nounwind readnone - -define ptx_device <3 x float> @_Z3madDv3_fS_S_(<3 x float> %a, <3 x float> %b, <3 x float> %c) nounwind readnone { -entry: - %0 = extractelement <3 x float> %a, i32 0 - %1 = extractelement <3 x float> %b, i32 0 - %2 = extractelement <3 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <3 x float> undef, float %call, i32 0 - %3 = extractelement <3 x float> %a, i32 1 - %4 = extractelement <3 x float> %b, i32 1 - %5 = extractelement <3 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <3 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <3 x float> %a, i32 2 - %7 = extractelement <3 x float> %b, i32 2 - %8 = extractelement <3 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <3 x float> %vecinit2, float %call3, i32 2 - ret <3 x float> %vecinit4 -} - -define ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone { -entry: - %0 = extractelement <4 x float> %a, i32 0 - %1 = extractelement <4 x float> %b, i32 0 - %2 = extractelement <4 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <4 x float> undef, float %call, i32 0 - %3 = extractelement <4 x float> %a, i32 1 - %4 = extractelement <4 x float> %b, i32 1 - %5 = extractelement <4 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <4 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <4 x float> %a, i32 2 - %7 = extractelement <4 x float> %b, i32 2 - %8 = extractelement <4 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <4 x float> %vecinit2, float %call3, i32 2 - %9 = extractelement <4 x float> %a, i32 3 - %10 = extractelement <4 x float> %b, i32 3 - %11 = extractelement <4 x float> %c, i32 3 - %call5 = tail call ptx_device float @_Z3madfff(float %9, float %10, float %11) nounwind readnone - %vecinit6 = insertelement <4 x float> %vecinit4, float %call5, i32 3 - ret <4 x float> %vecinit6 -} - -define ptx_kernel void @add(%struct.big addrspace(1)* nocapture %b, i32 %x, i32 %y) nounwind noinline { -entry: - %arrayidx = getelementptr inbounds [3 x i32]* @add.d, i32 0, i32 %y - %0 = load i32* %arrayidx, align 4, !tbaa !1 - %add = add i32 %y, %x - %add1 = add i32 %add, %0 - %a = getelementptr inbounds %struct.big addrspace(1)* %b, i32 0, i32 0 - store i32 %add1, i32 addrspace(1)* %a, align 4, !tbaa !1 - %sub = add i32 %x, 10 - %add2 = sub i32 %sub, %y - %b3 = getelementptr inbounds %struct.big addrspace(1)* %b, i32 0, i32 1 - store i32 %add2, i32 addrspace(1)* %b3, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (%struct.big addrspace(1)*, i32, i32)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/get_global_id.cl b/backend/kernels/get_global_id.cl deleted file mode 100644 index 9053763..0000000 --- a/backend/kernels/get_global_id.cl +++ /dev/null @@ -1,10 +0,0 @@ -#include "stdlib.h" - -__kernel void test_global_id(__global int *dst, __global int *p) -{ - short hop = get_local_id(0); - dst[get_global_id(0)] = hop; - p[get_global_id(0)] = get_local_id(0); -} - - diff --git a/backend/kernels/get_global_id.cl.ll b/backend/kernels/get_global_id.cl.ll deleted file mode 100644 index 8a6aaa4..0000000 --- a/backend/kernels/get_global_id.cl.ll +++ /dev/null @@ -1,32 +0,0 @@ -; ModuleID = 'get_global_id.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @test_global_id(i32 addrspace(1)* nocapture %dst, i32 addrspace(1)* nocapture %p) nounwind noinline { -get_global_id.exit17: - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %sext = shl i32 %call.i, 16 - %conv1 = ashr exact i32 %sext, 16 - %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - %mul.i = mul i32 %call.i10.i, %call.i3.i - %add.i = add i32 %mul.i, %call.i - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add.i - store i32 %conv1, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %arrayidx5 = getelementptr inbounds i32 addrspace(1)* %p, i32 %add.i - store i32 %call.i, i32 addrspace(1)* %arrayidx5, align 4, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*)* @test_global_id} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/insert.cl b/backend/kernels/insert.cl deleted file mode 100644 index 429b54f..0000000 --- a/backend/kernels/insert.cl +++ /dev/null @@ -1,10 +0,0 @@ -#include "stdlib.h" - -__kernel void insert(__global int4 *dst, __global int4 *src, int c) -{ - int4 x = src[0]; - src[0].z = 1.f; - dst[0] = src[0]; -} - - diff --git a/backend/kernels/insert.cl.ll b/backend/kernels/insert.cl.ll deleted file mode 100644 index e9f3d6b..0000000 --- a/backend/kernels/insert.cl.ll +++ /dev/null @@ -1,18 +0,0 @@ -; ModuleID = 'insert.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @insert(<4 x i32> addrspace(1)* nocapture %dst, <4 x i32> addrspace(1)* nocapture %src, i32 %c) nounwind noinline { -entry: - %0 = load <4 x i32> addrspace(1)* %src, align 16 - %1 = insertelement <4 x i32> %0, i32 1, i32 2 - store <4 x i32> %1, <4 x i32> addrspace(1)* %src, align 16 - store <4 x i32> %1, <4 x i32> addrspace(1)* %dst, align 16, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x i32> addrspace(1)*, <4 x i32> addrspace(1)*, i32)* @insert} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/load_store.cl b/backend/kernels/load_store.cl deleted file mode 100644 index fdff43b..0000000 --- a/backend/kernels/load_store.cl +++ /dev/null @@ -1,6 +0,0 @@ -__kernel void load_store(__local int *dst, __local int *src) -{ - dst[0] = src[0]; -} - - diff --git a/backend/kernels/load_store.cl.ll b/backend/kernels/load_store.cl.ll deleted file mode 100644 index 84bc047..0000000 --- a/backend/kernels/load_store.cl.ll +++ /dev/null @@ -1,17 +0,0 @@ -; ModuleID = 'load_store.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @load_store(i32 addrspace(4)* nocapture %dst, i32 addrspace(4)* nocapture %src) nounwind noinline { -entry: - %0 = load i32 addrspace(4)* %src, align 4, !tbaa !1 - store i32 %0, i32 addrspace(4)* %dst, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(4)*, i32 addrspace(4)*)* @load_store} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop.cl b/backend/kernels/loop.cl deleted file mode 100644 index bba0705..0000000 --- a/backend/kernels/loop.cl +++ /dev/null @@ -1,9 +0,0 @@ -#include "stdlib.h" - -struct big { int x[10]; }; - -__kernel void add(__global int *dst, unsigned int x, struct big b) -{ - for (int i = 0; i < x; ++i) dst[i]++; -} - diff --git a/backend/kernels/loop.cl.ll b/backend/kernels/loop.cl.ll deleted file mode 100644 index f5f6085..0000000 --- a/backend/kernels/loop.cl.ll +++ /dev/null @@ -1,31 +0,0 @@ -; ModuleID = 'loop.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { -entry: - %cmp2 = icmp eq i32 %x, 0 - br i1 %cmp2, label %for.end, label %for.body - -for.body: ; preds = %for.body, %entry - %i.03 = phi i32 [ %inc1, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %i.03 - %0 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc1 = add nsw i32 %i.03, 1 - %exitcond = icmp eq i32 %inc1, %x - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body, %entry - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop2.cl b/backend/kernels/loop2.cl deleted file mode 100644 index 04997ae..0000000 --- a/backend/kernels/loop2.cl +++ /dev/null @@ -1,14 +0,0 @@ -#include "stdlib.h" - -struct big { int x[10]; }; - -__kernel void add(__global int *dst, unsigned int x, struct big b) -{ - for (int i = 0; i < x; ++i) { - if (dst[i+1] > 0) - dst[i]++; - else - dst[i] += 2; - } -} - diff --git a/backend/kernels/loop2.cl.ll b/backend/kernels/loop2.cl.ll deleted file mode 100644 index 2d710c0..0000000 --- a/backend/kernels/loop2.cl.ll +++ /dev/null @@ -1,39 +0,0 @@ -; ModuleID = 'loop2.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { -entry: - %cmp6 = icmp eq i32 %x, 0 - br i1 %cmp6, label %for.end, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %entry - %.pre = load i32 addrspace(1)* %dst, align 4, !tbaa !1 - br label %for.body - -for.body: ; preds = %for.body, %for.body.lr.ph - %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %1, %for.body ] - %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] - %add = add nsw i32 %i.07, 1 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add - %1 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %cmp1 = icmp sgt i32 %1, 0 - %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %dst, i32 %i.07 - %storemerge.v = select i1 %cmp1, i32 1, i32 2 - %storemerge = add i32 %storemerge.v, %0 - store i32 %storemerge, i32 addrspace(1)* %arrayidx2, align 4 - %exitcond = icmp eq i32 %add, %x - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body, %entry - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop3.cl b/backend/kernels/loop3.cl deleted file mode 100644 index acb5536..0000000 --- a/backend/kernels/loop3.cl +++ /dev/null @@ -1,9 +0,0 @@ -#include "stdlib.h" - -struct big { int x[10]; }; - -__kernel void add(__global int *dst, unsigned int x, struct big b) -{ - for (int i = 0; i < x; ++i) dst[get_local_id(0) + i]++; -} - diff --git a/backend/kernels/loop3.cl.ll b/backend/kernels/loop3.cl.ll deleted file mode 100644 index 984165e..0000000 --- a/backend/kernels/loop3.cl.ll +++ /dev/null @@ -1,38 +0,0 @@ -; ModuleID = 'loop3.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { -entry: - %cmp2 = icmp eq i32 %x, 0 - br i1 %cmp2, label %for.end, label %get_local_id.exit.lr.ph - -get_local_id.exit.lr.ph: ; preds = %entry - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - br label %get_local_id.exit - -get_local_id.exit: ; preds = %get_local_id.exit, %get_local_id.exit.lr.ph - %i.03 = phi i32 [ 0, %get_local_id.exit.lr.ph ], [ %inc1, %get_local_id.exit ] - %add = add i32 %call.i, %i.03 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add - %0 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc1 = add nsw i32 %i.03, 1 - %exitcond = icmp eq i32 %inc1, %x - br i1 %exitcond, label %for.end, label %get_local_id.exit - -for.end: ; preds = %get_local_id.exit, %entry - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop3.ll b/backend/kernels/loop3.ll deleted file mode 100644 index c25a6d3..0000000 --- a/backend/kernels/loop3.ll +++ /dev/null @@ -1,38 +0,0 @@ -; ModuleID = 'loop3.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { -entry: - %cmp2 = icmp eq i32 %x, 0 - br i1 %cmp2, label %for.end, label %get_local_id.exit.lr.ph - -get_local_id.exit.lr.ph: ; preds = %entry - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - br label %get_local_id.exit - -get_local_id.exit: ; preds = %get_local_id.exit, %get_local_id.exit.lr.ph - %i.03 = phi i32 [ 0, %get_local_id.exit.lr.ph ], [ %inc1, %get_local_id.exit ] - %add = add i32 %call.i, %i.03 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add - %0 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc1 = add nsw i32 %i.03, 1 - %exitcond = icmp eq i32 %inc1, %x - br i1 %exitcond, label %for.end, label %get_local_id.exit - -for.end: ; preds = %get_local_id.exit, %entry - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop4.cl b/backend/kernels/loop4.cl deleted file mode 100644 index 56fa72a..0000000 --- a/backend/kernels/loop4.cl +++ /dev/null @@ -1,12 +0,0 @@ -#include "stdlib.h" - -struct big { int x[10]; }; - -__kernel void add(__global int *dst, unsigned int x, struct big b) -{ - if (get_local_id(1) > 4) - for (int i = 0; i < x; ++i) dst[get_local_id(0) + i]++; - else - for (int i = 0; i < 2*x; ++i) dst[get_local_id(0) + i + x]++; -} - diff --git a/backend/kernels/loop4.cl.ll b/backend/kernels/loop4.cl.ll deleted file mode 100644 index 08ed4b0..0000000 --- a/backend/kernels/loop4.cl.ll +++ /dev/null @@ -1,67 +0,0 @@ -; ModuleID = 'loop4.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { -get_local_id.exit: - %call3.i = tail call ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - %cmp = icmp ugt i32 %call3.i, 4 - br i1 %cmp, label %for.cond.preheader, label %for.cond5.preheader - -for.cond.preheader: ; preds = %get_local_id.exit - %cmp124 = icmp eq i32 %x, 0 - br i1 %cmp124, label %if.end, label %get_local_id.exit17.lr.ph - -get_local_id.exit17.lr.ph: ; preds = %for.cond.preheader - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - br label %get_local_id.exit17 - -for.cond5.preheader: ; preds = %get_local_id.exit - %mul.mask = and i32 %x, 2147483647 - %cmp621 = icmp eq i32 %mul.mask, 0 - br i1 %cmp621, label %if.end, label %get_local_id.exit20.lr.ph - -get_local_id.exit20.lr.ph: ; preds = %for.cond5.preheader - %call.i18 = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %0 = shl i32 %x, 1 - br label %get_local_id.exit20 - -get_local_id.exit17: ; preds = %get_local_id.exit17, %get_local_id.exit17.lr.ph - %i.025 = phi i32 [ 0, %get_local_id.exit17.lr.ph ], [ %inc3, %get_local_id.exit17 ] - %add = add i32 %call.i, %i.025 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add - %1 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %1, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc3 = add nsw i32 %i.025, 1 - %exitcond26 = icmp eq i32 %inc3, %x - br i1 %exitcond26, label %if.end, label %get_local_id.exit17 - -get_local_id.exit20: ; preds = %get_local_id.exit20, %get_local_id.exit20.lr.ph - %i4.022 = phi i32 [ 0, %get_local_id.exit20.lr.ph ], [ %inc14, %get_local_id.exit20 ] - %add9 = add i32 %i4.022, %x - %add10 = add i32 %add9, %call.i18 - %arrayidx11 = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add10 - %2 = load i32 addrspace(1)* %arrayidx11, align 4, !tbaa !1 - %inc12 = add nsw i32 %2, 1 - store i32 %inc12, i32 addrspace(1)* %arrayidx11, align 4, !tbaa !1 - %inc14 = add nsw i32 %i4.022, 1 - %exitcond = icmp eq i32 %inc14, %0 - br i1 %exitcond, label %if.end, label %get_local_id.exit20 - -if.end: ; preds = %get_local_id.exit20, %get_local_id.exit17, %for.cond5.preheader, %for.cond.preheader - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop4.ll b/backend/kernels/loop4.ll deleted file mode 100644 index 8b5a746..0000000 --- a/backend/kernels/loop4.ll +++ /dev/null @@ -1,67 +0,0 @@ -; ModuleID = 'loop4.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { -get_local_id.exit: - %call3.i = tail call ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - %cmp = icmp ugt i32 %call3.i, 4 - br i1 %cmp, label %for.cond.preheader, label %for.cond5.preheader - -for.cond.preheader: ; preds = %get_local_id.exit - %cmp124 = icmp eq i32 %x, 0 - br i1 %cmp124, label %if.end, label %get_local_id.exit17.lr.ph - -get_local_id.exit17.lr.ph: ; preds = %for.cond.preheader - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - br label %get_local_id.exit17 - -for.cond5.preheader: ; preds = %get_local_id.exit - %mul.mask = and i32 %x, 2147483647 - %cmp621 = icmp eq i32 %mul.mask, 0 - br i1 %cmp621, label %if.end, label %get_local_id.exit20.lr.ph - -get_local_id.exit20.lr.ph: ; preds = %for.cond5.preheader - %call.i18 = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %0 = shl i32 %x, 1 - br label %get_local_id.exit20 - -get_local_id.exit17: ; preds = %get_local_id.exit17, %get_local_id.exit17.lr.ph - %i.025 = phi i32 [ 0, %get_local_id.exit17.lr.ph ], [ %inc3, %get_local_id.exit17 ] - %add = add i32 %call.i, %i.025 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add - %1 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %1, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc3 = add nsw i32 %i.025, 1 - %exitcond26 = icmp eq i32 %inc3, %x - br i1 %exitcond26, label %if.end, label %get_local_id.exit17 - -get_local_id.exit20: ; preds = %get_local_id.exit20, %get_local_id.exit20.lr.ph - %i4.022 = phi i32 [ 0, %get_local_id.exit20.lr.ph ], [ %inc14, %get_local_id.exit20 ] - %add9 = add i32 %i4.022, %x - %add10 = add i32 %add9, %call.i18 - %arrayidx11 = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add10 - %2 = load i32 addrspace(1)* %arrayidx11, align 4, !tbaa !1 - %inc12 = add nsw i32 %2, 1 - store i32 %inc12, i32 addrspace(1)* %arrayidx11, align 4, !tbaa !1 - %inc14 = add nsw i32 %i4.022, 1 - %exitcond = icmp eq i32 %inc14, %0 - br i1 %exitcond, label %if.end, label %get_local_id.exit20 - -if.end: ; preds = %get_local_id.exit20, %get_local_id.exit17, %for.cond5.preheader, %for.cond.preheader - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop5.cl b/backend/kernels/loop5.cl deleted file mode 100644 index d4a3aa2..0000000 --- a/backend/kernels/loop5.cl +++ /dev/null @@ -1,17 +0,0 @@ -#include "stdlib.h" - -struct big { int x[10]; }; - -__kernel void add(__global int *dst0, __global int *dst1, unsigned int x, int y, struct big b) -{ - __global int *dst = NULL; - if (y > 0) - dst = dst0; - else - dst = dst1; - if (get_local_id(1) > 4) - for (int i = 0; i < x; ++i) dst[get_local_id(0) + i]++; - else - for (int i = 0; i < 2*x; ++i) dst[get_local_id(0) + i + x]++; -} - diff --git a/backend/kernels/loop5.cl.ll b/backend/kernels/loop5.cl.ll deleted file mode 100644 index 884ae08..0000000 --- a/backend/kernels/loop5.cl.ll +++ /dev/null @@ -1,69 +0,0 @@ -; ModuleID = 'loop5.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst0, i32 addrspace(1)* nocapture %dst1, i32 %x, i32 %y, %struct.big* nocapture byval %b) nounwind noinline { -get_local_id.exit: - %cmp = icmp sgt i32 %y, 0 - %dst0.dst1 = select i1 %cmp, i32 addrspace(1)* %dst0, i32 addrspace(1)* %dst1 - %call3.i = tail call ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - %cmp1 = icmp ugt i32 %call3.i, 4 - br i1 %cmp1, label %for.cond.preheader, label %for.cond8.preheader - -for.cond.preheader: ; preds = %get_local_id.exit - %cmp328 = icmp eq i32 %x, 0 - br i1 %cmp328, label %if.end19, label %get_local_id.exit21.lr.ph - -get_local_id.exit21.lr.ph: ; preds = %for.cond.preheader - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - br label %get_local_id.exit21 - -for.cond8.preheader: ; preds = %get_local_id.exit - %mul.mask = and i32 %x, 2147483647 - %cmp925 = icmp eq i32 %mul.mask, 0 - br i1 %cmp925, label %if.end19, label %get_local_id.exit24.lr.ph - -get_local_id.exit24.lr.ph: ; preds = %for.cond8.preheader - %call.i22 = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %0 = shl i32 %x, 1 - br label %get_local_id.exit24 - -get_local_id.exit21: ; preds = %get_local_id.exit21, %get_local_id.exit21.lr.ph - %i.029 = phi i32 [ 0, %get_local_id.exit21.lr.ph ], [ %inc5, %get_local_id.exit21 ] - %add = add i32 %call.i, %i.029 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst0.dst1, i32 %add - %1 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %1, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc5 = add nsw i32 %i.029, 1 - %exitcond30 = icmp eq i32 %inc5, %x - br i1 %exitcond30, label %if.end19, label %get_local_id.exit21 - -get_local_id.exit24: ; preds = %get_local_id.exit24, %get_local_id.exit24.lr.ph - %i7.026 = phi i32 [ 0, %get_local_id.exit24.lr.ph ], [ %inc17, %get_local_id.exit24 ] - %add12 = add i32 %i7.026, %x - %add13 = add i32 %add12, %call.i22 - %arrayidx14 = getelementptr inbounds i32 addrspace(1)* %dst0.dst1, i32 %add13 - %2 = load i32 addrspace(1)* %arrayidx14, align 4, !tbaa !1 - %inc15 = add nsw i32 %2, 1 - store i32 %inc15, i32 addrspace(1)* %arrayidx14, align 4, !tbaa !1 - %inc17 = add nsw i32 %i7.026, 1 - %exitcond = icmp eq i32 %inc17, %0 - br i1 %exitcond, label %if.end19, label %get_local_id.exit24 - -if.end19: ; preds = %get_local_id.exit24, %get_local_id.exit21, %for.cond8.preheader, %for.cond.preheader - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*, i32, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/loop5.ll b/backend/kernels/loop5.ll deleted file mode 100644 index 089beee..0000000 --- a/backend/kernels/loop5.ll +++ /dev/null @@ -1,131 +0,0 @@ -; ModuleID = 'loop5.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.big = type { [10 x i32] } - -define ptx_device <2 x float> @_Z3madDv2_fS_S_(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone { -entry: - %0 = extractelement <2 x float> %a, i32 0 - %1 = extractelement <2 x float> %b, i32 0 - %2 = extractelement <2 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <2 x float> undef, float %call, i32 0 - %3 = extractelement <2 x float> %a, i32 1 - %4 = extractelement <2 x float> %b, i32 1 - %5 = extractelement <2 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <2 x float> %vecinit, float %call1, i32 1 - ret <2 x float> %vecinit2 -} - -declare ptx_device float @_Z3madfff(float, float, float) nounwind readnone - -define ptx_device <3 x float> @_Z3madDv3_fS_S_(<3 x float> %a, <3 x float> %b, <3 x float> %c) nounwind readnone { -entry: - %0 = extractelement <3 x float> %a, i32 0 - %1 = extractelement <3 x float> %b, i32 0 - %2 = extractelement <3 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <3 x float> undef, float %call, i32 0 - %3 = extractelement <3 x float> %a, i32 1 - %4 = extractelement <3 x float> %b, i32 1 - %5 = extractelement <3 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <3 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <3 x float> %a, i32 2 - %7 = extractelement <3 x float> %b, i32 2 - %8 = extractelement <3 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <3 x float> %vecinit2, float %call3, i32 2 - ret <3 x float> %vecinit4 -} - -define ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone { -entry: - %0 = extractelement <4 x float> %a, i32 0 - %1 = extractelement <4 x float> %b, i32 0 - %2 = extractelement <4 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <4 x float> undef, float %call, i32 0 - %3 = extractelement <4 x float> %a, i32 1 - %4 = extractelement <4 x float> %b, i32 1 - %5 = extractelement <4 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <4 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <4 x float> %a, i32 2 - %7 = extractelement <4 x float> %b, i32 2 - %8 = extractelement <4 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <4 x float> %vecinit2, float %call3, i32 2 - %9 = extractelement <4 x float> %a, i32 3 - %10 = extractelement <4 x float> %b, i32 3 - %11 = extractelement <4 x float> %c, i32 3 - %call5 = tail call ptx_device float @_Z3madfff(float %9, float %10, float %11) nounwind readnone - %vecinit6 = insertelement <4 x float> %vecinit4, float %call5, i32 3 - ret <4 x float> %vecinit6 -} - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst0, i32 addrspace(1)* nocapture %dst1, i32 %x, i32 %y, %struct.big* nocapture byval %b) nounwind noinline { -get_local_id.exit: - %cmp = icmp sgt i32 %y, 0 - %dst0.dst1 = select i1 %cmp, i32 addrspace(1)* %dst0, i32 addrspace(1)* %dst1 - %call3.i = tail call ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - %cmp1 = icmp ugt i32 %call3.i, 4 - br i1 %cmp1, label %for.cond.preheader, label %for.cond8.preheader - -for.cond.preheader: ; preds = %get_local_id.exit - %cmp328 = icmp eq i32 %x, 0 - br i1 %cmp328, label %if.end19, label %get_local_id.exit21.lr.ph - -get_local_id.exit21.lr.ph: ; preds = %for.cond.preheader - %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - br label %get_local_id.exit21 - -for.cond8.preheader: ; preds = %get_local_id.exit - %mul.mask = and i32 %x, 2147483647 - %cmp925 = icmp eq i32 %mul.mask, 0 - br i1 %cmp925, label %if.end19, label %get_local_id.exit24.lr.ph - -get_local_id.exit24.lr.ph: ; preds = %for.cond8.preheader - %call.i22 = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %0 = shl i32 %x, 1 - br label %get_local_id.exit24 - -get_local_id.exit21: ; preds = %get_local_id.exit21, %get_local_id.exit21.lr.ph - %i.029 = phi i32 [ 0, %get_local_id.exit21.lr.ph ], [ %inc5, %get_local_id.exit21 ] - %add = add i32 %call.i, %i.029 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst0.dst1, i32 %add - %1 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %1, 1 - store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc5 = add nsw i32 %i.029, 1 - %exitcond30 = icmp eq i32 %inc5, %x - br i1 %exitcond30, label %if.end19, label %get_local_id.exit21 - -get_local_id.exit24: ; preds = %get_local_id.exit24, %get_local_id.exit24.lr.ph - %i7.026 = phi i32 [ 0, %get_local_id.exit24.lr.ph ], [ %inc17, %get_local_id.exit24 ] - %add12 = add i32 %i7.026, %x - %add13 = add i32 %add12, %call.i22 - %arrayidx14 = getelementptr inbounds i32 addrspace(1)* %dst0.dst1, i32 %add13 - %2 = load i32 addrspace(1)* %arrayidx14, align 4, !tbaa !1 - %inc15 = add nsw i32 %2, 1 - store i32 %inc15, i32 addrspace(1)* %arrayidx14, align 4, !tbaa !1 - %inc17 = add nsw i32 %i7.026, 1 - %exitcond = icmp eq i32 %inc17, %0 - br i1 %exitcond, label %if.end19, label %get_local_id.exit24 - -if.end19: ; preds = %get_local_id.exit24, %get_local_id.exit21, %for.cond8.preheader, %for.cond.preheader - ret void -} - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id1() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*, i32, i32, %struct.big*)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/mad.cl.ll b/backend/kernels/mad.cl.ll deleted file mode 100644 index 6a28145..0000000 --- a/backend/kernels/mad.cl.ll +++ /dev/null @@ -1,113 +0,0 @@ -; ModuleID = 'mad.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_device <2 x float> @_Z3madDv2_fS_S_(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone { -entry: - %0 = extractelement <2 x float> %a, i32 0 - %1 = extractelement <2 x float> %b, i32 0 - %2 = extractelement <2 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <2 x float> undef, float %call, i32 0 - %3 = extractelement <2 x float> %a, i32 1 - %4 = extractelement <2 x float> %b, i32 1 - %5 = extractelement <2 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <2 x float> %vecinit, float %call1, i32 1 - ret <2 x float> %vecinit2 -} - -declare ptx_device float @_Z3madfff(float, float, float) nounwind readnone - -define ptx_device <3 x float> @_Z3madDv3_fS_S_(<3 x float> %a, <3 x float> %b, <3 x float> %c) nounwind readnone { -entry: - %0 = extractelement <3 x float> %a, i32 0 - %1 = extractelement <3 x float> %b, i32 0 - %2 = extractelement <3 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <3 x float> undef, float %call, i32 0 - %3 = extractelement <3 x float> %a, i32 1 - %4 = extractelement <3 x float> %b, i32 1 - %5 = extractelement <3 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <3 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <3 x float> %a, i32 2 - %7 = extractelement <3 x float> %b, i32 2 - %8 = extractelement <3 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <3 x float> %vecinit2, float %call3, i32 2 - ret <3 x float> %vecinit4 -} - -define ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone { -entry: - %0 = extractelement <4 x float> %a, i32 0 - %1 = extractelement <4 x float> %b, i32 0 - %2 = extractelement <4 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <4 x float> undef, float %call, i32 0 - %3 = extractelement <4 x float> %a, i32 1 - %4 = extractelement <4 x float> %b, i32 1 - %5 = extractelement <4 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <4 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <4 x float> %a, i32 2 - %7 = extractelement <4 x float> %b, i32 2 - %8 = extractelement <4 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <4 x float> %vecinit2, float %call3, i32 2 - %9 = extractelement <4 x float> %a, i32 3 - %10 = extractelement <4 x float> %b, i32 3 - %11 = extractelement <4 x float> %c, i32 3 - %call5 = tail call ptx_device float @_Z3madfff(float %9, float %10, float %11) nounwind readnone - %vecinit6 = insertelement <4 x float> %vecinit4, float %call5, i32 3 - ret <4 x float> %vecinit6 -} - -define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, float %z) nounwind noinline { -entry: - %cmp16 = icmp eq i32 %x, 0 - br i1 %cmp16, label %for.end, label %for.body - -for.body: ; preds = %for.body, %entry - %i.017 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %i.017 - %0 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %call2 = tail call ptx_device i32 @_Z3madiii(i32 %0, i32 2, i32 3) nounwind readonly - %conv = sitofp i32 %0 to float - %call5 = tail call ptx_device float @_Z3madfff(float %conv, float 2.000000e+00, float 3.000000e+00) nounwind readnone - %call.i = tail call ptx_device float @_Z3madfff(float %conv, float 0.000000e+00, float 3.000000e+00) nounwind readnone - %vecinit.i = insertelement <4 x float> undef, float %call.i, i32 0 - %call1.i = tail call ptx_device float @_Z3madfff(float %conv, float 1.000000e+00, float 3.000000e+00) nounwind readnone - %vecinit2.i = insertelement <4 x float> %vecinit.i, float %call1.i, i32 1 - %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %call5, i32 2 - %call5.i = tail call ptx_device float @_Z3madfff(float %conv, float 3.000000e+00, float 3.000000e+00) nounwind readnone - %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %call5.i, i32 3 - %mul = fmul <4 x float> %vecinit6.i, - %conv9 = fptosi float %call5 to i32 - %add = add nsw i32 %conv9, %call2 - %conv10 = sitofp i32 %add to float - %1 = extractelement <4 x float> %mul, i32 0 - %add11 = fadd float %conv10, %1 - %2 = extractelement <4 x float> %mul, i32 1 - %add12 = fadd float %add11, %2 - %3 = extractelement <4 x float> %mul, i32 2 - %add13 = fadd float %add12, %3 - %conv14 = fptosi float %add13 to i32 - store i32 %conv14, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %i.017, 1 - %exitcond = icmp eq i32 %inc, %x - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body, %entry - ret void -} - -declare ptx_device i32 @_Z3madiii(i32, i32, i32) nounwind readonly - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32, float)* @add} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/select.cl b/backend/kernels/select.cl deleted file mode 100644 index 0bf8141..0000000 --- a/backend/kernels/select.cl +++ /dev/null @@ -1,10 +0,0 @@ -#include "stdlib.h" - -__kernel void test_select(__global int4 *dst, - __global int4 *src0, - __global int4 *src1) -{ - const int4 from = select(src0[0], src0[1], src0[1]); - dst[0] = from; -} - diff --git a/backend/kernels/select.cl.ll b/backend/kernels/select.cl.ll deleted file mode 100644 index ee38103..0000000 --- a/backend/kernels/select.cl.ll +++ /dev/null @@ -1,38 +0,0 @@ -; ModuleID = 'select.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @test_select(<4 x i32> addrspace(1)* nocapture %dst, <4 x i32> addrspace(1)* nocapture %src0, <4 x i32> addrspace(1)* nocapture %src1) nounwind noinline { -entry: - %0 = load <4 x i32> addrspace(1)* %src0, align 16, !tbaa !1 - %arrayidx1 = getelementptr inbounds <4 x i32> addrspace(1)* %src0, i32 1 - %1 = load <4 x i32> addrspace(1)* %arrayidx1, align 16, !tbaa !1 - %2 = extractelement <4 x i32> %0, i32 0 - %3 = extractelement <4 x i32> %1, i32 0 - %4 = extractelement <4 x i32> %0, i32 1 - %5 = extractelement <4 x i32> %1, i32 1 - %6 = extractelement <4 x i32> %0, i32 2 - %7 = extractelement <4 x i32> %1, i32 2 - %8 = extractelement <4 x i32> %0, i32 3 - %9 = extractelement <4 x i32> %1, i32 3 - %tobool.i = icmp slt i32 %3, 0 - %cond1.i = select i1 %tobool.i, i32 %3, i32 %2 - %10 = insertelement <4 x i32> undef, i32 %cond1.i, i32 0 - %tobool3.i = icmp slt i32 %5, 0 - %cond7.i = select i1 %tobool3.i, i32 %5, i32 %4 - %11 = insertelement <4 x i32> %10, i32 %cond7.i, i32 1 - %tobool9.i = icmp slt i32 %7, 0 - %cond13.i = select i1 %tobool9.i, i32 %7, i32 %6 - %12 = insertelement <4 x i32> %11, i32 %cond13.i, i32 2 - %tobool15.i = icmp slt i32 %9, 0 - %cond19.i = select i1 %tobool15.i, i32 %9, i32 %8 - %13 = insertelement <4 x i32> %12, i32 %cond19.i, i32 3 - store <4 x i32> %13, <4 x i32> addrspace(1)* %dst, align 16, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x i32> addrspace(1)*, <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)*)* @test_select} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/short.cl b/backend/kernels/short.cl deleted file mode 100644 index 246cf02..0000000 --- a/backend/kernels/short.cl +++ /dev/null @@ -1,7 +0,0 @@ -#include "stdlib.h" -__kernel void short_write(__global short *dst, short x, short y) -{ - dst[0] = x + y; -} - - diff --git a/backend/kernels/short.cl.ll b/backend/kernels/short.cl.ll deleted file mode 100644 index c56edab..0000000 --- a/backend/kernels/short.cl.ll +++ /dev/null @@ -1,17 +0,0 @@ -; ModuleID = 'short.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @short_write(i16 addrspace(1)* nocapture %dst, i16 %x, i16 %y) nounwind noinline { -entry: - %add = add i16 %y, %x - store i16 %add, i16 addrspace(1)* %dst, align 2, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i16 addrspace(1)*, i16, i16)* @short_write} -!1 = metadata !{metadata !"short", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/shuffle.cl b/backend/kernels/shuffle.cl deleted file mode 100644 index 45d144e..0000000 --- a/backend/kernels/shuffle.cl +++ /dev/null @@ -1,8 +0,0 @@ -#include "stdlib.h" -__kernel void shuffle(__global int4 *dst, __global int4 *src, int c) -{ - const int4 from = src[0]; - dst[0] = from.xywz; -} - - diff --git a/backend/kernels/shuffle.cl.ll b/backend/kernels/shuffle.cl.ll deleted file mode 100644 index 31a1e2b..0000000 --- a/backend/kernels/shuffle.cl.ll +++ /dev/null @@ -1,17 +0,0 @@ -; ModuleID = 'shuffle.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @shuffle(<4 x i32> addrspace(1)* nocapture %dst, <4 x i32> addrspace(1)* nocapture %src, i32 %c) nounwind noinline { -entry: - %0 = load <4 x i32> addrspace(1)* %src, align 16, !tbaa !1 - %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %dst, align 16, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x i32> addrspace(1)*, <4 x i32> addrspace(1)*, i32)* @shuffle} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/simple_float4.cl b/backend/kernels/simple_float4.cl deleted file mode 100644 index 743ceea..0000000 --- a/backend/kernels/simple_float4.cl +++ /dev/null @@ -1,8 +0,0 @@ -#include "stdlib.h" - -__kernel void simple_float4(__global float4 *dst, __global float4 *src) -{ - dst[get_global_id(0)] = src[get_global_id(0)]; -} - - diff --git a/backend/kernels/simple_float4.cl.ll b/backend/kernels/simple_float4.cl.ll deleted file mode 100644 index 80009cc..0000000 --- a/backend/kernels/simple_float4.cl.ll +++ /dev/null @@ -1,29 +0,0 @@ -; ModuleID = 'simple_float4.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @simple_float4(<4 x float> addrspace(1)* nocapture %dst, <4 x float> addrspace(1)* nocapture %src) nounwind noinline { -get_global_id.exit11: - %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - %mul.i = mul i32 %call.i10.i, %call.i3.i - %add.i = add i32 %mul.i, %call.i.i - %arrayidx = getelementptr inbounds <4 x float> addrspace(1)* %src, i32 %add.i - %0 = load <4 x float> addrspace(1)* %arrayidx, align 16, !tbaa !1 - %arrayidx2 = getelementptr inbounds <4 x float> addrspace(1)* %dst, i32 %add.i - store <4 x float> %0, <4 x float> addrspace(1)* %arrayidx2, align 16, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x float> addrspace(1)*, <4 x float> addrspace(1)*)* @simple_float4} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/simple_float4_2.cl b/backend/kernels/simple_float4_2.cl deleted file mode 100644 index c35d9bb..0000000 --- a/backend/kernels/simple_float4_2.cl +++ /dev/null @@ -1,8 +0,0 @@ -#include "stdlib.h" - -__kernel void simple_float4(__global float4 *dst, __global float4 *src) -{ - dst[get_global_id(0)] = src[get_global_id(0)] * src[get_global_id(0)]; -} - - diff --git a/backend/kernels/simple_float4_2.cl.ll b/backend/kernels/simple_float4_2.cl.ll deleted file mode 100644 index ebc7e2a..0000000 --- a/backend/kernels/simple_float4_2.cl.ll +++ /dev/null @@ -1,30 +0,0 @@ -; ModuleID = 'simple_float4_2.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @simple_float4(<4 x float> addrspace(1)* nocapture %dst, <4 x float> addrspace(1)* nocapture %src) nounwind noinline { -get_global_id.exit22: - %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - %mul.i = mul i32 %call.i10.i, %call.i3.i - %add.i = add i32 %mul.i, %call.i.i - %arrayidx = getelementptr inbounds <4 x float> addrspace(1)* %src, i32 %add.i - %0 = load <4 x float> addrspace(1)* %arrayidx, align 16, !tbaa !1 - %mul = fmul <4 x float> %0, %0 - %arrayidx4 = getelementptr inbounds <4 x float> addrspace(1)* %dst, i32 %add.i - store <4 x float> %mul, <4 x float> addrspace(1)* %arrayidx4, align 16, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x float> addrspace(1)*, <4 x float> addrspace(1)*)* @simple_float4} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/simple_float4_3.cl b/backend/kernels/simple_float4_3.cl deleted file mode 100644 index d908433..0000000 --- a/backend/kernels/simple_float4_3.cl +++ /dev/null @@ -1,9 +0,0 @@ -#include "stdlib.h" - -__kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b) -{ - dst[get_global_id(0)] = select(src[get_global_id(0)], src[get_global_id(1)], (int4)(b)); - dst[get_global_id(0)] += (float4) (src[2].x, 1.f, 2.f, 3.f); -} - - diff --git a/backend/kernels/simple_float4_3.cl.ll b/backend/kernels/simple_float4_3.cl.ll deleted file mode 100644 index afcafd6..0000000 --- a/backend/kernels/simple_float4_3.cl.ll +++ /dev/null @@ -1,38 +0,0 @@ -; ModuleID = 'simple_float4_3.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @simple_float4(<4 x float> addrspace(1)* nocapture %dst, <4 x float> addrspace(1)* nocapture %src, i1 %b) nounwind noinline { -get_global_id.exit35: - %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - %mul.i = mul i32 %call.i10.i, %call.i3.i - %add.i = add i32 %mul.i, %call.i.i - %arrayidx = getelementptr inbounds <4 x float> addrspace(1)* %src, i32 %add.i - %0 = load <4 x float> addrspace(1)* %arrayidx, align 16, !tbaa !1 - %arrayidx5 = getelementptr inbounds <4 x float> addrspace(1)* %dst, i32 %add.i - store <4 x float> %0, <4 x float> addrspace(1)* %arrayidx5, align 16, !tbaa !1 - %arrayidx6 = getelementptr inbounds <4 x float> addrspace(1)* %src, i32 2 - %1 = load <4 x float> addrspace(1)* %arrayidx6, align 16 - %2 = extractelement <4 x float> %1, i32 0 - %vecinit = insertelement <4 x float> undef, float %2, i32 0 - %vecinit7 = insertelement <4 x float> %vecinit, float 1.000000e+00, i32 1 - %vecinit8 = insertelement <4 x float> %vecinit7, float 2.000000e+00, i32 2 - %vecinit9 = insertelement <4 x float> %vecinit8, float 3.000000e+00, i32 3 - %add = fadd <4 x float> %0, %vecinit9 - store <4 x float> %add, <4 x float> addrspace(1)* %arrayidx5, align 16, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x float> addrspace(1)*, <4 x float> addrspace(1)*, i1)* @simple_float4} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h deleted file mode 100644 index eaf4b17..0000000 --- a/backend/kernels/stdlib.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#define DECL_INTERNAL_WORK_ITEM_FN(NAME) \ -__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##0(void); \ -__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##1(void); \ -__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##2(void); -DECL_INTERNAL_WORK_ITEM_FN(get_group_id) -DECL_INTERNAL_WORK_ITEM_FN(get_local_id) -DECL_INTERNAL_WORK_ITEM_FN(get_local_size) -DECL_INTERNAL_WORK_ITEM_FN(get_global_size) -DECL_INTERNAL_WORK_ITEM_FN(get_num_groups) -#undef DECL_INTERNAL_WORK_ITEM_FN - -#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \ -inline unsigned NAME(unsigned int dim) { \ - if (dim == 0) return __gen_ocl_##NAME##0(); \ - else if (dim == 1) return __gen_ocl_##NAME##1(); \ - else if (dim == 2) return __gen_ocl_##NAME##2(); \ - else return 0; \ -} -DECL_PUBLIC_WORK_ITEM_FN(get_group_id) -DECL_PUBLIC_WORK_ITEM_FN(get_local_id) -DECL_PUBLIC_WORK_ITEM_FN(get_local_size) -DECL_PUBLIC_WORK_ITEM_FN(get_global_size) -DECL_PUBLIC_WORK_ITEM_FN(get_num_groups) -#undef DECL_PUBLIC_WORK_ITEM_FN - -inline unsigned int get_global_id(unsigned int dim) { - return get_local_id(dim) + get_local_size(dim) * get_group_id(dim); -} - -__attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c); -__attribute__((overloadable)) inline unsigned select(unsigned src0, unsigned src1, unsigned cond) { - return cond ? src0 : src1; -} -__attribute__((overloadable)) inline int select(int src0, int src1, int cond) { - return cond ? src0 : src1; -} - -typedef float float2 __attribute__((ext_vector_type(2))); -typedef float float3 __attribute__((ext_vector_type(3))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef int int2 __attribute__((ext_vector_type(2))); -typedef int int3 __attribute__((ext_vector_type(3))); -typedef int int4 __attribute__((ext_vector_type(4))); -typedef int uint2 __attribute__((ext_vector_type(2))); -typedef unsigned uint3 __attribute__((ext_vector_type(3))); -typedef unsigned uint4 __attribute__((ext_vector_type(4))); -typedef bool bool2 __attribute__((ext_vector_type(2))); -typedef bool bool3 __attribute__((ext_vector_type(3))); -typedef bool bool4 __attribute__((ext_vector_type(4))); - -// This will be optimized out by LLVM and will output LLVM select instructions -#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \ -__attribute__((overloadable)) \ -inline TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \ - TYPE4 dst; \ - const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \ - const TYPE x1 = src1.x; \ - const TYPE y0 = src0.y; \ - const TYPE y1 = src1.y; \ - const TYPE z0 = src0.z; \ - const TYPE z1 = src1.z; \ - const TYPE w0 = src0.w; \ - const TYPE w1 = src1.w; \ - \ - dst.x = (cond.x & MASK) ? x1 : x0; \ - dst.y = (cond.y & MASK) ? y1 : y0; \ - dst.z = (cond.z & MASK) ? z1 : z0; \ - dst.w = (cond.w & MASK) ? w1 : w0; \ - return dst; \ -} -DECL_SELECT4(int4, int, int4, 0x80000000) -DECL_SELECT4(float4, float, int4, 0x80000000) -#undef DECL_SELECT4 - -__attribute__((overloadable,always_inline)) inline float2 mad(float2 a, float2 b, float2 c) { - return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y)); -} -__attribute__((overloadable,always_inline)) inline float3 mad(float3 a, float3 b, float3 c) { - return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z)); -} -__attribute__((overloadable,always_inline)) inline float4 mad(float4 a, float4 b, float4 c) { - return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), - mad(a.z,b.z,c.z), mad(a.w,b.w,c.w)); -} - -#define __private __attribute__((address_space(0))) -#define __global __attribute__((address_space(1))) -#define __constant __attribute__((address_space(2))) -//#define __local __attribute__((address_space(3))) -#define global __global -//#define local __local -#define constant __constant -#define private __private - -#define NULL ((void*)0) diff --git a/backend/kernels/store.cl b/backend/kernels/store.cl deleted file mode 100644 index 5c47378..0000000 --- a/backend/kernels/store.cl +++ /dev/null @@ -1,7 +0,0 @@ -#include "stdlib.h" -__kernel void store(__global int *dst, __local int *dst0, int x) -{ - dst[0] = 1; -} - - diff --git a/backend/kernels/store.cl.ll b/backend/kernels/store.cl.ll deleted file mode 100644 index 64a2009..0000000 --- a/backend/kernels/store.cl.ll +++ /dev/null @@ -1,16 +0,0 @@ -; ModuleID = 'store.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @store(i32 addrspace(1)* nocapture %dst, i32 addrspace(4)* nocapture %dst0, i32 %x) nounwind noinline { -entry: - store i32 1, i32 addrspace(1)* %dst, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32 addrspace(4)*, i32)* @store} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/struct.cl b/backend/kernels/struct.cl deleted file mode 100644 index 8be397d..0000000 --- a/backend/kernels/struct.cl +++ /dev/null @@ -1,26 +0,0 @@ -#include "stdlib.h" -struct my_struct { - int a; - int b[2]; -}; - -const __constant int g[4] = {0,1,2,3}; - -__kernel void struct_cl (struct my_struct s, int x, __global int *mem, int y) -{ - __local struct my_struct hop; - __local struct my_struct array[256]; - for (int i = 0; i < 256; ++i) { - array[i].a = i; - array[i].b[0] = i; - array[i].b[0] = i+1; - } - if (y == 0) - hop = array[y]; - else - hop = array[y+1]; - array[0] = hop; - mem[0] = s.a + array[x].a + array[x+1].b[0] + g[x] + g[3]; -} - - diff --git a/backend/kernels/struct.cl.ll b/backend/kernels/struct.cl.ll deleted file mode 100644 index caafa60..0000000 --- a/backend/kernels/struct.cl.ll +++ /dev/null @@ -1,66 +0,0 @@ -; ModuleID = 'struct.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.my_struct = type { i32, [2 x i32] } - -@g = addrspace(2) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4 -@struct_cl.hop = internal addrspace(4) unnamed_addr global %struct.my_struct zeroinitializer, align 4 -@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4 - -define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32 addrspace(1)* nocapture %mem, i32 %y) nounwind noinline { -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %i.023 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %a = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.023, i32 0 - store i32 %i.023, i32 addrspace(4)* %a, align 4, !tbaa !1 - %arrayidx2 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.023, i32 1, i32 0 - %add = add nsw i32 %i.023, 1 - store i32 %add, i32 addrspace(4)* %arrayidx2, align 4, !tbaa !1 - %exitcond = icmp eq i32 %add, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - %cmp6 = icmp eq i32 %y, 0 - br i1 %cmp6, label %if.then, label %if.else - -if.then: ; preds = %for.end - tail call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast (%struct.my_struct addrspace(4)* @struct_cl.hop to i8 addrspace(4)*), i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i32 12, i32 4, i1 false) - br label %if.end - -if.else: ; preds = %for.end - %add8 = add nsw i32 %y, 1 - %arrayidx9 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add8 - %0 = bitcast %struct.my_struct addrspace(4)* %arrayidx9 to i8 addrspace(4)* - tail call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast (%struct.my_struct addrspace(4)* @struct_cl.hop to i8 addrspace(4)*), i8 addrspace(4)* %0, i32 12, i32 4, i1 false) - br label %if.end - -if.end: ; preds = %if.else, %if.then - tail call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* bitcast (%struct.my_struct addrspace(4)* @struct_cl.hop to i8 addrspace(4)*), i32 12, i32 4, i1 false) - %a10 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0 - %1 = load i32* %a10, align 4, !tbaa !1 - %a12 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %x, i32 0 - %2 = load i32 addrspace(4)* %a12, align 4, !tbaa !1 - %add14 = add nsw i32 %x, 1 - %arrayidx17 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add14, i32 1, i32 0 - %3 = load i32 addrspace(4)* %arrayidx17, align 4, !tbaa !1 - %arrayidx19 = getelementptr inbounds [4 x i32] addrspace(2)* @g, i32 0, i32 %x - %4 = load i32 addrspace(2)* %arrayidx19, align 4, !tbaa !1 - %add13 = add i32 %1, 3 - %add18 = add i32 %add13, %2 - %add20 = add i32 %add18, %3 - %add21 = add i32 %add20, %4 - store i32 %add21, i32 addrspace(1)* %mem, align 4, !tbaa !1 - ret void -} - -declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind - -!opencl.kernels = !{!0} - -!0 = metadata !{void (%struct.my_struct*, i32, i32 addrspace(1)*, i32)* @struct_cl} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/struct2.cl b/backend/kernels/struct2.cl deleted file mode 100644 index 31269f4..0000000 --- a/backend/kernels/struct2.cl +++ /dev/null @@ -1,22 +0,0 @@ -#include "stdlib.h" -struct my_struct { - int a; - int b[2]; -}; - -const __constant int g[4] = {0,1,2,3}; - -__kernel void struct_cl (struct my_struct s, int x, __global struct my_struct *mem, int y) -{ - struct my_struct hop; - if (y == 0) { - hop.a = 1; - hop.b[0] = 2; - hop.b[1] = 2; - } else { - hop = s; - } - mem[0] = hop; -} - - diff --git a/backend/kernels/struct2.cl.ll b/backend/kernels/struct2.cl.ll deleted file mode 100644 index 8d102cc..0000000 --- a/backend/kernels/struct2.cl.ll +++ /dev/null @@ -1,38 +0,0 @@ -; ModuleID = 'struct2.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.my_struct = type { i32, [2 x i32] } - -@g = addrspace(2) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4 - -define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, %struct.my_struct addrspace(1)* nocapture %mem, i32 %y) nounwind noinline { -entry: - %cmp = icmp eq i32 %y, 0 - br i1 %cmp, label %if.end, label %if.else - -if.else: ; preds = %entry - %s.0 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0 - %tmp4 = load i32* %s.0, align 4 - %s.1.0 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 1, i32 0 - %tmp5 = load i32* %s.1.0, align 4 - %s.1.1 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 1, i32 1 - %tmp6 = load i32* %s.1.1, align 4 - br label %if.end - -if.end: ; preds = %if.else, %entry - %hop.1.1.0 = phi i32 [ %tmp6, %if.else ], [ 2, %entry ] - %hop.1.0.0 = phi i32 [ %tmp5, %if.else ], [ 2, %entry ] - %hop.0.0 = phi i32 [ %tmp4, %if.else ], [ 1, %entry ] - %mem.0 = getelementptr inbounds %struct.my_struct addrspace(1)* %mem, i32 0, i32 0 - store i32 %hop.0.0, i32 addrspace(1)* %mem.0, align 4 - %mem.1.0 = getelementptr inbounds %struct.my_struct addrspace(1)* %mem, i32 0, i32 1, i32 0 - store i32 %hop.1.0.0, i32 addrspace(1)* %mem.1.0, align 4 - %mem.1.1 = getelementptr inbounds %struct.my_struct addrspace(1)* %mem, i32 0, i32 1, i32 1 - store i32 %hop.1.1.0, i32 addrspace(1)* %mem.1.1, align 4 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (%struct.my_struct*, i32, %struct.my_struct addrspace(1)*, i32)* @struct_cl} diff --git a/backend/kernels/test_select.cl b/backend/kernels/test_select.cl deleted file mode 100644 index ff4284b..0000000 --- a/backend/kernels/test_select.cl +++ /dev/null @@ -1,11 +0,0 @@ -#include "stdlib.h" -__kernel void test_select(__global int *dst, __global int *src) -{ - - if (src[get_global_id(0)] > 1) - dst[get_global_id(0)] = 1; - else - dst[get_global_id(0)] = 2; -} - - diff --git a/backend/kernels/test_select.cl.ll b/backend/kernels/test_select.cl.ll deleted file mode 100644 index 589bf2d..0000000 --- a/backend/kernels/test_select.cl.ll +++ /dev/null @@ -1,32 +0,0 @@ -; ModuleID = 'test_select.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @test_select(i32 addrspace(1)* nocapture %dst, i32 addrspace(1)* nocapture %src) nounwind noinline { -get_global_id.exit13: - %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - %mul.i = mul i32 %call.i10.i, %call.i3.i - %add.i = add i32 %mul.i, %call.i.i - %arrayidx = getelementptr inbounds i32 addrspace(1)* %src, i32 %add.i - %0 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 - %cmp = icmp sgt i32 %0, 1 - %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %dst, i32 %add.i - %. = select i1 %cmp, i32 1, i32 2 - store i32 %., i32 addrspace(1)* %arrayidx2, align 4 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*)* @test_select} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/undefined.cl b/backend/kernels/undefined.cl deleted file mode 100644 index b1e5294..0000000 --- a/backend/kernels/undefined.cl +++ /dev/null @@ -1,11 +0,0 @@ -#include "stdlib.h" -__kernel void undefined(__global int *dst) -{ - int x; - if (x == 0) - dst[0] = 0; - else - dst[0] = 1; -} - - diff --git a/backend/kernels/undefined.cl.ll b/backend/kernels/undefined.cl.ll deleted file mode 100644 index 6537580..0000000 --- a/backend/kernels/undefined.cl.ll +++ /dev/null @@ -1,16 +0,0 @@ -; ModuleID = 'undefined.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @undefined(i32 addrspace(1)* nocapture %dst) nounwind noinline { -entry: - store i32 1, i32 addrspace(1)* %dst, align 4, !tbaa !1 - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i32 addrspace(1)*)* @undefined} -!1 = metadata !{metadata !"int", metadata !2} -!2 = metadata !{metadata !"omnipotent char", metadata !3} -!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/vector_constant.cl b/backend/kernels/vector_constant.cl deleted file mode 100644 index fc9d308..0000000 --- a/backend/kernels/vector_constant.cl +++ /dev/null @@ -1,7 +0,0 @@ -#include "stdlib.h" - -__kernel void simple_float4(__global float4 *dst, __global float4 *src) -{ - dst[get_global_id(0)] = src[get_global_id(0)] + (float4)(0.f,1.f,2.f,3.f); -} - diff --git a/backend/kernels/vector_constant.cl.ll b/backend/kernels/vector_constant.cl.ll deleted file mode 100644 index 71c54d1..0000000 --- a/backend/kernels/vector_constant.cl.ll +++ /dev/null @@ -1,30 +0,0 @@ -; ModuleID = 'vector_constant.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @simple_float4(<4 x float> addrspace(1)* nocapture %dst, <4 x float> addrspace(1)* nocapture %src) nounwind noinline { -get_global_id.exit11: - %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - %mul.i = mul i32 %call.i10.i, %call.i3.i - %add.i = add i32 %mul.i, %call.i.i - %arrayidx = getelementptr inbounds <4 x float> addrspace(1)* %src, i32 %add.i - %0 = load <4 x float> addrspace(1)* %arrayidx, align 16, !tbaa !1 - %add = fadd <4 x float> %0, - %arrayidx2 = getelementptr inbounds <4 x float> addrspace(1)* %dst, i32 %add.i - store <4 x float> %add, <4 x float> addrspace(1)* %arrayidx2, align 16, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone - -declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x float> addrspace(1)*, <4 x float> addrspace(1)*)* @simple_float4} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/vector_constant.ll b/backend/kernels/vector_constant.ll deleted file mode 100644 index 964e8e5..0000000 --- a/backend/kernels/vector_constant.ll +++ /dev/null @@ -1,84 +0,0 @@ -; ModuleID = 'vector_constant.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_device <2 x float> @_Z3madDv2_fS_S_(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone { -entry: - %0 = extractelement <2 x float> %a, i32 0 - %1 = extractelement <2 x float> %b, i32 0 - %2 = extractelement <2 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <2 x float> undef, float %call, i32 0 - %3 = extractelement <2 x float> %a, i32 1 - %4 = extractelement <2 x float> %b, i32 1 - %5 = extractelement <2 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <2 x float> %vecinit, float %call1, i32 1 - ret <2 x float> %vecinit2 -} - -declare ptx_device float @_Z3madfff(float, float, float) nounwind readnone - -define ptx_device <3 x float> @_Z3madDv3_fS_S_(<3 x float> %a, <3 x float> %b, <3 x float> %c) nounwind readnone { -entry: - %0 = extractelement <3 x float> %a, i32 0 - %1 = extractelement <3 x float> %b, i32 0 - %2 = extractelement <3 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <3 x float> undef, float %call, i32 0 - %3 = extractelement <3 x float> %a, i32 1 - %4 = extractelement <3 x float> %b, i32 1 - %5 = extractelement <3 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <3 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <3 x float> %a, i32 2 - %7 = extractelement <3 x float> %b, i32 2 - %8 = extractelement <3 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <3 x float> %vecinit2, float %call3, i32 2 - ret <3 x float> %vecinit4 -} - -define ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone { -entry: - %0 = extractelement <4 x float> %a, i32 0 - %1 = extractelement <4 x float> %b, i32 0 - %2 = extractelement <4 x float> %c, i32 0 - %call = tail call ptx_device float @_Z3madfff(float %0, float %1, float %2) nounwind readnone - %vecinit = insertelement <4 x float> undef, float %call, i32 0 - %3 = extractelement <4 x float> %a, i32 1 - %4 = extractelement <4 x float> %b, i32 1 - %5 = extractelement <4 x float> %c, i32 1 - %call1 = tail call ptx_device float @_Z3madfff(float %3, float %4, float %5) nounwind readnone - %vecinit2 = insertelement <4 x float> %vecinit, float %call1, i32 1 - %6 = extractelement <4 x float> %a, i32 2 - %7 = extractelement <4 x float> %b, i32 2 - %8 = extractelement <4 x float> %c, i32 2 - %call3 = tail call ptx_device float @_Z3madfff(float %6, float %7, float %8) nounwind readnone - %vecinit4 = insertelement <4 x float> %vecinit2, float %call3, i32 2 - %9 = extractelement <4 x float> %a, i32 3 - %10 = extractelement <4 x float> %b, i32 3 - %11 = extractelement <4 x float> %c, i32 3 - %call5 = tail call ptx_device float @_Z3madfff(float %9, float %10, float %11) nounwind readnone - %vecinit6 = insertelement <4 x float> %vecinit4, float %call5, i32 3 - ret <4 x float> %vecinit6 -} - -define ptx_kernel void @simple_float4(<4 x float> addrspace(1)* nocapture %dst, <4 x float> addrspace(1)* nocapture %src) nounwind noinline { -get_global_id.exit5: - %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone - %arrayidx = getelementptr inbounds <4 x float> addrspace(1)* %src, i32 %call.i - %0 = load <4 x float> addrspace(1)* %arrayidx, align 16, !tbaa !1 - %add = fadd <4 x float> %0, - %arrayidx2 = getelementptr inbounds <4 x float> addrspace(1)* %dst, i32 %call.i - store <4 x float> %add, <4 x float> addrspace(1)* %arrayidx2, align 16, !tbaa !1 - ret void -} - -declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone - -!opencl.kernels = !{!0} - -!0 = metadata !{void (<4 x float> addrspace(1)*, <4 x float> addrspace(1)*)* @simple_float4} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/void.cl b/backend/kernels/void.cl deleted file mode 100644 index fd9b4bd..0000000 --- a/backend/kernels/void.cl +++ /dev/null @@ -1,4 +0,0 @@ -#include "stdlib.h" -__kernel void hop() {} - - diff --git a/backend/kernels/void.cl.ll b/backend/kernels/void.cl.ll deleted file mode 100644 index de543dd..0000000 --- a/backend/kernels/void.cl.ll +++ /dev/null @@ -1,12 +0,0 @@ -; ModuleID = 'void.cl.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @hop() nounwind readnone noinline { -entry: - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void ()* @hop} diff --git a/backend/src/backend/gen/gen_mesa_defines.h b/backend/src/backend/gen/gen_mesa_defines.h deleted file mode 100644 index 8bfa81a..0000000 --- a/backend/src/backend/gen/gen_mesa_defines.h +++ /dev/null @@ -1,1525 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - - /* - * Authors: - * Keith Whitwell - */ - -#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low)) -#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK) -#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) - -#ifndef BRW_DEFINES_H -#define BRW_DEFINES_H - -/* 3D state: - */ -#define PIPE_CONTROL_NOWRITE 0x00 -#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 -#define PIPE_CONTROL_WRITEDEPTH 0x02 -#define PIPE_CONTROL_WRITETIMESTAMP 0x03 - -#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 -#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 - -#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */ -/* DW0 */ -# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10 -# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) -# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) -/* DW1 */ -# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) -# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) - -#define _3DPRIM_POINTLIST 0x01 -#define _3DPRIM_LINELIST 0x02 -#define _3DPRIM_LINESTRIP 0x03 -#define _3DPRIM_TRILIST 0x04 -#define _3DPRIM_TRISTRIP 0x05 -#define _3DPRIM_TRIFAN 0x06 -#define _3DPRIM_QUADLIST 0x07 -#define _3DPRIM_QUADSTRIP 0x08 -#define _3DPRIM_LINELIST_ADJ 0x09 -#define _3DPRIM_LINESTRIP_ADJ 0x0A -#define _3DPRIM_TRILIST_ADJ 0x0B -#define _3DPRIM_TRISTRIP_ADJ 0x0C -#define _3DPRIM_TRISTRIP_REVERSE 0x0D -#define _3DPRIM_POLYGON 0x0E -#define _3DPRIM_RECTLIST 0x0F -#define _3DPRIM_LINELOOP 0x10 -#define _3DPRIM_POINTLIST_BF 0x11 -#define _3DPRIM_LINESTRIP_CONT 0x12 -#define _3DPRIM_LINESTRIP_BF 0x13 -#define _3DPRIM_LINESTRIP_CONT_BF 0x14 -#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 - -#define BRW_ANISORATIO_2 0 -#define BRW_ANISORATIO_4 1 -#define BRW_ANISORATIO_6 2 -#define BRW_ANISORATIO_8 3 -#define BRW_ANISORATIO_10 4 -#define BRW_ANISORATIO_12 5 -#define BRW_ANISORATIO_14 6 -#define BRW_ANISORATIO_16 7 - -#define BRW_BLENDFACTOR_ONE 0x1 -#define BRW_BLENDFACTOR_SRC_COLOR 0x2 -#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 -#define BRW_BLENDFACTOR_DST_ALPHA 0x4 -#define BRW_BLENDFACTOR_DST_COLOR 0x5 -#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 -#define BRW_BLENDFACTOR_CONST_COLOR 0x7 -#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 -#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 -#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A -#define BRW_BLENDFACTOR_ZERO 0x11 -#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 -#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 -#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 -#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 -#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 -#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 -#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 -#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A - -#define BRW_BLENDFUNCTION_ADD 0 -#define BRW_BLENDFUNCTION_SUBTRACT 1 -#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BRW_BLENDFUNCTION_MIN 3 -#define BRW_BLENDFUNCTION_MAX 4 - -#define BRW_ALPHATEST_FORMAT_UNORM8 0 -#define BRW_ALPHATEST_FORMAT_FLOAT32 1 - -#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 -#define BRW_CHROMAKEY_REPLACE_BLACK 1 - -#define BRW_CLIP_API_OGL 0 -#define BRW_CLIP_API_DX 1 - -#define BRW_CLIPMODE_NORMAL 0 -#define BRW_CLIPMODE_CLIP_ALL 1 -#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 -#define BRW_CLIPMODE_REJECT_ALL 3 -#define BRW_CLIPMODE_ACCEPT_ALL 4 -#define BRW_CLIPMODE_KERNEL_CLIP 5 - -#define BRW_CLIP_NDCSPACE 0 -#define BRW_CLIP_SCREENSPACE 1 - -#define BRW_COMPAREFUNCTION_ALWAYS 0 -#define BRW_COMPAREFUNCTION_NEVER 1 -#define BRW_COMPAREFUNCTION_LESS 2 -#define BRW_COMPAREFUNCTION_EQUAL 3 -#define BRW_COMPAREFUNCTION_LEQUAL 4 -#define BRW_COMPAREFUNCTION_GREATER 5 -#define BRW_COMPAREFUNCTION_NOTEQUAL 6 -#define BRW_COMPAREFUNCTION_GEQUAL 7 - -#define BRW_COVERAGE_PIXELS_HALF 0 -#define BRW_COVERAGE_PIXELS_1 1 -#define BRW_COVERAGE_PIXELS_2 2 -#define BRW_COVERAGE_PIXELS_4 3 - -#define BRW_CULLMODE_BOTH 0 -#define BRW_CULLMODE_NONE 1 -#define BRW_CULLMODE_FRONT 2 -#define BRW_CULLMODE_BACK 3 - -#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 -#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 - -#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 -#define BRW_DEPTHFORMAT_D32_FLOAT 1 -#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 -#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */ -#define BRW_DEPTHFORMAT_D16_UNORM 5 - -#define BRW_FLOATING_POINT_IEEE_754 0 -#define BRW_FLOATING_POINT_NON_IEEE_754 1 - -#define BRW_FRONTWINDING_CW 0 -#define BRW_FRONTWINDING_CCW 1 - -#define BRW_SPRITE_POINT_ENABLE 16 - -#define BRW_CUT_INDEX_ENABLE (1 << 10) - -#define BRW_INDEX_BYTE 0 -#define BRW_INDEX_WORD 1 -#define BRW_INDEX_DWORD 2 - -#define BRW_LOGICOPFUNCTION_CLEAR 0 -#define BRW_LOGICOPFUNCTION_NOR 1 -#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 -#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 -#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 -#define BRW_LOGICOPFUNCTION_INVERT 5 -#define BRW_LOGICOPFUNCTION_XOR 6 -#define BRW_LOGICOPFUNCTION_NAND 7 -#define BRW_LOGICOPFUNCTION_AND 8 -#define BRW_LOGICOPFUNCTION_EQUIV 9 -#define BRW_LOGICOPFUNCTION_NOOP 10 -#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 -#define BRW_LOGICOPFUNCTION_COPY 12 -#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 -#define BRW_LOGICOPFUNCTION_OR 14 -#define BRW_LOGICOPFUNCTION_SET 15 - -#define BRW_MAPFILTER_NEAREST 0x0 -#define BRW_MAPFILTER_LINEAR 0x1 -#define BRW_MAPFILTER_ANISOTROPIC 0x2 - -#define BRW_MIPFILTER_NONE 0 -#define BRW_MIPFILTER_NEAREST 1 -#define BRW_MIPFILTER_LINEAR 3 - -#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 -#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 -#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 -#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 -#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 -#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 - -#define BRW_POLYGON_FRONT_FACING 0 -#define BRW_POLYGON_BACK_FACING 1 - -#define BRW_PREFILTER_ALWAYS 0x0 -#define BRW_PREFILTER_NEVER 0x1 -#define BRW_PREFILTER_LESS 0x2 -#define BRW_PREFILTER_EQUAL 0x3 -#define BRW_PREFILTER_LEQUAL 0x4 -#define BRW_PREFILTER_GREATER 0x5 -#define BRW_PREFILTER_NOTEQUAL 0x6 -#define BRW_PREFILTER_GEQUAL 0x7 - -#define BRW_PROVOKING_VERTEX_0 0 -#define BRW_PROVOKING_VERTEX_1 1 -#define BRW_PROVOKING_VERTEX_2 2 - -#define BRW_RASTRULE_UPPER_LEFT 0 -#define BRW_RASTRULE_UPPER_RIGHT 1 -/* These are listed as "Reserved, but not seen as useful" - * in Intel documentation (page 212, "Point Rasterization Rule", - * section 7.4 "SF Pipeline State Summary", of document - * "Intel® 965 Express Chipset Family and Intel® G35 Express - * Chipset Graphics Controller Programmer's Reference Manual, - * Volume 2: 3D/Media", Revision 1.0b as of January 2008, - * available at - * http://intellinuxgraphics.org/documentation.html - * at the time of this writing). - * - * These appear to be supported on at least some - * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT - * is useful when using OpenGL to render to a FBO - * (which has the pixel coordinate Y orientation inverted - * with respect to the normal OpenGL pixel coordinate system). - */ -#define BRW_RASTRULE_LOWER_LEFT 2 -#define BRW_RASTRULE_LOWER_RIGHT 3 - -#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 -#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 -#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 - -#define BRW_STENCILOP_KEEP 0 -#define BRW_STENCILOP_ZERO 1 -#define BRW_STENCILOP_REPLACE 2 -#define BRW_STENCILOP_INCRSAT 3 -#define BRW_STENCILOP_DECRSAT 4 -#define BRW_STENCILOP_INCR 5 -#define BRW_STENCILOP_DECR 6 -#define BRW_STENCILOP_INVERT 7 - -/* Surface state DW0 */ -#define BRW_SURFACE_RC_READ_WRITE (1 << 8) -#define BRW_SURFACE_MIPLAYOUT_SHIFT 10 -#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 -#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 -#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f -#define BRW_SURFACE_BLEND_ENABLED (1 << 13) -#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14 -#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15 -#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16 -#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17 - -#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 -#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 -#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 -#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 -#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 -#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 -#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 -#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 -#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 -#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 -#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 -#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 -#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 -#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 -#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 -#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 -#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 -#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 -#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 -#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 -#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 -#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 -#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 -#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 -#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 -#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 -#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A -#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B -#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C -#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D -#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E -#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F -#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 -#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 -#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 -#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 -#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 -#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 -#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 -#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 -#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 -#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 -#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 -#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 -#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 -#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 -#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 -#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 -#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA -#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB -#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC -#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD -#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE -#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF -#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 -#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 -#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 -#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 -#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 -#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 -#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 -#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 -#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA -#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF -#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 -#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 -#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 -#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 -#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 -#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 -#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 -#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA -#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB -#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC -#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED -#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE -#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 -#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 -#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 -#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 -#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 -#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 -#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 -#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 -#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 -#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 -#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 -#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 -#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 -#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 -#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 -#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 -#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 -#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 -#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 -#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 -#define BRW_SURFACEFORMAT_R16_UNORM 0x10A -#define BRW_SURFACEFORMAT_R16_SNORM 0x10B -#define BRW_SURFACEFORMAT_R16_SINT 0x10C -#define BRW_SURFACEFORMAT_R16_UINT 0x10D -#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E -#define BRW_SURFACEFORMAT_I16_UNORM 0x111 -#define BRW_SURFACEFORMAT_L16_UNORM 0x112 -#define BRW_SURFACEFORMAT_A16_UNORM 0x113 -#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 -#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 -#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B -#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C -#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D -#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E -#define BRW_SURFACEFORMAT_R16_USCALED 0x11F -#define BRW_SURFACEFORMAT_R8_UNORM 0x140 -#define BRW_SURFACEFORMAT_R8_SNORM 0x141 -#define BRW_SURFACEFORMAT_R8_SINT 0x142 -#define BRW_SURFACEFORMAT_R8_UINT 0x143 -#define BRW_SURFACEFORMAT_A8_UNORM 0x144 -#define BRW_SURFACEFORMAT_I8_UNORM 0x145 -#define BRW_SURFACEFORMAT_L8_UNORM 0x146 -#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 -#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 -#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 -#define BRW_SURFACEFORMAT_R8_USCALED 0x14A -#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C -#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180 -#define BRW_SURFACEFORMAT_R1_UINT 0x181 -#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 -#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 -#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 -#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 -#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 -#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 -#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A -#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B -#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C -#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D -#define BRW_SURFACEFORMAT_MONO8 0x18E -#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F -#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 -#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 -#define BRW_SURFACEFORMAT_FXT1 0x192 -#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 -#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 -#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 -#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 -#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 -#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 -#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 -#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A -#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C -#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D -#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E -#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F -#define BRW_SURFACE_FORMAT_SHIFT 18 -#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) - -#define BRW_SURFACERETURNFORMAT_FLOAT32 0 -#define BRW_SURFACERETURNFORMAT_S1 1 - -#define BRW_SURFACE_TYPE_SHIFT 29 -#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29) -#define BRW_SURFACE_1D 0 -#define BRW_SURFACE_2D 1 -#define BRW_SURFACE_3D 2 -#define BRW_SURFACE_CUBE 3 -#define BRW_SURFACE_BUFFER 4 -#define BRW_SURFACE_NULL 7 - -#define GEN7_SURFACE_ARYSPC_FULL 0 -#define GEN7_SURFACE_ARYSPC_LOD0 1 - -/* Surface state DW2 */ -#define BRW_SURFACE_HEIGHT_SHIFT 19 -#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) -#define BRW_SURFACE_WIDTH_SHIFT 6 -#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6) -#define BRW_SURFACE_LOD_SHIFT 2 -#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2) - -/* Surface state DW3 */ -#define BRW_SURFACE_DEPTH_SHIFT 21 -#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21) -#define BRW_SURFACE_PITCH_SHIFT 3 -#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) -#define BRW_SURFACE_TILED (1 << 1) -#define BRW_SURFACE_TILED_Y (1 << 0) - -/* Surface state DW4 */ -#define BRW_SURFACE_MIN_LOD_SHIFT 28 -#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) -#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4) -#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4) -#define GEN7_SURFACE_MULTISAMPLECOUNT_1 0 -#define GEN7_SURFACE_MULTISAMPLECOUNT_4 2 -#define GEN7_SURFACE_MULTISAMPLECOUNT_8 3 -#define GEN7_SURFACE_MSFMT_MSS 0 -#define GEN7_SURFACE_MSFMT_DEPTH_STENCIL 1 - -/* Surface state DW5 */ -#define BRW_SURFACE_X_OFFSET_SHIFT 25 -#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) -#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24) -#define BRW_SURFACE_Y_OFFSET_SHIFT 20 -#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) - -/* Surface state DW7 */ -#define HSW_SCS_ZERO 0 -#define HSW_SCS_ONE 1 -#define HSW_SCS_RED 4 -#define HSW_SCS_GREEN 5 -#define HSW_SCS_BLUE 6 -#define HSW_SCS_ALPHA 7 - -#define BRW_TEXCOORDMODE_WRAP 0 -#define BRW_TEXCOORDMODE_MIRROR 1 -#define BRW_TEXCOORDMODE_CLAMP 2 -#define BRW_TEXCOORDMODE_CUBE 3 -#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 -#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 - -#define BRW_THREAD_PRIORITY_NORMAL 0 -#define BRW_THREAD_PRIORITY_HIGH 1 - -#define BRW_TILEWALK_XMAJOR 0 -#define BRW_TILEWALK_YMAJOR 1 - -#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 -#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 - -/* Execution Unit (EU) defines - */ - -#define BRW_ALIGN_1 0 -#define BRW_ALIGN_16 1 - -#define BRW_ADDRESS_DIRECT 0 -#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 - -#define BRW_CHANNEL_X 0 -#define BRW_CHANNEL_Y 1 -#define BRW_CHANNEL_Z 2 -#define BRW_CHANNEL_W 3 - -enum brw_compression { - BRW_COMPRESSION_NONE = 0, - BRW_COMPRESSION_2NDHALF = 1, - BRW_COMPRESSION_COMPRESSED = 2, -}; - -#define GEN6_COMPRESSION_1Q 0 -#define GEN6_COMPRESSION_2Q 1 -#define GEN6_COMPRESSION_3Q 2 -#define GEN6_COMPRESSION_4Q 3 -#define GEN6_COMPRESSION_1H 0 -#define GEN6_COMPRESSION_2H 2 - -#define BRW_CONDITIONAL_NONE 0 -#define BRW_CONDITIONAL_Z 1 -#define BRW_CONDITIONAL_NZ 2 -#define BRW_CONDITIONAL_EQ 1 /* Z */ -#define BRW_CONDITIONAL_NEQ 2 /* NZ */ -#define BRW_CONDITIONAL_G 3 -#define BRW_CONDITIONAL_GE 4 -#define BRW_CONDITIONAL_L 5 -#define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_R 7 -#define BRW_CONDITIONAL_O 8 -#define BRW_CONDITIONAL_U 9 - -#define BRW_DEBUG_NONE 0 -#define BRW_DEBUG_BREAKPOINT 1 - -#define BRW_DEPENDENCY_NORMAL 0 -#define BRW_DEPENDENCY_NOTCLEARED 1 -#define BRW_DEPENDENCY_NOTCHECKED 2 -#define BRW_DEPENDENCY_DISABLE 3 - -#define BRW_EXECUTE_1 0 -#define BRW_EXECUTE_2 1 -#define BRW_EXECUTE_4 2 -#define BRW_EXECUTE_8 3 -#define BRW_EXECUTE_16 4 -#define BRW_EXECUTE_32 5 - -#define BRW_HORIZONTAL_STRIDE_0 0 -#define BRW_HORIZONTAL_STRIDE_1 1 -#define BRW_HORIZONTAL_STRIDE_2 2 -#define BRW_HORIZONTAL_STRIDE_4 3 - -#define BRW_INSTRUCTION_NORMAL 0 -#define BRW_INSTRUCTION_SATURATE 1 - -#define BRW_MASK_ENABLE 0 -#define BRW_MASK_DISABLE 1 - -/** @{ - * - * Gen6 has replaced "mask enable/disable" with WECtrl, which is - * effectively the same but much simpler to think about. Now, there - * are two contributors ANDed together to whether channels are - * executed: The predication on the instruction, and the channel write - * enable. - */ -/** - * This is the default value. It means that a channel's write enable is set - * if the per-channel IP is pointing at this instruction. - */ -#define BRW_WE_NORMAL 0 -/** - * This is used like BRW_MASK_DISABLE, and causes all channels to have - * their write enable set. Note that predication still contributes to - * whether the channel actually gets written. - */ -#define BRW_WE_ALL 1 -/** @} */ - -enum opcode { - /* These are the actual hardware opcodes. */ - BRW_OPCODE_MOV = 1, - BRW_OPCODE_SEL = 2, - BRW_OPCODE_NOT = 4, - BRW_OPCODE_AND = 5, - BRW_OPCODE_OR = 6, - BRW_OPCODE_XOR = 7, - BRW_OPCODE_SHR = 8, - BRW_OPCODE_SHL = 9, - BRW_OPCODE_RSR = 10, - BRW_OPCODE_RSL = 11, - BRW_OPCODE_ASR = 12, - BRW_OPCODE_CMP = 16, - BRW_OPCODE_CMPN = 17, - BRW_OPCODE_JMPI = 32, - BRW_OPCODE_IF = 34, - BRW_OPCODE_IFF = 35, - BRW_OPCODE_ELSE = 36, - BRW_OPCODE_ENDIF = 37, - BRW_OPCODE_DO = 38, - BRW_OPCODE_WHILE = 39, - BRW_OPCODE_BREAK = 40, - BRW_OPCODE_CONTINUE = 41, - BRW_OPCODE_HALT = 42, - BRW_OPCODE_MSAVE = 44, - BRW_OPCODE_MRESTORE = 45, - BRW_OPCODE_PUSH = 46, - BRW_OPCODE_POP = 47, - BRW_OPCODE_WAIT = 48, - BRW_OPCODE_SEND = 49, - BRW_OPCODE_SENDC = 50, - BRW_OPCODE_MATH = 56, - BRW_OPCODE_ADD = 64, - BRW_OPCODE_MUL = 65, - BRW_OPCODE_AVG = 66, - BRW_OPCODE_FRC = 67, - BRW_OPCODE_RNDU = 68, - BRW_OPCODE_RNDD = 69, - BRW_OPCODE_RNDE = 70, - BRW_OPCODE_RNDZ = 71, - BRW_OPCODE_MAC = 72, - BRW_OPCODE_MACH = 73, - BRW_OPCODE_LZD = 74, - BRW_OPCODE_SAD2 = 80, - BRW_OPCODE_SADA2 = 81, - BRW_OPCODE_DP4 = 84, - BRW_OPCODE_DPH = 85, - BRW_OPCODE_DP3 = 86, - BRW_OPCODE_DP2 = 87, - BRW_OPCODE_DPA2 = 88, - BRW_OPCODE_LINE = 89, - BRW_OPCODE_PLN = 90, - BRW_OPCODE_MAD = 91, - BRW_OPCODE_NOP = 126, - - /* These are compiler backend opcodes that get translated into other - * instructions. - */ - FS_OPCODE_FB_WRITE = 128, - SHADER_OPCODE_RCP, - SHADER_OPCODE_RSQ, - SHADER_OPCODE_SQRT, - SHADER_OPCODE_EXP2, - SHADER_OPCODE_LOG2, - SHADER_OPCODE_POW, - SHADER_OPCODE_INT_QUOTIENT, - SHADER_OPCODE_INT_REMAINDER, - SHADER_OPCODE_SIN, - SHADER_OPCODE_COS, - - SHADER_OPCODE_TEX, - SHADER_OPCODE_TXD, - SHADER_OPCODE_TXF, - SHADER_OPCODE_TXL, - SHADER_OPCODE_TXS, - FS_OPCODE_TXB, - - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, - - VS_OPCODE_URB_WRITE, - VS_OPCODE_SCRATCH_READ, - VS_OPCODE_SCRATCH_WRITE, - VS_OPCODE_PULL_CONSTANT_LOAD, -}; - -#define BRW_PREDICATE_NONE 0 -#define BRW_PREDICATE_NORMAL 1 -#define BRW_PREDICATE_ALIGN1_ANYV 2 -#define BRW_PREDICATE_ALIGN1_ALLV 3 -#define BRW_PREDICATE_ALIGN1_ANY2H 4 -#define BRW_PREDICATE_ALIGN1_ALL2H 5 -#define BRW_PREDICATE_ALIGN1_ANY4H 6 -#define BRW_PREDICATE_ALIGN1_ALL4H 7 -#define BRW_PREDICATE_ALIGN1_ANY8H 8 -#define BRW_PREDICATE_ALIGN1_ALL8H 9 -#define BRW_PREDICATE_ALIGN1_ANY16H 10 -#define BRW_PREDICATE_ALIGN1_ALL16H 11 -#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 -#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 -#define BRW_PREDICATE_ALIGN16_ANY4H 6 -#define BRW_PREDICATE_ALIGN16_ALL4H 7 - -#define BRW_ARCHITECTURE_REGISTER_FILE 0 -#define BRW_GENERAL_REGISTER_FILE 1 -#define BRW_MESSAGE_REGISTER_FILE 2 -#define BRW_IMMEDIATE_VALUE 3 - -#define BRW_REGISTER_TYPE_UD 0 -#define BRW_REGISTER_TYPE_D 1 -#define BRW_REGISTER_TYPE_UW 2 -#define BRW_REGISTER_TYPE_W 3 -#define BRW_REGISTER_TYPE_UB 4 -#define BRW_REGISTER_TYPE_B 5 -#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ -#define BRW_REGISTER_TYPE_HF 6 -#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ -#define BRW_REGISTER_TYPE_F 7 - -#define BRW_ARF_NULL 0x00 -#define BRW_ARF_ADDRESS 0x10 -#define BRW_ARF_ACCUMULATOR 0x20 -#define BRW_ARF_FLAG 0x30 -#define BRW_ARF_MASK 0x40 -#define BRW_ARF_MASK_STACK 0x50 -#define BRW_ARF_MASK_STACK_DEPTH 0x60 -#define BRW_ARF_STATE 0x70 -#define BRW_ARF_CONTROL 0x80 -#define BRW_ARF_NOTIFICATION_COUNT 0x90 -#define BRW_ARF_IP 0xA0 - -#define BRW_MRF_COMPR4 (1 << 7) - -#define BRW_AMASK 0 -#define BRW_IMASK 1 -#define BRW_LMASK 2 -#define BRW_CMASK 3 - -#define BRW_THREAD_NORMAL 0 -#define BRW_THREAD_ATOMIC 1 -#define BRW_THREAD_SWITCH 2 - -#define BRW_VERTICAL_STRIDE_0 0 -#define BRW_VERTICAL_STRIDE_1 1 -#define BRW_VERTICAL_STRIDE_2 2 -#define BRW_VERTICAL_STRIDE_4 3 -#define BRW_VERTICAL_STRIDE_8 4 -#define BRW_VERTICAL_STRIDE_16 5 -#define BRW_VERTICAL_STRIDE_32 6 -#define BRW_VERTICAL_STRIDE_64 7 -#define BRW_VERTICAL_STRIDE_128 8 -#define BRW_VERTICAL_STRIDE_256 9 -#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF - -#define BRW_WIDTH_1 0 -#define BRW_WIDTH_2 1 -#define BRW_WIDTH_4 2 -#define BRW_WIDTH_8 3 -#define BRW_WIDTH_16 4 - -#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 -#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 -#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 -#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 -#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 -#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 -#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 -#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 -#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 -#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 -#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 -#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 - -#define BRW_POLYGON_FACING_FRONT 0 -#define BRW_POLYGON_FACING_BACK 1 - -/** - * Message target: Shared Function ID for where to SEND a message. - * - * These are enumerated in the ISA reference under "send - Send Message". - * In particular, see the following tables: - * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" - * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" - * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / - * Overview / GPE Function IDs - */ -enum brw_message_target { - BRW_SFID_NULL = 0, - BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ - BRW_SFID_SAMPLER = 2, - BRW_SFID_MESSAGE_GATEWAY = 3, - BRW_SFID_DATAPORT_READ = 4, - BRW_SFID_DATAPORT_WRITE = 5, - BRW_SFID_URB = 6, - BRW_SFID_THREAD_SPAWNER = 7, - - GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, - GEN6_SFID_DATAPORT_RENDER_CACHE = 5, - GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, - - GEN7_SFID_DATAPORT_DATA_CACHE = 10, -}; - -#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 - -#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 -#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 -#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 - -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 - -#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 -#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 -#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31 - -/* for GEN5 only */ -#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 -#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 -#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 -#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 - -#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 -#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 -#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 -#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 -#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 - -#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 -#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 - -#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 -#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 - -/* This one stays the same across generations. */ -#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 -/* GEN4 */ -#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 -/* G45, GEN5 */ -#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 -#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 -#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 -#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 -#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 -/* GEN6 */ -#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 -#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 -#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 -#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 -#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 - -#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 -#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 -#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 - -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 - -#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 -#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 -#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 -#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 -#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 -#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 -#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 - -/* GEN6 */ -#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 -#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 -#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 -#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 -#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 -#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 -#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 -#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 - -/* GEN7 */ -#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10 - -#define BRW_MATH_FUNCTION_INV 1 -#define BRW_MATH_FUNCTION_LOG 2 -#define BRW_MATH_FUNCTION_EXP 3 -#define BRW_MATH_FUNCTION_SQRT 4 -#define BRW_MATH_FUNCTION_RSQ 5 -#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ -#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ -#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ -#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ -#define BRW_MATH_FUNCTION_POW 10 -#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 -#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 -#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 - -#define BRW_MATH_INTEGER_UNSIGNED 0 -#define BRW_MATH_INTEGER_SIGNED 1 - -#define BRW_MATH_PRECISION_FULL 0 -#define BRW_MATH_PRECISION_PARTIAL 1 - -#define BRW_MATH_SATURATE_NONE 0 -#define BRW_MATH_SATURATE_SATURATE 1 - -#define BRW_MATH_DATA_VECTOR 0 -#define BRW_MATH_DATA_SCALAR 1 - -#define BRW_URB_OPCODE_WRITE 0 - -#define BRW_URB_SWIZZLE_NONE 0 -#define BRW_URB_SWIZZLE_INTERLEAVE 1 -#define BRW_URB_SWIZZLE_TRANSPOSE 2 - -#define BRW_SCRATCH_SPACE_SIZE_1K 0 -#define BRW_SCRATCH_SPACE_SIZE_2K 1 -#define BRW_SCRATCH_SPACE_SIZE_4K 2 -#define BRW_SCRATCH_SPACE_SIZE_8K 3 -#define BRW_SCRATCH_SPACE_SIZE_16K 4 -#define BRW_SCRATCH_SPACE_SIZE_32K 5 -#define BRW_SCRATCH_SPACE_SIZE_64K 6 -#define BRW_SCRATCH_SPACE_SIZE_128K 7 -#define BRW_SCRATCH_SPACE_SIZE_256K 8 -#define BRW_SCRATCH_SPACE_SIZE_512K 9 -#define BRW_SCRATCH_SPACE_SIZE_1M 10 -#define BRW_SCRATCH_SPACE_SIZE_2M 11 - - - - -#define CMD_URB_FENCE 0x6000 -#define CMD_CS_URB_STATE 0x6001 -#define CMD_CONST_BUFFER 0x6002 - -#define CMD_STATE_BASE_ADDRESS 0x6101 -#define CMD_STATE_SIP 0x6102 -#define CMD_PIPELINE_SELECT_965 0x6104 -#define CMD_PIPELINE_SELECT_GM45 0x6904 - -#define _3DSTATE_PIPELINED_POINTERS 0x7800 -#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801 -# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) -# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) -# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) - -#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ - -#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ -# define PS_SAMPLER_STATE_CHANGE (1 << 12) -# define GS_SAMPLER_STATE_CHANGE (1 << 9) -# define VS_SAMPLER_STATE_CHANGE (1 << 8) -/* DW1: VS */ -/* DW2: GS */ -/* DW3: PS */ - -#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */ -#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */ -#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */ - -#define _3DSTATE_VERTEX_BUFFERS 0x7808 -# define BRW_VB0_INDEX_SHIFT 27 -# define GEN6_VB0_INDEX_SHIFT 26 -# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) -# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) -# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) -# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) -# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) -# define BRW_VB0_PITCH_SHIFT 0 - -#define _3DSTATE_VERTEX_ELEMENTS 0x7809 -# define BRW_VE0_INDEX_SHIFT 27 -# define GEN6_VE0_INDEX_SHIFT 26 -# define BRW_VE0_FORMAT_SHIFT 16 -# define BRW_VE0_VALID (1 << 26) -# define GEN6_VE0_VALID (1 << 25) -# define BRW_VE0_SRC_OFFSET_SHIFT 0 -# define BRW_VE1_COMPONENT_NOSTORE 0 -# define BRW_VE1_COMPONENT_STORE_SRC 1 -# define BRW_VE1_COMPONENT_STORE_0 2 -# define BRW_VE1_COMPONENT_STORE_1_FLT 3 -# define BRW_VE1_COMPONENT_STORE_1_INT 4 -# define BRW_VE1_COMPONENT_STORE_VID 5 -# define BRW_VE1_COMPONENT_STORE_IID 6 -# define BRW_VE1_COMPONENT_STORE_PID 7 -# define BRW_VE1_COMPONENT_0_SHIFT 28 -# define BRW_VE1_COMPONENT_1_SHIFT 24 -# define BRW_VE1_COMPONENT_2_SHIFT 20 -# define BRW_VE1_COMPONENT_3_SHIFT 16 -# define BRW_VE1_DST_OFFSET_SHIFT 0 - -#define CMD_INDEX_BUFFER 0x780a -#define GEN4_3DSTATE_VF_STATISTICS 0x780b -#define GM45_3DSTATE_VF_STATISTICS 0x680b -#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ -#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */ -#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */ - -#define _3DSTATE_URB 0x7805 /* GEN6 */ -# define GEN6_URB_VS_SIZE_SHIFT 16 -# define GEN6_URB_VS_ENTRIES_SHIFT 0 -# define GEN6_URB_GS_ENTRIES_SHIFT 8 -# define GEN6_URB_GS_SIZE_SHIFT 0 - -#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ -#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ -#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ -#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */ -# define GEN7_URB_ENTRY_SIZE_SHIFT 16 -# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 - -#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ -#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ -# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 - -#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ -# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) -# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) -# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) - -#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */ -#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */ - -#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ - -#define _3DSTATE_VS 0x7810 /* GEN6+ */ -/* DW2 */ -# define GEN6_VS_SPF_MODE (1 << 31) -# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) -# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 -# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) -/* DW4 */ -# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 -# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 -# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 -/* DW5 */ -# define GEN6_VS_MAX_THREADS_SHIFT 25 -# define HSW_VS_MAX_THREADS_SHIFT 23 -# define GEN6_VS_STATISTICS_ENABLE (1 << 10) -# define GEN6_VS_CACHE_DISABLE (1 << 1) -# define GEN6_VS_ENABLE (1 << 0) - -#define _3DSTATE_GS 0x7811 /* GEN6+ */ -/* DW2 */ -# define GEN6_GS_SPF_MODE (1 << 31) -# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) -# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 -# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) -/* DW4 */ -# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 -# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10) -# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 -# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 -/* DW5 */ -# define GEN6_GS_MAX_THREADS_SHIFT 25 -# define GEN6_GS_STATISTICS_ENABLE (1 << 10) -# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) -# define GEN6_GS_RENDERING_ENABLE (1 << 8) -# define GEN7_GS_ENABLE (1 << 0) -/* DW6 */ -# define GEN6_GS_REORDER (1 << 30) -# define GEN6_GS_DISCARD_ADJACENCY (1 << 29) -# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28) -# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27) -# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16 -# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) -# define GEN6_GS_ENABLE (1 << 15) - -# define BRW_GS_EDGE_INDICATOR_0 (1 << 8) -# define BRW_GS_EDGE_INDICATOR_1 (1 << 9) - -#define _3DSTATE_HS 0x781B /* GEN7+ */ -#define _3DSTATE_TE 0x781C /* GEN7+ */ -#define _3DSTATE_DS 0x781D /* GEN7+ */ - -#define _3DSTATE_CLIP 0x7812 /* GEN6+ */ -/* DW1 */ -# define GEN7_CLIP_WINDING_CW (0 << 20) -# define GEN7_CLIP_WINDING_CCW (1 << 20) -# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19) -# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19) -# define GEN7_CLIP_EARLY_CULL (1 << 18) -# define GEN7_CLIP_CULLMODE_BOTH (0 << 16) -# define GEN7_CLIP_CULLMODE_NONE (1 << 16) -# define GEN7_CLIP_CULLMODE_FRONT (2 << 16) -# define GEN7_CLIP_CULLMODE_BACK (3 << 16) -# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) -/** - * Just does cheap culling based on the clip distance. Bits must be - * disjoint with USER_CLIP_CLIP_DISTANCE bits. - */ -# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 -/* DW2 */ -# define GEN6_CLIP_ENABLE (1 << 31) -# define GEN6_CLIP_API_OGL (0 << 30) -# define GEN6_CLIP_API_D3D (1 << 30) -# define GEN6_CLIP_XY_TEST (1 << 28) -# define GEN6_CLIP_Z_TEST (1 << 27) -# define GEN6_CLIP_GB_TEST (1 << 26) -/** 8-bit field of which user clip distances to clip aganist. */ -# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 -# define GEN6_CLIP_MODE_NORMAL (0 << 13) -# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) -# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) -# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) -# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8) -# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 -# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 -# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 -/* DW3 */ -# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 -# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 -# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) - -#define _3DSTATE_SF 0x7813 /* GEN6+ */ -/* DW1 (for gen6) */ -# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 -# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) -# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20) -# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) -# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 -# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 -/* DW2 */ -# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) -# define GEN6_SF_STATISTICS_ENABLE (1 << 10) -# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) -# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) -# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) -# define GEN6_SF_FRONT_SOLID (0 << 5) -# define GEN6_SF_FRONT_WIREFRAME (1 << 5) -# define GEN6_SF_FRONT_POINT (2 << 5) -# define GEN6_SF_BACK_SOLID (0 << 3) -# define GEN6_SF_BACK_WIREFRAME (1 << 3) -# define GEN6_SF_BACK_POINT (2 << 3) -# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) -# define GEN6_SF_WINDING_CCW (1 << 0) -/* DW3 */ -# define GEN6_SF_LINE_AA_ENABLE (1 << 31) -# define GEN6_SF_CULL_BOTH (0 << 29) -# define GEN6_SF_CULL_NONE (1 << 29) -# define GEN6_SF_CULL_FRONT (2 << 29) -# define GEN6_SF_CULL_BACK (3 << 29) -# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ -# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) -# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) -# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) -# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) -# define GEN6_SF_SCISSOR_ENABLE (1 << 11) -# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) -# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) -# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) -# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) -/* DW4 */ -# define GEN6_SF_TRI_PROVOKE_SHIFT 29 -# define GEN6_SF_LINE_PROVOKE_SHIFT 27 -# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 -# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) -# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) -# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) -# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) -# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) -# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ -/* DW5: depth offset constant */ -/* DW6: depth offset scale */ -/* DW7: depth offset clamp */ -/* DW8 */ -# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) -# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) -# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) -# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) -# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 -# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 -# define ATTRIBUTE_1_SOURCE_SHIFT 16 -# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) -# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) -# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) -# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) -# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 -# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 -# define ATTRIBUTE_0_SOURCE_SHIFT 0 - -# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 -# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 -# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 -# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 -# define ATTRIBUTE_SWIZZLE_SHIFT 6 - -/* DW16: Point sprite texture coordinate enables */ -/* DW17: Constant interpolation enables */ -/* DW18: attr 0-7 wrap shortest enables */ -/* DW19: attr 8-16 wrap shortest enables */ - -/* On GEN7, many fields of 3DSTATE_SF were split out into a new command: - * 3DSTATE_SBE. The remaining fields live in different DWords, but retain - * the same bit-offset. The only new field: - */ -/* GEN7/DW1: */ -# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 -/* GEN7/DW2: */ -# define HSW_SF_LINE_STIPPLE_ENABLE 14 - -#define _3DSTATE_SBE 0x781F /* GEN7+ */ -/* DW1 */ -# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) -# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22 -# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21) -# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) -# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 -# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 -/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */ -/* DW10: Point sprite texture coordinate enables */ -/* DW11: Constant interpolation enables */ -/* DW12: attr 0-7 wrap shortest enables */ -/* DW13: attr 8-16 wrap shortest enables */ - -enum brw_wm_barycentric_interp_mode { - BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0, - BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1, - BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2, - BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3, - BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4, - BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5, - BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6 -}; - -#define _3DSTATE_WM 0x7814 /* GEN6+ */ -/* DW1: kernel pointer */ -/* DW2 */ -# define GEN6_WM_SPF_MODE (1 << 31) -# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) -# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 -# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) -/* DW3: scratch space */ -/* DW4 */ -# define GEN6_WM_STATISTICS_ENABLE (1 << 31) -# define GEN6_WM_DEPTH_CLEAR (1 << 30) -# define GEN6_WM_DEPTH_RESOLVE (1 << 28) -# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) -# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 -# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 -# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 -/* DW5 */ -# define GEN6_WM_MAX_THREADS_SHIFT 25 -# define GEN6_WM_KILL_ENABLE (1 << 22) -# define GEN6_WM_COMPUTED_DEPTH (1 << 21) -# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) -# define GEN6_WM_DISPATCH_ENABLE (1 << 19) -# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) -# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) -# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) -# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) -# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) -# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) -# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) -# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) -# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) -# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) -# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) -# define GEN6_WM_USES_SOURCE_W (1 << 8) -# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) -# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) -# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) -# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) -/* DW6 */ -# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 -# define GEN6_WM_POSOFFSET_NONE (0 << 18) -# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) -# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) -# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) -# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) -# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) -# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) -# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) -# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) -# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) -# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) -# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) -# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10 -# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) -# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) -# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) -# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) -# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) -# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0) -# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) -/* DW7: kernel 1 pointer */ -/* DW8: kernel 2 pointer */ - -#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */ -#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */ -#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */ -# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) -# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) -# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) -# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) - -#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */ -#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */ - -#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */ -/* DW1 */ -# define SO_FUNCTION_ENABLE (1 << 31) -# define SO_RENDERING_DISABLE (1 << 30) -/* This selects which incoming rendering stream goes down the pipeline. The - * rendering stream is 0 if not defined by special cases in the GS state. - */ -# define SO_RENDER_STREAM_SELECT_SHIFT 27 -# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27) -/* Controls reordering of TRISTRIP_* elements in stream output (not rendering). - */ -# define SO_REORDER_TRAILING (1 << 26) -/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */ -# define SO_STATISTICS_ENABLE (1 << 25) -# define SO_BUFFER_ENABLE(n) (1 << (8 + (n))) -/* DW2 */ -# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29 -# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29) -# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24 -# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24) -# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21 -# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21) -# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16 -# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16) -# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13 -# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13) -# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8 -# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8) -# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5 -# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5) -# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0 -# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0) - -/* 3DSTATE_WM for Gen7 */ -/* DW1 */ -# define GEN7_WM_STATISTICS_ENABLE (1 << 31) -# define GEN7_WM_DEPTH_CLEAR (1 << 30) -# define GEN7_WM_DISPATCH_ENABLE (1 << 29) -# define GEN7_WM_DEPTH_RESOLVE (1 << 28) -# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) -# define GEN7_WM_KILL_ENABLE (1 << 25) -# define GEN7_WM_PSCDEPTH_OFF (0 << 23) -# define GEN7_WM_PSCDEPTH_ON (1 << 23) -# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) -# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) -# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) -# define GEN7_WM_USES_SOURCE_W (1 << 19) -# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) -# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) -# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17) -# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11 -# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) -# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) -# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) -# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) -# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) -# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6) -# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6) -# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6) -# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6) -# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) -# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3) -# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) -# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0) -# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0) -# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) -# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) -/* DW2 */ -# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) -# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) - -#define _3DSTATE_PS 0x7820 /* GEN7+ */ -/* DW1: kernel pointer */ -/* DW2 */ -# define GEN7_PS_SPF_MODE (1 << 31) -# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) -# define GEN7_PS_SAMPLER_COUNT_SHIFT 27 -# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) -/* DW3: scratch space */ -/* DW4 */ -# define IVB_PS_MAX_THREADS_SHIFT 24 -# define HSW_PS_MAX_THREADS_SHIFT 23 -# define HSW_PS_SAMPLE_MASK_SHIFT 12 -# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12) -# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) -# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) -# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) -# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) -# define GEN7_PS_POSOFFSET_NONE (0 << 3) -# define GEN7_PS_POSOFFSET_CENTROID (2 << 3) -# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) -# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2) -# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1) -# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0) -/* DW5 */ -# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16 -# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8 -# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0 -/* DW6: kernel 1 pointer */ -/* DW7: kernel 2 pointer */ - -#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */ - -#define _3DSTATE_DRAWING_RECTANGLE 0x7900 -#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 -#define _3DSTATE_CHROMA_KEY 0x7904 -#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */ -#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 -#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 -#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 -#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 -#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ - -#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ -/* DW1 */ -# define SVB_INDEX_SHIFT 29 -# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ -/* DW2: SVB index */ -/* DW3: SVB maximum index */ - -#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ -/* DW1 */ -# define MS_PIXEL_LOCATION_CENTER (0 << 4) -# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) -# define MS_NUMSAMPLES_1 (0 << 1) -# define MS_NUMSAMPLES_4 (2 << 1) -# define MS_NUMSAMPLES_8 (3 << 1) - -#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ -#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ - -#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804 -#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805 -#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806 -# define HSW_STENCIL_ENABLED (1 << 31) -#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807 - -#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */ -# define GEN5_DEPTH_CLEAR_VALID (1 << 15) -/* DW1: depth clear value */ -/* DW2 */ -# define GEN7_DEPTH_CLEAR_VALID (1 << 0) - -#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */ -/* DW1 */ -# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12 -# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12) -# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8 -# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8) -# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4 -# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4) -# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0 -# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0) -/* DW2 */ -# define SO_NUM_ENTRIES_3_SHIFT 24 -# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24) -# define SO_NUM_ENTRIES_2_SHIFT 16 -# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16) -# define SO_NUM_ENTRIES_1_SHIFT 8 -# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8) -# define SO_NUM_ENTRIES_0_SHIFT 0 -# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0) - -/* SO_DECL DW0 */ -# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12 -# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12) -# define SO_DECL_HOLE_FLAG (1 << 11) -# define SO_DECL_REGISTER_INDEX_SHIFT 4 -# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4) -# define SO_DECL_COMPONENT_MASK_SHIFT 0 -# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0) - -#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */ -/* DW1 */ -# define SO_BUFFER_INDEX_SHIFT 29 -# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29) -# define SO_BUFFER_PITCH_SHIFT 0 -# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0) -/* DW2: start address */ -/* DW3: end address. */ - -#define CMD_PIPE_CONTROL 0x7a00 - -#define CMD_MI_FLUSH 0x0200 - -/* Bitfields for the URB_WRITE message, DW2 of message header: */ -#define URB_WRITE_PRIM_END 0x1 -#define URB_WRITE_PRIM_START 0x2 -#define URB_WRITE_PRIM_TYPE_SHIFT 2 - -/* Maximum number of entries that can be addressed using a binding table - * pointer of type SURFTYPE_BUFFER - */ -#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) - -#endif diff --git a/backend/src/backend/gen/gen_mesa_structs.h b/backend/src/backend/gen/gen_mesa_structs.h deleted file mode 100644 index 2f16362..0000000 --- a/backend/src/backend/gen/gen_mesa_structs.h +++ /dev/null @@ -1,1513 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_STRUCTS_H -#define BRW_STRUCTS_H - - -/** Number of general purpose registers (VS, WM, etc) */ -#define BRW_MAX_GRF 128 - -/** - * First GRF used for the MRF hack. - * - * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We - * haven't converted our compiler to be aware of this, so it asks for MRFs and - * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The - * register allocators have to be careful of this to avoid corrupting the "MRF"s - * with actual GRF allocations. - */ -#define GEN7_MRF_HACK_START 112. - -/** Number of message register file registers */ -#define BRW_MAX_MRF 16 - -/* These seem to be passed around as function args, so it works out - * better to keep them as #defines: - */ -#define BRW_FLUSH_READ_CACHE 0x1 -#define BRW_FLUSH_STATE_CACHE 0x2 -#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 -#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 - -struct brw_urb_fence -{ - struct - { - uint32_t length:8; - uint32_t vs_realloc:1; - uint32_t gs_realloc:1; - uint32_t clp_realloc:1; - uint32_t sf_realloc:1; - uint32_t vfe_realloc:1; - uint32_t cs_realloc:1; - uint32_t pad:2; - uint32_t opcode:16; - } header; - - struct - { - uint32_t vs_fence:10; - uint32_t gs_fence:10; - uint32_t clp_fence:10; - uint32_t pad:2; - } bits0; - - struct - { - uint32_t sf_fence:10; - uint32_t vf_fence:10; - uint32_t cs_fence:11; - uint32_t pad:1; - } bits1; -}; - -/* State structs for the various fixed function units: - */ - - -struct thread0 -{ - uint32_t pad0:1; - uint32_t grf_reg_count:3; - uint32_t pad1:2; - uint32_t kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ -}; - -struct thread1 -{ - uint32_t ext_halt_exception_enable:1; - uint32_t sw_exception_enable:1; - uint32_t mask_stack_exception_enable:1; - uint32_t timeout_exception_enable:1; - uint32_t illegal_op_exception_enable:1; - uint32_t pad0:3; - uint32_t depth_coef_urb_read_offset:6; /* WM only */ - uint32_t pad1:2; - uint32_t floating_point_mode:1; - uint32_t thread_priority:1; - uint32_t binding_table_entry_count:8; - uint32_t pad3:5; - uint32_t single_program_flow:1; -}; - -struct thread2 -{ - uint32_t per_thread_scratch_space:4; - uint32_t pad0:6; - uint32_t scratch_space_base_pointer:22; -}; - - -struct thread3 -{ - uint32_t dispatch_grf_start_reg:4; - uint32_t urb_entry_read_offset:6; - uint32_t pad0:1; - uint32_t urb_entry_read_length:6; - uint32_t pad1:1; - uint32_t const_urb_entry_read_offset:6; - uint32_t pad2:1; - uint32_t const_urb_entry_read_length:6; - uint32_t pad3:1; -}; - - - -struct brw_clip_unit_state -{ - struct thread0 thread0; - struct - { - uint32_t pad0:7; - uint32_t sw_exception_enable:1; - uint32_t pad1:3; - uint32_t mask_stack_exception_enable:1; - uint32_t pad2:1; - uint32_t illegal_op_exception_enable:1; - uint32_t pad3:2; - uint32_t floating_point_mode:1; - uint32_t thread_priority:1; - uint32_t binding_table_entry_count:8; - uint32_t pad4:5; - uint32_t single_program_flow:1; - } thread1; - - struct thread2 thread2; - struct thread3 thread3; - - struct - { - uint32_t pad0:9; - uint32_t gs_output_stats:1; /* not always */ - uint32_t stats_enable:1; - uint32_t nr_urb_entries:7; - uint32_t pad1:1; - uint32_t urb_entry_allocation_size:5; - uint32_t pad2:1; - uint32_t max_threads:5; /* may be less */ - uint32_t pad3:2; - } thread4; - - struct - { - uint32_t pad0:13; - uint32_t clip_mode:3; - uint32_t userclip_enable_flags:8; - uint32_t userclip_must_clip:1; - uint32_t negative_w_clip_test:1; - uint32_t guard_band_enable:1; - uint32_t viewport_z_clip_enable:1; - uint32_t viewport_xy_clip_enable:1; - uint32_t vertex_position_space:1; - uint32_t api_mode:1; - uint32_t pad2:1; - } clip5; - - struct - { - uint32_t pad0:5; - uint32_t clipper_viewport_state_ptr:27; - } clip6; - - - float viewport_xmin; - float viewport_xmax; - float viewport_ymin; - float viewport_ymax; -}; - -struct gen6_blend_state -{ - struct { - uint32_t dest_blend_factor:5; - uint32_t source_blend_factor:5; - uint32_t pad3:1; - uint32_t blend_func:3; - uint32_t pad2:1; - uint32_t ia_dest_blend_factor:5; - uint32_t ia_source_blend_factor:5; - uint32_t pad1:1; - uint32_t ia_blend_func:3; - uint32_t pad0:1; - uint32_t ia_blend_enable:1; - uint32_t blend_enable:1; - } blend0; - - struct { - uint32_t post_blend_clamp_enable:1; - uint32_t pre_blend_clamp_enable:1; - uint32_t clamp_range:2; - uint32_t pad0:4; - uint32_t x_dither_offset:2; - uint32_t y_dither_offset:2; - uint32_t dither_enable:1; - uint32_t alpha_test_func:3; - uint32_t alpha_test_enable:1; - uint32_t pad1:1; - uint32_t logic_op_func:4; - uint32_t logic_op_enable:1; - uint32_t pad2:1; - uint32_t write_disable_b:1; - uint32_t write_disable_g:1; - uint32_t write_disable_r:1; - uint32_t write_disable_a:1; - uint32_t pad3:1; - uint32_t alpha_to_coverage_dither:1; - uint32_t alpha_to_one:1; - uint32_t alpha_to_coverage:1; - } blend1; -}; - -struct gen6_color_calc_state -{ - struct { - uint32_t alpha_test_format:1; - uint32_t pad0:14; - uint32_t round_disable:1; - uint32_t bf_stencil_ref:8; - uint32_t stencil_ref:8; - } cc0; - - union { - float alpha_ref_f; - struct { - uint32_t ui:8; - uint32_t pad0:24; - } alpha_ref_fi; - } cc1; - - float constant_r; - float constant_g; - float constant_b; - float constant_a; -}; - -struct gen6_depth_stencil_state -{ - struct { - uint32_t pad0:3; - uint32_t bf_stencil_pass_depth_pass_op:3; - uint32_t bf_stencil_pass_depth_fail_op:3; - uint32_t bf_stencil_fail_op:3; - uint32_t bf_stencil_func:3; - uint32_t bf_stencil_enable:1; - uint32_t pad1:2; - uint32_t stencil_write_enable:1; - uint32_t stencil_pass_depth_pass_op:3; - uint32_t stencil_pass_depth_fail_op:3; - uint32_t stencil_fail_op:3; - uint32_t stencil_func:3; - uint32_t stencil_enable:1; - } ds0; - - struct { - uint32_t bf_stencil_write_mask:8; - uint32_t bf_stencil_test_mask:8; - uint32_t stencil_write_mask:8; - uint32_t stencil_test_mask:8; - } ds1; - - struct { - uint32_t pad0:26; - uint32_t depth_write_enable:1; - uint32_t depth_test_func:3; - uint32_t pad1:1; - uint32_t depth_test_enable:1; - } ds2; -}; - -struct brw_cc_unit_state -{ - struct - { - uint32_t pad0:3; - uint32_t bf_stencil_pass_depth_pass_op:3; - uint32_t bf_stencil_pass_depth_fail_op:3; - uint32_t bf_stencil_fail_op:3; - uint32_t bf_stencil_func:3; - uint32_t bf_stencil_enable:1; - uint32_t pad1:2; - uint32_t stencil_write_enable:1; - uint32_t stencil_pass_depth_pass_op:3; - uint32_t stencil_pass_depth_fail_op:3; - uint32_t stencil_fail_op:3; - uint32_t stencil_func:3; - uint32_t stencil_enable:1; - } cc0; - - - struct - { - uint32_t bf_stencil_ref:8; - uint32_t stencil_write_mask:8; - uint32_t stencil_test_mask:8; - uint32_t stencil_ref:8; - } cc1; - - - struct - { - uint32_t logicop_enable:1; - uint32_t pad0:10; - uint32_t depth_write_enable:1; - uint32_t depth_test_function:3; - uint32_t depth_test:1; - uint32_t bf_stencil_write_mask:8; - uint32_t bf_stencil_test_mask:8; - } cc2; - - - struct - { - uint32_t pad0:8; - uint32_t alpha_test_func:3; - uint32_t alpha_test:1; - uint32_t blend_enable:1; - uint32_t ia_blend_enable:1; - uint32_t pad1:1; - uint32_t alpha_test_format:1; - uint32_t pad2:16; - } cc3; - - struct - { - uint32_t pad0:5; - uint32_t cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ - } cc4; - - struct - { - uint32_t pad0:2; - uint32_t ia_dest_blend_factor:5; - uint32_t ia_src_blend_factor:5; - uint32_t ia_blend_function:3; - uint32_t statistics_enable:1; - uint32_t logicop_func:4; - uint32_t pad1:11; - uint32_t dither_enable:1; - } cc5; - - struct - { - uint32_t clamp_post_alpha_blend:1; - uint32_t clamp_pre_alpha_blend:1; - uint32_t clamp_range:2; - uint32_t pad0:11; - uint32_t y_dither_offset:2; - uint32_t x_dither_offset:2; - uint32_t dest_blend_factor:5; - uint32_t src_blend_factor:5; - uint32_t blend_function:3; - } cc6; - - struct { - union { - float f; - uint8_t ub[4]; - } alpha_ref; - } cc7; -}; - -struct brw_sf_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - uint32_t pad0:10; - uint32_t stats_enable:1; - uint32_t nr_urb_entries:7; - uint32_t pad1:1; - uint32_t urb_entry_allocation_size:5; - uint32_t pad2:1; - uint32_t max_threads:6; - uint32_t pad3:1; - } thread4; - - struct - { - uint32_t front_winding:1; - uint32_t viewport_transform:1; - uint32_t pad0:3; - uint32_t sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ - } sf5; - - struct - { - uint32_t pad0:9; - uint32_t dest_org_vbias:4; - uint32_t dest_org_hbias:4; - uint32_t scissor:1; - uint32_t disable_2x2_trifilter:1; - uint32_t disable_zero_pix_trifilter:1; - uint32_t point_rast_rule:2; - uint32_t line_endcap_aa_region_width:2; - uint32_t line_width:4; - uint32_t fast_scissor_disable:1; - uint32_t cull_mode:2; - uint32_t aa_enable:1; - } sf6; - - struct - { - uint32_t point_size:11; - uint32_t use_point_size_state:1; - uint32_t subpixel_precision:1; - uint32_t sprite_point:1; - uint32_t pad0:10; - uint32_t aa_line_distance_mode:1; - uint32_t trifan_pv:2; - uint32_t linestrip_pv:2; - uint32_t tristrip_pv:2; - uint32_t line_last_pixel_enable:1; - } sf7; - -}; - -struct gen6_scissor_rect -{ - uint32_t xmin:16; - uint32_t ymin:16; - uint32_t xmax:16; - uint32_t ymax:16; -}; - -struct brw_gs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - uint32_t pad0:8; - uint32_t rendering_enable:1; /* for Ironlake */ - uint32_t pad4:1; - uint32_t stats_enable:1; - uint32_t nr_urb_entries:7; - uint32_t pad1:1; - uint32_t urb_entry_allocation_size:5; - uint32_t pad2:1; - uint32_t max_threads:5; - uint32_t pad3:2; - } thread4; - - struct - { - uint32_t sampler_count:3; - uint32_t pad0:2; - uint32_t sampler_state_pointer:27; - } gs5; - - - struct - { - uint32_t max_vp_index:4; - uint32_t pad0:12; - uint32_t svbi_post_inc_value:10; - uint32_t pad1:1; - uint32_t svbi_post_inc_enable:1; - uint32_t svbi_payload:1; - uint32_t discard_adjaceny:1; - uint32_t reorder_enable:1; - uint32_t pad2:1; - } gs6; -}; - - -struct brw_vs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - uint32_t pad0:10; - uint32_t stats_enable:1; - uint32_t nr_urb_entries:7; - uint32_t pad1:1; - uint32_t urb_entry_allocation_size:5; - uint32_t pad2:1; - uint32_t max_threads:6; - uint32_t pad3:1; - } thread4; - - struct - { - uint32_t sampler_count:3; - uint32_t pad0:2; - uint32_t sampler_state_pointer:27; - } vs5; - - struct - { - uint32_t vs_enable:1; - uint32_t vert_cache_disable:1; - uint32_t pad0:30; - } vs6; -}; - - -struct brw_wm_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct { - uint32_t stats_enable:1; - uint32_t depth_buffer_clear:1; - uint32_t sampler_count:3; - uint32_t sampler_state_pointer:27; - } wm4; - - struct - { - uint32_t enable_8_pix:1; - uint32_t enable_16_pix:1; - uint32_t enable_32_pix:1; - uint32_t enable_con_32_pix:1; - uint32_t enable_con_64_pix:1; - uint32_t pad0:1; - - /* These next four bits are for Ironlake+ */ - uint32_t fast_span_coverage_enable:1; - uint32_t depth_buffer_clear:1; - uint32_t depth_buffer_resolve_enable:1; - uint32_t hierarchical_depth_buffer_resolve_enable:1; - - uint32_t legacy_global_depth_bias:1; - uint32_t line_stipple:1; - uint32_t depth_offset:1; - uint32_t polygon_stipple:1; - uint32_t line_aa_region_width:2; - uint32_t line_endcap_aa_region_width:2; - uint32_t early_depth_test:1; - uint32_t thread_dispatch_enable:1; - uint32_t program_uses_depth:1; - uint32_t program_computes_depth:1; - uint32_t program_uses_killpixel:1; - uint32_t legacy_line_rast: 1; - uint32_t transposed_urb_read_enable:1; - uint32_t max_threads:7; - } wm5; - - float global_depth_offset_constant; - float global_depth_offset_scale; - - /* for Ironlake only */ - struct { - uint32_t pad0:1; - uint32_t grf_reg_count_1:3; - uint32_t pad1:2; - uint32_t kernel_start_pointer_1:26; - } wm8; - - struct { - uint32_t pad0:1; - uint32_t grf_reg_count_2:3; - uint32_t pad1:2; - uint32_t kernel_start_pointer_2:26; - } wm9; - - struct { - uint32_t pad0:1; - uint32_t grf_reg_count_3:3; - uint32_t pad1:2; - uint32_t kernel_start_pointer_3:26; - } wm10; -}; - -struct brw_sampler_default_color { - float color[4]; -}; - -struct gen5_sampler_default_color { - uint8_t ub[4]; - float f[4]; - uint16_t hf[4]; - uint16_t us[4]; - int16_t s[4]; - uint8_t b[4]; -}; - -struct brw_sampler_state -{ - - struct - { - uint32_t shadow_function:3; - uint32_t lod_bias:11; - uint32_t min_filter:3; - uint32_t mag_filter:3; - uint32_t mip_filter:2; - uint32_t base_level:5; - uint32_t min_mag_neq:1; - uint32_t lod_preclamp:1; - uint32_t default_color_mode:1; - uint32_t pad0:1; - uint32_t disable:1; - } ss0; - - struct - { - uint32_t r_wrap_mode:3; - uint32_t t_wrap_mode:3; - uint32_t s_wrap_mode:3; - uint32_t cube_control_mode:1; - uint32_t pad:2; - uint32_t max_lod:10; - uint32_t min_lod:10; - } ss1; - - - struct - { - uint32_t pad:5; - uint32_t default_color_pointer:27; - } ss2; - - struct - { - uint32_t non_normalized_coord:1; - uint32_t pad:12; - uint32_t address_round:6; - uint32_t max_aniso:3; - uint32_t chroma_key_mode:1; - uint32_t chroma_key_index:2; - uint32_t chroma_key_enable:1; - uint32_t monochrome_filter_width:3; - uint32_t monochrome_filter_height:3; - } ss3; -}; - -struct gen7_sampler_state -{ - struct - { - uint32_t aniso_algorithm:1; - uint32_t lod_bias:13; - uint32_t min_filter:3; - uint32_t mag_filter:3; - uint32_t mip_filter:2; - uint32_t base_level:5; - uint32_t pad1:1; - uint32_t lod_preclamp:1; - uint32_t default_color_mode:1; - uint32_t pad0:1; - uint32_t disable:1; - } ss0; - - struct - { - uint32_t cube_control_mode:1; - uint32_t shadow_function:3; - uint32_t pad:4; - uint32_t max_lod:12; - uint32_t min_lod:12; - } ss1; - - struct - { - uint32_t pad:5; - uint32_t default_color_pointer:27; - } ss2; - - struct - { - uint32_t r_wrap_mode:3; - uint32_t t_wrap_mode:3; - uint32_t s_wrap_mode:3; - uint32_t pad:1; - uint32_t non_normalized_coord:1; - uint32_t trilinear_quality:2; - uint32_t address_round:6; - uint32_t max_aniso:3; - uint32_t chroma_key_mode:1; - uint32_t chroma_key_index:2; - uint32_t chroma_key_enable:1; - uint32_t pad0:6; - } ss3; -}; - -struct brw_clipper_viewport -{ - float xmin; - float xmax; - float ymin; - float ymax; -}; - -struct brw_cc_viewport -{ - float min_depth; - float max_depth; -}; - -struct brw_sf_viewport -{ - struct { - float m00; - float m11; - float m22; - float m30; - float m31; - float m32; - } viewport; - - /* scissor coordinates are inclusive */ - struct { - int16_t xmin; - int16_t ymin; - int16_t xmax; - int16_t ymax; - } scissor; -}; - -struct gen6_sf_viewport { - float m00; - float m11; - float m22; - float m30; - float m31; - float m32; -}; - -struct gen7_sf_clip_viewport { - struct { - float m00; - float m11; - float m22; - float m30; - float m31; - float m32; - } viewport; - - uint32_t pad0[2]; - - struct { - float xmin; - float xmax; - float ymin; - float ymax; - } guardband; - - float pad1[4]; -}; - -/* volume 5c Shared Functions - 1.13.4.1.2 */ -struct gen7_surface_state -{ - struct { - uint32_t cube_pos_z:1; - uint32_t cube_neg_z:1; - uint32_t cube_pos_y:1; - uint32_t cube_neg_y:1; - uint32_t cube_pos_x:1; - uint32_t cube_neg_x:1; - uint32_t pad2:2; - uint32_t render_cache_read_write:1; - uint32_t pad1:1; - uint32_t surface_array_spacing:1; - uint32_t vert_line_stride_ofs:1; - uint32_t vert_line_stride:1; - uint32_t tile_walk:1; - uint32_t tiled_surface:1; - uint32_t horizontal_alignment:1; - uint32_t vertical_alignment:2; - uint32_t surface_format:9; /**< BRW_SURFACEFORMAT_x */ - uint32_t pad0:1; - uint32_t is_array:1; - uint32_t surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ - } ss0; - - struct { - uint32_t base_addr; - } ss1; - - struct { - uint32_t width:14; - uint32_t pad1:2; - uint32_t height:14; - uint32_t pad0:2; - } ss2; - - struct { - uint32_t pitch:18; - uint32_t pad:3; - uint32_t depth:11; - } ss3; - - struct { - uint32_t multisample_position_palette_index:3; - uint32_t num_multisamples:3; - uint32_t multisampled_surface_storage_format:1; - uint32_t render_target_view_extent:11; - uint32_t min_array_elt:11; - uint32_t rotation:2; - uint32_t pad0:1; - } ss4; - - struct { - uint32_t mip_count:4; - uint32_t min_lod:4; - uint32_t pad1:12; - uint32_t y_offset:4; - uint32_t pad0:1; - uint32_t x_offset:7; - } ss5; - - struct { - uint32_t pad; /* Multisample Control Surface stuff */ - } ss6; - - struct { - uint32_t resource_min_lod:12; - - /* Only on Haswell */ - uint32_t pad0:4; - uint32_t shader_chanel_select_a:3; - uint32_t shader_chanel_select_b:3; - uint32_t shader_chanel_select_g:3; - uint32_t shader_chanel_select_r:3; - - uint32_t alpha_clear_color:1; - uint32_t blue_clear_color:1; - uint32_t green_clear_color:1; - uint32_t red_clear_color:1; - } ss7; -}; - - -struct brw_vertex_element_state -{ - struct - { - uint32_t src_offset:11; - uint32_t pad:5; - uint32_t src_format:9; - uint32_t pad0:1; - uint32_t valid:1; - uint32_t vertex_buffer_index:5; - } ve0; - - struct - { - uint32_t dst_offset:8; - uint32_t pad:8; - uint32_t vfcomponent3:4; - uint32_t vfcomponent2:4; - uint32_t vfcomponent1:4; - uint32_t vfcomponent0:4; - } ve1; -}; - -struct brw_urb_immediate { - uint32_t opcode:4; - uint32_t offset:6; - uint32_t swizzle_control:2; - uint32_t pad:1; - uint32_t allocate:1; - uint32_t used:1; - uint32_t complete:1; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; -}; - -/* Instruction format for the execution units: -*/ - -struct brw_instruction -{ - struct - { - uint32_t opcode:7; - uint32_t pad:1; - uint32_t access_mode:1; - uint32_t mask_control:1; - uint32_t dependency_control:2; - uint32_t compression_control:2; /* gen6: quater control */ - uint32_t thread_control:2; - uint32_t predicate_control:4; - uint32_t predicate_inverse:1; - uint32_t execution_size:3; - /** - * Conditional Modifier for most instructions. On Gen6+, this is also - * used for the SEND instruction's Message Target/SFID. - */ - uint32_t destreg__conditionalmod:4; - uint32_t acc_wr_control:1; - uint32_t cmpt_control:1; - uint32_t debug_control:1; - uint32_t saturate:1; - } header; - - union { - struct - { - uint32_t dest_reg_file:2; - uint32_t dest_reg_type:3; - uint32_t src0_reg_file:2; - uint32_t src0_reg_type:3; - uint32_t src1_reg_file:2; - uint32_t src1_reg_type:3; - uint32_t pad:1; - uint32_t dest_subreg_nr:5; - uint32_t dest_reg_nr:8; - uint32_t dest_horiz_stride:2; - uint32_t dest_address_mode:1; - } da1; - - struct - { - uint32_t dest_reg_file:2; - uint32_t dest_reg_type:3; - uint32_t src0_reg_file:2; - uint32_t src0_reg_type:3; - uint32_t src1_reg_file:2; /* 0x00000c00 */ - uint32_t src1_reg_type:3; /* 0x00007000 */ - uint32_t pad:1; - int32_t dest_indirect_offset:10; /* offset against the deref'd address reg */ - uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */ - uint32_t dest_horiz_stride:2; - uint32_t dest_address_mode:1; - } ia1; - - struct - { - uint32_t dest_reg_file:2; - uint32_t dest_reg_type:3; - uint32_t src0_reg_file:2; - uint32_t src0_reg_type:3; - uint32_t src1_reg_file:2; - uint32_t src1_reg_type:3; - uint32_t pad:1; - uint32_t dest_writemask:4; - uint32_t dest_subreg_nr:1; - uint32_t dest_reg_nr:8; - uint32_t dest_horiz_stride:2; - uint32_t dest_address_mode:1; - } da16; - - struct - { - uint32_t dest_reg_file:2; - uint32_t dest_reg_type:3; - uint32_t src0_reg_file:2; - uint32_t src0_reg_type:3; - uint32_t pad0:6; - uint32_t dest_writemask:4; - int32_t dest_indirect_offset:6; - uint32_t dest_subreg_nr:3; - uint32_t dest_horiz_stride:2; - uint32_t dest_address_mode:1; - } ia16; - - struct { - uint32_t dest_reg_file:2; - uint32_t dest_reg_type:3; - uint32_t src0_reg_file:2; - uint32_t src0_reg_type:3; - uint32_t src1_reg_file:2; - uint32_t src1_reg_type:3; - uint32_t pad:1; - - int32_t jump_count:16; - } branch_gen6; - - struct { - uint32_t dest_reg_file:1; - uint32_t flag_subreg_num:1; - uint32_t pad0:2; - uint32_t src0_abs:1; - uint32_t src0_negate:1; - uint32_t src1_abs:1; - uint32_t src1_negate:1; - uint32_t src2_abs:1; - uint32_t src2_negate:1; - uint32_t pad1:7; - uint32_t dest_writemask:4; - uint32_t dest_subreg_nr:3; - uint32_t dest_reg_nr:8; - } da3src; - } bits1; - - - union { - struct - { - uint32_t src0_subreg_nr:5; - uint32_t src0_reg_nr:8; - uint32_t src0_abs:1; - uint32_t src0_negate:1; - uint32_t src0_address_mode:1; - uint32_t src0_horiz_stride:2; - uint32_t src0_width:3; - uint32_t src0_vert_stride:4; - uint32_t flag_reg_nr:1; - uint32_t pad:6; - } da1; - - struct - { - int32_t src0_indirect_offset:10; - uint32_t src0_subreg_nr:3; - uint32_t src0_abs:1; - uint32_t src0_negate:1; - uint32_t src0_address_mode:1; - uint32_t src0_horiz_stride:2; - uint32_t src0_width:3; - uint32_t src0_vert_stride:4; - uint32_t flag_reg_nr:1; - uint32_t pad:6; - } ia1; - - struct - { - uint32_t src0_swz_x:2; - uint32_t src0_swz_y:2; - uint32_t src0_subreg_nr:1; - uint32_t src0_reg_nr:8; - uint32_t src0_abs:1; - uint32_t src0_negate:1; - uint32_t src0_address_mode:1; - uint32_t src0_swz_z:2; - uint32_t src0_swz_w:2; - uint32_t pad0:1; - uint32_t src0_vert_stride:4; - uint32_t flag_reg_nr:1; - uint32_t pad1:6; - } da16; - - struct - { - uint32_t src0_swz_x:2; - uint32_t src0_swz_y:2; - int32_t src0_indirect_offset:6; - uint32_t src0_subreg_nr:3; - uint32_t src0_abs:1; - uint32_t src0_negate:1; - uint32_t src0_address_mode:1; - uint32_t src0_swz_z:2; - uint32_t src0_swz_w:2; - uint32_t pad0:1; - uint32_t src0_vert_stride:4; - uint32_t flag_reg_nr:1; - uint32_t pad1:6; - } ia16; - - /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. - * - * Does not apply to Gen6+. The SFID/message target moved to bits - * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. - */ - struct - { - uint32_t pad:26; - uint32_t end_of_thread:1; - uint32_t pad1:1; - uint32_t sfid:4; - } send_gen5; /* for Ironlake only */ - - struct { - uint32_t src0_rep_ctrl:1; - uint32_t src0_swizzle:8; - uint32_t src0_subreg_nr:3; - uint32_t src0_reg_nr:8; - uint32_t pad0:1; - uint32_t src1_rep_ctrl:1; - uint32_t src1_swizzle:8; - uint32_t src1_subreg_nr_low:2; - } da3src; - } bits2; - - union - { - struct - { - uint32_t src1_subreg_nr:5; - uint32_t src1_reg_nr:8; - uint32_t src1_abs:1; - uint32_t src1_negate:1; - uint32_t src1_address_mode:1; - uint32_t src1_horiz_stride:2; - uint32_t src1_width:3; - uint32_t src1_vert_stride:4; - uint32_t pad0:7; - } da1; - - struct - { - uint32_t src1_swz_x:2; - uint32_t src1_swz_y:2; - uint32_t src1_subreg_nr:1; - uint32_t src1_reg_nr:8; - uint32_t src1_abs:1; - uint32_t src1_negate:1; - uint32_t src1_address_mode:1; - uint32_t src1_swz_z:2; - uint32_t src1_swz_w:2; - uint32_t pad1:1; - uint32_t src1_vert_stride:4; - uint32_t pad2:7; - } da16; - - struct - { - int32_t src1_indirect_offset:10; - uint32_t src1_subreg_nr:3; - uint32_t src1_abs:1; - uint32_t src1_negate:1; - uint32_t src1_address_mode:1; - uint32_t src1_horiz_stride:2; - uint32_t src1_width:3; - uint32_t src1_vert_stride:4; - uint32_t flag_reg_nr:1; - uint32_t pad1:6; - } ia1; - - struct - { - uint32_t src1_swz_x:2; - uint32_t src1_swz_y:2; - int32_t src1_indirect_offset:6; - uint32_t src1_subreg_nr:3; - uint32_t src1_abs:1; - uint32_t src1_negate:1; - uint32_t pad0:1; - uint32_t src1_swz_z:2; - uint32_t src1_swz_w:2; - uint32_t pad1:1; - uint32_t src1_vert_stride:4; - uint32_t flag_reg_nr:1; - uint32_t pad2:6; - } ia16; - - - struct - { - int32_t jump_count:16; /* note: signed */ - uint32_t pop_count:4; - uint32_t pad0:12; - } if_else; - - /* This is also used for gen7 IF/ELSE instructions */ - struct - { - /* Signed jump distance to the ip to jump to if all channels - * are disabled after the break or continue. It should point - * to the end of the innermost control flow block, as that's - * where some channel could get re-enabled. - */ - int jip:16; - - /* Signed jump distance to the location to resume execution - * of this channel if it's enabled for the break or continue. - */ - int uip:16; - } break_cont; - - /** - * \defgroup SEND instructions / Message Descriptors - * - * @{ - */ - - /** - * Generic Message Descriptor for Gen4 SEND instructions. The structs - * below expand function_control to something specific for their - * message. Due to struct packing issues, they duplicate these bits. - * - * See the G45 PRM, Volume 4, Table 14-15. - */ - struct { - uint32_t function_control:16; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } generic; - - /** - * Generic Message Descriptor for Gen5-7 SEND instructions. - * - * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most - * of the information on the SEND instruction is missing from the public - * Ironlake PRM.) - * - * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. - * According to the SEND instruction description: - * "The MSb of the message description, the EOT field, always comes from - * bit 127 of the instruction word"...which is bit 31 of this field. - */ - struct { - uint32_t function_control:19; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } generic_gen5; - - /** G45 PRM, Volume 4, Section 6.1.1.1 */ - struct { - uint32_t function:4; - uint32_t int_type:1; - uint32_t precision:1; - uint32_t saturate:1; - uint32_t data_type:1; - uint32_t pad0:8; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } math; - - /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ - struct { - uint32_t function:4; - uint32_t int_type:1; - uint32_t precision:1; - uint32_t saturate:1; - uint32_t data_type:1; - uint32_t snapshot:1; - uint32_t pad0:10; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } math_gen5; - - /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ - struct { - uint32_t binding_table_index:8; - uint32_t sampler:4; - uint32_t return_format:2; - uint32_t msg_type:2; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } sampler; - - /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ - struct { - uint32_t binding_table_index:8; - uint32_t sampler:4; - uint32_t msg_type:4; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } sampler_g4x; - - /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ - struct { - uint32_t binding_table_index:8; - uint32_t sampler:4; - uint32_t msg_type:4; - uint32_t simd_mode:2; - uint32_t pad0:1; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } sampler_gen5; - - struct { - uint32_t binding_table_index:8; - uint32_t sampler:4; - uint32_t msg_type:5; - uint32_t simd_mode:2; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } sampler_gen7; - - struct brw_urb_immediate urb; - - struct { - uint32_t opcode:4; - uint32_t offset:6; - uint32_t swizzle_control:2; - uint32_t pad:1; - uint32_t allocate:1; - uint32_t used:1; - uint32_t complete:1; - uint32_t pad0:3; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } urb_gen5; - - struct { - uint32_t opcode:3; - uint32_t offset:11; - uint32_t swizzle_control:1; - uint32_t complete:1; - uint32_t per_slot_offset:1; - uint32_t pad0:2; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } urb_gen7; - - /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:4; - uint32_t msg_type:2; - uint32_t target_cache:2; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } dp_read; - - /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:3; - uint32_t msg_type:3; - uint32_t target_cache:2; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } dp_read_g4x; - - /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:3; - uint32_t msg_type:3; - uint32_t target_cache:2; - uint32_t pad0:3; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } dp_read_gen5; - - /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:3; - uint32_t last_render_target:1; - uint32_t msg_type:3; - uint32_t send_commit_msg:1; - uint32_t response_length:4; - uint32_t msg_length:4; - uint32_t msg_target:4; - uint32_t pad1:3; - uint32_t end_of_thread:1; - } dp_write; - - /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:3; - uint32_t last_render_target:1; - uint32_t msg_type:3; - uint32_t send_commit_msg:1; - uint32_t pad0:3; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } dp_write_gen5; - - /** - * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. - * - * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. - **/ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:5; - uint32_t msg_type:3; - uint32_t pad0:3; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } gen6_dp_sampler_const_cache; - - /** - * Message for the Sandybridge Render Cache Data Port. - * - * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, - * Section 3.9.2.1.1: Message Descriptor. - * - * "Slot Group Select" and "Last Render Target" are part of the - * 5-bit message control for Render Target Write messages. See - * Section 3.9.9.2.1 of the same volume. - */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:3; - uint32_t slot_group_select:1; - uint32_t last_render_target:1; - uint32_t msg_type:4; - uint32_t send_commit_msg:1; - uint32_t pad0:1; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad1:2; - uint32_t end_of_thread:1; - } gen6_dp; - - /** - * Message for any of the Gen7 Data Port caches. - * - * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / - * Data Port Messages / Message Descriptor. Once again, "Slot Group - * Select" and "Last Render Target" are part of the 6-bit message - * control for Render Target Writes. - */ - struct { - uint32_t binding_table_index:8; - uint32_t msg_control:3; - uint32_t slot_group_select:1; - uint32_t last_render_target:1; - uint32_t msg_control_pad:1; - uint32_t msg_type:4; - uint32_t pad1:1; - uint32_t header_present:1; - uint32_t response_length:5; - uint32_t msg_length:4; - uint32_t pad2:2; - uint32_t end_of_thread:1; - } gen7_dp; - /** @} */ - - struct { - uint32_t src1_subreg_nr_high:1; - uint32_t src1_reg_nr:8; - uint32_t pad0:1; - uint32_t src2_rep_ctrl:1; - uint32_t src2_swizzle:8; - uint32_t src2_subreg_nr:3; - uint32_t src2_reg_nr:8; - uint32_t pad1:2; - } da3src; - - int32_t d; - uint32_t ud; - float f; - } bits3; -}; - - -#endif -- 2.7.4