From: Benjamin Segovia Date: Sat, 3 Mar 2012 11:07:13 +0000 (-0800) Subject: Added first support for compare instructions Added first support for convert instructions X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c18f60b676d2ab6f13a10af5a2c34139ecd42dd1;p=contrib%2Fbeignet.git Added first support for compare instructions Added first support for convert instructions --- diff --git a/backend/kernels/add.o b/backend/kernels/add.o deleted file mode 100644 index fcff924..0000000 Binary files a/backend/kernels/add.o and /dev/null differ diff --git a/backend/kernels/add2.o b/backend/kernels/add2.o deleted file mode 100644 index 8b5ebb4..0000000 Binary files a/backend/kernels/add2.o and /dev/null differ diff --git a/backend/kernels/cmp.cl b/backend/kernels/cmp.cl new file mode 100644 index 0000000..aab1615 --- /dev/null +++ b/backend/kernels/cmp.cl @@ -0,0 +1,5 @@ +__kernel void test_cmp(__global bool *dst, int x, int y, float z, float w) +{ + dst[0] = (x < y) + (z > w); +} + diff --git a/backend/kernels/cmp.ll b/backend/kernels/cmp.ll new file mode 100644 index 0000000..c2f6400 --- /dev/null +++ b/backend/kernels/cmp.ll @@ -0,0 +1,22 @@ +; ModuleID = 'cmp.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @test_cmp(i8* nocapture %dst, i32 %x, i32 %y, float %z, float %w) nounwind noinline { +entry: + %cmp = icmp slt i32 %x, %y + %conv = zext i1 %cmp to i32 + %cmp1 = fcmp ogt float %z, %w + %add = sext i1 %cmp1 to i32 + %tobool = icmp ne i32 %conv, %add + %frombool = zext i1 %tobool to i8 + store i8 %frombool, i8* %dst, align 1, !tbaa !1 + ret void +} + +!opencl.kernels = !{!0} + +!0 = metadata !{void (i8*, i32, i32, float, float)* @test_cmp} +!1 = metadata !{metadata !"bool", metadata !2} +!2 = metadata !{metadata !"omnipotent char", metadata !3} +!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/cmp_cvt.cl b/backend/kernels/cmp_cvt.cl new file mode 100644 index 0000000..266eae7 --- /dev/null +++ b/backend/kernels/cmp_cvt.cl @@ -0,0 +1,7 @@ +#include "stdlib.h" + +__kernel void cmp_cvt(__global int *dst, int x, int y) +{ + dst[0] = x + y < get_local_id(0) ; +} + diff --git a/backend/kernels/cmp_cvt.ll b/backend/kernels/cmp_cvt.ll new file mode 100644 index 0000000..ab62b6c --- /dev/null +++ b/backend/kernels/cmp_cvt.ll @@ -0,0 +1,22 @@ +; ModuleID = 'cmp_cvt.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @cmp_cvt(i32* nocapture %dst, i32 %x, i32 %y) nounwind noinline { +get_local_id.exit: + %add = add nsw i32 %y, %x + %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone + %cmp = icmp ult i32 %add, %call.i + %conv = zext i1 %cmp to i32 + store i32 %conv, i32* %dst, align 4, !tbaa !1 + ret void +} + +declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone + +!opencl.kernels = !{!0} + +!0 = metadata !{void (i32*, i32, i32)* @cmp_cvt} +!1 = metadata !{metadata !"int", metadata !2} +!2 = metadata !{metadata !"omnipotent char", metadata !3} +!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/compile.sh b/backend/kernels/compile.sh index 7545845..380017e 100755 --- a/backend/kernels/compile.sh +++ b/backend/kernels/compile.sh @@ -1,4 +1,5 @@ clang -x cl -emit-llvm -O3 -ccc-host-triple ptx32 -c $1.cl -o $1.o llvm-dis $1.o +rm $1.o mv $1.o.ll $1.ll diff --git a/backend/kernels/cycle.o b/backend/kernels/cycle.o deleted file mode 100644 index b7157d7..0000000 Binary files a/backend/kernels/cycle.o and /dev/null differ diff --git a/backend/kernels/get_global_id.cbe.c b/backend/kernels/get_global_id.cbe.c deleted file mode 100644 index f88bd5c..0000000 --- a/backend/kernels/get_global_id.cbe.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -/* Provide Declarations */ -#include -#include -#include -/* get a declaration for alloca */ -#if defined(__CYGWIN__) || defined(__MINGW32__) -#define alloca(x) __builtin_alloca((x)) -#define _alloca(x) __builtin_alloca((x)) -#elif defined(__APPLE__) -extern void *__builtin_alloca(unsigned long); -#define alloca(x) __builtin_alloca(x) -#define longjmp _longjmp -#define setjmp _setjmp -#elif defined(__sun__) -#if defined(__sparcv9) -extern void *__builtin_alloca(unsigned long); -#else -extern void *__builtin_alloca(unsigned int); -#endif -#define alloca(x) __builtin_alloca(x) -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__) -#define alloca(x) __builtin_alloca(x) -#elif defined(_MSC_VER) -#define inline _inline -#define alloca(x) _alloca(x) -#else -#include -#endif - -#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */ -#define __attribute__(X) -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __EXTERNAL_WEAK__ __attribute__((weak_import)) -#elif defined(__GNUC__) -#define __EXTERNAL_WEAK__ __attribute__((weak)) -#else -#define __EXTERNAL_WEAK__ -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __ATTRIBUTE_WEAK__ -#elif defined(__GNUC__) -#define __ATTRIBUTE_WEAK__ __attribute__((weak)) -#else -#define __ATTRIBUTE_WEAK__ -#endif - -#if defined(__GNUC__) -#define __HIDDEN__ __attribute__((visibility("hidden"))) -#endif - -#ifdef __GNUC__ -#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */ -#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */ -#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */ -#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */ -#define LLVM_INF __builtin_inf() /* Double */ -#define LLVM_INFF __builtin_inff() /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality) -#define __ATTRIBUTE_CTOR__ __attribute__((constructor)) -#define __ATTRIBUTE_DTOR__ __attribute__((destructor)) -#define LLVM_ASM __asm__ -#else -#define LLVM_NAN(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANF(NanStr) 0.0F /* Float */ -#define LLVM_NANS(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANSF(NanStr) 0.0F /* Float */ -#define LLVM_INF ((double)0.0) /* Double */ -#define LLVM_INFF 0.0F /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */ -#define __ATTRIBUTE_CTOR__ -#define __ATTRIBUTE_DTOR__ -#define LLVM_ASM(X) -#endif - -#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ -#define __builtin_stack_save() 0 /* not implemented */ -#define __builtin_stack_restore(X) /* noop */ -#endif - -#if __GNUC__ && __LP64__ /* 128-bit integer types */ -typedef int __attribute__((mode(TI))) llvmInt128; -typedef unsigned __attribute__((mode(TI))) llvmUInt128; -#endif - -#define CODE_FOR_MAIN() /* Any target-specific code for main()*/ - -#ifndef __cplusplus -typedef unsigned char bool; -#endif - - -/* Support for floating point constants */ -typedef unsigned long long ConstantDoubleTy; -typedef unsigned int ConstantFloatTy; -typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty; -typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty; - - -/* Global Declarations */ -/* Helper union for bitcasts */ -typedef union { - unsigned int Int32; - unsigned long long Int64; - float Float; - double Double; -} llvmBitCastUnion; - -/* Function Declarations */ -double fmod(double, double); -float fmodf(float, float); -long double fmodl(long double, long double); -void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p); -unsigned int __gen_ocl_get_global_id0(void); -unsigned int __gen_ocl_get_local_id0(void); -void abort(void); - - -/* Function Bodies */ -static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; } -static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; } -static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_une(double X, double Y) { return X != Y; } -static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; } -static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); } -static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; } -static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; } -static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; } -static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; } - -void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p) { - unsigned int llvm_cbe_call_2e_i; - unsigned int llvm_cbe_call_2e_i6; - - llvm_cbe_call_2e_i = /*tail*/ __gen_ocl_get_local_id0(); - llvm_cbe_call_2e_i6 = /*tail*/ __gen_ocl_get_global_id0(); - *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i6)])) = (((signed int )(((signed int )(llvm_cbe_call_2e_i << 16u)) >> ((signed int )16u)))); - *((&llvm_cbe_p[((signed int )llvm_cbe_call_2e_i6)])) = llvm_cbe_call_2e_i; - return; -} - diff --git a/backend/kernels/get_global_id.o b/backend/kernels/get_global_id.o deleted file mode 100644 index d1ddd39..0000000 Binary files a/backend/kernels/get_global_id.o and /dev/null differ diff --git a/backend/kernels/gg.ll b/backend/kernels/gg.ll new file mode 100644 index 0000000..0f9d666 Binary files /dev/null and b/backend/kernels/gg.ll differ diff --git a/backend/kernels/gg.ll.ll b/backend/kernels/gg.ll.ll new file mode 100644 index 0000000..1c60671 --- /dev/null +++ b/backend/kernels/gg.ll.ll @@ -0,0 +1,89 @@ +; ModuleID = 'gg.ll' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +%struct.my_struct = type { i32, [2 x i32] } + +@g = addrspace(1) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4 +@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4 + +define ptx_kernel void @struct_cl(%struct.my_struct* byval %s, i32 %x, i32* %mem, i32 %y) nounwind noinline { +entry: + %x.addr = alloca i32, align 4 + %mem.addr = alloca i32*, align 4 + %y.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32* %mem, i32** %mem.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 256 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %2 = load i32* %i, align 4 + %arrayidx = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %2 + %a = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx, i32 0, i32 0 + store i32 %1, i32 addrspace(4)* %a, align 4 + %3 = load i32* %i, align 4 + %4 = load i32* %i, align 4 + %arrayidx1 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %4 + %b = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx1, i32 0, i32 1 + %arrayidx2 = getelementptr inbounds [2 x i32] addrspace(4)* %b, i32 0, i32 0 + store i32 %3, i32 addrspace(4)* %arrayidx2, align 4 + %5 = load i32* %i, align 4 + %add = add nsw i32 %5, 1 + %6 = load i32* %i, align 4 + %arrayidx3 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %6 + %b4 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx3, i32 0, i32 1 + %arrayidx5 = getelementptr inbounds [2 x i32] addrspace(4)* %b4, i32 0, i32 0 + store i32 %add, i32 addrspace(4)* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %8 = load i32* %y.addr, align 4 + %arrayidx6 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %8 + %9 = bitcast %struct.my_struct addrspace(4)* %arrayidx6 to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* %9, i32 12, i32 4, i1 false) + %a7 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0 + %10 = load i32* %a7, align 4 + %11 = load i32* %x.addr, align 4 + %arrayidx8 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %11 + %a9 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx8, i32 0, i32 0 + %12 = load i32 addrspace(4)* %a9, align 4 + %add10 = add nsw i32 %10, %12 + %13 = load i32* %x.addr, align 4 + %add11 = add nsw i32 %13, 1 + %arrayidx12 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add11 + %b13 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx12, i32 0, i32 1 + %arrayidx14 = getelementptr inbounds [2 x i32] addrspace(4)* %b13, i32 0, i32 0 + %14 = load i32 addrspace(4)* %arrayidx14, align 4 + %add15 = add nsw i32 %add10, %14 + %15 = load i32* %x.addr, align 4 + %arrayidx16 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %15 + %16 = load i32 addrspace(1)* %arrayidx16, align 4 + %add17 = add nsw i32 %add15, %16 + %17 = load i32 addrspace(1)* getelementptr inbounds ([4 x i32] addrspace(1)* @g, i32 0, i32 3), align 4 + %add18 = add nsw i32 %add17, %17 + %18 = load i32** %mem.addr, align 4 + %arrayidx19 = getelementptr inbounds i32* %18, i32 0 + store i32 %add18, i32* %arrayidx19 + ret void +} + +declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind + +!opencl.kernels = !{!0} + +!0 = metadata !{void (%struct.my_struct*, i32, i32*, i32)* @struct_cl} diff --git a/backend/kernels/k.ll b/backend/kernels/k.ll deleted file mode 100644 index dac3a71..0000000 --- a/backend/kernels/k.ll +++ /dev/null @@ -1,10 +0,0 @@ -; ModuleID = 'k' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -%struct.my_struct = type { i32, [2 x i32] } - -define ptx_device void @struct_cl(%struct.my_struct* nocapture byval %s) nounwind readnone { -entry: - ret void -} diff --git a/backend/kernels/k.s b/backend/kernels/k.s deleted file mode 100644 index 0756a2e..0000000 --- a/backend/kernels/k.s +++ /dev/null @@ -1,11 +0,0 @@ - .version 2.0 - .target compute_10, map_f64_to_f32 - - -.func () struct_cl (.reg .b32 %param0) // @struct_cl -{ - -// BB#0: // %entry - ret; -} - diff --git a/backend/kernels/load_store.cbe.c b/backend/kernels/load_store.cbe.c deleted file mode 100644 index 18768f9..0000000 --- a/backend/kernels/load_store.cbe.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -/* Provide Declarations */ -#include -#include -#include -/* get a declaration for alloca */ -#if defined(__CYGWIN__) || defined(__MINGW32__) -#define alloca(x) __builtin_alloca((x)) -#define _alloca(x) __builtin_alloca((x)) -#elif defined(__APPLE__) -extern void *__builtin_alloca(unsigned long); -#define alloca(x) __builtin_alloca(x) -#define longjmp _longjmp -#define setjmp _setjmp -#elif defined(__sun__) -#if defined(__sparcv9) -extern void *__builtin_alloca(unsigned long); -#else -extern void *__builtin_alloca(unsigned int); -#endif -#define alloca(x) __builtin_alloca(x) -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__) -#define alloca(x) __builtin_alloca(x) -#elif defined(_MSC_VER) -#define inline _inline -#define alloca(x) _alloca(x) -#else -#include -#endif - -#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */ -#define __attribute__(X) -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __EXTERNAL_WEAK__ __attribute__((weak_import)) -#elif defined(__GNUC__) -#define __EXTERNAL_WEAK__ __attribute__((weak)) -#else -#define __EXTERNAL_WEAK__ -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __ATTRIBUTE_WEAK__ -#elif defined(__GNUC__) -#define __ATTRIBUTE_WEAK__ __attribute__((weak)) -#else -#define __ATTRIBUTE_WEAK__ -#endif - -#if defined(__GNUC__) -#define __HIDDEN__ __attribute__((visibility("hidden"))) -#endif - -#ifdef __GNUC__ -#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */ -#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */ -#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */ -#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */ -#define LLVM_INF __builtin_inf() /* Double */ -#define LLVM_INFF __builtin_inff() /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality) -#define __ATTRIBUTE_CTOR__ __attribute__((constructor)) -#define __ATTRIBUTE_DTOR__ __attribute__((destructor)) -#define LLVM_ASM __asm__ -#else -#define LLVM_NAN(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANF(NanStr) 0.0F /* Float */ -#define LLVM_NANS(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANSF(NanStr) 0.0F /* Float */ -#define LLVM_INF ((double)0.0) /* Double */ -#define LLVM_INFF 0.0F /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */ -#define __ATTRIBUTE_CTOR__ -#define __ATTRIBUTE_DTOR__ -#define LLVM_ASM(X) -#endif - -#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ -#define __builtin_stack_save() 0 /* not implemented */ -#define __builtin_stack_restore(X) /* noop */ -#endif - -#if __GNUC__ && __LP64__ /* 128-bit integer types */ -typedef int __attribute__((mode(TI))) llvmInt128; -typedef unsigned __attribute__((mode(TI))) llvmUInt128; -#endif - -#define CODE_FOR_MAIN() /* Any target-specific code for main()*/ - -#ifndef __cplusplus -typedef unsigned char bool; -#endif - - -/* Support for floating point constants */ -typedef unsigned long long ConstantDoubleTy; -typedef unsigned int ConstantFloatTy; -typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty; -typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty; - - -/* Global Declarations */ -/* Helper union for bitcasts */ -typedef union { - unsigned int Int32; - unsigned long long Int64; - float Float; - double Double; -} llvmBitCastUnion; - -/* Function Declarations */ -double fmod(double, double); -float fmodf(float, float); -long double fmodl(long double, long double); -void load_store(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_src); -void abort(void); - - -/* Function Bodies */ -static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; } -static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; } -static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_une(double X, double Y) { return X != Y; } -static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; } -static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); } -static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; } -static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; } -static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; } -static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; } - -void load_store(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_src) { - unsigned int llvm_cbe_tmp__1; - - llvm_cbe_tmp__1 = *llvm_cbe_src; - *llvm_cbe_dst = llvm_cbe_tmp__1; - return; -} - diff --git a/backend/kernels/load_store.o b/backend/kernels/load_store.o deleted file mode 100644 index 1ac5988..0000000 Binary files a/backend/kernels/load_store.o and /dev/null differ diff --git a/backend/kernels/loop.o b/backend/kernels/loop.o deleted file mode 100644 index 39a6897..0000000 Binary files a/backend/kernels/loop.o and /dev/null differ diff --git a/backend/kernels/mad.o b/backend/kernels/mad.o deleted file mode 100644 index 906b7e4..0000000 Binary files a/backend/kernels/mad.o and /dev/null differ diff --git a/backend/kernels/short.o b/backend/kernels/short.o deleted file mode 100644 index bee5c62..0000000 Binary files a/backend/kernels/short.o and /dev/null differ diff --git a/backend/kernels/simple_float4.o b/backend/kernels/simple_float4.o deleted file mode 100644 index 62e522d..0000000 Binary files a/backend/kernels/simple_float4.o and /dev/null differ diff --git a/backend/kernels/simple_float4_2.cl b/backend/kernels/simple_float4_2.cl index c35d9bb..1776d7c 100644 --- a/backend/kernels/simple_float4_2.cl +++ b/backend/kernels/simple_float4_2.cl @@ -5,4 +5,3 @@ __kernel void simple_float4(__global float4 *dst, __global float4 *src) dst[get_global_id(0)] = src[get_global_id(0)] * src[get_global_id(0)]; } - diff --git a/backend/kernels/simple_float4_2.o b/backend/kernels/simple_float4_2.o deleted file mode 100644 index 8580e1c..0000000 Binary files a/backend/kernels/simple_float4_2.o and /dev/null differ diff --git a/backend/kernels/simple_float4_3.cl b/backend/kernels/simple_float4_3.cl new file mode 100644 index 0000000..57b84f2 --- /dev/null +++ b/backend/kernels/simple_float4_3.cl @@ -0,0 +1,8 @@ +#include "stdlib.h" + +__kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b) +{ + dst[get_global_id(0)] = select(b, src[get_global_id(0)], src[get_global_id(1)]); + dst[get_global_id(0)] += (float4) (src[2].x, 1.f, 2.f, 3.f); +} + diff --git a/backend/kernels/simple_float4_3.ll b/backend/kernels/simple_float4_3.ll new file mode 100644 index 0000000..c1bdd31 --- /dev/null +++ b/backend/kernels/simple_float4_3.ll @@ -0,0 +1,36 @@ +; ModuleID = 'simple_float4_3.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @simple_float4(<4 x float>* nocapture %dst, <4 x float>* nocapture %src, i1 %b) nounwind noinline { +get_global_id.exit16: + %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + %arrayidx = getelementptr inbounds <4 x float>* %src, i32 %call.i + %0 = load <4 x float>* %arrayidx, align 16, !tbaa !1 + %call3.i = tail call ptx_device i32 @__gen_ocl_get_global_id1() nounwind readnone + %arrayidx2 = getelementptr inbounds <4 x float>* %src, i32 %call3.i + %1 = load <4 x float>* %arrayidx2, align 16, !tbaa !1 + %x.y.i = select i1 %b, <4 x float> %0, <4 x float> %1 + %arrayidx5 = getelementptr inbounds <4 x float>* %dst, i32 %call.i + store <4 x float> %x.y.i, <4 x float>* %arrayidx5, align 16, !tbaa !1 + %arrayidx6 = getelementptr inbounds <4 x float>* %src, i32 2 + %2 = load <4 x float>* %arrayidx6, align 16 + %3 = extractelement <4 x float> %2, i32 0 + %vecinit = insertelement <4 x float> undef, float %3, i32 0 + %vecinit7 = insertelement <4 x float> %vecinit, float 1.000000e+00, i32 1 + %vecinit8 = insertelement <4 x float> %vecinit7, float 2.000000e+00, i32 2 + %vecinit9 = insertelement <4 x float> %vecinit8, float 3.000000e+00, i32 3 + %add = fadd <4 x float> %x.y.i, %vecinit9 + store <4 x float> %add, <4 x float>* %arrayidx5, align 16, !tbaa !1 + ret void +} + +declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + +declare ptx_device i32 @__gen_ocl_get_global_id1() nounwind readnone + +!opencl.kernels = !{!0} + +!0 = metadata !{void (<4 x float>*, <4 x float>*, i1)* @simple_float4} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h index ca3b2d9..3d83799 100644 --- a/backend/kernels/stdlib.h +++ b/backend/kernels/stdlib.h @@ -44,4 +44,22 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef int int2 __attribute__((ext_vector_type(2))); typedef int int3 __attribute__((ext_vector_type(3))); typedef int int4 __attribute__((ext_vector_type(4))); +typedef bool bool2 __attribute__((ext_vector_type(2))); +typedef bool bool3 __attribute__((ext_vector_type(3))); +typedef bool bool4 __attribute__((ext_vector_type(4))); + +#define DECL_SELECT(TYPE) \ +__attribute__((overloadable)) \ +inline TYPE select(bool b, TYPE x, TYPE y) { \ + if (b) return x; else return y; \ +} +#define DECL_SELECT_ALL(TYPE) \ + DECL_SELECT(TYPE) \ + DECL_SELECT(TYPE##2) \ + DECL_SELECT(TYPE##3) \ + DECL_SELECT(TYPE##4) +DECL_SELECT_ALL(int) +DECL_SELECT_ALL(float) +#undef DECL_SELECT_ALL +#undef DECL_SELECT diff --git a/backend/kernels/store.o b/backend/kernels/store.o deleted file mode 100644 index ea7b34a..0000000 Binary files a/backend/kernels/store.o and /dev/null differ diff --git a/backend/kernels/struct.cl b/backend/kernels/struct.cl index af3b92d..a93d1e4 100644 --- a/backend/kernels/struct.cl +++ b/backend/kernels/struct.cl @@ -3,13 +3,17 @@ struct my_struct { int b[2]; }; -__constant int g[4] = {0,1,2,3}; +const __constant int g[4] = {0,1,2,3}; -__kernel void struct_cl (struct my_struct s, int x, __global int *mem) +__kernel void struct_cl (struct my_struct s, int x, __global int *mem, int y) { - __local int array[256]; - for (int i = 0; i < 256; ++i) - array[i] = i; - mem[0] = s.a + array[x] + g[x]; + __local struct my_struct array[256]; + for (int i = 0; i < 256; ++i) { + array[i].a = i; + array[i].b[0] = i; + array[i].b[0] = i+1; + } + array[0] = array[y]; + mem[0] = s.a + array[x].a + array[x+1].b[0] + g[x] + g[3]; } diff --git a/backend/kernels/struct.cl.o b/backend/kernels/struct.cl.o deleted file mode 100644 index 424f976..0000000 Binary files a/backend/kernels/struct.cl.o and /dev/null differ diff --git a/backend/kernels/struct.cl.o.ll b/backend/kernels/struct.cl.o.ll deleted file mode 100644 index ad709aa..0000000 --- a/backend/kernels/struct.cl.o.ll +++ /dev/null @@ -1,12 +0,0 @@ -; ModuleID = 'struct.cl.o' -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @struct_cl(i64 %s.coerce0, i32 %s.coerce1) nounwind uwtable readnone { -entry: - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void (i64, i32)* @struct_cl} diff --git a/backend/kernels/struct.ll b/backend/kernels/struct.ll index acbb3fa..ac4394d 100644 --- a/backend/kernels/struct.ll +++ b/backend/kernels/struct.ll @@ -4,37 +4,49 @@ target triple = "ptx32--" %struct.my_struct = type { i32, [2 x i32] } -@g = addrspace(1) global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4 -@struct_cl.array = internal addrspace(4) unnamed_addr global [256 x i32] zeroinitializer, align 4 +@g = addrspace(1) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4 +@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4 -define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32* nocapture %mem) nounwind noinline { +define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32* nocapture %mem, i32 %y) nounwind noinline { entry: br label %for.body for.body: ; preds = %for.body, %entry - %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %i.05 - store i32 %i.05, i32 addrspace(4)* %arrayidx, align 4, !tbaa !1 - %inc = add nsw i32 %i.05, 1 - %exitcond = icmp eq i32 %inc, 256 + %i.020 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %a = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.020, i32 0 + store i32 %i.020, i32 addrspace(4)* %a, align 4, !tbaa !1 + %arrayidx2 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.020, i32 1, i32 0 + %add = add nsw i32 %i.020, 1 + store i32 %add, i32 addrspace(4)* %arrayidx2, align 4, !tbaa !1 + %exitcond = icmp eq i32 %add, 256 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body - %a = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0 - %0 = load i32* %a, align 4, !tbaa !1 - %arrayidx1 = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %x - %1 = load i32 addrspace(4)* %arrayidx1, align 4, !tbaa !1 - %arrayidx2 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x - %2 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !1 - %add = add i32 %1, %0 - %add3 = add i32 %add, %2 - store i32 %add3, i32* %mem, align 4, !tbaa !1 + %arrayidx6 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %y + %0 = bitcast %struct.my_struct addrspace(4)* %arrayidx6 to i8 addrspace(4)* + tail call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* %0, i32 12, i32 4, i1 false) + %a7 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0 + %1 = load i32* %a7, align 4, !tbaa !1 + %a9 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %x, i32 0 + %2 = load i32 addrspace(4)* %a9, align 4, !tbaa !1 + %add11 = add nsw i32 %x, 1 + %arrayidx14 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add11, i32 1, i32 0 + %3 = load i32 addrspace(4)* %arrayidx14, align 4, !tbaa !1 + %arrayidx16 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x + %4 = load i32 addrspace(1)* %arrayidx16, align 4, !tbaa !1 + %add10 = add i32 %1, 3 + %add15 = add i32 %add10, %2 + %add17 = add i32 %add15, %3 + %add18 = add i32 %add17, %4 + store i32 %add18, i32* %mem, align 4, !tbaa !1 ret void } +declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind + !opencl.kernels = !{!0} -!0 = metadata !{void (%struct.my_struct*, i32, i32*)* @struct_cl} +!0 = metadata !{void (%struct.my_struct*, i32, i32*, i32)* @struct_cl} !1 = metadata !{metadata !"int", metadata !2} !2 = metadata !{metadata !"omnipotent char", metadata !3} !3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/struct.o b/backend/kernels/struct.o deleted file mode 100644 index 4f6af9c..0000000 Binary files a/backend/kernels/struct.o and /dev/null differ diff --git a/backend/kernels/struct.o.cbe.c b/backend/kernels/struct.o.cbe.c deleted file mode 100644 index 8d0cea3..0000000 --- a/backend/kernels/struct.o.cbe.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -/* Provide Declarations */ -#include -#include -#include -/* get a declaration for alloca */ -#if defined(__CYGWIN__) || defined(__MINGW32__) -#define alloca(x) __builtin_alloca((x)) -#define _alloca(x) __builtin_alloca((x)) -#elif defined(__APPLE__) -extern void *__builtin_alloca(unsigned long); -#define alloca(x) __builtin_alloca(x) -#define longjmp _longjmp -#define setjmp _setjmp -#elif defined(__sun__) -#if defined(__sparcv9) -extern void *__builtin_alloca(unsigned long); -#else -extern void *__builtin_alloca(unsigned int); -#endif -#define alloca(x) __builtin_alloca(x) -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__) -#define alloca(x) __builtin_alloca(x) -#elif defined(_MSC_VER) -#define inline _inline -#define alloca(x) _alloca(x) -#else -#include -#endif - -#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */ -#define __attribute__(X) -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __EXTERNAL_WEAK__ __attribute__((weak_import)) -#elif defined(__GNUC__) -#define __EXTERNAL_WEAK__ __attribute__((weak)) -#else -#define __EXTERNAL_WEAK__ -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __ATTRIBUTE_WEAK__ -#elif defined(__GNUC__) -#define __ATTRIBUTE_WEAK__ __attribute__((weak)) -#else -#define __ATTRIBUTE_WEAK__ -#endif - -#if defined(__GNUC__) -#define __HIDDEN__ __attribute__((visibility("hidden"))) -#endif - -#ifdef __GNUC__ -#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */ -#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */ -#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */ -#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */ -#define LLVM_INF __builtin_inf() /* Double */ -#define LLVM_INFF __builtin_inff() /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality) -#define __ATTRIBUTE_CTOR__ __attribute__((constructor)) -#define __ATTRIBUTE_DTOR__ __attribute__((destructor)) -#define LLVM_ASM __asm__ -#else -#define LLVM_NAN(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANF(NanStr) 0.0F /* Float */ -#define LLVM_NANS(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANSF(NanStr) 0.0F /* Float */ -#define LLVM_INF ((double)0.0) /* Double */ -#define LLVM_INFF 0.0F /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */ -#define __ATTRIBUTE_CTOR__ -#define __ATTRIBUTE_DTOR__ -#define LLVM_ASM(X) -#endif - -#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ -#define __builtin_stack_save() 0 /* not implemented */ -#define __builtin_stack_restore(X) /* noop */ -#endif - -#if __GNUC__ && __LP64__ /* 128-bit integer types */ -typedef int __attribute__((mode(TI))) llvmInt128; -typedef unsigned __attribute__((mode(TI))) llvmUInt128; -#endif - -#define CODE_FOR_MAIN() /* Any target-specific code for main()*/ - -#ifndef __cplusplus -typedef unsigned char bool; -#endif - - -/* Support for floating point constants */ -typedef unsigned long long ConstantDoubleTy; -typedef unsigned int ConstantFloatTy; -typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty; -typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty; - - -/* Global Declarations */ -/* Helper union for bitcasts */ -typedef union { - unsigned int Int32; - unsigned long long Int64; - float Float; - double Double; -} llvmBitCastUnion; -/* Structure forward decls */ -typedef struct l_struct_OC_my_struct l_struct_OC_my_struct; - -/* Structure contents */ -l_struct_OC_my_struct { - unsigned int field0; - field1 { unsigned int array[2]; }; -}; - - -/* External Global Variable Declarations */ - -/* Function Declarations */ -double fmod(double, double); -float fmodf(float, float); -long double fmodl(long double, long double); -void struct_cl(l_struct_OC_my_struct llvm_cbe_s, unsigned int llvm_cbe_x, unsigned int *llvm_cbe_mem); -void abort(void); - - -/* Global Variable Declarations */ -static struct_cl_OC_array { unsigned int array[256]; }; - - -/* Global Variable Definitions and Initialization */ -static struct_cl_OC_array { unsigned int array[256]; }; - - -/* Function Bodies */ -static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; } -static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; } -static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_une(double X, double Y) { return X != Y; } -static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; } -static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); } -static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; } -static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; } -static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; } -static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; } - -void struct_cl(l_struct_OC_my_struct llvm_cbe_s, unsigned int llvm_cbe_x, unsigned int *llvm_cbe_mem) { - unsigned int llvm_cbe_i_2e_03; - unsigned int llvm_cbe_i_2e_03__PHI_TEMPORARY; - unsigned int llvm_cbe_inc; - unsigned int llvm_cbe_tmp__1; - - llvm_cbe_i_2e_03__PHI_TEMPORARY = 0u; /* for PHI node */ - goto llvm_cbe_for_2e_body; - - do { /* Syntactic loop 'for.body' to make GCC happy */ -llvm_cbe_for_2e_body: - llvm_cbe_i_2e_03 = llvm_cbe_i_2e_03__PHI_TEMPORARY; - *((&struct_cl_OC_array.array[((signed int )llvm_cbe_i_2e_03)])) = llvm_cbe_i_2e_03; - llvm_cbe_inc = ((unsigned int )(((unsigned int )llvm_cbe_i_2e_03) + ((unsigned int )1u))); - if ((llvm_cbe_inc == 256u)) { - goto llvm_cbe_for_2e_end; - } else { - llvm_cbe_i_2e_03__PHI_TEMPORARY = llvm_cbe_inc; /* for PHI node */ - goto llvm_cbe_for_2e_body; - } - - } while (1); /* end of syntactic loop 'for.body' */ -llvm_cbe_for_2e_end: - llvm_cbe_tmp__1 = *((&struct_cl_OC_array.array[((signed int )llvm_cbe_x)])); - *llvm_cbe_mem = llvm_cbe_tmp__1; - return; -} - diff --git a/backend/kernels/test_select.cl b/backend/kernels/test_select.cl new file mode 100644 index 0000000..7973d23 --- /dev/null +++ b/backend/kernels/test_select.cl @@ -0,0 +1,10 @@ +#include "stdlib.h" +__kernel void test_select(__global int *dst, __global int *src) +{ + + if (src[get_global_id(0)] > 1) + dst[get_global_id(0)] = 1; + else + dst[get_global_id(0)] = 2; +} + diff --git a/backend/kernels/test_select.ll b/backend/kernels/test_select.ll new file mode 100644 index 0000000..302251c --- /dev/null +++ b/backend/kernels/test_select.ll @@ -0,0 +1,24 @@ +; ModuleID = 'test_select.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @test_select(i32* nocapture %dst, i32* nocapture %src) nounwind noinline { +get_global_id.exit7: + %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + %arrayidx = getelementptr inbounds i32* %src, i32 %call.i + %0 = load i32* %arrayidx, align 4, !tbaa !1 + %cmp = icmp sgt i32 %0, 1 + %arrayidx2 = getelementptr inbounds i32* %dst, i32 %call.i + %. = select i1 %cmp, i32 1, i32 2 + store i32 %., i32* %arrayidx2, align 4 + ret void +} + +declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + +!opencl.kernels = !{!0} + +!0 = metadata !{void (i32*, i32*)* @test_select} +!1 = metadata !{metadata !"int", metadata !2} +!2 = metadata !{metadata !"omnipotent char", metadata !3} +!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/undefined.o b/backend/kernels/undefined.o deleted file mode 100644 index d20bc49..0000000 Binary files a/backend/kernels/undefined.o and /dev/null differ diff --git a/backend/kernels/void.cl.o b/backend/kernels/void.cl.o deleted file mode 100644 index 36cc1e1..0000000 Binary files a/backend/kernels/void.cl.o and /dev/null differ diff --git a/backend/kernels/void.cl.o.cbe.c b/backend/kernels/void.cl.o.cbe.c deleted file mode 100644 index adb99b8..0000000 --- a/backend/kernels/void.cl.o.cbe.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -/* Provide Declarations */ -#include -#include -#include -/* get a declaration for alloca */ -#if defined(__CYGWIN__) || defined(__MINGW32__) -#define alloca(x) __builtin_alloca((x)) -#define _alloca(x) __builtin_alloca((x)) -#elif defined(__APPLE__) -extern void *__builtin_alloca(unsigned long); -#define alloca(x) __builtin_alloca(x) -#define longjmp _longjmp -#define setjmp _setjmp -#elif defined(__sun__) -#if defined(__sparcv9) -extern void *__builtin_alloca(unsigned long); -#else -extern void *__builtin_alloca(unsigned int); -#endif -#define alloca(x) __builtin_alloca(x) -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__) -#define alloca(x) __builtin_alloca(x) -#elif defined(_MSC_VER) -#define inline _inline -#define alloca(x) _alloca(x) -#else -#include -#endif - -#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */ -#define __attribute__(X) -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __EXTERNAL_WEAK__ __attribute__((weak_import)) -#elif defined(__GNUC__) -#define __EXTERNAL_WEAK__ __attribute__((weak)) -#else -#define __EXTERNAL_WEAK__ -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __ATTRIBUTE_WEAK__ -#elif defined(__GNUC__) -#define __ATTRIBUTE_WEAK__ __attribute__((weak)) -#else -#define __ATTRIBUTE_WEAK__ -#endif - -#if defined(__GNUC__) -#define __HIDDEN__ __attribute__((visibility("hidden"))) -#endif - -#ifdef __GNUC__ -#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */ -#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */ -#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */ -#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */ -#define LLVM_INF __builtin_inf() /* Double */ -#define LLVM_INFF __builtin_inff() /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality) -#define __ATTRIBUTE_CTOR__ __attribute__((constructor)) -#define __ATTRIBUTE_DTOR__ __attribute__((destructor)) -#define LLVM_ASM __asm__ -#else -#define LLVM_NAN(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANF(NanStr) 0.0F /* Float */ -#define LLVM_NANS(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANSF(NanStr) 0.0F /* Float */ -#define LLVM_INF ((double)0.0) /* Double */ -#define LLVM_INFF 0.0F /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */ -#define __ATTRIBUTE_CTOR__ -#define __ATTRIBUTE_DTOR__ -#define LLVM_ASM(X) -#endif - -#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ -#define __builtin_stack_save() 0 /* not implemented */ -#define __builtin_stack_restore(X) /* noop */ -#endif - -#if __GNUC__ && __LP64__ /* 128-bit integer types */ -typedef int __attribute__((mode(TI))) llvmInt128; -typedef unsigned __attribute__((mode(TI))) llvmUInt128; -#endif - -#define CODE_FOR_MAIN() /* Any target-specific code for main()*/ - -#ifndef __cplusplus -typedef unsigned char bool; -#endif - - -/* Support for floating point constants */ -typedef unsigned long long ConstantDoubleTy; -typedef unsigned int ConstantFloatTy; -typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty; -typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty; - - -/* Global Declarations */ -/* Helper union for bitcasts */ -typedef union { - unsigned int Int32; - unsigned long long Int64; - float Float; - double Double; -} llvmBitCastUnion; - -/* Function Declarations */ -double fmod(double, double); -float fmodf(float, float); -long double fmodl(long double, long double); -void hop(void); -void abort(void); - - -/* Function Bodies */ -static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; } -static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; } -static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_une(double X, double Y) { return X != Y; } -static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; } -static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); } -static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; } -static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; } -static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; } -static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; } - -void hop(void) { - return; -} - diff --git a/backend/kernels/void.cl.o.ll b/backend/kernels/void.cl.o.ll deleted file mode 100644 index 28fb111..0000000 --- a/backend/kernels/void.cl.o.ll +++ /dev/null @@ -1,12 +0,0 @@ -; ModuleID = 'void.cl.o' -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @hop() nounwind uwtable readnone { -entry: - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void ()* @hop} diff --git a/backend/kernels/void.o b/backend/kernels/void.o deleted file mode 100644 index 6d433f1..0000000 Binary files a/backend/kernels/void.o and /dev/null differ diff --git a/backend/kernels/void.o.ll b/backend/kernels/void.o.ll deleted file mode 100644 index 3c6c269..0000000 --- a/backend/kernels/void.o.ll +++ /dev/null @@ -1,12 +0,0 @@ -; ModuleID = 'void.o' -target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" -target triple = "ptx32--" - -define ptx_kernel void @hop() nounwind readnone noinline { -entry: - ret void -} - -!opencl.kernels = !{!0} - -!0 = metadata !{void ()* @hop} diff --git a/backend/kernels/void.s b/backend/kernels/void.s deleted file mode 100644 index e768625..0000000 --- a/backend/kernels/void.s +++ /dev/null @@ -1,138 +0,0 @@ -/* Provide Declarations */ -#include -#include -#include -/* get a declaration for alloca */ -#if defined(__CYGWIN__) || defined(__MINGW32__) -#define alloca(x) __builtin_alloca((x)) -#define _alloca(x) __builtin_alloca((x)) -#elif defined(__APPLE__) -extern void *__builtin_alloca(unsigned long); -#define alloca(x) __builtin_alloca(x) -#define longjmp _longjmp -#define setjmp _setjmp -#elif defined(__sun__) -#if defined(__sparcv9) -extern void *__builtin_alloca(unsigned long); -#else -extern void *__builtin_alloca(unsigned int); -#endif -#define alloca(x) __builtin_alloca(x) -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__) -#define alloca(x) __builtin_alloca(x) -#elif defined(_MSC_VER) -#define inline _inline -#define alloca(x) _alloca(x) -#else -#include -#endif - -#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */ -#define __attribute__(X) -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __EXTERNAL_WEAK__ __attribute__((weak_import)) -#elif defined(__GNUC__) -#define __EXTERNAL_WEAK__ __attribute__((weak)) -#else -#define __EXTERNAL_WEAK__ -#endif - -#if defined(__GNUC__) && defined(__APPLE_CC__) -#define __ATTRIBUTE_WEAK__ -#elif defined(__GNUC__) -#define __ATTRIBUTE_WEAK__ __attribute__((weak)) -#else -#define __ATTRIBUTE_WEAK__ -#endif - -#if defined(__GNUC__) -#define __HIDDEN__ __attribute__((visibility("hidden"))) -#endif - -#ifdef __GNUC__ -#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */ -#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */ -#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */ -#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */ -#define LLVM_INF __builtin_inf() /* Double */ -#define LLVM_INFF __builtin_inff() /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality) -#define __ATTRIBUTE_CTOR__ __attribute__((constructor)) -#define __ATTRIBUTE_DTOR__ __attribute__((destructor)) -#define LLVM_ASM __asm__ -#else -#define LLVM_NAN(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANF(NanStr) 0.0F /* Float */ -#define LLVM_NANS(NanStr) ((double)0.0) /* Double */ -#define LLVM_NANSF(NanStr) 0.0F /* Float */ -#define LLVM_INF ((double)0.0) /* Double */ -#define LLVM_INFF 0.0F /* Float */ -#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */ -#define __ATTRIBUTE_CTOR__ -#define __ATTRIBUTE_DTOR__ -#define LLVM_ASM(X) -#endif - -#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ -#define __builtin_stack_save() 0 /* not implemented */ -#define __builtin_stack_restore(X) /* noop */ -#endif - -#if __GNUC__ && __LP64__ /* 128-bit integer types */ -typedef int __attribute__((mode(TI))) llvmInt128; -typedef unsigned __attribute__((mode(TI))) llvmUInt128; -#endif - -#define CODE_FOR_MAIN() /* Any target-specific code for main()*/ - -#ifndef __cplusplus -typedef unsigned char bool; -#endif - - -/* Support for floating point constants */ -typedef unsigned long long ConstantDoubleTy; -typedef unsigned int ConstantFloatTy; -typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty; -typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty; - - -/* Global Declarations */ -/* Helper union for bitcasts */ -typedef union { - unsigned int Int32; - unsigned long long Int64; - float Float; - double Double; -} llvmBitCastUnion; - -/* Function Declarations */ -double fmod(double, double); -float fmodf(float, float); -long double fmodl(long double, long double); -void hop(void); -void abort(void); - - -/* Function Bodies */ -static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; } -static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; } -static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_une(double X, double Y) { return X != Y; } -static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); } -static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; } -static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); } -static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; } -static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; } -static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; } -static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; } - -void hop(void) { - return; -} - diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 4c950f0..d56cbe8 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -493,7 +493,7 @@ namespace ir { // We can convert anything to anything, but types and families must match INLINE bool ConvertInstruction::wellFormed(const Function &fn, std::string &whyNot) const { - const RegisterData::Family dstFamily = getFamily(srcType); + const RegisterData::Family dstFamily = getFamily(dstType); const RegisterData::Family srcFamily = getFamily(srcType); if (UNLIKELY(checkRegisterData(dstFamily, dst, fn, whyNot) == false)) return false; @@ -613,8 +613,7 @@ namespace ir { out << "." << this->getDstType() << "." << this->getSrcType() << " %" << this->getDstIndex(fn, 0) - << " %" << this->getSrcIndex(fn, 0) - << " %" << this->getSrcIndex(fn, 1); + << " %" << this->getSrcIndex(fn, 0); } INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const { diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index fb63a62..74580d6 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -361,7 +361,7 @@ namespace ir { /*! ge.type dst src0 src1 */ Instruction GT(Type type, Register dst, Register src0, Register src1); /*! cvt.{dstType <- srcType} dst src */ - Instruction CVT(Type dstType, Type srcType, Register dst, Register src0, Register src1); + Instruction CVT(Type dstType, Type srcType, Register dst, Register src); /*! bra labelIndex */ Instruction BRA(LabelIndex labelIndex); /*! (pred) bra labelIndex */ diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 83c6ce2..9a6fb4e 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -370,18 +370,16 @@ namespace gbe DECL_VISIT_FN(LoadInst, LoadInst); DECL_VISIT_FN(StoreInst, StoreInst); DECL_VISIT_FN(CallInst, CallInst); + DECL_VISIT_FN(ICmpInst, ICmpInst); + DECL_VISIT_FN(FCmpInst, FCmpInst); #undef DECL_VISIT_FN // Must be implemented later void visitInsertElementInst(InsertElementInst &I) {NOT_SUPPORTED;} void visitExtractElementInst(ExtractElementInst &I) {NOT_SUPPORTED;} void visitShuffleVectorInst(ShuffleVectorInst &SVI) {NOT_SUPPORTED;} - void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;} - void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;} void visitPHINode(PHINode &I) {NOT_SUPPORTED;} void visitBranchInst(BranchInst &I) {NOT_SUPPORTED;} - void visitICmpInst(ICmpInst &I) {NOT_SUPPORTED;} - void visitFCmpInst(FCmpInst &I) {NOT_SUPPORTED;} void visitSelectInst(SelectInst &I) {NOT_SUPPORTED;} // These instructions are not supported at all @@ -395,6 +393,8 @@ namespace gbe void visitUnreachableInst(UnreachableInst &I) {NOT_SUPPORTED;} void visitGetElementPtrInst(GetElementPtrInst &I) {NOT_SUPPORTED;} void visitAllocaInst(AllocaInst &I) {NOT_SUPPORTED;} + void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;} + void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;} template void visitLoadOrStore(T &I); void visitInstruction(Instruction &I) {NOT_SUPPORTED;} @@ -615,13 +615,10 @@ namespace gbe this->newRegister(&I); } - void GenWriter::emitBinaryOperator(Instruction &I) { - GBE_ASSERT(I.getType()->isPointerTy() == false); - - // Get the element type for a vector + static ir::Type + getVectorInfo(const ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum) + { ir::Type type; - uint32_t elemNum; - Type *llvmType = I.getType(); if (llvmType->isVectorTy() == true) { VectorType *vectorType = cast(llvmType); Type *elementType = vectorType->getElementType(); @@ -631,6 +628,16 @@ namespace gbe elemNum = 1; type = getType(ctx, llvmType); } + return type; + } + + void GenWriter::emitBinaryOperator(Instruction &I) { + GBE_ASSERT(I.getType()->isPointerTy() == false || + I.getType() != Type::getInt1Ty(I.getContext())); + + // Get the element type for a vector + uint32_t elemNum; + const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum); // Emit the instructions in a row for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { @@ -662,38 +669,183 @@ namespace gbe } } + void GenWriter::regAllocateICmpInst(ICmpInst &I) { + this->newRegister(&I); + } + + static ir::Type makeTypeSigned(const ir::Type &type) { + if (type == ir::TYPE_U8) return ir::TYPE_S8; + else if (type == ir::TYPE_U16) return ir::TYPE_S16; + else if (type == ir::TYPE_U32) return ir::TYPE_S32; + else if (type == ir::TYPE_U64) return ir::TYPE_S64; + return type; + } + + static ir::Type makeTypeUnsigned(const ir::Type &type) { + if (type == ir::TYPE_S8) return ir::TYPE_U8; + else if (type == ir::TYPE_S16) return ir::TYPE_U16; + else if (type == ir::TYPE_S32) return ir::TYPE_U32; + else if (type == ir::TYPE_S64) return ir::TYPE_U64; + return type; + } + + void GenWriter::emitICmpInst(ICmpInst &I) { + GBE_ASSERT(I.getOperand(0)->getType() != Type::getInt1Ty(I.getContext())); + + // Get the element type and the number of elements + uint32_t elemNum; + Type *operandType = I.getOperand(0)->getType(); + const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum); + const ir::Type signedType = makeTypeSigned(type); + const ir::Type unsignedType = makeTypeUnsigned(type); + + // Emit the instructions in a row + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + const ir::Register dst = this->getRegister(&I, elemID); + const ir::Register src0 = this->getRegister(I.getOperand(0), elemID); + const ir::Register src1 = this->getRegister(I.getOperand(1), elemID); + + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: ctx.EQ(type, dst, src0, src1); break; + case ICmpInst::ICMP_NE: ctx.NE(type, dst, src0, src1); break; + case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break; + case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break; + case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break; + case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break; + case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break; + case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break; + case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break; + case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break; + default: NOT_SUPPORTED; + }; + } + } + + void GenWriter::regAllocateFCmpInst(FCmpInst &I) { + this->newRegister(&I); + } + + void GenWriter::emitFCmpInst(FCmpInst &I) { + GBE_ASSERT(I.getType() != Type::getInt1Ty(I.getContext())); + + // Get the element type and the number of elements + uint32_t elemNum; + Type *operandType = I.getOperand(0)->getType(); + const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum); + + // Emit the instructions in a row + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + const ir::Register dst = this->getRegister(&I, elemID); + const ir::Register src0 = this->getRegister(I.getOperand(0), elemID); + const ir::Register src1 = this->getRegister(I.getOperand(1), elemID); + + switch (I.getPredicate()) { + case ICmpInst::FCMP_OEQ: + case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break; + case ICmpInst::FCMP_ONE: + case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break; + case ICmpInst::FCMP_OLE: + case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break; + case ICmpInst::FCMP_OGE: + case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break; + case ICmpInst::FCMP_OLT: + case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break; + case ICmpInst::FCMP_OGT: + case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break; + default: NOT_SUPPORTED; + }; + } + } + void GenWriter::regAllocateCastInst(CastInst &I) { - if (I.getOpcode() == Instruction::PtrToInt || - I.getOpcode() == Instruction::IntToPtr) { - Value *dstValue = &I; - Value *srcValue = I.getOperand(0); - Constant *CPV = dyn_cast(srcValue); - if (CPV == NULL) { - Type *dstType = dstValue->getType(); - Type *srcType = srcValue->getType(); - GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType)); - regTranslator.newValueProxy(srcValue, dstValue); - } else - this->newRegister(dstValue); + Value *dstValue = &I; + Value *srcValue = I.getOperand(0); + + switch (I.getOpcode()) + { + // When casting pointer to integers, be aware with integers + case Instruction::PtrToInt: + case Instruction::IntToPtr: + { + Constant *CPV = dyn_cast(srcValue); + if (CPV == NULL) { + Type *dstType = dstValue->getType(); + Type *srcType = srcValue->getType(); + GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType)); + regTranslator.newValueProxy(srcValue, dstValue); + } else + this->newRegister(dstValue); + } + break; + // Bitcast just forward registers + case Instruction::BitCast: + { + uint32_t elemNum; + getVectorInfo(ctx, I.getType(), &I, elemNum); + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) + regTranslator.newValueProxy(srcValue, dstValue, elemID, elemID); + } + break; + // Various conversion operations -> just allocate registers for them + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::FPExt: + case Instruction::FPTrunc: + this->newRegister(&I); + break; + default: NOT_SUPPORTED; } - else - NOT_SUPPORTED; } void GenWriter::emitCastInst(CastInst &I) { - if (I.getOpcode() == Instruction::PtrToInt || - I.getOpcode() == Instruction::IntToPtr) { - Value *srcValue = &I; - Value *dstValue = I.getOperand(0); - Constant *CPV = dyn_cast(srcValue); - if (CPV != NULL) { - const ir::ImmediateIndex index = ctx.newImmediate(CPV); - const ir::Immediate imm = ctx.getImmediate(index); - const ir::Register reg = this->getRegister(dstValue); - ctx.LOADI(imm.type, reg, index); + switch (I.getOpcode()) + { + case Instruction::PtrToInt: + case Instruction::IntToPtr: + { + Value *dstValue = &I; + Value *srcValue = I.getOperand(0); + Constant *CPV = dyn_cast(srcValue); + if (CPV != NULL) { + const ir::ImmediateIndex index = ctx.newImmediate(CPV); + const ir::Immediate imm = ctx.getImmediate(index); + const ir::Register reg = this->getRegister(dstValue); + ctx.LOADI(imm.type, reg, index); + } } - } + break; + case Instruction::BitCast: break; // nothing to emit here + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::FPExt: + case Instruction::FPTrunc: + { + // Get the element type for a vector + uint32_t elemNum; + Type *llvmDstType = I.getType(); + Type *llvmSrcType = I.getOperand(0)->getType(); + const ir::Type dstType = getVectorInfo(ctx, llvmDstType, &I, elemNum); + const ir::Type srcType = getVectorInfo(ctx, llvmSrcType, &I, elemNum); + + // Emit the instructions in a row + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + const ir::Register dst = this->getRegister(&I, elemID); + const ir::Register src = this->getRegister(I.getOperand(0), elemID); + ctx.CVT(dstType, srcType, dst, src); + } + } + break; + default: NOT_SUPPORTED; + }; } #ifndef NDEBUG diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index f177411..6cf73bc 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -49,8 +49,9 @@ namespace gbe Module &mod = *M.get(); llvm::PassManager passes; + passes.add(createScalarReplAggregatesPass()); // Break up allocas passes.add(createRemoveGEPPass(unit)); - passes.add(createConstantPropagationPass()); + passes.add(createConstantPropagationPass()); passes.add(createDeadInstEliminationPass()); // remove simplified instructions passes.add(createLowerSwitchPass()); passes.add(createPromoteMemoryToRegisterPass()); diff --git a/backend/src/utest/utest_llvm.cpp b/backend/src/utest/utest_llvm.cpp index 852f07d..d9492c4 100644 --- a/backend/src/utest/utest_llvm.cpp +++ b/backend/src/utest/utest_llvm.cpp @@ -84,6 +84,7 @@ runTests: UTEST_EXPECT_SUCCESS(utestLLVM2Gen("simple_float4_2.ll")); //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll")); UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("cmp_cvt.ll")); } UTEST_REGISTER(utestLLVM)