Added first support for compare instructions Added first support for convert instructions

author Benjamin Segovia <segovia.benjamin@gmail.com>

Sat, 3 Mar 2012 11:07:13 +0000 (03:07 -0800)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:15:33 +0000 (16:15 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Sat, 3 Mar 2012 11:07:13 +0000 (03:07 -0800)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:33 +0000 (16:15 -0700)
diff --git a/backend/kernels/add.o b/backend/kernels/add.o

deleted file mode 100644 (file)

index fcff924..0000000

Binary files a/backend/kernels/add.o and /dev/null differ
diff --git a/backend/kernels/add2.o b/backend/kernels/add2.o

deleted file mode 100644 (file)

index 8b5ebb4..0000000

Binary files a/backend/kernels/add2.o and /dev/null differ
diff --git a/backend/kernels/cmp.cl b/backend/kernels/cmp.cl

new file mode 100644 (file)

index 0000000..aab1615
--- /dev/null
+++ b/backend/kernels/cmp.cl
@@ -0,0 +1,5 @@
+__kernel void test_cmp(__global bool *dst, int x, int y, float z, float w)
+{
+  dst[0] = (x < y) + (z > w);
+}
+
diff --git a/backend/kernels/cmp.ll b/backend/kernels/cmp.ll

new file mode 100644 (file)

index 0000000..c2f6400
--- /dev/null
+++ b/backend/kernels/cmp.ll
@@ -0,0 +1,22 @@
+; ModuleID = 'cmp.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_cmp(i8* nocapture %dst, i32 %x, i32 %y, float %z, float %w) nounwind noinline {
+entry:
+  %cmp = icmp slt i32 %x, %y
+  %conv = zext i1 %cmp to i32
+  %cmp1 = fcmp ogt float %z, %w
+  %add = sext i1 %cmp1 to i32
+  %tobool = icmp ne i32 %conv, %add
+  %frombool = zext i1 %tobool to i8
+  store i8 %frombool, i8* %dst, align 1, !tbaa !1
+  ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i8*, i32, i32, float, float)* @test_cmp}
+!1 = metadata !{metadata !"bool", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/backend/kernels/cmp_cvt.cl b/backend/kernels/cmp_cvt.cl

new file mode 100644 (file)

index 0000000..266eae7
--- /dev/null
+++ b/backend/kernels/cmp_cvt.cl
@@ -0,0 +1,7 @@
+#include "stdlib.h"
+
+__kernel void cmp_cvt(__global int *dst, int x, int y)
+{
+  dst[0] = x + y < get_local_id(0) ;
+}
+
diff --git a/backend/kernels/cmp_cvt.ll b/backend/kernels/cmp_cvt.ll

new file mode 100644 (file)

index 0000000..ab62b6c
--- /dev/null
+++ b/backend/kernels/cmp_cvt.ll
@@ -0,0 +1,22 @@
+; ModuleID = 'cmp_cvt.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @cmp_cvt(i32* nocapture %dst, i32 %x, i32 %y) nounwind noinline {
+get_local_id.exit:
+  %add = add nsw i32 %y, %x
+  %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
+  %cmp = icmp ult i32 %add, %call.i
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* %dst, align 4, !tbaa !1
+  ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*, i32, i32)* @cmp_cvt}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/backend/kernels/compile.sh b/backend/kernels/compile.sh

index 7545845..380017e 100755 (executable)
--- a/backend/kernels/compile.sh
+++ b/backend/kernels/compile.sh
@@ -1,4 +1,5 @@
  clang -x cl -emit-llvm -O3 -ccc-host-triple ptx32 -c $1.cl -o $1.o
  llvm-dis $1.o
+rm $1.o
  mv $1.o.ll $1.ll
  
diff --git a/backend/kernels/cycle.o b/backend/kernels/cycle.o

deleted file mode 100644 (file)

index b7157d7..0000000

Binary files a/backend/kernels/cycle.o and /dev/null differ
diff --git a/backend/kernels/get_global_id.cbe.c b/backend/kernels/get_global_id.cbe.c

deleted file mode 100644 (file)

index f88bd5c..0000000
--- a/backend/kernels/get_global_id.cbe.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/* 
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define  alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__  /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */
-#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */
-#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF           __builtin_inf()         /* Double */
-#define LLVM_INFF          __builtin_inff()        /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM           __asm__
-#else
-#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */
-#define LLVM_NANF(NanStr)  0.0F                    /* Float */
-#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */
-#define LLVM_NANSF(NanStr) 0.0F                    /* Float */
-#define LLVM_INF           ((double)0.0)           /* Double */
-#define LLVM_INFF          0.0F                    /* Float */
-#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ 
-#define __builtin_stack_save() 0   /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int        ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
-  unsigned int Int32;
-  unsigned long long Int64;
-  float Float;
-  double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p);
-unsigned int __gen_ocl_get_global_id0(void);
-unsigned int __gen_ocl_get_local_id0(void);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X <  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X >  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X <  Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X >  Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p) {
-  unsigned int llvm_cbe_call_2e_i;
-  unsigned int llvm_cbe_call_2e_i6;
-
-  llvm_cbe_call_2e_i =  /*tail*/ __gen_ocl_get_local_id0();
-  llvm_cbe_call_2e_i6 =  /*tail*/ __gen_ocl_get_global_id0();
-  *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i6)])) = (((signed int )(((signed int )(llvm_cbe_call_2e_i << 16u)) >> ((signed int )16u))));
-  *((&llvm_cbe_p[((signed int )llvm_cbe_call_2e_i6)])) = llvm_cbe_call_2e_i;
-  return;
-}
-
diff --git a/backend/kernels/get_global_id.o b/backend/kernels/get_global_id.o

deleted file mode 100644 (file)

index d1ddd39..0000000

Binary files a/backend/kernels/get_global_id.o and /dev/null differ
diff --git a/backend/kernels/gg.ll b/backend/kernels/gg.ll

new file mode 100644 (file)

index 0000000..0f9d666

Binary files /dev/null and b/backend/kernels/gg.ll differ
diff --git a/backend/kernels/gg.ll.ll b/backend/kernels/gg.ll.ll

new file mode 100644 (file)

index 0000000..1c60671
--- /dev/null
+++ b/backend/kernels/gg.ll.ll
@@ -0,0 +1,89 @@
+; ModuleID = 'gg.ll'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+%struct.my_struct = type { i32, [2 x i32] }
+
+@g = addrspace(1) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4
+@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4
+
+define ptx_kernel void @struct_cl(%struct.my_struct* byval %s, i32 %x, i32* %mem, i32 %y) nounwind noinline {
+entry:
+  %x.addr = alloca i32, align 4
+  %mem.addr = alloca i32*, align 4
+  %y.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32* %mem, i32** %mem.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 256
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %2
+  %a = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx, i32 0, i32 0
+  store i32 %1, i32 addrspace(4)* %a, align 4
+  %3 = load i32* %i, align 4
+  %4 = load i32* %i, align 4
+  %arrayidx1 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %4
+  %b = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx1, i32 0, i32 1
+  %arrayidx2 = getelementptr inbounds [2 x i32] addrspace(4)* %b, i32 0, i32 0
+  store i32 %3, i32 addrspace(4)* %arrayidx2, align 4
+  %5 = load i32* %i, align 4
+  %add = add nsw i32 %5, 1
+  %6 = load i32* %i, align 4
+  %arrayidx3 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %6
+  %b4 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx3, i32 0, i32 1
+  %arrayidx5 = getelementptr inbounds [2 x i32] addrspace(4)* %b4, i32 0, i32 0
+  store i32 %add, i32 addrspace(4)* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %y.addr, align 4
+  %arrayidx6 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %8
+  %9 = bitcast %struct.my_struct addrspace(4)* %arrayidx6 to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* %9, i32 12, i32 4, i1 false)
+  %a7 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
+  %10 = load i32* %a7, align 4
+  %11 = load i32* %x.addr, align 4
+  %arrayidx8 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %11
+  %a9 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx8, i32 0, i32 0
+  %12 = load i32 addrspace(4)* %a9, align 4
+  %add10 = add nsw i32 %10, %12
+  %13 = load i32* %x.addr, align 4
+  %add11 = add nsw i32 %13, 1
+  %arrayidx12 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add11
+  %b13 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx12, i32 0, i32 1
+  %arrayidx14 = getelementptr inbounds [2 x i32] addrspace(4)* %b13, i32 0, i32 0
+  %14 = load i32 addrspace(4)* %arrayidx14, align 4
+  %add15 = add nsw i32 %add10, %14
+  %15 = load i32* %x.addr, align 4
+  %arrayidx16 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %15
+  %16 = load i32 addrspace(1)* %arrayidx16, align 4
+  %add17 = add nsw i32 %add15, %16
+  %17 = load i32 addrspace(1)* getelementptr inbounds ([4 x i32] addrspace(1)* @g, i32 0, i32 3), align 4
+  %add18 = add nsw i32 %add17, %17
+  %18 = load i32** %mem.addr, align 4
+  %arrayidx19 = getelementptr inbounds i32* %18, i32 0
+  store i32 %add18, i32* %arrayidx19
+  ret void
+}
+
+declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (%struct.my_struct*, i32, i32*, i32)* @struct_cl}
diff --git a/backend/kernels/k.ll b/backend/kernels/k.ll

deleted file mode 100644 (file)

index dac3a71..0000000
--- a/backend/kernels/k.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; ModuleID = 'k'
-target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
-target triple = "ptx32--"
-
-%struct.my_struct = type { i32, [2 x i32] }
-
-define ptx_device void @struct_cl(%struct.my_struct* nocapture byval %s) nounwind readnone {
-entry:
-  ret void
-}
diff --git a/backend/kernels/k.s b/backend/kernels/k.s

deleted file mode 100644 (file)

index 0756a2e..0000000
--- a/backend/kernels/k.s
+++ /dev/null
@@ -1,11 +0,0 @@
-       .version 2.0
-       .target compute_10, map_f64_to_f32
-
-
-.func () struct_cl (.reg .b32 %param0)  // @struct_cl
-{
-
-// BB#0:                                // %entry
-       ret;
-}
-
diff --git a/backend/kernels/load_store.cbe.c b/backend/kernels/load_store.cbe.c

deleted file mode 100644 (file)

index 18768f9..0000000
--- a/backend/kernels/load_store.cbe.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* 
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define  alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__  /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */
-#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */
-#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF           __builtin_inf()         /* Double */
-#define LLVM_INFF          __builtin_inff()        /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM           __asm__
-#else
-#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */
-#define LLVM_NANF(NanStr)  0.0F                    /* Float */
-#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */
-#define LLVM_NANSF(NanStr) 0.0F                    /* Float */
-#define LLVM_INF           ((double)0.0)           /* Double */
-#define LLVM_INFF          0.0F                    /* Float */
-#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ 
-#define __builtin_stack_save() 0   /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int        ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
-  unsigned int Int32;
-  unsigned long long Int64;
-  float Float;
-  double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void load_store(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_src);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X <  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X >  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X <  Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X >  Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void load_store(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_src) {
-  unsigned int llvm_cbe_tmp__1;
-
-  llvm_cbe_tmp__1 = *llvm_cbe_src;
-  *llvm_cbe_dst = llvm_cbe_tmp__1;
-  return;
-}
-
diff --git a/backend/kernels/load_store.o b/backend/kernels/load_store.o

deleted file mode 100644 (file)

index 1ac5988..0000000

Binary files a/backend/kernels/load_store.o and /dev/null differ
diff --git a/backend/kernels/loop.o b/backend/kernels/loop.o

deleted file mode 100644 (file)

index 39a6897..0000000

Binary files a/backend/kernels/loop.o and /dev/null differ
diff --git a/backend/kernels/mad.o b/backend/kernels/mad.o

deleted file mode 100644 (file)

index 906b7e4..0000000

Binary files a/backend/kernels/mad.o and /dev/null differ
diff --git a/backend/kernels/short.o b/backend/kernels/short.o

deleted file mode 100644 (file)

index bee5c62..0000000

Binary files a/backend/kernels/short.o and /dev/null differ
diff --git a/backend/kernels/simple_float4.o b/backend/kernels/simple_float4.o

deleted file mode 100644 (file)

index 62e522d..0000000

Binary files a/backend/kernels/simple_float4.o and /dev/null differ
diff --git a/backend/kernels/simple_float4_2.cl b/backend/kernels/simple_float4_2.cl

index c35d9bb..1776d7c 100644 (file)
--- a/backend/kernels/simple_float4_2.cl
+++ b/backend/kernels/simple_float4_2.cl
@@ -5,4 +5,3 @@ __kernel void simple_float4(__global float4 *dst, __global float4 *src)
    dst[get_global_id(0)] = src[get_global_id(0)] * src[get_global_id(0)];
  }
  
-
diff --git a/backend/kernels/simple_float4_2.o b/backend/kernels/simple_float4_2.o

deleted file mode 100644 (file)

index 8580e1c..0000000

Binary files a/backend/kernels/simple_float4_2.o and /dev/null differ
diff --git a/backend/kernels/simple_float4_3.cl b/backend/kernels/simple_float4_3.cl

new file mode 100644 (file)

index 0000000..57b84f2
--- /dev/null
+++ b/backend/kernels/simple_float4_3.cl
@@ -0,0 +1,8 @@
+#include "stdlib.h"
+
+__kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b)
+{
+  dst[get_global_id(0)] = select(b, src[get_global_id(0)], src[get_global_id(1)]);
+  dst[get_global_id(0)] += (float4) (src[2].x, 1.f, 2.f, 3.f);
+}
+
diff --git a/backend/kernels/simple_float4_3.ll b/backend/kernels/simple_float4_3.ll

new file mode 100644 (file)

index 0000000..c1bdd31
--- /dev/null
+++ b/backend/kernels/simple_float4_3.ll
@@ -0,0 +1,36 @@
+; ModuleID = 'simple_float4_3.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @simple_float4(<4 x float>* nocapture %dst, <4 x float>* nocapture %src, i1 %b) nounwind noinline {
+get_global_id.exit16:
+  %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+  %arrayidx = getelementptr inbounds <4 x float>* %src, i32 %call.i
+  %0 = load <4 x float>* %arrayidx, align 16, !tbaa !1
+  %call3.i = tail call ptx_device i32 @__gen_ocl_get_global_id1() nounwind readnone
+  %arrayidx2 = getelementptr inbounds <4 x float>* %src, i32 %call3.i
+  %1 = load <4 x float>* %arrayidx2, align 16, !tbaa !1
+  %x.y.i = select i1 %b, <4 x float> %0, <4 x float> %1
+  %arrayidx5 = getelementptr inbounds <4 x float>* %dst, i32 %call.i
+  store <4 x float> %x.y.i, <4 x float>* %arrayidx5, align 16, !tbaa !1
+  %arrayidx6 = getelementptr inbounds <4 x float>* %src, i32 2
+  %2 = load <4 x float>* %arrayidx6, align 16
+  %3 = extractelement <4 x float> %2, i32 0
+  %vecinit = insertelement <4 x float> undef, float %3, i32 0
+  %vecinit7 = insertelement <4 x float> %vecinit, float 1.000000e+00, i32 1
+  %vecinit8 = insertelement <4 x float> %vecinit7, float 2.000000e+00, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit8, float 3.000000e+00, i32 3
+  %add = fadd <4 x float> %x.y.i, %vecinit9
+  store <4 x float> %add, <4 x float>* %arrayidx5, align 16, !tbaa !1
+  ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+
+declare ptx_device i32 @__gen_ocl_get_global_id1() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (<4 x float>*, <4 x float>*, i1)* @simple_float4}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h

index ca3b2d9..3d83799 100644 (file)
--- a/backend/kernels/stdlib.h
+++ b/backend/kernels/stdlib.h
@@ -44,4 +44,22 @@ typedef float float4 __attribute__((ext_vector_type(4)));
  typedef int int2 __attribute__((ext_vector_type(2)));
  typedef int int3 __attribute__((ext_vector_type(3)));
  typedef int int4 __attribute__((ext_vector_type(4)));
+typedef bool bool2 __attribute__((ext_vector_type(2)));
+typedef bool bool3 __attribute__((ext_vector_type(3)));
+typedef bool bool4 __attribute__((ext_vector_type(4)));
+
+#define DECL_SELECT(TYPE)                     \
+__attribute__((overloadable))                 \
+inline TYPE select(bool b, TYPE x, TYPE y) {  \
+  if (b) return x; else return y;             \
+}
+#define DECL_SELECT_ALL(TYPE)   \
+  DECL_SELECT(TYPE)             \
+  DECL_SELECT(TYPE##2)          \
+  DECL_SELECT(TYPE##3)          \
+  DECL_SELECT(TYPE##4)
+DECL_SELECT_ALL(int)
+DECL_SELECT_ALL(float)
+#undef DECL_SELECT_ALL
+#undef DECL_SELECT
  
diff --git a/backend/kernels/store.o b/backend/kernels/store.o

deleted file mode 100644 (file)

index ea7b34a..0000000

Binary files a/backend/kernels/store.o and /dev/null differ
diff --git a/backend/kernels/struct.cl b/backend/kernels/struct.cl

index af3b92d..a93d1e4 100644 (file)
--- a/backend/kernels/struct.cl
+++ b/backend/kernels/struct.cl
@@ -3,13 +3,17 @@ struct my_struct {
    int b[2];
  };
  
-__constant int g[4] = {0,1,2,3};
+const __constant int g[4] = {0,1,2,3};
  
-__kernel void struct_cl (struct my_struct s, int x, __global int *mem)
+__kernel void struct_cl (struct my_struct s, int x, __global int *mem, int y)
  {
-  __local int array[256];
-  for (int i = 0; i < 256; ++i)
-    array[i] = i;
-  mem[0] = s.a + array[x] + g[x];
+  __local struct my_struct array[256];
+  for (int i = 0; i < 256; ++i) {
+    array[i].a = i;
+    array[i].b[0] = i;
+    array[i].b[0] = i+1;
+  }
+  array[0] = array[y];
+  mem[0] = s.a + array[x].a + array[x+1].b[0] + g[x] + g[3];
  }
  
diff --git a/backend/kernels/struct.cl.o b/backend/kernels/struct.cl.o

deleted file mode 100644 (file)

index 424f976..0000000

Binary files a/backend/kernels/struct.cl.o and /dev/null differ
diff --git a/backend/kernels/struct.cl.o.ll b/backend/kernels/struct.cl.o.ll

deleted file mode 100644 (file)

index ad709aa..0000000
--- a/backend/kernels/struct.cl.o.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; ModuleID = 'struct.cl.o'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @struct_cl(i64 %s.coerce0, i32 %s.coerce1) nounwind uwtable readnone {
-entry:
-  ret void
-}
-
-!opencl.kernels = !{!0}
-
-!0 = metadata !{void (i64, i32)* @struct_cl}
diff --git a/backend/kernels/struct.ll b/backend/kernels/struct.ll

index acbb3fa..ac4394d 100644 (file)
--- a/backend/kernels/struct.ll
+++ b/backend/kernels/struct.ll
@@ -4,37 +4,49 @@ target triple = "ptx32--"
  
  %struct.my_struct = type { i32, [2 x i32] }
  
-@g = addrspace(1) global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4
-@struct_cl.array = internal addrspace(4) unnamed_addr global [256 x i32] zeroinitializer, align 4
+@g = addrspace(1) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4
+@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4
  
-define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32* nocapture %mem) nounwind noinline {
+define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32* nocapture %mem, i32 %y) nounwind noinline {
  entry:
    br label %for.body
  
  for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %i.05
-  store i32 %i.05, i32 addrspace(4)* %arrayidx, align 4, !tbaa !1
-  %inc = add nsw i32 %i.05, 1
-  %exitcond = icmp eq i32 %inc, 256
+  %i.020 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %a = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.020, i32 0
+  store i32 %i.020, i32 addrspace(4)* %a, align 4, !tbaa !1
+  %arrayidx2 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.020, i32 1, i32 0
+  %add = add nsw i32 %i.020, 1
+  store i32 %add, i32 addrspace(4)* %arrayidx2, align 4, !tbaa !1
+  %exitcond = icmp eq i32 %add, 256
    br i1 %exitcond, label %for.end, label %for.body
  
  for.end:                                          ; preds = %for.body
-  %a = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
-  %0 = load i32* %a, align 4, !tbaa !1
-  %arrayidx1 = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %x
-  %1 = load i32 addrspace(4)* %arrayidx1, align 4, !tbaa !1
-  %arrayidx2 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x
-  %2 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !1
-  %add = add i32 %1, %0
-  %add3 = add i32 %add, %2
-  store i32 %add3, i32* %mem, align 4, !tbaa !1
+  %arrayidx6 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %y
+  %0 = bitcast %struct.my_struct addrspace(4)* %arrayidx6 to i8 addrspace(4)*
+  tail call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* %0, i32 12, i32 4, i1 false)
+  %a7 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
+  %1 = load i32* %a7, align 4, !tbaa !1
+  %a9 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %x, i32 0
+  %2 = load i32 addrspace(4)* %a9, align 4, !tbaa !1
+  %add11 = add nsw i32 %x, 1
+  %arrayidx14 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add11, i32 1, i32 0
+  %3 = load i32 addrspace(4)* %arrayidx14, align 4, !tbaa !1
+  %arrayidx16 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x
+  %4 = load i32 addrspace(1)* %arrayidx16, align 4, !tbaa !1
+  %add10 = add i32 %1, 3
+  %add15 = add i32 %add10, %2
+  %add17 = add i32 %add15, %3
+  %add18 = add i32 %add17, %4
+  store i32 %add18, i32* %mem, align 4, !tbaa !1
    ret void
  }
  
+declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind
+
  !opencl.kernels = !{!0}
  
-!0 = metadata !{void (%struct.my_struct*, i32, i32*)* @struct_cl}
+!0 = metadata !{void (%struct.my_struct*, i32, i32*, i32)* @struct_cl}
  !1 = metadata !{metadata !"int", metadata !2}
  !2 = metadata !{metadata !"omnipotent char", metadata !3}
  !3 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/backend/kernels/struct.o b/backend/kernels/struct.o

deleted file mode 100644 (file)

index 4f6af9c..0000000

Binary files a/backend/kernels/struct.o and /dev/null differ
diff --git a/backend/kernels/struct.o.cbe.c b/backend/kernels/struct.o.cbe.c

deleted file mode 100644 (file)

index 8d0cea3..0000000
--- a/backend/kernels/struct.o.cbe.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/* 
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define  alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__  /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */
-#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */
-#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF           __builtin_inf()         /* Double */
-#define LLVM_INFF          __builtin_inff()        /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM           __asm__
-#else
-#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */
-#define LLVM_NANF(NanStr)  0.0F                    /* Float */
-#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */
-#define LLVM_NANSF(NanStr) 0.0F                    /* Float */
-#define LLVM_INF           ((double)0.0)           /* Double */
-#define LLVM_INFF          0.0F                    /* Float */
-#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ 
-#define __builtin_stack_save() 0   /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int        ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
-  unsigned int Int32;
-  unsigned long long Int64;
-  float Float;
-  double Double;
-} llvmBitCastUnion;
-/* Structure forward decls */
-typedef struct l_struct_OC_my_struct l_struct_OC_my_struct;
-
-/* Structure contents */
-l_struct_OC_my_struct {
-  unsigned int field0;
-  field1 { unsigned int array[2]; };
-};
-
-
-/* External Global Variable Declarations */
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void struct_cl(l_struct_OC_my_struct llvm_cbe_s, unsigned int llvm_cbe_x, unsigned int *llvm_cbe_mem);
-void abort(void);
-
-
-/* Global Variable Declarations */
-static struct_cl_OC_array { unsigned int array[256]; };
-
-
-/* Global Variable Definitions and Initialization */
-static struct_cl_OC_array { unsigned int array[256]; };
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X <  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X >  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X <  Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X >  Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void struct_cl(l_struct_OC_my_struct llvm_cbe_s, unsigned int llvm_cbe_x, unsigned int *llvm_cbe_mem) {
-  unsigned int llvm_cbe_i_2e_03;
-  unsigned int llvm_cbe_i_2e_03__PHI_TEMPORARY;
-  unsigned int llvm_cbe_inc;
-  unsigned int llvm_cbe_tmp__1;
-
-  llvm_cbe_i_2e_03__PHI_TEMPORARY = 0u;   /* for PHI node */
-  goto llvm_cbe_for_2e_body;
-
-  do {     /* Syntactic loop 'for.body' to make GCC happy */
-llvm_cbe_for_2e_body:
-  llvm_cbe_i_2e_03 = llvm_cbe_i_2e_03__PHI_TEMPORARY;
-  *((&struct_cl_OC_array.array[((signed int )llvm_cbe_i_2e_03)])) = llvm_cbe_i_2e_03;
-  llvm_cbe_inc = ((unsigned int )(((unsigned int )llvm_cbe_i_2e_03) + ((unsigned int )1u)));
-  if ((llvm_cbe_inc == 256u)) {
-    goto llvm_cbe_for_2e_end;
-  } else {
-    llvm_cbe_i_2e_03__PHI_TEMPORARY = llvm_cbe_inc;   /* for PHI node */
-    goto llvm_cbe_for_2e_body;
-  }
-
-  } while (1); /* end of syntactic loop 'for.body' */
-llvm_cbe_for_2e_end:
-  llvm_cbe_tmp__1 = *((&struct_cl_OC_array.array[((signed int )llvm_cbe_x)]));
-  *llvm_cbe_mem = llvm_cbe_tmp__1;
-  return;
-}
-
diff --git a/backend/kernels/test_select.cl b/backend/kernels/test_select.cl

new file mode 100644 (file)

index 0000000..7973d23
--- /dev/null
+++ b/backend/kernels/test_select.cl
@@ -0,0 +1,10 @@
+#include "stdlib.h"
+__kernel void test_select(__global int *dst, __global int *src)
+{
+
+  if (src[get_global_id(0)] > 1)
+    dst[get_global_id(0)] = 1;
+  else
+    dst[get_global_id(0)] = 2;
+}
+
diff --git a/backend/kernels/test_select.ll b/backend/kernels/test_select.ll

new file mode 100644 (file)

index 0000000..302251c
--- /dev/null
+++ b/backend/kernels/test_select.ll
@@ -0,0 +1,24 @@
+; ModuleID = 'test_select.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_select(i32* nocapture %dst, i32* nocapture %src) nounwind noinline {
+get_global_id.exit7:
+  %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+  %arrayidx = getelementptr inbounds i32* %src, i32 %call.i
+  %0 = load i32* %arrayidx, align 4, !tbaa !1
+  %cmp = icmp sgt i32 %0, 1
+  %arrayidx2 = getelementptr inbounds i32* %dst, i32 %call.i
+  %. = select i1 %cmp, i32 1, i32 2
+  store i32 %., i32* %arrayidx2, align 4
+  ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*, i32*)* @test_select}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/backend/kernels/undefined.o b/backend/kernels/undefined.o

deleted file mode 100644 (file)

index d20bc49..0000000

Binary files a/backend/kernels/undefined.o and /dev/null differ
diff --git a/backend/kernels/void.cl.o b/backend/kernels/void.cl.o

deleted file mode 100644 (file)

index 36cc1e1..0000000

Binary files a/backend/kernels/void.cl.o and /dev/null differ
diff --git a/backend/kernels/void.cl.o.cbe.c b/backend/kernels/void.cl.o.cbe.c

deleted file mode 100644 (file)

index adb99b8..0000000
--- a/backend/kernels/void.cl.o.cbe.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/* 
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define  alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__  /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */
-#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */
-#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF           __builtin_inf()         /* Double */
-#define LLVM_INFF          __builtin_inff()        /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM           __asm__
-#else
-#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */
-#define LLVM_NANF(NanStr)  0.0F                    /* Float */
-#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */
-#define LLVM_NANSF(NanStr) 0.0F                    /* Float */
-#define LLVM_INF           ((double)0.0)           /* Double */
-#define LLVM_INFF          0.0F                    /* Float */
-#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ 
-#define __builtin_stack_save() 0   /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int        ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
-  unsigned int Int32;
-  unsigned long long Int64;
-  float Float;
-  double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void hop(void);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X <  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X >  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X <  Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X >  Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void hop(void) {
-  return;
-}
-
diff --git a/backend/kernels/void.cl.o.ll b/backend/kernels/void.cl.o.ll

deleted file mode 100644 (file)

index 28fb111..0000000
--- a/backend/kernels/void.cl.o.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; ModuleID = 'void.cl.o'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @hop() nounwind uwtable readnone {
-entry:
-  ret void
-}
-
-!opencl.kernels = !{!0}
-
-!0 = metadata !{void ()* @hop}
diff --git a/backend/kernels/void.o b/backend/kernels/void.o

deleted file mode 100644 (file)

index 6d433f1..0000000

Binary files a/backend/kernels/void.o and /dev/null differ
diff --git a/backend/kernels/void.o.ll b/backend/kernels/void.o.ll

deleted file mode 100644 (file)

index 3c6c269..0000000
--- a/backend/kernels/void.o.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; ModuleID = 'void.o'
-target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
-target triple = "ptx32--"
-
-define ptx_kernel void @hop() nounwind readnone noinline {
-entry:
-  ret void
-}
-
-!opencl.kernels = !{!0}
-
-!0 = metadata !{void ()* @hop}
diff --git a/backend/kernels/void.s b/backend/kernels/void.s

deleted file mode 100644 (file)

index e768625..0000000
--- a/backend/kernels/void.s
+++ /dev/null
@@ -1,138 +0,0 @@
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define  alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__  /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */
-#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */
-#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF           __builtin_inf()         /* Double */
-#define LLVM_INFF          __builtin_inff()        /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM           __asm__
-#else
-#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */
-#define LLVM_NANF(NanStr)  0.0F                    /* Float */
-#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */
-#define LLVM_NANSF(NanStr) 0.0F                    /* Float */
-#define LLVM_INF           ((double)0.0)           /* Double */
-#define LLVM_INFF          0.0F                    /* Float */
-#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ 
-#define __builtin_stack_save() 0   /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int        ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
-  unsigned int Int32;
-  unsigned long long Int64;
-  float Float;
-  double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void hop(void);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X <  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X >  Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X <  Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X >  Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void hop(void) {
-  return;
-}
-
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp

index 4c950f0..d56cbe8 100644 (file)
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -493,7 +493,7 @@ namespace ir {
      // We can convert anything to anything, but types and families must match
      INLINE bool ConvertInstruction::wellFormed(const Function &fn, std::string &whyNot) const
      {
-      const RegisterData::Family dstFamily = getFamily(srcType);
+      const RegisterData::Family dstFamily = getFamily(dstType);
        const RegisterData::Family srcFamily = getFamily(srcType);
        if (UNLIKELY(checkRegisterData(dstFamily, dst, fn, whyNot) == false))
          return false;
@@ -613,8 +613,7 @@ namespace ir {
        out << "." << this->getDstType()
            << "." << this->getSrcType()
            << " %" << this->getDstIndex(fn, 0)
-          << " %" << this->getSrcIndex(fn, 0)
-          << " %" << this->getSrcIndex(fn, 1);
+          << " %" << this->getSrcIndex(fn, 0);
      }
  
      INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const {
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp

index fb63a62..74580d6 100644 (file)
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -361,7 +361,7 @@ namespace ir {
    /*! ge.type dst src0 src1 */
    Instruction GT(Type type, Register dst, Register src0, Register src1);
    /*! cvt.{dstType <- srcType} dst src */
-  Instruction CVT(Type dstType, Type srcType, Register dst, Register src0, Register src1);
+  Instruction CVT(Type dstType, Type srcType, Register dst, Register src);
    /*! bra labelIndex */
    Instruction BRA(LabelIndex labelIndex);
    /*! (pred) bra labelIndex */
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp

index 83c6ce2..9a6fb4e 100644 (file)
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -370,18 +370,16 @@ namespace gbe
      DECL_VISIT_FN(LoadInst, LoadInst);
      DECL_VISIT_FN(StoreInst, StoreInst);
      DECL_VISIT_FN(CallInst, CallInst);
+    DECL_VISIT_FN(ICmpInst, ICmpInst);
+    DECL_VISIT_FN(FCmpInst, FCmpInst);
  #undef DECL_VISIT_FN
  
      // Must be implemented later
      void visitInsertElementInst(InsertElementInst &I) {NOT_SUPPORTED;}
      void visitExtractElementInst(ExtractElementInst &I) {NOT_SUPPORTED;}
      void visitShuffleVectorInst(ShuffleVectorInst &SVI) {NOT_SUPPORTED;}
-    void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
-    void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
      void visitPHINode(PHINode &I) {NOT_SUPPORTED;}
      void visitBranchInst(BranchInst &I) {NOT_SUPPORTED;}
-    void visitICmpInst(ICmpInst &I) {NOT_SUPPORTED;}
-    void visitFCmpInst(FCmpInst &I) {NOT_SUPPORTED;}
      void visitSelectInst(SelectInst &I) {NOT_SUPPORTED;}
  
      // These instructions are not supported at all
@@ -395,6 +393,8 @@ namespace gbe
      void visitUnreachableInst(UnreachableInst &I) {NOT_SUPPORTED;}
      void visitGetElementPtrInst(GetElementPtrInst &I) {NOT_SUPPORTED;}
      void visitAllocaInst(AllocaInst &I) {NOT_SUPPORTED;}
+    void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
+    void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
      template <bool isLoad, typename T> void visitLoadOrStore(T &I);
  
      void visitInstruction(Instruction &I) {NOT_SUPPORTED;}
@@ -615,13 +615,10 @@ namespace gbe
      this->newRegister(&I);
    }
  
-  void GenWriter::emitBinaryOperator(Instruction &I) {
-    GBE_ASSERT(I.getType()->isPointerTy() == false);
-
-    // Get the element type for a vector
+  static ir::Type
+  getVectorInfo(const ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum)
+  {
      ir::Type type;
-    uint32_t elemNum;
-    Type *llvmType = I.getType();
      if (llvmType->isVectorTy() == true) {
        VectorType *vectorType = cast<VectorType>(llvmType);
        Type *elementType = vectorType->getElementType();
@@ -631,6 +628,16 @@ namespace gbe
        elemNum = 1;
        type = getType(ctx, llvmType);
      }
+    return type;
+  }
+
+  void GenWriter::emitBinaryOperator(Instruction &I) {
+    GBE_ASSERT(I.getType()->isPointerTy() == false ||
+               I.getType() != Type::getInt1Ty(I.getContext()));
+
+    // Get the element type for a vector
+    uint32_t elemNum;
+    const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
  
      // Emit the instructions in a row
      for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
@@ -662,38 +669,183 @@ namespace gbe
      }
    }
  
+  void GenWriter::regAllocateICmpInst(ICmpInst &I) {
+    this->newRegister(&I);
+  }
+
+  static ir::Type makeTypeSigned(const ir::Type &type) {
+    if (type == ir::TYPE_U8) return ir::TYPE_S8;
+    else if (type == ir::TYPE_U16) return ir::TYPE_S16;
+    else if (type == ir::TYPE_U32) return ir::TYPE_S32;
+    else if (type == ir::TYPE_U64) return ir::TYPE_S64;
+    return type;
+  }
+
+  static ir::Type makeTypeUnsigned(const ir::Type &type) {
+    if (type == ir::TYPE_S8) return ir::TYPE_U8;
+    else if (type == ir::TYPE_S16) return ir::TYPE_U16;
+    else if (type == ir::TYPE_S32) return ir::TYPE_U32;
+    else if (type == ir::TYPE_S64) return ir::TYPE_U64;
+    return type;
+  }
+
+  void GenWriter::emitICmpInst(ICmpInst &I) {
+    GBE_ASSERT(I.getOperand(0)->getType() != Type::getInt1Ty(I.getContext()));
+
+    // Get the element type and the number of elements
+    uint32_t elemNum;
+    Type *operandType = I.getOperand(0)->getType();
+    const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
+    const ir::Type signedType = makeTypeSigned(type);
+    const ir::Type unsignedType = makeTypeUnsigned(type);
+
+    // Emit the instructions in a row
+    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+      const ir::Register dst = this->getRegister(&I, elemID);
+      const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+      const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+      switch (I.getPredicate()) {
+        case ICmpInst::ICMP_EQ:  ctx.EQ(type, dst, src0, src1); break;
+        case ICmpInst::ICMP_NE:  ctx.NE(type, dst, src0, src1); break;
+        case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break;
+        case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break;
+        case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break;
+        case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break;
+        case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break;
+        case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break;
+        case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break;
+        case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break;
+        default: NOT_SUPPORTED;
+      };
+    }
+  }
+
+  void GenWriter::regAllocateFCmpInst(FCmpInst &I) {
+    this->newRegister(&I);
+  }
+
+  void GenWriter::emitFCmpInst(FCmpInst &I) {
+    GBE_ASSERT(I.getType() != Type::getInt1Ty(I.getContext()));
+
+    // Get the element type and the number of elements
+    uint32_t elemNum;
+    Type *operandType = I.getOperand(0)->getType();
+    const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
+
+    // Emit the instructions in a row
+    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+      const ir::Register dst = this->getRegister(&I, elemID);
+      const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+      const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+      switch (I.getPredicate()) {
+        case ICmpInst::FCMP_OEQ:
+        case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break;
+        case ICmpInst::FCMP_ONE:
+        case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break;
+        case ICmpInst::FCMP_OLE:
+        case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break;
+        case ICmpInst::FCMP_OGE:
+        case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break;
+        case ICmpInst::FCMP_OLT:
+        case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break;
+        case ICmpInst::FCMP_OGT:
+        case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
+        default: NOT_SUPPORTED;
+      };
+    }
+  }
+
    void GenWriter::regAllocateCastInst(CastInst &I)
    {
-    if (I.getOpcode() == Instruction::PtrToInt ||
-        I.getOpcode() == Instruction::IntToPtr) {
-      Value *dstValue = &I;
-      Value *srcValue = I.getOperand(0);
-      Constant *CPV = dyn_cast<Constant>(srcValue);
-      if (CPV == NULL) {
-        Type *dstType = dstValue->getType();
-        Type *srcType = srcValue->getType();
-        GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType));
-        regTranslator.newValueProxy(srcValue, dstValue);
-      } else
-        this->newRegister(dstValue);
+    Value *dstValue = &I;
+    Value *srcValue = I.getOperand(0);
+
+    switch (I.getOpcode())
+    {
+      // When casting pointer to integers, be aware with integers
+      case Instruction::PtrToInt:
+      case Instruction::IntToPtr:
+      {
+        Constant *CPV = dyn_cast<Constant>(srcValue);
+        if (CPV == NULL) {
+          Type *dstType = dstValue->getType();
+          Type *srcType = srcValue->getType();
+          GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType));
+          regTranslator.newValueProxy(srcValue, dstValue);
+        } else
+          this->newRegister(dstValue);
+      }
+      break;
+      // Bitcast just forward registers
+      case Instruction::BitCast:
+      {
+        uint32_t elemNum;
+        getVectorInfo(ctx, I.getType(), &I, elemNum);
+        for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+          regTranslator.newValueProxy(srcValue, dstValue, elemID, elemID);
+      }
+      break;
+      // Various conversion operations -> just allocate registers for them
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+      case Instruction::SExt:
+      case Instruction::ZExt:
+      case Instruction::FPExt:
+      case Instruction::FPTrunc:
+        this->newRegister(&I);
+      break;
+      default: NOT_SUPPORTED;
      }
-    else
-      NOT_SUPPORTED;
    }
  
    void GenWriter::emitCastInst(CastInst &I) {
-    if (I.getOpcode() == Instruction::PtrToInt ||
-        I.getOpcode() == Instruction::IntToPtr) {
-      Value *srcValue = &I;
-      Value *dstValue = I.getOperand(0);
-      Constant *CPV = dyn_cast<Constant>(srcValue);
-      if (CPV != NULL) {
-        const ir::ImmediateIndex index = ctx.newImmediate(CPV);
-        const ir::Immediate imm = ctx.getImmediate(index);
-        const ir::Register reg = this->getRegister(dstValue);
-        ctx.LOADI(imm.type, reg, index);
+    switch (I.getOpcode())
+    {
+      case Instruction::PtrToInt:
+      case Instruction::IntToPtr:
+      {
+        Value *dstValue = &I;
+        Value *srcValue = I.getOperand(0);
+        Constant *CPV = dyn_cast<Constant>(srcValue);
+        if (CPV != NULL) {
+          const ir::ImmediateIndex index = ctx.newImmediate(CPV);
+          const ir::Immediate imm = ctx.getImmediate(index);
+          const ir::Register reg = this->getRegister(dstValue);
+          ctx.LOADI(imm.type, reg, index);
+        }
        }
-    }
+      break;
+      case Instruction::BitCast: break; // nothing to emit here
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+      case Instruction::SExt:
+      case Instruction::ZExt:
+      case Instruction::FPExt:
+      case Instruction::FPTrunc:
+      {
+        // Get the element type for a vector
+        uint32_t elemNum;
+        Type *llvmDstType = I.getType();
+        Type *llvmSrcType = I.getOperand(0)->getType();
+        const ir::Type dstType = getVectorInfo(ctx, llvmDstType, &I, elemNum);
+        const ir::Type srcType = getVectorInfo(ctx, llvmSrcType, &I, elemNum);
+
+        // Emit the instructions in a row
+        for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+          const ir::Register dst = this->getRegister(&I, elemID);
+          const ir::Register src = this->getRegister(I.getOperand(0), elemID);
+          ctx.CVT(dstType, srcType, dst, src);
+        }
+      }
+      break;
+      default: NOT_SUPPORTED;
+    };
    }
  
  #ifndef NDEBUG
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp

index f177411..6cf73bc 100644 (file)
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -49,8 +49,9 @@ namespace gbe
      Module &mod = *M.get();
  
      llvm::PassManager passes;
+    passes.add(createScalarReplAggregatesPass()); // Break up allocas
      passes.add(createRemoveGEPPass(unit));
-    passes.add(createConstantPropagationPass()); 
+    passes.add(createConstantPropagationPass());
      passes.add(createDeadInstEliminationPass()); // remove simplified instructions
      passes.add(createLowerSwitchPass());
      passes.add(createPromoteMemoryToRegisterPass());
diff --git a/backend/src/utest/utest_llvm.cpp b/backend/src/utest/utest_llvm.cpp

index 852f07d..d9492c4 100644 (file)
--- a/backend/src/utest/utest_llvm.cpp
+++ b/backend/src/utest/utest_llvm.cpp
@@ -84,6 +84,7 @@ runTests:
    UTEST_EXPECT_SUCCESS(utestLLVM2Gen("simple_float4_2.ll"));
    //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll"));
    UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll"));
+  UTEST_EXPECT_SUCCESS(utestLLVM2Gen("cmp_cvt.ll"));
  }
  
  UTEST_REGISTER(utestLLVM)
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Sat, 3 Mar 2012 11:07:13 +0000 (03:07 -0800)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:15:33 +0000 (16:15 -0700)
backend/kernels/add.o	[deleted file]	patch \| blob \| history
backend/kernels/add2.o	[deleted file]	patch \| blob \| history
backend/kernels/cmp.cl	[new file with mode: 0644]	patch \| blob
backend/kernels/cmp.ll	[new file with mode: 0644]	patch \| blob
backend/kernels/cmp_cvt.cl	[new file with mode: 0644]	patch \| blob
backend/kernels/cmp_cvt.ll	[new file with mode: 0644]	patch \| blob
backend/kernels/compile.sh		patch \| blob \| history
backend/kernels/cycle.o	[deleted file]	patch \| blob \| history
backend/kernels/get_global_id.cbe.c	[deleted file]	patch \| blob \| history
backend/kernels/get_global_id.o	[deleted file]	patch \| blob \| history
backend/kernels/gg.ll	[new file with mode: 0644]	patch \| blob
backend/kernels/gg.ll.ll	[new file with mode: 0644]	patch \| blob
backend/kernels/k.ll	[deleted file]	patch \| blob \| history
backend/kernels/k.s	[deleted file]	patch \| blob \| history
backend/kernels/load_store.cbe.c	[deleted file]	patch \| blob \| history
backend/kernels/load_store.o	[deleted file]	patch \| blob \| history
backend/kernels/loop.o	[deleted file]	patch \| blob \| history
backend/kernels/mad.o	[deleted file]	patch \| blob \| history
backend/kernels/short.o	[deleted file]	patch \| blob \| history
backend/kernels/simple_float4.o	[deleted file]	patch \| blob \| history
backend/kernels/simple_float4_2.cl		patch \| blob \| history
backend/kernels/simple_float4_2.o	[deleted file]	patch \| blob \| history
backend/kernels/simple_float4_3.cl	[new file with mode: 0644]	patch \| blob
backend/kernels/simple_float4_3.ll	[new file with mode: 0644]	patch \| blob
backend/kernels/stdlib.h		patch \| blob \| history
backend/kernels/store.o	[deleted file]	patch \| blob \| history
backend/kernels/struct.cl		patch \| blob \| history
backend/kernels/struct.cl.o	[deleted file]	patch \| blob \| history
backend/kernels/struct.cl.o.ll	[deleted file]	patch \| blob \| history
backend/kernels/struct.ll		patch \| blob \| history
backend/kernels/struct.o	[deleted file]	patch \| blob \| history
backend/kernels/struct.o.cbe.c	[deleted file]	patch \| blob \| history
backend/kernels/test_select.cl	[new file with mode: 0644]	patch \| blob
backend/kernels/test_select.ll	[new file with mode: 0644]	patch \| blob
backend/kernels/undefined.o	[deleted file]	patch \| blob \| history
backend/kernels/void.cl.o	[deleted file]	patch \| blob \| history
backend/kernels/void.cl.o.cbe.c	[deleted file]	patch \| blob \| history
backend/kernels/void.cl.o.ll	[deleted file]	patch \| blob \| history
backend/kernels/void.o	[deleted file]	patch \| blob \| history
backend/kernels/void.o.ll	[deleted file]	patch \| blob \| history
backend/kernels/void.s	[deleted file]	patch \| blob \| history
backend/src/ir/instruction.cpp		patch \| blob \| history
backend/src/ir/instruction.hpp		patch \| blob \| history
backend/src/llvm/llvm_gen_backend.cpp		patch \| blob \| history
backend/src/llvm/llvm_to_gen.cpp		patch \| blob \| history
backend/src/utest/utest_llvm.cpp		patch \| blob \| history