--- /dev/null
+__kernel void test_cmp(__global bool *dst, int x, int y, float z, float w)
+{
+ dst[0] = (x < y) + (z > w);
+}
+
--- /dev/null
+; ModuleID = 'cmp.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_cmp(i8* nocapture %dst, i32 %x, i32 %y, float %z, float %w) nounwind noinline {
+entry:
+ %cmp = icmp slt i32 %x, %y
+ %conv = zext i1 %cmp to i32
+ %cmp1 = fcmp ogt float %z, %w
+ %add = sext i1 %cmp1 to i32
+ %tobool = icmp ne i32 %conv, %add
+ %frombool = zext i1 %tobool to i8
+ store i8 %frombool, i8* %dst, align 1, !tbaa !1
+ ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i8*, i32, i32, float, float)* @test_cmp}
+!1 = metadata !{metadata !"bool", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
--- /dev/null
+#include "stdlib.h"
+
+__kernel void cmp_cvt(__global int *dst, int x, int y)
+{
+ dst[0] = x + y < get_local_id(0) ;
+}
+
--- /dev/null
+; ModuleID = 'cmp_cvt.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @cmp_cvt(i32* nocapture %dst, i32 %x, i32 %y) nounwind noinline {
+get_local_id.exit:
+ %add = add nsw i32 %y, %x
+ %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
+ %cmp = icmp ult i32 %add, %call.i
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, i32* %dst, align 4, !tbaa !1
+ ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*, i32, i32)* @cmp_cvt}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
clang -x cl -emit-llvm -O3 -ccc-host-triple ptx32 -c $1.cl -o $1.o
llvm-dis $1.o
+rm $1.o
mv $1.o.ll $1.ll
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */
-#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */
-#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF __builtin_inf() /* Double */
-#define LLVM_INFF __builtin_inff() /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM __asm__
-#else
-#define LLVM_NAN(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANF(NanStr) 0.0F /* Float */
-#define LLVM_NANS(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANSF(NanStr) 0.0F /* Float */
-#define LLVM_INF ((double)0.0) /* Double */
-#define LLVM_INFF 0.0F /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */
-#define __builtin_stack_save() 0 /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
- unsigned int Int32;
- unsigned long long Int64;
- float Float;
- double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p);
-unsigned int __gen_ocl_get_global_id0(void);
-unsigned int __gen_ocl_get_local_id0(void);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p) {
- unsigned int llvm_cbe_call_2e_i;
- unsigned int llvm_cbe_call_2e_i6;
-
- llvm_cbe_call_2e_i = /*tail*/ __gen_ocl_get_local_id0();
- llvm_cbe_call_2e_i6 = /*tail*/ __gen_ocl_get_global_id0();
- *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i6)])) = (((signed int )(((signed int )(llvm_cbe_call_2e_i << 16u)) >> ((signed int )16u))));
- *((&llvm_cbe_p[((signed int )llvm_cbe_call_2e_i6)])) = llvm_cbe_call_2e_i;
- return;
-}
-
--- /dev/null
+; ModuleID = 'gg.ll'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+%struct.my_struct = type { i32, [2 x i32] }
+
+@g = addrspace(1) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4
+@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4
+
+define ptx_kernel void @struct_cl(%struct.my_struct* byval %s, i32 %x, i32* %mem, i32 %y) nounwind noinline {
+entry:
+ %x.addr = alloca i32, align 4
+ %mem.addr = alloca i32*, align 4
+ %y.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32* %mem, i32** %mem.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 256
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %2 = load i32* %i, align 4
+ %arrayidx = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %2
+ %a = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx, i32 0, i32 0
+ store i32 %1, i32 addrspace(4)* %a, align 4
+ %3 = load i32* %i, align 4
+ %4 = load i32* %i, align 4
+ %arrayidx1 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %4
+ %b = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx1, i32 0, i32 1
+ %arrayidx2 = getelementptr inbounds [2 x i32] addrspace(4)* %b, i32 0, i32 0
+ store i32 %3, i32 addrspace(4)* %arrayidx2, align 4
+ %5 = load i32* %i, align 4
+ %add = add nsw i32 %5, 1
+ %6 = load i32* %i, align 4
+ %arrayidx3 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %6
+ %b4 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx3, i32 0, i32 1
+ %arrayidx5 = getelementptr inbounds [2 x i32] addrspace(4)* %b4, i32 0, i32 0
+ store i32 %add, i32 addrspace(4)* %arrayidx5, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %7 = load i32* %i, align 4
+ %inc = add nsw i32 %7, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %8 = load i32* %y.addr, align 4
+ %arrayidx6 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %8
+ %9 = bitcast %struct.my_struct addrspace(4)* %arrayidx6 to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* %9, i32 12, i32 4, i1 false)
+ %a7 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
+ %10 = load i32* %a7, align 4
+ %11 = load i32* %x.addr, align 4
+ %arrayidx8 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %11
+ %a9 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx8, i32 0, i32 0
+ %12 = load i32 addrspace(4)* %a9, align 4
+ %add10 = add nsw i32 %10, %12
+ %13 = load i32* %x.addr, align 4
+ %add11 = add nsw i32 %13, 1
+ %arrayidx12 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add11
+ %b13 = getelementptr inbounds %struct.my_struct addrspace(4)* %arrayidx12, i32 0, i32 1
+ %arrayidx14 = getelementptr inbounds [2 x i32] addrspace(4)* %b13, i32 0, i32 0
+ %14 = load i32 addrspace(4)* %arrayidx14, align 4
+ %add15 = add nsw i32 %add10, %14
+ %15 = load i32* %x.addr, align 4
+ %arrayidx16 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %15
+ %16 = load i32 addrspace(1)* %arrayidx16, align 4
+ %add17 = add nsw i32 %add15, %16
+ %17 = load i32 addrspace(1)* getelementptr inbounds ([4 x i32] addrspace(1)* @g, i32 0, i32 3), align 4
+ %add18 = add nsw i32 %add17, %17
+ %18 = load i32** %mem.addr, align 4
+ %arrayidx19 = getelementptr inbounds i32* %18, i32 0
+ store i32 %add18, i32* %arrayidx19
+ ret void
+}
+
+declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (%struct.my_struct*, i32, i32*, i32)* @struct_cl}
+++ /dev/null
-; ModuleID = 'k'
-target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
-target triple = "ptx32--"
-
-%struct.my_struct = type { i32, [2 x i32] }
-
-define ptx_device void @struct_cl(%struct.my_struct* nocapture byval %s) nounwind readnone {
-entry:
- ret void
-}
+++ /dev/null
- .version 2.0
- .target compute_10, map_f64_to_f32
-
-
-.func () struct_cl (.reg .b32 %param0) // @struct_cl
-{
-
-// BB#0: // %entry
- ret;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */
-#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */
-#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF __builtin_inf() /* Double */
-#define LLVM_INFF __builtin_inff() /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM __asm__
-#else
-#define LLVM_NAN(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANF(NanStr) 0.0F /* Float */
-#define LLVM_NANS(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANSF(NanStr) 0.0F /* Float */
-#define LLVM_INF ((double)0.0) /* Double */
-#define LLVM_INFF 0.0F /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */
-#define __builtin_stack_save() 0 /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
- unsigned int Int32;
- unsigned long long Int64;
- float Float;
- double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void load_store(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_src);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void load_store(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_src) {
- unsigned int llvm_cbe_tmp__1;
-
- llvm_cbe_tmp__1 = *llvm_cbe_src;
- *llvm_cbe_dst = llvm_cbe_tmp__1;
- return;
-}
-
dst[get_global_id(0)] = src[get_global_id(0)] * src[get_global_id(0)];
}
-
--- /dev/null
+#include "stdlib.h"
+
+__kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b)
+{
+ dst[get_global_id(0)] = select(b, src[get_global_id(0)], src[get_global_id(1)]);
+ dst[get_global_id(0)] += (float4) (src[2].x, 1.f, 2.f, 3.f);
+}
+
--- /dev/null
+; ModuleID = 'simple_float4_3.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @simple_float4(<4 x float>* nocapture %dst, <4 x float>* nocapture %src, i1 %b) nounwind noinline {
+get_global_id.exit16:
+ %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+ %arrayidx = getelementptr inbounds <4 x float>* %src, i32 %call.i
+ %0 = load <4 x float>* %arrayidx, align 16, !tbaa !1
+ %call3.i = tail call ptx_device i32 @__gen_ocl_get_global_id1() nounwind readnone
+ %arrayidx2 = getelementptr inbounds <4 x float>* %src, i32 %call3.i
+ %1 = load <4 x float>* %arrayidx2, align 16, !tbaa !1
+ %x.y.i = select i1 %b, <4 x float> %0, <4 x float> %1
+ %arrayidx5 = getelementptr inbounds <4 x float>* %dst, i32 %call.i
+ store <4 x float> %x.y.i, <4 x float>* %arrayidx5, align 16, !tbaa !1
+ %arrayidx6 = getelementptr inbounds <4 x float>* %src, i32 2
+ %2 = load <4 x float>* %arrayidx6, align 16
+ %3 = extractelement <4 x float> %2, i32 0
+ %vecinit = insertelement <4 x float> undef, float %3, i32 0
+ %vecinit7 = insertelement <4 x float> %vecinit, float 1.000000e+00, i32 1
+ %vecinit8 = insertelement <4 x float> %vecinit7, float 2.000000e+00, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit8, float 3.000000e+00, i32 3
+ %add = fadd <4 x float> %x.y.i, %vecinit9
+ store <4 x float> %add, <4 x float>* %arrayidx5, align 16, !tbaa !1
+ ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+
+declare ptx_device i32 @__gen_ocl_get_global_id1() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (<4 x float>*, <4 x float>*, i1)* @simple_float4}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
+typedef bool bool2 __attribute__((ext_vector_type(2)));
+typedef bool bool3 __attribute__((ext_vector_type(3)));
+typedef bool bool4 __attribute__((ext_vector_type(4)));
+
+#define DECL_SELECT(TYPE) \
+__attribute__((overloadable)) \
+inline TYPE select(bool b, TYPE x, TYPE y) { \
+ if (b) return x; else return y; \
+}
+#define DECL_SELECT_ALL(TYPE) \
+ DECL_SELECT(TYPE) \
+ DECL_SELECT(TYPE##2) \
+ DECL_SELECT(TYPE##3) \
+ DECL_SELECT(TYPE##4)
+DECL_SELECT_ALL(int)
+DECL_SELECT_ALL(float)
+#undef DECL_SELECT_ALL
+#undef DECL_SELECT
int b[2];
};
-__constant int g[4] = {0,1,2,3};
+const __constant int g[4] = {0,1,2,3};
-__kernel void struct_cl (struct my_struct s, int x, __global int *mem)
+__kernel void struct_cl (struct my_struct s, int x, __global int *mem, int y)
{
- __local int array[256];
- for (int i = 0; i < 256; ++i)
- array[i] = i;
- mem[0] = s.a + array[x] + g[x];
+ __local struct my_struct array[256];
+ for (int i = 0; i < 256; ++i) {
+ array[i].a = i;
+ array[i].b[0] = i;
+ array[i].b[0] = i+1;
+ }
+ array[0] = array[y];
+ mem[0] = s.a + array[x].a + array[x+1].b[0] + g[x] + g[3];
}
+++ /dev/null
-; ModuleID = 'struct.cl.o'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @struct_cl(i64 %s.coerce0, i32 %s.coerce1) nounwind uwtable readnone {
-entry:
- ret void
-}
-
-!opencl.kernels = !{!0}
-
-!0 = metadata !{void (i64, i32)* @struct_cl}
%struct.my_struct = type { i32, [2 x i32] }
-@g = addrspace(1) global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4
-@struct_cl.array = internal addrspace(4) unnamed_addr global [256 x i32] zeroinitializer, align 4
+@g = addrspace(1) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 4
+@struct_cl.array = internal addrspace(4) global [256 x %struct.my_struct] zeroinitializer, align 4
-define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32* nocapture %mem) nounwind noinline {
+define ptx_kernel void @struct_cl(%struct.my_struct* nocapture byval %s, i32 %x, i32* nocapture %mem, i32 %y) nounwind noinline {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
- %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %i.05
- store i32 %i.05, i32 addrspace(4)* %arrayidx, align 4, !tbaa !1
- %inc = add nsw i32 %i.05, 1
- %exitcond = icmp eq i32 %inc, 256
+ %i.020 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %a = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.020, i32 0
+ store i32 %i.020, i32 addrspace(4)* %a, align 4, !tbaa !1
+ %arrayidx2 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %i.020, i32 1, i32 0
+ %add = add nsw i32 %i.020, 1
+ store i32 %add, i32 addrspace(4)* %arrayidx2, align 4, !tbaa !1
+ %exitcond = icmp eq i32 %add, 256
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
- %a = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
- %0 = load i32* %a, align 4, !tbaa !1
- %arrayidx1 = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %x
- %1 = load i32 addrspace(4)* %arrayidx1, align 4, !tbaa !1
- %arrayidx2 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x
- %2 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !1
- %add = add i32 %1, %0
- %add3 = add i32 %add, %2
- store i32 %add3, i32* %mem, align 4, !tbaa !1
+ %arrayidx6 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %y
+ %0 = bitcast %struct.my_struct addrspace(4)* %arrayidx6 to i8 addrspace(4)*
+ tail call void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* bitcast ([256 x %struct.my_struct] addrspace(4)* @struct_cl.array to i8 addrspace(4)*), i8 addrspace(4)* %0, i32 12, i32 4, i1 false)
+ %a7 = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
+ %1 = load i32* %a7, align 4, !tbaa !1
+ %a9 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %x, i32 0
+ %2 = load i32 addrspace(4)* %a9, align 4, !tbaa !1
+ %add11 = add nsw i32 %x, 1
+ %arrayidx14 = getelementptr inbounds [256 x %struct.my_struct] addrspace(4)* @struct_cl.array, i32 0, i32 %add11, i32 1, i32 0
+ %3 = load i32 addrspace(4)* %arrayidx14, align 4, !tbaa !1
+ %arrayidx16 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x
+ %4 = load i32 addrspace(1)* %arrayidx16, align 4, !tbaa !1
+ %add10 = add i32 %1, 3
+ %add15 = add i32 %add10, %2
+ %add17 = add i32 %add15, %3
+ %add18 = add i32 %add17, %4
+ store i32 %add18, i32* %mem, align 4, !tbaa !1
ret void
}
+declare void @llvm.memcpy.p4i8.p4i8.i32(i8 addrspace(4)* nocapture, i8 addrspace(4)* nocapture, i32, i32, i1) nounwind
+
!opencl.kernels = !{!0}
-!0 = metadata !{void (%struct.my_struct*, i32, i32*)* @struct_cl}
+!0 = metadata !{void (%struct.my_struct*, i32, i32*, i32)* @struct_cl}
!1 = metadata !{metadata !"int", metadata !2}
!2 = metadata !{metadata !"omnipotent char", metadata !3}
!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */
-#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */
-#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF __builtin_inf() /* Double */
-#define LLVM_INFF __builtin_inff() /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM __asm__
-#else
-#define LLVM_NAN(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANF(NanStr) 0.0F /* Float */
-#define LLVM_NANS(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANSF(NanStr) 0.0F /* Float */
-#define LLVM_INF ((double)0.0) /* Double */
-#define LLVM_INFF 0.0F /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */
-#define __builtin_stack_save() 0 /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
- unsigned int Int32;
- unsigned long long Int64;
- float Float;
- double Double;
-} llvmBitCastUnion;
-/* Structure forward decls */
-typedef struct l_struct_OC_my_struct l_struct_OC_my_struct;
-
-/* Structure contents */
-l_struct_OC_my_struct {
- unsigned int field0;
- field1 { unsigned int array[2]; };
-};
-
-
-/* External Global Variable Declarations */
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void struct_cl(l_struct_OC_my_struct llvm_cbe_s, unsigned int llvm_cbe_x, unsigned int *llvm_cbe_mem);
-void abort(void);
-
-
-/* Global Variable Declarations */
-static struct_cl_OC_array { unsigned int array[256]; };
-
-
-/* Global Variable Definitions and Initialization */
-static struct_cl_OC_array { unsigned int array[256]; };
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void struct_cl(l_struct_OC_my_struct llvm_cbe_s, unsigned int llvm_cbe_x, unsigned int *llvm_cbe_mem) {
- unsigned int llvm_cbe_i_2e_03;
- unsigned int llvm_cbe_i_2e_03__PHI_TEMPORARY;
- unsigned int llvm_cbe_inc;
- unsigned int llvm_cbe_tmp__1;
-
- llvm_cbe_i_2e_03__PHI_TEMPORARY = 0u; /* for PHI node */
- goto llvm_cbe_for_2e_body;
-
- do { /* Syntactic loop 'for.body' to make GCC happy */
-llvm_cbe_for_2e_body:
- llvm_cbe_i_2e_03 = llvm_cbe_i_2e_03__PHI_TEMPORARY;
- *((&struct_cl_OC_array.array[((signed int )llvm_cbe_i_2e_03)])) = llvm_cbe_i_2e_03;
- llvm_cbe_inc = ((unsigned int )(((unsigned int )llvm_cbe_i_2e_03) + ((unsigned int )1u)));
- if ((llvm_cbe_inc == 256u)) {
- goto llvm_cbe_for_2e_end;
- } else {
- llvm_cbe_i_2e_03__PHI_TEMPORARY = llvm_cbe_inc; /* for PHI node */
- goto llvm_cbe_for_2e_body;
- }
-
- } while (1); /* end of syntactic loop 'for.body' */
-llvm_cbe_for_2e_end:
- llvm_cbe_tmp__1 = *((&struct_cl_OC_array.array[((signed int )llvm_cbe_x)]));
- *llvm_cbe_mem = llvm_cbe_tmp__1;
- return;
-}
-
--- /dev/null
+#include "stdlib.h"
+__kernel void test_select(__global int *dst, __global int *src)
+{
+
+ if (src[get_global_id(0)] > 1)
+ dst[get_global_id(0)] = 1;
+ else
+ dst[get_global_id(0)] = 2;
+}
+
--- /dev/null
+; ModuleID = 'test_select.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_select(i32* nocapture %dst, i32* nocapture %src) nounwind noinline {
+get_global_id.exit7:
+ %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+ %arrayidx = getelementptr inbounds i32* %src, i32 %call.i
+ %0 = load i32* %arrayidx, align 4, !tbaa !1
+ %cmp = icmp sgt i32 %0, 1
+ %arrayidx2 = getelementptr inbounds i32* %dst, i32 %call.i
+ %. = select i1 %cmp, i32 1, i32 2
+ store i32 %., i32* %arrayidx2, align 4
+ ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*, i32*)* @test_select}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */
-#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */
-#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF __builtin_inf() /* Double */
-#define LLVM_INFF __builtin_inff() /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM __asm__
-#else
-#define LLVM_NAN(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANF(NanStr) 0.0F /* Float */
-#define LLVM_NANS(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANSF(NanStr) 0.0F /* Float */
-#define LLVM_INF ((double)0.0) /* Double */
-#define LLVM_INFF 0.0F /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */
-#define __builtin_stack_save() 0 /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
- unsigned int Int32;
- unsigned long long Int64;
- float Float;
- double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void hop(void);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void hop(void) {
- return;
-}
-
+++ /dev/null
-; ModuleID = 'void.cl.o'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @hop() nounwind uwtable readnone {
-entry:
- ret void
-}
-
-!opencl.kernels = !{!0}
-
-!0 = metadata !{void ()* @hop}
+++ /dev/null
-; ModuleID = 'void.o'
-target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
-target triple = "ptx32--"
-
-define ptx_kernel void @hop() nounwind readnone noinline {
-entry:
- ret void
-}
-
-!opencl.kernels = !{!0}
-
-!0 = metadata !{void ()* @hop}
+++ /dev/null
-/* Provide Declarations */
-#include <stdarg.h>
-#include <setjmp.h>
-#include <limits.h>
-/* get a declaration for alloca */
-#if defined(__CYGWIN__) || defined(__MINGW32__)
-#define alloca(x) __builtin_alloca((x))
-#define _alloca(x) __builtin_alloca((x))
-#elif defined(__APPLE__)
-extern void *__builtin_alloca(unsigned long);
-#define alloca(x) __builtin_alloca(x)
-#define longjmp _longjmp
-#define setjmp _setjmp
-#elif defined(__sun__)
-#if defined(__sparcv9)
-extern void *__builtin_alloca(unsigned long);
-#else
-extern void *__builtin_alloca(unsigned int);
-#endif
-#define alloca(x) __builtin_alloca(x)
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)
-#define alloca(x) __builtin_alloca(x)
-#elif defined(_MSC_VER)
-#define inline _inline
-#define alloca(x) _alloca(x)
-#else
-#include <alloca.h>
-#endif
-
-#ifndef __GNUC__ /* Can only support "linkonce" vars with GCC */
-#define __attribute__(X)
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak_import))
-#elif defined(__GNUC__)
-#define __EXTERNAL_WEAK__ __attribute__((weak))
-#else
-#define __EXTERNAL_WEAK__
-#endif
-
-#if defined(__GNUC__) && defined(__APPLE_CC__)
-#define __ATTRIBUTE_WEAK__
-#elif defined(__GNUC__)
-#define __ATTRIBUTE_WEAK__ __attribute__((weak))
-#else
-#define __ATTRIBUTE_WEAK__
-#endif
-
-#if defined(__GNUC__)
-#define __HIDDEN__ __attribute__((visibility("hidden")))
-#endif
-
-#ifdef __GNUC__
-#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */
-#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */
-#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */
-#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */
-#define LLVM_INF __builtin_inf() /* Double */
-#define LLVM_INFF __builtin_inff() /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) __builtin_prefetch(addr,rw,locality)
-#define __ATTRIBUTE_CTOR__ __attribute__((constructor))
-#define __ATTRIBUTE_DTOR__ __attribute__((destructor))
-#define LLVM_ASM __asm__
-#else
-#define LLVM_NAN(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANF(NanStr) 0.0F /* Float */
-#define LLVM_NANS(NanStr) ((double)0.0) /* Double */
-#define LLVM_NANSF(NanStr) 0.0F /* Float */
-#define LLVM_INF ((double)0.0) /* Double */
-#define LLVM_INFF 0.0F /* Float */
-#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */
-#define __ATTRIBUTE_CTOR__
-#define __ATTRIBUTE_DTOR__
-#define LLVM_ASM(X)
-#endif
-
-#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */
-#define __builtin_stack_save() 0 /* not implemented */
-#define __builtin_stack_restore(X) /* noop */
-#endif
-
-#if __GNUC__ && __LP64__ /* 128-bit integer types */
-typedef int __attribute__((mode(TI))) llvmInt128;
-typedef unsigned __attribute__((mode(TI))) llvmUInt128;
-#endif
-
-#define CODE_FOR_MAIN() /* Any target-specific code for main()*/
-
-#ifndef __cplusplus
-typedef unsigned char bool;
-#endif
-
-
-/* Support for floating point constants */
-typedef unsigned long long ConstantDoubleTy;
-typedef unsigned int ConstantFloatTy;
-typedef struct { unsigned long long f1; unsigned short f2; unsigned short pad[3]; } ConstantFP80Ty;
-typedef struct { unsigned long long f1; unsigned long long f2; } ConstantFP128Ty;
-
-
-/* Global Declarations */
-/* Helper union for bitcasts */
-typedef union {
- unsigned int Int32;
- unsigned long long Int64;
- float Float;
- double Double;
-} llvmBitCastUnion;
-
-/* Function Declarations */
-double fmod(double, double);
-float fmodf(float, float);
-long double fmodl(long double, long double);
-void hop(void);
-void abort(void);
-
-
-/* Function Bodies */
-static inline int llvm_fcmp_ord(double X, double Y) { return X == X && Y == Y; }
-static inline int llvm_fcmp_uno(double X, double Y) { return X != X || Y != Y; }
-static inline int llvm_fcmp_ueq(double X, double Y) { return X == Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_une(double X, double Y) { return X != Y; }
-static inline int llvm_fcmp_ult(double X, double Y) { return X < Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ugt(double X, double Y) { return X > Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_ule(double X, double Y) { return X <= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_uge(double X, double Y) { return X >= Y || llvm_fcmp_uno(X, Y); }
-static inline int llvm_fcmp_oeq(double X, double Y) { return X == Y ; }
-static inline int llvm_fcmp_one(double X, double Y) { return X != Y && llvm_fcmp_ord(X, Y); }
-static inline int llvm_fcmp_olt(double X, double Y) { return X < Y ; }
-static inline int llvm_fcmp_ogt(double X, double Y) { return X > Y ; }
-static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
-static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-
-void hop(void) {
- return;
-}
-
// We can convert anything to anything, but types and families must match
INLINE bool ConvertInstruction::wellFormed(const Function &fn, std::string &whyNot) const
{
- const RegisterData::Family dstFamily = getFamily(srcType);
+ const RegisterData::Family dstFamily = getFamily(dstType);
const RegisterData::Family srcFamily = getFamily(srcType);
if (UNLIKELY(checkRegisterData(dstFamily, dst, fn, whyNot) == false))
return false;
out << "." << this->getDstType()
<< "." << this->getSrcType()
<< " %" << this->getDstIndex(fn, 0)
- << " %" << this->getSrcIndex(fn, 0)
- << " %" << this->getSrcIndex(fn, 1);
+ << " %" << this->getSrcIndex(fn, 0);
}
INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const {
/*! ge.type dst src0 src1 */
Instruction GT(Type type, Register dst, Register src0, Register src1);
/*! cvt.{dstType <- srcType} dst src */
- Instruction CVT(Type dstType, Type srcType, Register dst, Register src0, Register src1);
+ Instruction CVT(Type dstType, Type srcType, Register dst, Register src);
/*! bra labelIndex */
Instruction BRA(LabelIndex labelIndex);
/*! (pred) bra labelIndex */
DECL_VISIT_FN(LoadInst, LoadInst);
DECL_VISIT_FN(StoreInst, StoreInst);
DECL_VISIT_FN(CallInst, CallInst);
+ DECL_VISIT_FN(ICmpInst, ICmpInst);
+ DECL_VISIT_FN(FCmpInst, FCmpInst);
#undef DECL_VISIT_FN
// Must be implemented later
void visitInsertElementInst(InsertElementInst &I) {NOT_SUPPORTED;}
void visitExtractElementInst(ExtractElementInst &I) {NOT_SUPPORTED;}
void visitShuffleVectorInst(ShuffleVectorInst &SVI) {NOT_SUPPORTED;}
- void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
- void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
void visitPHINode(PHINode &I) {NOT_SUPPORTED;}
void visitBranchInst(BranchInst &I) {NOT_SUPPORTED;}
- void visitICmpInst(ICmpInst &I) {NOT_SUPPORTED;}
- void visitFCmpInst(FCmpInst &I) {NOT_SUPPORTED;}
void visitSelectInst(SelectInst &I) {NOT_SUPPORTED;}
// These instructions are not supported at all
void visitUnreachableInst(UnreachableInst &I) {NOT_SUPPORTED;}
void visitGetElementPtrInst(GetElementPtrInst &I) {NOT_SUPPORTED;}
void visitAllocaInst(AllocaInst &I) {NOT_SUPPORTED;}
+ void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
+ void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
template <bool isLoad, typename T> void visitLoadOrStore(T &I);
void visitInstruction(Instruction &I) {NOT_SUPPORTED;}
this->newRegister(&I);
}
- void GenWriter::emitBinaryOperator(Instruction &I) {
- GBE_ASSERT(I.getType()->isPointerTy() == false);
-
- // Get the element type for a vector
+ static ir::Type
+ getVectorInfo(const ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum)
+ {
ir::Type type;
- uint32_t elemNum;
- Type *llvmType = I.getType();
if (llvmType->isVectorTy() == true) {
VectorType *vectorType = cast<VectorType>(llvmType);
Type *elementType = vectorType->getElementType();
elemNum = 1;
type = getType(ctx, llvmType);
}
+ return type;
+ }
+
+ void GenWriter::emitBinaryOperator(Instruction &I) {
+ GBE_ASSERT(I.getType()->isPointerTy() == false ||
+ I.getType() != Type::getInt1Ty(I.getContext()));
+
+ // Get the element type for a vector
+ uint32_t elemNum;
+ const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
// Emit the instructions in a row
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
}
}
+ void GenWriter::regAllocateICmpInst(ICmpInst &I) {
+ this->newRegister(&I);
+ }
+
+ static ir::Type makeTypeSigned(const ir::Type &type) {
+ if (type == ir::TYPE_U8) return ir::TYPE_S8;
+ else if (type == ir::TYPE_U16) return ir::TYPE_S16;
+ else if (type == ir::TYPE_U32) return ir::TYPE_S32;
+ else if (type == ir::TYPE_U64) return ir::TYPE_S64;
+ return type;
+ }
+
+ static ir::Type makeTypeUnsigned(const ir::Type &type) {
+ if (type == ir::TYPE_S8) return ir::TYPE_U8;
+ else if (type == ir::TYPE_S16) return ir::TYPE_U16;
+ else if (type == ir::TYPE_S32) return ir::TYPE_U32;
+ else if (type == ir::TYPE_S64) return ir::TYPE_U64;
+ return type;
+ }
+
+ void GenWriter::emitICmpInst(ICmpInst &I) {
+ GBE_ASSERT(I.getOperand(0)->getType() != Type::getInt1Ty(I.getContext()));
+
+ // Get the element type and the number of elements
+ uint32_t elemNum;
+ Type *operandType = I.getOperand(0)->getType();
+ const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
+ const ir::Type signedType = makeTypeSigned(type);
+ const ir::Type unsignedType = makeTypeUnsigned(type);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: ctx.EQ(type, dst, src0, src1); break;
+ case ICmpInst::ICMP_NE: ctx.NE(type, dst, src0, src1); break;
+ case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break;
+ case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break;
+ case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break;
+ default: NOT_SUPPORTED;
+ };
+ }
+ }
+
+ void GenWriter::regAllocateFCmpInst(FCmpInst &I) {
+ this->newRegister(&I);
+ }
+
+ void GenWriter::emitFCmpInst(FCmpInst &I) {
+ GBE_ASSERT(I.getType() != Type::getInt1Ty(I.getContext()));
+
+ // Get the element type and the number of elements
+ uint32_t elemNum;
+ Type *operandType = I.getOperand(0)->getType();
+ const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::FCMP_OEQ:
+ case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_ONE:
+ case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OLE:
+ case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OGE:
+ case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OLT:
+ case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OGT:
+ case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
+ default: NOT_SUPPORTED;
+ };
+ }
+ }
+
void GenWriter::regAllocateCastInst(CastInst &I)
{
- if (I.getOpcode() == Instruction::PtrToInt ||
- I.getOpcode() == Instruction::IntToPtr) {
- Value *dstValue = &I;
- Value *srcValue = I.getOperand(0);
- Constant *CPV = dyn_cast<Constant>(srcValue);
- if (CPV == NULL) {
- Type *dstType = dstValue->getType();
- Type *srcType = srcValue->getType();
- GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType));
- regTranslator.newValueProxy(srcValue, dstValue);
- } else
- this->newRegister(dstValue);
+ Value *dstValue = &I;
+ Value *srcValue = I.getOperand(0);
+
+ switch (I.getOpcode())
+ {
+ // When casting pointer to integers, be aware with integers
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ {
+ Constant *CPV = dyn_cast<Constant>(srcValue);
+ if (CPV == NULL) {
+ Type *dstType = dstValue->getType();
+ Type *srcType = srcValue->getType();
+ GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType));
+ regTranslator.newValueProxy(srcValue, dstValue);
+ } else
+ this->newRegister(dstValue);
+ }
+ break;
+ // Bitcast just forward registers
+ case Instruction::BitCast:
+ {
+ uint32_t elemNum;
+ getVectorInfo(ctx, I.getType(), &I, elemNum);
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ regTranslator.newValueProxy(srcValue, dstValue, elemID, elemID);
+ }
+ break;
+ // Various conversion operations -> just allocate registers for them
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ this->newRegister(&I);
+ break;
+ default: NOT_SUPPORTED;
}
- else
- NOT_SUPPORTED;
}
void GenWriter::emitCastInst(CastInst &I) {
- if (I.getOpcode() == Instruction::PtrToInt ||
- I.getOpcode() == Instruction::IntToPtr) {
- Value *srcValue = &I;
- Value *dstValue = I.getOperand(0);
- Constant *CPV = dyn_cast<Constant>(srcValue);
- if (CPV != NULL) {
- const ir::ImmediateIndex index = ctx.newImmediate(CPV);
- const ir::Immediate imm = ctx.getImmediate(index);
- const ir::Register reg = this->getRegister(dstValue);
- ctx.LOADI(imm.type, reg, index);
+ switch (I.getOpcode())
+ {
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ {
+ Value *dstValue = &I;
+ Value *srcValue = I.getOperand(0);
+ Constant *CPV = dyn_cast<Constant>(srcValue);
+ if (CPV != NULL) {
+ const ir::ImmediateIndex index = ctx.newImmediate(CPV);
+ const ir::Immediate imm = ctx.getImmediate(index);
+ const ir::Register reg = this->getRegister(dstValue);
+ ctx.LOADI(imm.type, reg, index);
+ }
}
- }
+ break;
+ case Instruction::BitCast: break; // nothing to emit here
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ {
+ // Get the element type for a vector
+ uint32_t elemNum;
+ Type *llvmDstType = I.getType();
+ Type *llvmSrcType = I.getOperand(0)->getType();
+ const ir::Type dstType = getVectorInfo(ctx, llvmDstType, &I, elemNum);
+ const ir::Type srcType = getVectorInfo(ctx, llvmSrcType, &I, elemNum);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const ir::Register src = this->getRegister(I.getOperand(0), elemID);
+ ctx.CVT(dstType, srcType, dst, src);
+ }
+ }
+ break;
+ default: NOT_SUPPORTED;
+ };
}
#ifndef NDEBUG
Module &mod = *M.get();
llvm::PassManager passes;
+ passes.add(createScalarReplAggregatesPass()); // Break up allocas
passes.add(createRemoveGEPPass(unit));
- passes.add(createConstantPropagationPass());
+ passes.add(createConstantPropagationPass());
passes.add(createDeadInstEliminationPass()); // remove simplified instructions
passes.add(createLowerSwitchPass());
passes.add(createPromoteMemoryToRegisterPass());
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("simple_float4_2.ll"));
//UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll"));
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll"));
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("cmp_cvt.ll"));
}
UTEST_REGISTER(utestLLVM)