From c2e1de5a51d3a07cfeb1be61fad90a39f0e4dd84 Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Fri, 2 Mar 2012 20:45:26 -0800 Subject: [PATCH] Made first tests pass with vectors --- backend/kernels/cycle.cl | 14 + backend/kernels/cycle.ll | 15 + backend/kernels/cycle.o | Bin 0 -> 404 bytes backend/kernels/get_global_id.cl | 21 +- backend/kernels/mad.cl | 3 +- backend/kernels/mad.ll | 7 +- backend/kernels/mad.o | Bin 884 -> 896 bytes backend/kernels/short.cl | 5 + backend/kernels/short.ll | 17 + backend/kernels/short.o | Bin 0 -> 504 bytes backend/kernels/simple_float4.cl | 7 + backend/kernels/simple_float4.ll | 21 + backend/kernels/simple_float4.o | Bin 0 -> 596 bytes backend/kernels/simple_float4_2.cl | 8 + backend/kernels/simple_float4_2.ll | 22 + backend/kernels/simple_float4_2.o | Bin 0 -> 600 bytes backend/kernels/stdlib.h | 211 +---- backend/src/ir/context.hpp | 16 +- backend/src/ir/function.hpp | 2 +- backend/src/ir/register.cpp | 9 + backend/src/ir/register.hpp | 2 + backend/src/llvm/llvm_gen_backend.cpp | 1370 ++++++++++----------------------- backend/src/llvm/llvm_passes.cpp | 6 +- backend/src/utest/utest_llvm.cpp | 10 +- 24 files changed, 598 insertions(+), 1168 deletions(-) create mode 100644 backend/kernels/cycle.cl create mode 100644 backend/kernels/cycle.ll create mode 100644 backend/kernels/cycle.o create mode 100644 backend/kernels/short.cl create mode 100644 backend/kernels/short.ll create mode 100644 backend/kernels/short.o create mode 100644 backend/kernels/simple_float4.cl create mode 100644 backend/kernels/simple_float4.ll create mode 100644 backend/kernels/simple_float4.o create mode 100644 backend/kernels/simple_float4_2.cl create mode 100644 backend/kernels/simple_float4_2.ll create mode 100644 backend/kernels/simple_float4_2.o diff --git a/backend/kernels/cycle.cl b/backend/kernels/cycle.cl new file mode 100644 index 0000000..fe9135c --- /dev/null +++ b/backend/kernels/cycle.cl @@ -0,0 +1,14 @@ +__kernel void cycle(global int *dst) +{ + int x, y; + +hop0: + x = y; + +hop1: + y = x; + goto hop0; + + dst[0] = x; +} + diff --git a/backend/kernels/cycle.ll b/backend/kernels/cycle.ll new file mode 100644 index 0000000..6336300 --- /dev/null +++ b/backend/kernels/cycle.ll @@ -0,0 +1,15 @@ +; ModuleID = 'cycle.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @cycle(i32* nocapture %dst) noreturn nounwind readnone noinline { +entry: + br label %hop0 + +hop0: ; preds = %hop0, %entry + br label %hop0 +} + +!opencl.kernels = !{!0} + +!0 = metadata !{void (i32*)* @cycle} diff --git a/backend/kernels/cycle.o b/backend/kernels/cycle.o new file mode 100644 index 0000000000000000000000000000000000000000..b7157d7a6283d68a1a8a2781a72724fe0cb5c092 GIT binary patch literal 404 zcmZ>AK5$Qwhk+r9fq{WhfPp~>NV7L8Pjo!N;>jjpWI2hkg-4Z@LsE{VJE?)kmC0!m zw^NFMk%$F{geRAgi;4%=1V!Z|K@B{TibX2yCr$_?tyr+a;n)X8pgEor3=Awln!AI6 z#ZiEz^FI*CpNL@SRA68LTFGFfbb>=6wW)&?fq{VwNDH$Z(s4BK7HDB+WB@sgJwV{BNTRrd%sJ=i z0|p+#Kp`-w1p1#H$TH?=kU8>DCQXFVw!_(?<#3Nv1AAo!dof5+p#lTg32XrzGd2Lj G9|QnjXJdc> literal 0 HcmV?d00001 diff --git a/backend/kernels/get_global_id.cl b/backend/kernels/get_global_id.cl index 299d6c3..010beed 100644 --- a/backend/kernels/get_global_id.cl +++ b/backend/kernels/get_global_id.cl @@ -1,23 +1,4 @@ -__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id0(void); -__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id1(void); -__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id2(void); -__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id0(void); -__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id1(void); -__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id2(void); - -inline unsigned get_global_id(unsigned int dim) { - if (dim == 0) return __gen_ocl_get_global_id0(); - else if (dim == 1) return __gen_ocl_get_global_id1(); - else if (dim == 2) return __gen_ocl_get_global_id2(); - else return 0; -} - -inline unsigned get_local_id(unsigned int dim) { - if (dim == 0) return __gen_ocl_get_local_id0(); - else if (dim == 1) return __gen_ocl_get_local_id1(); - else if (dim == 2) return __gen_ocl_get_local_id2(); - else return 0; -} +#include "stdlib.h" __kernel void test_global_id(__global int *dst, __global int *p) { diff --git a/backend/kernels/mad.cl b/backend/kernels/mad.cl index 9b6e36a..9589ff6 100644 --- a/backend/kernels/mad.cl +++ b/backend/kernels/mad.cl @@ -10,7 +10,8 @@ __kernel void add(__global int *dst, unsigned int x, float z) y = mad(dst[i], 2, 3); float z = mad((float) dst[i], 2.f, 3.f); float4 z0 = mad((float4) dst[i], (float4)(0.f,1.f,2.f,3.f), (float4)3.f); - dst[i] = y + (int) z + z0.x + z0.y + z0.z; + float4 x0 = z0 * (float4) 2.f; + dst[i] = y + (int) z + x0.x + x0.y + x0.z; } } diff --git a/backend/kernels/mad.ll b/backend/kernels/mad.ll index 536fcc4..6bd19da 100644 --- a/backend/kernels/mad.ll +++ b/backend/kernels/mad.ll @@ -17,14 +17,15 @@ for.body: ; preds = %for.body, %entry %1 = insertelement <4 x float> undef, float %conv, i32 0 %splat = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer %call8 = tail call ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %splat, <4 x float> , <4 x float> ) nounwind readonly + %mul = fmul <4 x float> %call8, %conv9 = fptosi float %call5 to i32 %add = add nsw i32 %conv9, %call2 %conv10 = sitofp i32 %add to float - %2 = extractelement <4 x float> %call8, i32 0 + %2 = extractelement <4 x float> %mul, i32 0 %add11 = fadd float %conv10, %2 - %3 = extractelement <4 x float> %call8, i32 1 + %3 = extractelement <4 x float> %mul, i32 1 %add12 = fadd float %add11, %3 - %4 = extractelement <4 x float> %call8, i32 2 + %4 = extractelement <4 x float> %mul, i32 2 %add13 = fadd float %add12, %4 %conv14 = fptosi float %add13 to i32 store i32 %conv14, i32* %arrayidx, align 4, !tbaa !1 diff --git a/backend/kernels/mad.o b/backend/kernels/mad.o index 26aa240908f640f28c87040101b904ddb207b35a..906b7e46e832781dd9672fe58597629009b6e7e9 100644 GIT binary patch delta 376 zcmeyu*1*o`DwwcGtfj5LD zQ$XfG1ykTeQ#T$SMYSgGz;{w+NeLW=iYeU2$3LVTS2+C5N13fnK$w}m-u0?yQIx`O z0|6Nc9%<(feO%0j4TdcGD-9KxC7eG@&|)@jux@r}78H}>X$a-yXp_FQfT1~(O)-F( zhn-0UXt7xk!{LDI#{xahGx?|&i1wc;lQK(6;4oB7IAe7DL&9-|=5HR#Y;6L(?$8@mr@G*fM zi|q^=3|aJ68VE2;I6s&m#cb4|-R#gTI8~CTA&`@!P5RsdhUQ2%MF(acb|#RuVnGat z1Fmli^f=Gtqh285_eA(?f^6#plhAw-)<>7W=1H_XFqv8_(R4Y)G*_(+DBc^-5M(h? z-hw$zltaLkp^ay{uLx7<=PSDed74dT9%tZbjum0rptG%4trb0PpIsBYZ= diff --git a/backend/kernels/short.cl b/backend/kernels/short.cl new file mode 100644 index 0000000..83e38a0 --- /dev/null +++ b/backend/kernels/short.cl @@ -0,0 +1,5 @@ +__kernel void short_write(__global short *dst, short x, short y) +{ + dst[0] = x + y; +} + diff --git a/backend/kernels/short.ll b/backend/kernels/short.ll new file mode 100644 index 0000000..6225107 --- /dev/null +++ b/backend/kernels/short.ll @@ -0,0 +1,17 @@ +; ModuleID = 'short.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @short_write(i16* nocapture %dst, i16 %x, i16 %y) nounwind noinline { +entry: + %add = add i16 %y, %x + store i16 %add, i16* %dst, align 2, !tbaa !1 + ret void +} + +!opencl.kernels = !{!0} + +!0 = metadata !{void (i16*, i16, i16)* @short_write} +!1 = metadata !{metadata !"short", metadata !2} +!2 = metadata !{metadata !"omnipotent char", metadata !3} +!3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/short.o b/backend/kernels/short.o new file mode 100644 index 0000000000000000000000000000000000000000..bee5c62ae4673612b3e9c731edf0141757755e4e GIT binary patch literal 504 zcmZ>AK5$Qwhk>D*fq{WhfPp~>NV7L8Pjo!N;>jjpWI2hkg-4Z@LsE{VJE?)kmC0!m zw^NFMk%$F{geRAgi;4%=1V!Z|K@B{TibX2yCr$_?tyr+a;n)X8pgEor3=Awln!AI6 z#ZiEz^FI*C9|&ORQ~-(r#f_9sa44iUwJ@kTdvJ29IUnP6R@79P!6PZoadL)&fg8g! z4h1j81ubSkcM0=VEMTvh!EBk)Y<6P<+tUkd9~Q6|E3j7?uoqomdzrvqvVgtJfxQaI zxWM)dmHSSBy{dt|N`So#q6H{x;G*cq$iM`2gaSXaG>~#;VUlnXaWd4=c*ZDu&r$A0 zB1fQr>&6F_?PUe+B@ON63)+hc*sBQ1(Hi>?uYbw}-rL ziM-bscrqMCoD@aeEZ#HlKYhUW_yAwt2EL~X{O1Yy7{2U4 zmN7?z%#nvOX(Eia9nKalhkKkF*efg8i$RJC6&M(pfi&9&k9J|-{i2G(pqOD`0050( Bfg}I` literal 0 HcmV?d00001 diff --git a/backend/kernels/simple_float4.cl b/backend/kernels/simple_float4.cl new file mode 100644 index 0000000..7b47a18 --- /dev/null +++ b/backend/kernels/simple_float4.cl @@ -0,0 +1,7 @@ +#include "stdlib.h" + +__kernel void simple_float4(__global float4 *dst, __global float4 *src) +{ + dst[get_global_id(0)] = src[get_global_id(0)]; +} + diff --git a/backend/kernels/simple_float4.ll b/backend/kernels/simple_float4.ll new file mode 100644 index 0000000..0e05208 --- /dev/null +++ b/backend/kernels/simple_float4.ll @@ -0,0 +1,21 @@ +; ModuleID = 'simple_float4.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @simple_float4(<4 x float>* nocapture %dst, <4 x float>* nocapture %src) nounwind noinline { +get_global_id.exit5: + %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + %arrayidx = getelementptr inbounds <4 x float>* %src, i32 %call.i + %0 = load <4 x float>* %arrayidx, align 16, !tbaa !1 + %arrayidx2 = getelementptr inbounds <4 x float>* %dst, i32 %call.i + store <4 x float> %0, <4 x float>* %arrayidx2, align 16, !tbaa !1 + ret void +} + +declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + +!opencl.kernels = !{!0} + +!0 = metadata !{void (<4 x float>*, <4 x float>*)* @simple_float4} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/simple_float4.o b/backend/kernels/simple_float4.o new file mode 100644 index 0000000000000000000000000000000000000000..62e522df83f688d228864cf4fde651c58f9f7ab4 GIT binary patch literal 596 zcmZ>AK5$Qwhk;=d0|Nu200V;%kY;aGp6Gaj#gk3I$Z`^63y&%*hol@!cT$6ZE0fbC zZl@LjBM}P@2~RE~7Zne#35v=`f*N=v6^m5ZPjCn(tyr+a;n)X8pgEor3=AAVn!AUA z#ZiETlMzIA{s#j20|5~Ve?Tk-HwK1I1qKE_pgJQZ4o-#CrWOV@7Y|NOHJ4+Y&Wf5U zGXx~XIXGu31h_GzF|-MGG%VpUa5iygU{f{N)dGv)PReY)>z+ zeOSO=tiWDnz+QBL?PUUc$pZE=2lgr;;{w|=RPH+g_NoT4O;h-Om;<|u*I0FG9{bJho(Eq5HY*w8G!#@X^jv-K2a z>obR^0L@NRU|^62(!wkUI}R!waWR-@AP_R;BTJ)1k3|pDfdZg311}JVH82FSB%CO? zc|gIniIb<_Db5M}y3fhcam*jJ6%l7A=Q+oEq3GE7*%cstOet7&w8nSi|4_jwbt+ hb@n^@?B8g?xZ%$U2Wbao#WR})>Ni>(XaSiI0s!$qrvU%} literal 0 HcmV?d00001 diff --git a/backend/kernels/simple_float4_2.cl b/backend/kernels/simple_float4_2.cl new file mode 100644 index 0000000..c35d9bb --- /dev/null +++ b/backend/kernels/simple_float4_2.cl @@ -0,0 +1,8 @@ +#include "stdlib.h" + +__kernel void simple_float4(__global float4 *dst, __global float4 *src) +{ + dst[get_global_id(0)] = src[get_global_id(0)] * src[get_global_id(0)]; +} + + diff --git a/backend/kernels/simple_float4_2.ll b/backend/kernels/simple_float4_2.ll new file mode 100644 index 0000000..4f5e1da --- /dev/null +++ b/backend/kernels/simple_float4_2.ll @@ -0,0 +1,22 @@ +; ModuleID = 'simple_float4_2.o' +target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" +target triple = "ptx32--" + +define ptx_kernel void @simple_float4(<4 x float>* nocapture %dst, <4 x float>* nocapture %src) nounwind noinline { +get_global_id.exit10: + %call.i = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + %arrayidx = getelementptr inbounds <4 x float>* %src, i32 %call.i + %0 = load <4 x float>* %arrayidx, align 16, !tbaa !1 + %mul = fmul <4 x float> %0, %0 + %arrayidx4 = getelementptr inbounds <4 x float>* %dst, i32 %call.i + store <4 x float> %mul, <4 x float>* %arrayidx4, align 16, !tbaa !1 + ret void +} + +declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone + +!opencl.kernels = !{!0} + +!0 = metadata !{void (<4 x float>*, <4 x float>*)* @simple_float4} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/simple_float4_2.o b/backend/kernels/simple_float4_2.o new file mode 100644 index 0000000000000000000000000000000000000000..8580e1c1ee3c477382df94627bb86834124232b7 GIT binary patch literal 600 zcmZ>AK5$Qwhk;=-0|Nu200V;%kY;aGp6Gaj#gk3I$Z`^63y&%*hol@!cT$6ZE0fbC zZl@LjBM}P@2~RE~7Zne#35v=`f*N=v6^m5ZPjCn(tyr+a;n)X8pgEor3=AAVn!AUA z#ZiETlMzIA{s#j20|5~Ve?Tk-HwK1I1qKE_pgJQZ4o-#CrWOV@7Y|NOHJ4+Y&Wf5U zGXx~XIXGu31h_GzF|-MGG%VpUa5iygU{f{N)dGv)PReY)>z+ zeOSO=tiWDnz+QBL?PUUc$pZE=2lgr;;{w|=RPH+g_NoT4O;h-Om;<|u*I0FG9{bJho(Eq5HY*w8G!#@X^jv-K2a z>obR^0L@NRU|^5|(!wkUI}R!waWR-@AP_Reh)2?2pyMFN6AcC)b|z3bg*7k)vLu`+ zxOqUqwTY9b+2rQI0|DKF5=q9 literal 0 HcmV?d00001 diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h index bab425d..ca3b2d9 100644 --- a/backend/kernels/stdlib.h +++ b/backend/kernels/stdlib.h @@ -17,188 +17,31 @@ * Author: Benjamin Segovia */ -#pragma OPENCL EXTENSION cl_khr_fp64 : enable +__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id0(void); +__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id1(void); +__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id2(void); +__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id0(void); +__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id1(void); +__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id2(void); + +inline unsigned get_global_id(unsigned int dim) { + if (dim == 0) return __gen_ocl_get_global_id0(); + else if (dim == 1) return __gen_ocl_get_global_id1(); + else if (dim == 2) return __gen_ocl_get_global_id2(); + else return 0; +} + +inline unsigned get_local_id(unsigned int dim) { + if (dim == 0) return __gen_ocl_get_local_id0(); + else if (dim == 1) return __gen_ocl_get_local_id1(); + else if (dim == 2) return __gen_ocl_get_local_id2(); + else return 0; +} + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef int int2 __attribute__((ext_vector_type(2))); +typedef int int3 __attribute__((ext_vector_type(3))); +typedef int int4 __attribute__((ext_vector_type(4))); -/* Types */ - -/* Standard types from Clang's stddef and stdint, Copyright (C) 2008 Eli Friedman */ -typedef signed __INT64_TYPE__ int64_t; -typedef unsigned __INT64_TYPE__ uint64_t; -typedef signed __INT32_TYPE__ int32_t; -typedef unsigned __INT32_TYPE__ uint32_t; -typedef signed __INT16_TYPE__ int16_t; -typedef unsigned __INT16_TYPE__ uint16_t; -typedef signed __INT8_TYPE__ int8_t; -typedef unsigned __INT8_TYPE__ uint8_t; - -#define __stdint_join3(a,b,c) a ## b ## c -#define __intn_t(n) __stdint_join3( int, n, _t) -#define __uintn_t(n) __stdint_join3(uint, n, _t) - -typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t; -typedef __typeof__(sizeof(int)) size_t; -typedef __intn_t(__INTPTR_WIDTH__) intptr_t; -typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t; - -/* OpenCL types */ -typedef uint8_t uchar; -typedef uint16_t ushort; -typedef uint32_t uint; -typedef uint64_t ulong; - -typedef unsigned int sampler_t; -typedef struct image2d *image2d_t; -typedef struct image3d *image3d_t; - -/* Vectors */ -#define COAL_VECTOR(type, len) \ - typedef type type##len __attribute__((ext_vector_type(len))) -#define COAL_VECTOR_SET(type) \ - COAL_VECTOR(type, 2); \ - COAL_VECTOR(type, 3); \ - COAL_VECTOR(type, 4); \ - COAL_VECTOR(type, 8); \ - COAL_VECTOR(type, 16); - -COAL_VECTOR_SET(char) -COAL_VECTOR_SET(uchar) - -COAL_VECTOR_SET(short) -COAL_VECTOR_SET(ushort) - -COAL_VECTOR_SET(int) -COAL_VECTOR_SET(uint) - -COAL_VECTOR_SET(long) -COAL_VECTOR_SET(ulong) - -COAL_VECTOR_SET(float) - -#undef COAL_VECTOR_SET -#undef COAL_VECTOR - -/* Address spaces */ -#define __private __attribute__((address_space(0))) -#define __global __attribute__((address_space(1))) -#define __local __attribute__((address_space(2))) -#define __constant __attribute__((address_space(3))) - -#define global __global -#define local __local -#define constant __constant -#define private __private - -#define __write_only -#define __read_only const - -#define write_only __write_only -#define read_only __read_only - -/* Defines */ -#define OVERLOAD __attribute__((overloadable)) - -#define CLK_NORMALIZED_COORDS_FALSE 0x00000000 -#define CLK_NORMALIZED_COORDS_TRUE 0x00000001 -#define CLK_ADDRESS_NONE 0x00000000 -#define CLK_ADDRESS_MIRRORED_REPEAT 0x00000010 -#define CLK_ADDRESS_REPEAT 0x00000020 -#define CLK_ADDRESS_CLAMP_TO_EDGE 0x00000030 -#define CLK_ADDRESS_CLAMP 0x00000040 -#define CLK_FILTER_NEAREST 0x00000000 -#define CLK_FILTER_LINEAR 0x00000100 - -#define CLK_LOCAL_MEM_FENCE 0x00000001 -#define CLK_GLOBAL_MEM_FENCE 0x00000002 - -#define CLK_R 0x10B0 -#define CLK_A 0x10B1 -#define CLK_RG 0x10B2 -#define CLK_RA 0x10B3 -#define CLK_RGB 0x10B4 -#define CLK_RGBA 0x10B5 -#define CLK_BGRA 0x10B6 -#define CLK_ARGB 0x10B7 -#define CLK_INTENSITY 0x10B8 -#define CLK_LUMINANCE 0x10B9 -#define CLK_Rx 0x10BA -#define CLK_RGx 0x10BB -#define CLK_RGBx 0x10BC - -#define CLK_SNORM_INT8 0x10D0 -#define CLK_SNORM_INT16 0x10D1 -#define CLK_UNORM_INT8 0x10D2 -#define CLK_UNORM_INT16 0x10D3 -#define CLK_UNORM_SHORT_565 0x10D4 -#define CLK_UNORM_SHORT_555 0x10D5 -#define CLK_UNORM_INT_101010 0x10D6 -#define CLK_SIGNED_INT8 0x10D7 -#define CLK_SIGNED_INT16 0x10D8 -#define CLK_SIGNED_INT32 0x10D9 -#define CLK_UNSIGNED_INT8 0x10DA -#define CLK_UNSIGNED_INT16 0x10DB -#define CLK_UNSIGNED_INT32 0x10DC -#define CLK_HALF_FLOAT 0x10DD -#define CLK_FLOAT 0x10DE - -#define M_E 2.7182818284590452354 /* e */ -#define M_LOG2E 1.4426950408889634074 /* log_2 e */ -#define M_LOG10E 0.43429448190325182765 /* log_10 e */ -#define M_LN2 0.69314718055994530942 /* log_e 2 */ -#define M_LN10 2.30258509299404568402 /* log_e 10 */ -#define M_PI 3.14159265358979323846 /* pi */ -#define M_PI_2 1.57079632679489661923 /* pi/2 */ -#define M_PI_4 0.78539816339744830962 /* pi/4 */ -#define M_1_PI 0.31830988618379067154 /* 1/pi */ -#define M_2_PI 0.63661977236758134308 /* 2/pi */ -#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ -#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ -#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ - -/* Typedefs */ -typedef unsigned int cl_mem_fence_flags; - -/* Management functions */ -uint get_work_dim(); -size_t get_global_size(uint dimindx); -size_t get_global_id(uint dimindx); -size_t get_local_size(uint dimindx); -size_t get_local_id(uint dimindx); -size_t get_num_groups(uint dimindx); -size_t get_group_id(uint dimindx); -size_t get_global_offset(uint dimindx); - -void barrier(cl_mem_fence_flags flags); - -/* Image functions */ -float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, int2 coord); -float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, int4 coord); -float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, float2 coord); -float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord); -int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, int2 coord); -int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, int4 coord); -int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord); -int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord); -uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord); -uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, int4 coord); -uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord); -uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord); - -void OVERLOAD write_imagef(image2d_t image, int2 coord, float4 color); -void OVERLOAD write_imagef(image3d_t image, int4 coord, float4 color); -void OVERLOAD write_imagei(image2d_t image, int2 coord, int4 color); -void OVERLOAD write_imagei(image3d_t image, int4 coord, int4 color); -void OVERLOAD write_imageui(image2d_t image, int2 coord, uint4 color); -void OVERLOAD write_imageui(image3d_t image, int4 coord, uint4 color); - -int2 OVERLOAD get_image_dim(image2d_t image); -int4 OVERLOAD get_image_dim(image3d_t image); -int OVERLOAD get_image_width(image2d_t image); -int OVERLOAD get_image_width(image3d_t image); -int OVERLOAD get_image_height(image2d_t image); -int OVERLOAD get_image_height(image3d_t image); -int OVERLOAD get_image_depth(image3d_t image); - -int OVERLOAD get_image_channel_data_type(image2d_t image); -int OVERLOAD get_image_channel_data_type(image3d_t image); -int OVERLOAD get_image_channel_order(image2d_t image); -int OVERLOAD get_image_channel_order(image3d_t image); diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 4911d55..ffa489e 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -54,6 +54,11 @@ namespace ir { void endFunction(void); /*! Create a new register with the given family for the current function */ Register reg(RegisterData::Family family); + /*! Create a new immediate value */ + template INLINE ImmediateIndex newImmediate(T value) { + const Immediate imm(value); + return fn->newImmediate(imm); + } /*! Create a new register holding the given value. A LOADI is pushed */ template INLINE Register immReg(T value) { GBE_ASSERTM(fn != NULL, "No function currently defined"); @@ -70,6 +75,10 @@ namespace ir { void input(Register reg); /*! Append a new output register for the function */ void output(Register reg); + /*! Get the immediate value */ + INLINE Immediate getImmediate(ImmediateIndex index) const { + return fn->getImmediate(index); + } /*! Get the current processed function */ Function &getFunction(void); /*! Get the current processed unit */ @@ -79,8 +88,13 @@ namespace ir { GBE_ASSERTM(fn != NULL, "No function currently defined"); return fn->file.appendTuple(args...); } + /*! Make a tuple from an array of register */ + INLINE Tuple arrayTuple(const Register *reg, uint32_t regNum) { + GBE_ASSERTM(fn != NULL, "No function currently defined"); + return fn->file.appendArrayTuple(reg, regNum); + } /*! We just use variadic templates to forward instruction functions */ -#define DECL_INSN(NAME, FAMILY) \ +#define DECL_INSN(NAME, FAMILY) \ template INLINE void NAME(Args...args); #include "ir/instruction.hxx" #undef DECL_INSN diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 8c2cc82..8ef0c0d 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -97,7 +97,7 @@ namespace ir { /*! Get the register file */ INLINE const RegisterFile &getRegisterFile(void) const { return file; } /*! Get the given value ie immediate from the function */ - INLINE Immediate getImmediate(uint32_t ID) const { + INLINE Immediate getImmediate(ImmediateIndex ID) const { GBE_ASSERT(ID < immediateNum()); return immediates[ID]; } diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp index 25e20ba..fa00f83 100644 --- a/backend/src/ir/register.cpp +++ b/backend/src/ir/register.cpp @@ -50,6 +50,15 @@ namespace ir { return out; } + Tuple RegisterFile::appendArrayTuple(const Register *reg, uint32_t regNum) { + const Tuple index = Tuple(regTuples.size()); + for (uint32_t regID = 0; regID < regNum; ++regID) { + GBE_ASSERTM(reg[regID] < this->regNum(), "Out-of-bound register"); + regTuples.push_back(reg[regID]); + } + return index; + } + } /* namespace ir */ } /* namespace gbe */ diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp index b2acfdd..94d90e3 100644 --- a/backend/src/ir/register.hpp +++ b/backend/src/ir/register.hpp @@ -87,6 +87,8 @@ namespace ir { regs.push_back(reg); return Register(index); } + /*! Make a tuple from an array of register */ + Tuple appendArrayTuple(const Register *reg, uint32_t regNum); /*! Make a tuple and return the index to the first element of the tuple */ template INLINE Tuple appendTuple(First first, Rest... rest) { diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 3cff620..84ca795 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -17,6 +17,12 @@ * Author: Benjamin Segovia */ +/** + * \file llvm_gen_backend.cpp + * \author Benjamin Segovia + * + * Transform the LLVM IR code into Gen IR code + */ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -69,6 +75,171 @@ using namespace llvm; namespace gbe { + /*! Gen IR manipulates only scalar types */ + static bool isScalarType(const Type *type) + { + return type->isFloatTy() || + type->isIntegerTy() || + type->isDoubleTy() || + type->isPointerTy(); + } + + /*! LLVM IR Type to Gen IR type translation */ + static ir::Type getType(const ir::Context &ctx, const Type *type) + { + GBE_ASSERT(isScalarType(type)); + if (type->isFloatTy() == true) + return ir::TYPE_FLOAT; + if (type->isDoubleTy() == true) + return ir::TYPE_DOUBLE; + if (type->isPointerTy() == true) { + if (ctx.getPointerSize() == ir::POINTER_32_BITS) + return ir::TYPE_U32; + else + return ir::TYPE_U64; + } + GBE_ASSERT(type->isIntegerTy() == true); + if (type == Type::getInt1Ty(type->getContext())) + return ir::TYPE_BOOL; + if (type == Type::getInt8Ty(type->getContext())) + return ir::TYPE_S8; + if (type == Type::getInt16Ty(type->getContext())) + return ir::TYPE_S16; + if (type == Type::getInt32Ty(type->getContext())) + return ir::TYPE_S32; + if (type == Type::getInt64Ty(type->getContext())) + return ir::TYPE_S64; + GBE_ASSERT(0); + return ir::TYPE_S64; + } + + /*! Type to register family translation */ + static ir::RegisterData::Family getFamily(const ir::Context &ctx, const Type *type) + { + GBE_ASSERT(isScalarType(type) == true); + if (type == Type::getInt1Ty(type->getContext())) + return ir::RegisterData::BOOL; + if (type == Type::getInt8Ty(type->getContext())) + return ir::RegisterData::BYTE; + if (type == Type::getInt16Ty(type->getContext())) + return ir::RegisterData::WORD; + if (type == Type::getInt32Ty(type->getContext()) || type->isFloatTy()) + return ir::RegisterData::DWORD; + if (type == Type::getInt64Ty(type->getContext()) || type->isDoubleTy()) + return ir::RegisterData::QWORD; + if (type->isPointerTy() && ctx.getPointerSize() == ir::POINTER_32_BITS) + return ir::RegisterData::DWORD; + if (type->isPointerTy() && ctx.getPointerSize() == ir::POINTER_64_BITS) + return ir::RegisterData::QWORD; + GBE_ASSERT(0); + return ir::RegisterData::BOOL; + } + + /*! Handle the LLVM IR Value to Gen IR register translation. This has 2 roles: + * - Split the LLVM vector into several scalar values + * - Handle the transparent copies (bitcast or use of intrincics functions + * like get_local_id / get_global_id + */ + class RegisterTranslator + { + public: + RegisterTranslator(ir::Context &ctx) : ctx(ctx) {} + + /*! Empty the maps */ + void clear(void) { + valueMap.clear(); + scalarMap.clear(); + } + /*! Some values will not be allocated. For example, a bit-cast destination + * like: %fake = bitcast %real or a vector insertion since we do not have + * vectors in Gen-IR + */ + void newValueProxy(Value *real, + Value *fake, + uint32_t realIndex = 0u, + uint32_t fakeIndex = 0u) { + const ValueIndex key(fake, fakeIndex); + const ValueIndex value(real, realIndex); + GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice + valueMap[key] = value; + } + /*! Mostly used for the preallocated registers (lids, gids) */ + void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) { + const ValueIndex key(value, index); + GBE_ASSERT(scalarMap.find(key) == scalarMap.end()); + scalarMap[key] = reg; + } + /*! Allocate a new scalar register */ + ir::Register newScalar(Value *value, uint32_t index = 0u) + { + GBE_ASSERT(dyn_cast(value) == NULL); + Type *type = value->getType(); + auto typeID = type->getTypeID(); + switch (typeID) { + case Type::IntegerTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::PointerTyID: + GBE_ASSERT(index == 0); + return this->newScalar(value, type, index); + break; + case Type::VectorTyID: + { + auto vectorType = cast(type); + auto elementType = vectorType->getElementType(); + auto elementTypeID = elementType->getTypeID(); + if (elementTypeID != Type::IntegerTyID && + elementTypeID != Type::FloatTyID && + elementTypeID != Type::DoubleTyID) + GBE_ASSERTM(false, "Vectors of elements are not supported"); + return this->newScalar(value, elementType, index); + break; + } + default: NOT_SUPPORTED; + }; + return ir::Register(); + } + /*! Get the register from the given value at given index possibly iterating + * in the value map to get the final real register + */ + ir::Register getScalar(Value *value, uint32_t index = 0u) { + auto end = valueMap.end(); + for (;;) { + auto it = valueMap.find(std::make_pair(value, index)); + if (it == end) + break; + else { + value = it->second.first; + index = it->second.second; + } + } + const auto key = std::make_pair(value, index); + GBE_ASSERT(scalarMap.find(key) != scalarMap.end()); + return scalarMap[key]; + } + + private: + /*! This maps a scalar register to a Value (index is the vector index when + * the value is a vector of scalars) + */ + ir::Register newScalar(Value *value, Type *type, uint32_t index) { + const auto key = std::make_pair(value, index); + GBE_ASSERT(scalarMap.find(key) == scalarMap.end()); + const ir::RegisterData::Family family = getFamily(ctx, type); + const ir::Register reg = ctx.reg(family); + scalarMap[key] = reg; + return reg; + } + /*! Indices will be zero for scalar values */ + typedef std::pair ValueIndex; + /*! Map value to ir::Register */ + map scalarMap; + /*! Map values to values when this is only a translation (eq bitcast) */ + map valueMap; + /*! Actually allocates the registers */ + ir::Context &ctx; + }; + class CBEMCAsmInfo : public MCAsmInfo { public: CBEMCAsmInfo() { @@ -77,30 +248,36 @@ namespace gbe } }; - /// GenWriter - This class is the main chunk of code that converts an LLVM - /// module to a C translation unit. + /*! Translate LLVM IR code to Gen IR code */ class GenWriter : public FunctionPass, public InstVisitor { + /*! Unit to compute */ ir::Unit &unit; + /*! Helper structure to compute the unit */ ir::Context ctx; + /*! Make the LLVM-to-Gen translation */ + RegisterTranslator regTranslator; + /*! Map value to ir::LabelIndex */ + map labelMap; + /*! We visit each function twice. Once to allocate the registers and once to + * emit the Gen IR instructions + */ + enum Pass { + PASS_EMIT_REGISTERS = 0, + PASS_EMIT_INSTRUCTIONS = 1 + } pass; + std::string FDOutErr; tool_output_file *FDOut; formatted_raw_ostream Out; - IntrinsicLowering *IL; Mangler *Mang; LoopInfo *LI; const Module *TheModule; + const MCObjectFileInfo *MOFI; + const TargetData* TD; const MCAsmInfo* TAsm; const MCRegisterInfo *MRI; - const MCObjectFileInfo *MOFI; MCContext *TCtx; - const TargetData* TD; - - /*! Map value to ir::Register*/ - map registerMap; - - /*! Map value to ir::LabelIndex */ - map labelMap; std::map FPConstantMap; std::set intrinsicPrototypesAlreadyGenerated; @@ -120,14 +297,16 @@ namespace gbe : FunctionPass(ID), unit(unit), ctx(unit), + regTranslator(ctx), FDOut(new llvm::tool_output_file("-", FDOutErr, 0)), Out(FDOut->os()), - IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0), + Mang(0), LI(0), + TheModule(0), MOFI(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); FPCounter = 0; + pass = PASS_EMIT_REGISTERS; } virtual const char *getPassName() const { return "Gen Back-End"; } @@ -156,12 +335,8 @@ namespace gbe virtual bool doFinalization(Module &M) { // Free memory... - delete IL; delete TD; delete Mang; - delete TCtx; - delete TAsm; - delete MRI; delete MOFI; FPConstantMap.clear(); ByValParams.clear(); @@ -203,43 +378,30 @@ namespace gbe void writeOperand(Value *Operand, bool Static = false); void writeInstComputationInline(Instruction &I); void writeOperandInternal(Value *Operand, bool Static = false); - void writeOperandWithCast(Value* Operand, unsigned Opcode); - void writeOperandWithCast(Value* Operand, const ICmpInst &I); - bool writeInstructionCast(const Instruction &I); - - private : /// Prints the definition of the intrinsic function F. Supports the /// intrinsics which need to be explicitly defined in the CBackend. void printIntrinsicDefinition(const Function &F, raw_ostream &Out); - void printModuleTypes(); void printContainedStructs(Type *Ty, SmallPtrSet &); void printFloatingPointConstants(Function &F); void printFloatingPointConstants(const Constant *C); - void emitFunctionSignature(const Function *F, bool Prototype); /*! Emit the complete function code and declaration */ void emitFunction(Function &F); /*! Handle input and output function parameters */ - void emitFunctionPrototype(const Function *F); + void emitFunctionPrototype(Function &F); /*! Emit the code for a basic block */ void emitBasicBlock(BasicBlock *BB); - /*! Get the register family from the given type */ - INLINE ir::RegisterData::Family getArgumentFamily(const Type*) const; - /*! Insert a new register when this is a scalar value */ - INLINE ir::Register newRegister(const Value *value); + /*! Alocate one or several registers (if vector) for the value */ + INLINE void newRegister(Value *value); /*! Return a valid register from an operand (can use LOADI to make one) */ - INLINE ir::Register getRegister(Value *value); - /*! Return a valid register for a constant value */ - INLINE ir::Register getConstantRegister(Constant *CPV); + INLINE ir::Register getRegister(Value *value, uint32_t index = 0); + /*! Create a new immediate from a constant */ + ir::ImmediateIndex newImmediate(Constant *CPV); /*! Insert a new label index when this is a scalar value */ INLINE void newLabelIndex(const Value *value); - /*! int / float / double / bool are scalars */ - INLINE bool isScalarType(const Type *type) const; - /*! Get the Gen IR type from the LLVM type */ - INLINE ir::Type getType(const Type *type) const; void printBasicBlock(BasicBlock *BB); @@ -312,16 +474,27 @@ namespace gbe return false; } - // Instruction visitation functions - friend class InstVisitor; + /*! Helper function to emit loads and stores */ + template void emitLoadOrStore(T &I); // Currently supported instructions - void visitBinaryOperator(Instruction &I); - void visitReturnInst(ReturnInst &I); - void visitLoadInst(LoadInst &I); - void visitStoreInst(StoreInst &I); - void visitCallInst (CallInst &I); - bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee); +#define DECL_VISIT_FN(NAME, TYPE) \ + void regAllocate##NAME(TYPE &I); \ + void emit##NAME(TYPE &I); \ + void visit##NAME(TYPE &I) { \ + if (pass == PASS_EMIT_INSTRUCTIONS) \ + emit##NAME(I); \ + else \ + regAllocate##NAME(I); \ + } + DECL_VISIT_FN(BinaryOperator, Instruction); + DECL_VISIT_FN(CastInst, CastInst); + DECL_VISIT_FN(ReturnInst, ReturnInst); + DECL_VISIT_FN(LoadInst, LoadInst); + DECL_VISIT_FN(StoreInst, StoreInst); + DECL_VISIT_FN(CallInst, CallInst); + +#undef DECL_VISIT_FN // Must be implemented later void visitInsertElementInst(InsertElementInst &I) {NOT_SUPPORTED;} @@ -333,7 +506,6 @@ namespace gbe void visitBranchInst(BranchInst &I) {NOT_SUPPORTED;} void visitICmpInst(ICmpInst &I) {NOT_SUPPORTED;} void visitFCmpInst(FCmpInst &I) {NOT_SUPPORTED;} - void visitCastInst (CastInst &I); void visitSelectInst(SelectInst &I) {NOT_SUPPORTED;} // These instructions are not supported at all @@ -1354,122 +1526,6 @@ static std::string CBEMangle(const std::string &S) { Out << ')'; } - // Some instructions need to have their result value casted back to the - // original types because their operands were casted to the expected type. - // This function takes care of detecting that case and printing the cast - // for the Instruction. - bool GenWriter::writeInstructionCast(const Instruction &I) { - Type *Ty = I.getOperand(0)->getType(); - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: - Out << "(("; - printSimpleType(Out, Ty, false); - Out << ")("; - return true; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: - Out << "(("; - printSimpleType(Out, Ty, true); - Out << ")("; - return true; - default: break; - } - return false; - } - - // Write the operand with a cast to another type based on the Opcode being used. - // This will be used in cases where an instruction has specific type - // requirements (usually signedness) for its operands. - void GenWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { - - // Extract the operand's type, we'll need it. - Type* OpTy = Operand->getType(); - - // Indicate whether to do the cast or not. - bool shouldCast = false; - - // Indicate whether the cast should be to a signed type or not. - bool castIsSigned = false; - - // Based on the Opcode for which this Operand is being written, determine - // the new type to which the operand should be casted by setting the value - // of OpTy. If we change OpTy, also set shouldCast to true. - switch (Opcode) { - default: - // for most instructions, it doesn't matter - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::UDiv: - case Instruction::URem: // Cast to unsigned first - shouldCast = true; - castIsSigned = false; - break; - case Instruction::GetElementPtr: - case Instruction::AShr: - case Instruction::SDiv: - case Instruction::SRem: // Cast to signed first - shouldCast = true; - castIsSigned = true; - break; - } - - // Write out the casted operand if we should, otherwise just write the - // operand. - if (shouldCast) { - Out << "(("; - printSimpleType(Out, OpTy, castIsSigned); - Out << ")"; - writeOperand(Operand); - Out << ")"; - } else - writeOperand(Operand); - } - - // Write the operand with a cast to another type based on the icmp predicate - // being used. - void GenWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { - // This has to do a cast to ensure the operand has the right signedness. - // Also, if the operand is a pointer, we make sure to cast to an integer when - // doing the comparison both for signedness and so that the C compiler doesn't - // optimize things like "p < NULL" to false (p may contain an integer value - // f.e.). - bool shouldCast = Cmp.isRelational(); - - // Write out the casted operand if we should, otherwise just write the - // operand. - if (!shouldCast) { - writeOperand(Operand); - return; - } - - // Should this be a signed comparison? If so, convert to signed. - bool castIsSigned = Cmp.isSigned(); - - // If the operand was a pointer, convert to a large integer type. - Type* OpTy = Operand->getType(); - if (OpTy->isPointerTy()) - OpTy = TD->getIntPtrType(Operand->getContext()); - - Out << "(("; - printSimpleType(Out, OpTy, castIsSigned); - Out << ")"; - writeOperand(Operand); - Out << ")"; - } - enum SpecialGlobalClass { NotSpecial = 0, GlobalCtors, GlobalDtors, @@ -1482,14 +1538,12 @@ static std::string CBEMangle(const std::string &S) { // Initialize TheModule = &M; - TD = new TargetData(&M); - IL = new IntrinsicLowering(*TD); - IL->AddPrototypes(M); - TAsm = new CBEMCAsmInfo(); + TD = new TargetData(&M); MRI = new MCRegisterInfo(); TCtx = new MCContext(*TAsm, *MRI, NULL); Mang = new Mangler(*TCtx, *TD); + return false; } @@ -1561,57 +1615,6 @@ static std::string CBEMangle(const std::string &S) { } } - - /// printSymbolTable - Run through symbol table looking for type names. If a - /// type name is found, emit its declaration... - /// - void GenWriter::printModuleTypes() { - Out << "/* Helper union for bitcasts */\n"; - Out << "typedef union {\n"; - Out << " unsigned int Int32;\n"; - Out << " unsigned long long Int64;\n"; - Out << " float Float;\n"; - Out << " double Double;\n"; - Out << "} llvmBitCastUnion;\n"; - - // Get all of the struct types used in the module. - std::vector StructTypes; - TheModule->findUsedStructTypes(StructTypes); - - if (StructTypes.empty()) return; - - Out << "/* Structure forward decls */\n"; - - unsigned NextTypeID = 0; - - // If any of them are missing names, add a unique ID to UnnamedStructIDs. - // Print out forward declarations for structure types. - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { - StructType *ST = StructTypes[i]; - - if (ST->isLiteral() || ST->getName().empty()) - UnnamedStructIDs[ST] = NextTypeID++; - - std::string Name = getStructName(ST); - - Out << "typedef struct " << Name << ' ' << Name << ";\n"; - } - - Out << '\n'; - - // Keep track of which structures have been printed so far. - SmallPtrSet StructPrinted; - - // Loop over all structures then push them into the stack so they are - // printed in the correct order. - // - Out << "/* Structure contents */\n"; - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) - if (StructTypes[i]->isStructTy()) - // Only print out used types! - printContainedStructs(StructTypes[i], StructPrinted); - } - // Push the struct onto the stack and recursively push all structs // this one depends on. // @@ -1638,131 +1641,90 @@ static std::string CBEMangle(const std::string &S) { } } - INLINE bool GenWriter::isScalarType(const Type *type) const - { - return type->isFloatTy() || - type->isIntegerTy() || - type->isDoubleTy() || - type->isPointerTy(); - } - - INLINE ir::Type GenWriter::getType(const Type *type) const - { - GBE_ASSERT(this->isScalarType(type)); - if (type->isFloatTy() == true) - return ir::TYPE_FLOAT; - if (type->isDoubleTy() == true) - return ir::TYPE_DOUBLE; - if (type->isPointerTy() == true) { - if (ctx.getPointerSize() == ir::POINTER_32_BITS) - return ir::TYPE_U32; - else - return ir::TYPE_U64; - } - GBE_ASSERT(type->isIntegerTy() == true); - if (type == Type::getInt1Ty(type->getContext())) - return ir::TYPE_BOOL; - if (type == Type::getInt8Ty(type->getContext())) - return ir::TYPE_S8; - if (type == Type::getInt16Ty(type->getContext())) - return ir::TYPE_S16; - if (type == Type::getInt32Ty(type->getContext())) - return ir::TYPE_S32; - if (type == Type::getInt64Ty(type->getContext())) - return ir::TYPE_S64; - GBE_ASSERT(0); - return ir::TYPE_S64; - } - - INLINE ir::RegisterData::Family GenWriter::getArgumentFamily(const Type *type) const - { - GBE_ASSERT(this->isScalarType(type) == true); - if (type == Type::getInt1Ty(type->getContext())) - return ir::RegisterData::BOOL; - if (type == Type::getInt8Ty(type->getContext())) - return ir::RegisterData::BYTE; - if (type == Type::getInt16Ty(type->getContext())) - return ir::RegisterData::WORD; - if (type == Type::getInt32Ty(type->getContext()) || type->isFloatTy()) - return ir::RegisterData::DWORD; - if (type == Type::getInt64Ty(type->getContext()) || type->isDoubleTy()) - return ir::RegisterData::QWORD; - if (type->isPointerTy() && ctx.getPointerSize() == ir::POINTER_32_BITS) - return ir::RegisterData::DWORD; - if (type->isPointerTy() && ctx.getPointerSize() == ir::POINTER_64_BITS) - return ir::RegisterData::QWORD; - GBE_ASSERT(0); - return ir::RegisterData::BOOL; - } - - ir::Register GenWriter::newRegister(const Value *value) { - auto it = registerMap.find(value); - if (it == registerMap.end()) { - const Type *type = value->getType(); - const ir::RegisterData::Family family = getArgumentFamily(type); - const ir::Register reg = ctx.reg(family); - registerMap[value] = reg; - return reg; - } else - return it->second; - } - - ir::Register GenWriter::getConstantRegister(Constant *CPV) { + ir::ImmediateIndex GenWriter::newImmediate(Constant *CPV) { if (dyn_cast(CPV)) GBE_ASSERTM(false, "Unsupported constant expression"); else if (isa(CPV) && CPV->getType()->isSingleValueType()) GBE_ASSERTM(false, "Unsupported constant expression"); + + // Integers if (ConstantInt *CI = dyn_cast(CPV)) { Type* Ty = CI->getType(); if (Ty == Type::getInt1Ty(CPV->getContext())) { const bool b = CI->getZExtValue(); - return ctx.immReg(b); + return ctx.newImmediate(b); } else if (Ty == Type::getInt8Ty(CPV->getContext())) { const uint8_t u8 = CI->getZExtValue(); - return ctx.immReg(u8); + return ctx.newImmediate(u8); } else if (Ty == Type::getInt16Ty(CPV->getContext())) { const uint16_t u16 = CI->getZExtValue(); - return ctx.immReg(u16); + return ctx.newImmediate(u16); } else if (Ty == Type::getInt32Ty(CPV->getContext())) { const uint32_t u32 = CI->getZExtValue(); - return ctx.immReg(u32); + return ctx.newImmediate(u32); } else if (Ty == Type::getInt64Ty(CPV->getContext())) { const uint64_t u64 = CI->getZExtValue(); - return ctx.immReg(u64); + return ctx.newImmediate(u64); } else { GBE_ASSERTM(false, "Unsupported integer size"); - return ctx.immReg(uint64_t(0)); + return ctx.newImmediate(uint64_t(0)); } } + // Floats and doubles switch (CPV->getType()->getTypeID()) { - case Type::FloatTyID: - case Type::DoubleTyID: - { - ConstantFP *FPC = cast(CPV); - if (FPC->getType() == Type::getFloatTy(CPV->getContext())) { - const float f32 = FPC->getValueAPF().convertToFloat(); - return ctx.immReg(f32); - } else { - const double f64 = FPC->getValueAPF().convertToDouble(); - return ctx.immReg(f64); + case Type::FloatTyID: + case Type::DoubleTyID: + { + ConstantFP *FPC = cast(CPV); + if (FPC->getType() == Type::getFloatTy(CPV->getContext())) { + const float f32 = FPC->getValueAPF().convertToFloat(); + return ctx.newImmediate(f32); + } else { + const double f64 = FPC->getValueAPF().convertToDouble(); + return ctx.newImmediate(f64); + } } + break; + default: + GBE_ASSERTM(false, "Unsupported constant type"); } - break; - default: - GBE_ASSERTM(false, "Unsupported constant type"); - } - return ctx.immReg(uint64_t(0)); + return ctx.newImmediate(uint64_t(0)); } - ir::Register GenWriter::getRegister(Value *value) { + void GenWriter::newRegister(Value *value) { + auto type = value->getType(); + auto typeID = type->getTypeID(); + switch (typeID) { + case Type::IntegerTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::PointerTyID: + regTranslator.newScalar(value); + break; + case Type::VectorTyID: + { + auto vectorType = cast(type); + const uint32_t elemNum = vectorType->getNumElements(); + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) + regTranslator.newScalar(value, elemID); + break; + } + default: NOT_SUPPORTED; + }; + } + + ir::Register GenWriter::getRegister(Value *value, uint32_t index) { Constant *CPV = dyn_cast(value); - if (CPV && !isa(CPV)) - return getConstantRegister(CPV); - else { - GBE_ASSERT(this->registerMap.find(value) != this->registerMap.end()); - return this->registerMap[value]; + if (CPV && !isa(CPV)) { + const ir::ImmediateIndex index = this->newImmediate(CPV); + const ir::Immediate imm = ctx.getImmediate(index); + const ir::Register reg = ctx.reg(getFamily(imm.type)); + ctx.LOADI(imm.type, reg, index); + return reg; } + else + return regTranslator.getScalar(value, index); } void GenWriter::newLabelIndex(const Value *value) { @@ -1787,23 +1749,23 @@ static std::string CBEMangle(const std::string &S) { } } - void GenWriter::emitFunctionPrototype(const Function *F) + void GenWriter::emitFunctionPrototype(Function &F) { - const bool returnStruct = F->hasStructRetAttr(); + const bool returnStruct = F.hasStructRetAttr(); // Loop over the arguments and output registers for them - if (!F->arg_empty()) { - Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + if (!F.arg_empty()) { + Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); // When a struct is returned, first argument is pointer to the structure - if (returnStruct) { - ir::Function &fn = ctx.getFunction(); - fn.setStructReturned(true); - } + if (returnStruct) + ctx.getFunction().setStructReturned(true); - // Insert a new register if we need to + // Insert a new register for each function argument for (; I != E; ++I) { - const ir::Register reg = this->newRegister(I); + const Type *type = I->getType(); + GBE_ASSERT(isScalarType(type) == true); + const ir::Register reg = regTranslator.newScalar(I); ctx.input(reg); } } @@ -1811,9 +1773,9 @@ static std::string CBEMangle(const std::string &S) { // When returning a structure, first input register is the pointer to the // structure if (!returnStruct) { - const Type *type = F->getReturnType(); + const Type *type = F.getReturnType(); if (type->isVoidTy() == false) { - const ir::RegisterData::Family family = getArgumentFamily(type); + const ir::RegisterData::Family family = getFamily(ctx, type); const ir::Register reg = ctx.reg(family); ctx.output(reg); } @@ -1821,120 +1783,11 @@ static std::string CBEMangle(const std::string &S) { #if GBE_DEBUG // Variable number of arguments is not supported - FunctionType *FT = cast(F->getFunctionType()); + FunctionType *FT = cast(F.getFunctionType()); GBE_ASSERT(FT->isVarArg() == false); #endif /* GBE_DEBUG */ } - void GenWriter::emitFunctionSignature(const Function *F, bool Prototype) - { - /// isStructReturn - Should this function actually return a struct by-value? - bool isStructReturn = F->hasStructRetAttr(); - - // Loop over the arguments, printing them... - FunctionType *FT = cast(F->getFunctionType()); - const AttrListPtr &PAL = F->getAttributes(); - - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - - // Print out the name... - FunctionInnards << GetValueName(F) << '('; - - bool PrintedArg = false; - if (!F->isDeclaration()) { - if (!F->arg_empty()) { - Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - unsigned Idx = 1; - - // If this is a struct-return function, don't print the hidden - // struct-return argument. - if (isStructReturn) { - assert(I != E && "Invalid struct return function!"); - ++I; - ++Idx; - } - - std::string ArgName; - for (; I != E; ++I) { - if (PrintedArg) FunctionInnards << ", "; - if (I->hasName() || !Prototype) { - ArgName = GetValueName(I); - } else { - GBE_ASSERT(0); - ArgName = ""; - } - Type *ArgTy = I->getType(); - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - ArgTy = cast(ArgTy)->getElementType(); - ByValParams.insert(I); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), - ArgName); - PrintedArg = true; - ++Idx; - } - } - } else { - GBE_ASSERT(0); - - // Loop over the arguments, printing them. - FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end(); - unsigned Idx = 1; - - // If this is a struct-return function, don't print the hidden - // struct-return argument. - if (isStructReturn) { - assert(I != E && "Invalid struct return function!"); - ++I; - ++Idx; - } - - for (; I != E; ++I) { - if (PrintedArg) FunctionInnards << ", "; - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast(ArgTy)->getElementType(); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt)); - PrintedArg = true; - ++Idx; - } - } - - if (!PrintedArg && FT->isVarArg()) { - FunctionInnards << "int vararg_dummy_arg"; - PrintedArg = true; - } - - // Finish printing arguments... if this is a vararg function, print the ..., - // unless there are no known types, in which case, we just emit (). - // - if (FT->isVarArg() && PrintedArg) { - FunctionInnards << ",..."; // Output varargs portion of signature! - } else if (!FT->isVarArg() && !PrintedArg) { - FunctionInnards << "void"; // ret() -> ret(void) in C. - } - FunctionInnards << ')'; - - // Get the return tpe for the function. - Type *RetTy; - if (!isStructReturn) - RetTy = F->getReturnType(); - else { - // If this is a struct-return function, print the struct-return type. - RetTy = cast(FT->getParamType(0))->getElementType(); - } - - // Print out the return type and the signature built above. - printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), - FunctionInnards.str()); - } - static inline bool isFPIntBitCast(const Instruction &I) { if (!isa(I)) return false; @@ -1947,20 +1800,22 @@ static std::string CBEMangle(const std::string &S) { void GenWriter::emitFunction(Function &F) { ctx.startFunction(GetValueName(&F)); - this->registerMap.clear(); + this->regTranslator.clear(); this->labelMap.clear(); - this->emitFunctionPrototype(&F); + this->emitFunctionPrototype(F); - // We create all the register variables + // Visit all the instructions and emit the IR registers or the value to + // value mapping + pass = PASS_EMIT_REGISTERS; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) - if (I->getType() != Type::getVoidTy(F.getContext())) - this->newRegister(&*I); + visit(*I); // First create all the labels (one per block) for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) this->newLabelIndex(BB); - // ... then, emit the code for all basic blocks + // ... then, emit the instructions for all basic blocks + pass = PASS_EMIT_INSTRUCTIONS; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) emitBasicBlock(BB); ctx.endFunction(); @@ -2000,12 +1855,9 @@ static std::string CBEMangle(const std::string &S) { visit(*BB->getTerminator()); } + void GenWriter::regAllocateReturnInst(ReturnInst &I) {} - // Specific Instruction type classes... note that all of the casts are - // necessary because we use the instruction classes as opaque types... - // - void GenWriter::visitReturnInst(ReturnInst &I) { - // If this is a struct return function, return the temporary struct. + void GenWriter::emitReturnInst(ReturnInst &I) { const ir::Function &fn = ctx.getFunction(); GBE_ASSERTM(fn.outputNum() <= 1, "no more than one value can be returned"); if (fn.outputNum() == 1 && I.getNumOperands() > 0) { @@ -2015,30 +1867,8 @@ static std::string CBEMangle(const std::string &S) { ctx.MOV(ir::getType(family), dst, src); } ctx.RET(); - - bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr(); - if (isStructReturn) { - Out << " return StructReturn;\n"; - return; - } - - // Don't output a void return if this is the last basic block in the function - if (I.getNumOperands() == 0 && - &*--I.getParent()->getParent()->end() == I.getParent() && - !I.getParent()->size() == 1) { - return; - } -#if 0 - Out << " return"; - if (I.getNumOperands()) { - Out << ' '; - writeOperand(I.getOperand(0)); - } - Out << ";\n"; -#endif } - bool GenWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { /// FIXME: This should be reenabled, but loop reordering safe!! return true; @@ -2052,195 +1882,59 @@ static std::string CBEMangle(const std::string &S) { return true; return false; } -#if 0 - void GenWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock, - BasicBlock *Successor, - unsigned Indent) { - for (BasicBlock::iterator I = Successor->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - // Now we have to do the printing. - Value *IV = PN->getIncomingValueForBlock(CurBlock); - if (!isa(IV)) { - Out << std::string(Indent, ' '); - Out << " " << GetValueName(I) << "__PHI_TEMPORARY = "; - writeOperand(IV); - Out << "; /* for PHI node */\n"; - } - } - } - void GenWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ, - unsigned Indent) { - if (isGotoCodeNecessary(CurBB, Succ)) { - Out << std::string(Indent, ' ') << " goto "; - writeOperand(Succ); - Out << ";\n"; - } - } - - // Branch instruction printing - Avoid printing out a branch to a basic block - // that immediately succeeds the current one. - // - void GenWriter::visitBranchInst(BranchInst &I) + void GenWriter::regAllocateBinaryOperator(Instruction &I) { - - if (I.isConditional()) { - if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) { - Out << " if ("; - writeOperand(I.getCondition()); - Out << ") {\n"; - - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(0), 2); - - if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) { - Out << " } else {\n"; - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); - } - } else { - // First goto not necessary, assume second one is... - Out << " if (!"; - writeOperand(I.getCondition()); - Out << ") {\n"; - - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); - } - - Out << " }\n"; - } else { - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0); - printBranchToBlock(I.getParent(), I.getSuccessor(0), 0); - } - Out << "\n"; + this->newRegister(&I); } - // PHI nodes get copied into temporary values at the end of predecessor basic - // blocks. We now need to copy these temporary values into the REAL value for - // the PHI. - - void GenWriter::visitPHINode(PHINode &I) { - NOT_SUPPORTED; - writeOperand(&I); - Out << "__PHI_TEMPORARY"; - } -#endif - - void GenWriter::visitBinaryOperator(Instruction &I) + void GenWriter::emitBinaryOperator(Instruction &I) { - GBE_ASSERT(!I.getType()->isPointerTy()); - GBE_ASSERT(this->registerMap.find(&I) != this->registerMap.end()); - const ir::Register dst = this->registerMap[&I]; - const ir::Register src0 = this->getRegister(I.getOperand(0)); - const ir::Register src1 = this->getRegister(I.getOperand(1)); - const ir::Type type = this->getType(I.getType()); - - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break; - case Instruction::Sub: - case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break; - case Instruction::Mul: - case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: ctx.REM(type, dst, src0, src1); break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break; - case Instruction::And: ctx.AND(type, dst, src0, src1); break; - case Instruction::Or: ctx.OR(type, dst, src0, src1); break; - case Instruction::Xor: ctx.XOR(type, dst, src0, src1); break; - case Instruction::Shl : ctx.SHL(type, dst, src0, src1); break; - case Instruction::LShr: ctx.SHR(type, dst, src0, src1); break; - case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break; - default: - GBE_ASSERT(0); - }; - -#if 0 - // binary instructions, shift instructions, setCond instructions. - assert(!I.getType()->isPointerTy()); - // We must cast the results of binary operations which might be promoted. - bool needsCast = false; - if ((I.getType() == Type::getInt8Ty(I.getContext())) || - (I.getType() == Type::getInt16Ty(I.getContext())) - || (I.getType() == Type::getFloatTy(I.getContext()))) { - needsCast = true; - Out << "(("; - printType(Out, I.getType(), false); - Out << ")("; - } - - // If this is a negation operation, print it out as such. For FP, we don't - // want to print "-0.0 - X". - if (BinaryOperator::isNeg(&I)) { - Out << "-("; - writeOperand(BinaryOperator::getNegArgument(cast(&I))); - Out << ")"; - } else if (BinaryOperator::isFNeg(&I)) { - Out << "-("; - writeOperand(BinaryOperator::getFNegArgument(cast(&I))); - Out << ")"; - } else if (I.getOpcode() == Instruction::FRem) { - // Output a call to fmod/fmodf instead of emitting a%b - if (I.getType() == Type::getFloatTy(I.getContext())) - Out << "fmodf("; - else if (I.getType() == Type::getDoubleTy(I.getContext())) - Out << "fmod("; - else // all 3 flavors of long double - Out << "fmodl("; - writeOperand(I.getOperand(0)); - Out << ", "; - writeOperand(I.getOperand(1)); - Out << ")"; + GBE_ASSERT(I.getType()->isPointerTy() == false); + + // Get the element type for a vector + ir::Type type; + uint32_t elemNum; + Type *llvmType = I.getType(); + if (llvmType->isVectorTy() == true) { + VectorType *vectorType = cast(llvmType); + Type *elementType = vectorType->getElementType(); + elemNum = vectorType->getNumElements(); + type = getType(ctx, elementType); } else { + elemNum = 1; + type = getType(ctx, llvmType); + } - // Write out the cast of the instruction's value back to the proper type - // if necessary. - bool NeedsClosingParens = writeInstructionCast(I); - - // Certain instructions require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I.getOpcode()); + // Emit the instructions in a row + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + const ir::Register dst = this->getRegister(&I, elemID); + const ir::Register src0 = this->getRegister(I.getOperand(0), elemID); + const ir::Register src1 = this->getRegister(I.getOperand(1), elemID); switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl : Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: -#ifndef NDEBUG - errs() << "Invalid operator type!" << I; -#endif - llvm_unreachable(0); - } - - writeOperandWithCast(I.getOperand(1), I.getOpcode()); - if (NeedsClosingParens) - Out << "))"; - } - - if (needsCast) { - Out << "))"; + case Instruction::Add: + case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break; + case Instruction::Sub: + case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break; + case Instruction::Mul: + case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: ctx.REM(type, dst, src0, src1); break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break; + case Instruction::And: ctx.AND(type, dst, src0, src1); break; + case Instruction::Or: ctx.OR(type, dst, src0, src1); break; + case Instruction::Xor: ctx.XOR(type, dst, src0, src1); break; + case Instruction::Shl : ctx.SHL(type, dst, src0, src1); break; + case Instruction::LShr: ctx.SHR(type, dst, src0, src1); break; + case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break; + default: + GBE_ASSERT(0); + }; } -#endif } #if 0 @@ -2322,85 +2016,41 @@ static std::string CBEMangle(const std::string &S) { Out << ")"; } #endif -#if 0 - static const char * getFloatBitCastField(Type *Ty) { - switch (Ty->getTypeID()) { - default: llvm_unreachable("Invalid Type"); - case Type::FloatTyID: return "Float"; - case Type::DoubleTyID: return "Double"; - case Type::IntegerTyID: { - unsigned NumBits = cast(Ty)->getBitWidth(); - if (NumBits <= 32) - return "Int32"; - else - return "Int64"; - } - } - } -#endif -#if 1 - void GenWriter::visitCastInst(CastInst &I) { + void GenWriter::regAllocateCastInst(CastInst &I) + { if (I.getOpcode() == Instruction::PtrToInt || I.getOpcode() == Instruction::IntToPtr) { - Value *srcValue = &I, *dstValue = I.getOperand(0); - Type *dstType = dstValue->getType(); - Type *srcType = srcValue->getType(); - const ir::Unit &unit = ctx.getUnit(); - GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType)); - GBE_ASSERT(registerMap.find(dstValue) != registerMap.end()); - registerMap[dstValue] = registerMap[srcValue]; - } else - NOT_SUPPORTED; -#if 0 - Type *DstTy = I.getType(); - Type *SrcTy = I.getOperand(0)->getType(); - if (isFPIntBitCast(I)) { - Out << '('; - // These int<->float and long<->double casts need to be handled specially - Out << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; - writeOperand(I.getOperand(0)); - Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getType()); - Out << ')'; - return; - } - - Out << '('; - printCast(I.getOpcode(), SrcTy, DstTy); - - // Make a sext from i1 work by subtracting the i1 from 0 (an int). - if (SrcTy == Type::getInt1Ty(I.getContext()) && - I.getOpcode() == Instruction::SExt) - Out << "0-"; - - writeOperand(I.getOperand(0)); - - if (DstTy == Type::getInt1Ty(I.getContext()) && - (I.getOpcode() == Instruction::Trunc || - I.getOpcode() == Instruction::FPToUI || - I.getOpcode() == Instruction::FPToSI || - I.getOpcode() == Instruction::PtrToInt)) { - // Make sure we really get a trunc to bool by anding the operand with 1 - Out << "&1u"; + Value *dstValue = &I; + Value *srcValue = I.getOperand(0); + Constant *CPV = dyn_cast(srcValue); + if (CPV == NULL) { + Type *dstType = dstValue->getType(); + Type *srcType = srcValue->getType(); + GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType)); + regTranslator.newValueProxy(srcValue, dstValue); + } else + this->newRegister(dstValue); } - Out << ')'; -#endif + else + NOT_SUPPORTED; } -#endif -#if 0 - void GenWriter::visitSelectInst(SelectInst &I) { - Out << "(("; - writeOperand(I.getCondition()); - Out << ") ? ("; - writeOperand(I.getTrueValue()); - Out << ") : ("; - writeOperand(I.getFalseValue()); - Out << "))"; + void GenWriter::emitCastInst(CastInst &I) + { + if (I.getOpcode() == Instruction::PtrToInt || + I.getOpcode() == Instruction::IntToPtr) { + Value *srcValue = &I; + Value *dstValue = I.getOperand(0); + Constant *CPV = dyn_cast(srcValue); + if (CPV != NULL) { + const ir::ImmediateIndex index = ctx.newImmediate(CPV); + const ir::Immediate imm = ctx.getImmediate(index); + const ir::Register reg = this->getRegister(dstValue); + ctx.LOADI(imm.type, reg, index); + } + } } -#endif #ifndef NDEBUG static bool isSupportedIntegerSize(IntegerType &T) { @@ -2427,8 +2077,8 @@ static std::string CBEMangle(const std::string &S) { #endif } - void GenWriter::visitCallInst(CallInst &I) - { + void GenWriter::emitCallInst(CallInst &I) {} + void GenWriter::regAllocateCallInst(CallInst &I) { Value *dst = &I; Value *Callee = I.getCalledValue(); GBE_ASSERT(ctx.getFunction().getProfile() == ir::PROFILE_OCL); @@ -2441,136 +2091,23 @@ static std::string CBEMangle(const std::string &S) { // With OCL there is no side effect for any called functions. So do nothing // when there is no returned value if (I.getType() == Type::getVoidTy(I.getContext())) - return; + NOT_SUPPORTED; // Get the name of the called function and handle it. We should use a hash // map later const std::string fnName = Callee->getName(); if (fnName == "__gen_ocl_get_global_id0") - this->registerMap[dst] = ir::ocl::gid0; + regTranslator.newScalarProxy(ir::ocl::gid0, dst); else if (fnName == "__gen_ocl_get_global_id1") - this->registerMap[dst] = ir::ocl::gid1; + regTranslator.newScalarProxy(ir::ocl::gid1, dst); else if (fnName == "__gen_ocl_get_global_id2") - this->registerMap[dst] = ir::ocl::gid2; + regTranslator.newScalarProxy(ir::ocl::gid2, dst); else if (fnName == "__gen_ocl_get_local_id0") - this->registerMap[dst] = ir::ocl::lid0; + regTranslator.newScalarProxy(ir::ocl::lid0, dst); else if (fnName == "__gen_ocl_get_local_id1") - this->registerMap[dst] = ir::ocl::lid1; + regTranslator.newScalarProxy(ir::ocl::lid1, dst); else if (fnName == "__gen_ocl_get_local_id2") - this->registerMap[dst] = ir::ocl::lid2; - -#if 0 - return visitInlineAsm(I); - - bool WroteCallee = false; - - // Handle intrinsic function calls first... - if (Function *F = I.getCalledFunction()) - if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) - if (visitBuiltinCall(I, ID, WroteCallee)) - return; - - Value *Callee = I.getCalledValue(); - Out << (Callee->getName()); - PointerType *PTy = cast(Callee->getType()); - FunctionType *FTy = cast(PTy->getElementType()); - - // If this is a call to a struct-return function, assign to the first - // parameter instead of passing it to the call. - const AttrListPtr &PAL = I.getAttributes(); - bool hasByVal = I.hasByValArgument(); - bool isStructRet = I.hasStructRetAttr(); - if (isStructRet) { - writeOperandDeref(I.getArgOperand(0)); - Out << " = "; - } - - if (I.isTailCall()) Out << " /*tail*/ "; - - if (!WroteCallee) { - // If this is an indirect call to a struct return function, we need to cast - // the pointer. Ditto for indirect calls with byval arguments. - bool NeedsCast = (hasByVal || isStructRet) && !isa(Callee); - - // GCC is a real PITA. It does not permit codegening casts of functions to - // function pointers if they are in a call (it generates a trap instruction - // instead!). We work around this by inserting a cast to void* in between - // the function and the function pointer cast. Unfortunately, we can't just - // form the constant expression here, because the folder will immediately - // nuke it. - // - // Note finally, that this is completely unsafe. ANSI C does not guarantee - // that void* and function pointers have the same size. :( To deal with this - // in the common case, we handle casts where the number of arguments passed - // match exactly. - // - if (ConstantExpr *CE = dyn_cast(Callee)) - if (CE->isCast()) - if (Function *RF = dyn_cast(CE->getOperand(0))) { - NeedsCast = true; - Callee = RF; - } - - if (NeedsCast) { - // Ok, just cast the pointer type. - Out << "(("; - if (isStructRet) - printStructReturnPointerFunctionType(Out, PAL, - cast(I.getCalledValue()->getType())); - else if (hasByVal) - printType(Out, I.getCalledValue()->getType(), false, "", true, PAL); - else - printType(Out, I.getCalledValue()->getType()); - Out << ")(void*)"; - } - writeOperand(Callee); - if (NeedsCast) Out << ')'; - } - - Out << '('; - - bool PrintedArg = false; - if(FTy->isVarArg() && !FTy->getNumParams()) { - Out << "0 /*dummy arg*/"; - PrintedArg = true; - } - - unsigned NumDeclaredParams = FTy->getNumParams(); - CallSite CS(&I); - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - unsigned ArgNo = 0; - if (isStructRet) { // Skip struct return argument. - ++AI; - ++ArgNo; - } - - - for (; AI != AE; ++AI, ++ArgNo) { - if (PrintedArg) Out << ", "; - if (ArgNo < NumDeclaredParams && - (*AI)->getType() != FTy->getParamType(ArgNo)) { - Out << '('; - printType(Out, FTy->getParamType(ArgNo), - /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt)); - Out << ')'; - } - // Check if the argument is expected to be passed by value. - if (I.paramHasAttr(ArgNo+1, Attribute::ByVal)) - writeOperandDeref(*AI); - else - writeOperand(*AI); - PrintedArg = true; - } - Out << ')'; -#endif - } - - /// visitBuiltinCall - Handle the call to the specified builtin. Returns true - /// if the entire call is handled, return false if it wasn't handled, and - /// optionally set 'WroteCallee' if the callee has already been printed out. - bool GenWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee) { - GBE_ASSERTM(false, "builtin call is not supported"); - return false; + regTranslator.newScalarProxy(ir::ocl::lid2, dst); } void GenWriter::visitAllocaInst(AllocaInst &I) { @@ -2601,9 +2138,13 @@ static std::string CBEMangle(const std::string &S) { static INLINE Value *getLoadOrStoreValue(StoreInst &I) { return I.getValueOperand(); } + void GenWriter::regAllocateLoadInst(LoadInst &I) { + this->newRegister(&I); + } + void GenWriter::regAllocateStoreInst(StoreInst &I) {} template - INLINE void GenWriter::visitLoadOrStore(T &I) + INLINE void GenWriter::emitLoadOrStore(T &I) { GBE_ASSERTM(I.isVolatile() == false, "Volatile pointer is not supported"); unsigned int llvmSpace = I.getPointerAddressSpace(); @@ -2612,124 +2153,47 @@ static std::string CBEMangle(const std::string &S) { Type *llvmType = llvmValues->getType(); const bool dwAligned = (I.getAlignment() % 4) == 0; const ir::MemorySpace memSpace = addressSpaceLLVMToGen(llvmSpace); - const ir::Type type = getType(llvmType); - const ir::Register values = getRegister(llvmValues); - const ir::Register ptr = getRegister(llvmPtr); - if (isLoad) - ctx.LOAD(type, ptr, memSpace, dwAligned, values); - else - ctx.STORE(type, ptr, memSpace, dwAligned, values); - } - - void GenWriter::visitLoadInst(LoadInst &I) { - this->visitLoadOrStore(I); - } - - void GenWriter::visitStoreInst(StoreInst &I) { - this->visitLoadOrStore(I); - } - -#if 0 - void GenWriter::visitInsertElementInst(InsertElementInst &I) { - Type *EltTy = I.getType()->getElementType(); - writeOperand(I.getOperand(0)); - Out << ";\n "; - Out << "(("; - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(&I) << "))["; - writeOperand(I.getOperand(2)); - Out << "] = ("; - writeOperand(I.getOperand(1)); - Out << ")"; - } - - void GenWriter::visitExtractElementInst(ExtractElementInst &I) { - // We know that our operand is not inlined. - Out << "(("; - Type *EltTy = cast(I.getOperand(0)->getType())->getElementType(); - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; - writeOperand(I.getOperand(1)); - Out << "]"; - } - - void GenWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - Out << "("; - printType(Out, SVI.getType()); - Out << "){ "; - VectorType *VT = SVI.getType(); - unsigned NumElts = VT->getNumElements(); - Type *EltTy = VT->getElementType(); - - for (unsigned i = 0; i != NumElts; ++i) { - if (i) Out << ", "; - int SrcVal = SVI.getMaskValue(i); - if ((unsigned)SrcVal >= NumElts*2) { - Out << " 0/*undef*/ "; - } else { - Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts); - if (isa(Op)) { - // Do an extractelement of this value from the appropriate i. So do - // nothing when there is no returned valuenput. - Out << "(("; - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(Op) - << "))[" << (SrcVal & (NumElts-1)) << "]"; - } else if (isa(Op) || isa(Op)) { - Out << "0"; - } else { - printConstant(cast(Op)->getOperand(SrcVal & - (NumElts-1)), - false); - } - } + const ir::Register ptr = this->getRegister(llvmPtr); + + // Scalar is easy. We neednot build register tuples + if (isScalarType(llvmType) == true) { + const ir::Type type = getType(ctx, llvmType); + const ir::Register values = this->getRegister(llvmValues); + if (isLoad) + ctx.LOAD(type, ptr, memSpace, dwAligned, values); + else + ctx.STORE(type, ptr, memSpace, dwAligned, values); } - Out << "}"; - } + // A vector type requires to build a tuple + else { + VectorType *vectorType = cast(llvmType); + Type *elemType = vectorType->getElementType(); + + // Build the tuple data in the vector + vector tupleData; // put registers here + const uint32_t elemNum = vectorType->getNumElements(); + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + const ir::Register reg = this->getRegister(llvmValues, elemID); + tupleData.push_back(reg); + } + const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum); - void GenWriter::visitInsertValueInst(InsertValueInst &IVI) { - // Start by copying the entire aggregate value into the result variable. - writeOperand(IVI.getOperand(0)); - Out << ";\n "; - - // Then do the insert to update the field. - Out << GetValueName(&IVI); - for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), - makeArrayRef(b, i+1)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; + // Emit the instruction + const ir::Type type = getType(ctx, elemType); + if (isLoad) + ctx.LOAD(type, tuple, ptr, memSpace, elemNum, dwAligned); else - Out << ".field" << *i; + ctx.STORE(type, tuple, ptr, memSpace, elemNum, dwAligned); } - Out << " = "; - writeOperand(IVI.getOperand(1)); } - void GenWriter::visitExtractValueInst(ExtractValueInst &EVI) { - Out << "("; - if (isa(EVI.getOperand(0))) { - Out << "("; - printType(Out, EVI.getType()); - Out << ") 0/*UNDEF*/"; - } else { - Out << GetValueName(EVI.getOperand(0)); - for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), - makeArrayRef(b, i+1)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - } - Out << ")"; + void GenWriter::emitLoadInst(LoadInst &I) { + this->emitLoadOrStore(I); + } + + void GenWriter::emitStoreInst(StoreInst &I) { + this->emitLoadOrStore(I); } -#endif llvm::FunctionPass *createGenPass(ir::Unit &unit) { return new GenWriter(unit); diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp index bc30c1b..b2d6391 100644 --- a/backend/src/llvm/llvm_passes.cpp +++ b/backend/src/llvm/llvm_passes.cpp @@ -82,7 +82,11 @@ namespace gbe switch (Ty->getTypeID()) { case Type::VoidTyID: NOT_SUPPORTED; - case Type::VectorTyID: NOT_SUPPORTED; + case Type::VectorTyID: + { + const VectorType* VecTy = cast(Ty); + return VecTy->getNumElements() * getTypeByteSize(unit, VecTy->getElementType()); + } case Type::PointerTyID: case Type::IntegerTyID: case Type::FloatTyID: diff --git a/backend/src/utest/utest_llvm.cpp b/backend/src/utest/utest_llvm.cpp index ff36d14..852f07d 100644 --- a/backend/src/utest/utest_llvm.cpp +++ b/backend/src/utest/utest_llvm.cpp @@ -76,12 +76,14 @@ runTests: GBE_ASSERT(dummyKernel != NULL); fclose(dummyKernel); - //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add.ll")); - //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("load_store.ll")); - //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add2.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("load_store.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add2.ll")); UTEST_EXPECT_SUCCESS(utestLLVM2Gen("get_global_id.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("simple_float4.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("simple_float4_2.ll")); //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll")); - //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll")); + UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll")); } UTEST_REGISTER(utestLLVM) -- 2.7.4