if (dyn_cast<ConstantVector>(CPV))
CPV = extractConstantElem(CPV, index);
+ if (dyn_cast<ConstantAggregateZero>(CPV))
+ return doIt(uint32_t(0));
+
// Integers
if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
Type* Ty = CI->getType();
}
// Floats and doubles
- switch (CPV->getType()->getTypeID()) {
+ const Type::TypeID typeID = CPV->getType()->getTypeID();
+ switch (typeID) {
case Type::FloatTyID:
case Type::DoubleTyID:
{
break;
default:
GBE_ASSERTM(false, "Unsupported constant type");
+ break;
}
const uint64_t imm(8);
return doIt(imm);
INLINE OVERLOADABLE float native_tan(float x) {
return native_sin(x) / native_cos(x);
}
+#define E 2.71828182845904523536f
+INLINE OVERLOADABLE float native_exp(float x) { return native_powr(E, x); }
+#undef E
// TODO make them actually compliant precision-wise
-#define cos native_cos // XXX work-around ptx profile: cos already defined
-#define sin native_sin // XXX work-around ptr profile: sin already defined
#define sqrt native_sqrt // XXX work-around ptr profile: sin already defined
-
INLINE OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }
INLINE OVERLOADABLE float fabs(float x) { return __gen_ocl_fabs(x); }
INLINE OVERLOADABLE float trunc(float x) { return __gen_ocl_rndz(x); }
INLINE OVERLOADABLE float floor(float x) { return __gen_ocl_rndd(x); }
INLINE OVERLOADABLE float ceil(float x) { return __gen_ocl_rndu(x); }
INLINE OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }
+INLINE OVERLOADABLE float exp(float x, float y) { return native_exp(x); }
INLINE OVERLOADABLE float fmod(float x, float y) { return x-y*trunc(x/y); }
-// Hack pow is already a builtin
+// TODO use llvm intrinsics definitions
+#define cos native_cos
+#define sin native_sin
#define pow powr
PURE CONST OVERLOADABLE float mad(float a, float b, float c);
-OVERLOADABLE INLINE uint select(uint src0, uint src1, uint cond) {
+
+INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {
+ return cond ? src1 : src0;
+}
+INLINE OVERLOADABLE int select(int src0, int src1, int cond) {
return cond ? src1 : src0;
}
-OVERLOADABLE INLINE int select(int src0, int src1, int cond) {
+INLINE OVERLOADABLE float select(float src0, float src1, int cond) {
return cond ? src1 : src0;
}
+
// This will be optimized out by LLVM and will output LLVM select instructions
#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
-OVERLOADABLE INLINE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
+INLINE OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
TYPE4 dst; \
const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
const TYPE x1 = src1.x; \
DECL_SELECT4(float4, float, int4, 0x80000000)
#undef DECL_SELECT4
-#if 0
-INLINE OVERLOADABLE float2 mad(float2 a, float2 b, float2 c) {
- return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));
-}
-INLINE OVERLOADABLE float3 mad(float3 a, float3 b, float3 c) {
- return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));
-}
-INLINE OVERLOADABLE float4 mad(float4 a, float4 b, float4 c) {
- return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),
- mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));
-}
-#endif
-
/////////////////////////////////////////////////////////////////////////////
// Common Functions (see 6.11.4 of OCL 1.1 spec)
/////////////////////////////////////////////////////////////////////////////
DECL_MIN_MAX(unsigned char)
#undef DECL_MIN_MAX
+INLINE OVERLOADABLE float fmax(float a, float b) { return max(a,b); }
+INLINE OVERLOADABLE float fmin(float a, float b) { return min(a,b); }
INLINE OVERLOADABLE float mix(float x, float y, float a) { return x + (y-x)*a;}
/////////////////////////////////////////////////////////////////////////////
INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}
/////////////////////////////////////////////////////////////////////////////
-// Extensions to manipulate the register file
-/////////////////////////////////////////////////////////////////////////////
-
-// Direct addressing register regions
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int);
-OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int, int);
-
-// Gather from register file
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int);
-OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int, int);
-
-/////////////////////////////////////////////////////////////////////////////
-// Extension to have uniform condition per hardware thread
-/////////////////////////////////////////////////////////////////////////////
-
-OVERLOADABLE unsigned short __gen_ocl_any(unsigned short cond);
-OVERLOADABLE unsigned short __gen_ocl_all(unsigned short cond);
-
-/////////////////////////////////////////////////////////////////////////////
-// Extension to support OBlock reads / writes
-/////////////////////////////////////////////////////////////////////////////
-
-OVERLOADABLE int __gen_ocl_obread(const __global void *address);
-OVERLOADABLE int __gen_ocl_obread(const __constant void *address);
-OVERLOADABLE int __gen_ocl_obread(const __local void *address);
-OVERLOADABLE void __gen_ocl_obwrite(const __global void *address, int);
-OVERLOADABLE void __gen_ocl_obwrite(const __local void *address, int);
-
-/////////////////////////////////////////////////////////////////////////////
// Force the compilation to SIMD8 or SIMD16
/////////////////////////////////////////////////////////////////////////////
int __gen_ocl_force_simd8(void);
int __gen_ocl_force_simd16(void);
-#define DECL_VOTE(TYPE) \
-INLINE OVERLOADABLE TYPE __gen_ocl_any(TYPE cond) { \
- return (TYPE) __gen_ocl_any((unsigned short) cond); \
-} \
-INLINE OVERLOADABLE TYPE __gen_ocl_all(TYPE cond) { \
- return (TYPE) __gen_ocl_all((unsigned short) cond); \
-}
-DECL_VOTE(unsigned int)
-DECL_VOTE(unsigned char)
-DECL_VOTE(int)
-DECL_VOTE(char)
-DECL_VOTE(short)
-DECL_VOTE(bool)
-#undef DECL_VOTE
-
#define NULL ((void*)0)
#undef PURE
#undef CONST
"INLINE OVERLOADABLE float native_tan(float x) {\n"
" return native_sin(x) / native_cos(x);\n"
"}\n"
+"#define E 2.71828182845904523536f\n"
+"INLINE OVERLOADABLE float native_exp(float x) { return native_powr(E, x); }\n"
+"#undef E\n"
"\n"
"// TODO make them actually compliant precision-wise\n"
-"#define cos native_cos // XXX work-around ptx profile: cos already defined\n"
-"#define sin native_sin // XXX work-around ptr profile: sin already defined\n"
"#define sqrt native_sqrt // XXX work-around ptr profile: sin already defined\n"
-"\n"
"INLINE OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }\n"
"INLINE OVERLOADABLE float fabs(float x) { return __gen_ocl_fabs(x); }\n"
"INLINE OVERLOADABLE float trunc(float x) { return __gen_ocl_rndz(x); }\n"
"INLINE OVERLOADABLE float floor(float x) { return __gen_ocl_rndd(x); }\n"
"INLINE OVERLOADABLE float ceil(float x) { return __gen_ocl_rndu(x); }\n"
"INLINE OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }\n"
+"INLINE OVERLOADABLE float exp(float x, float y) { return native_exp(x); }\n"
"INLINE OVERLOADABLE float fmod(float x, float y) { return x-y*trunc(x/y); }\n"
"\n"
-"// Hack pow is already a builtin\n"
+"// TODO use llvm intrinsics definitions\n"
+"#define cos native_cos\n"
+"#define sin native_sin\n"
"#define pow powr\n"
"\n"
"PURE CONST OVERLOADABLE float mad(float a, float b, float c);\n"
-"OVERLOADABLE INLINE uint select(uint src0, uint src1, uint cond) {\n"
+"\n"
+"INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {\n"
+" return cond ? src1 : src0;\n"
+"}\n"
+"INLINE OVERLOADABLE int select(int src0, int src1, int cond) {\n"
" return cond ? src1 : src0;\n"
"}\n"
-"OVERLOADABLE INLINE int select(int src0, int src1, int cond) {\n"
+"INLINE OVERLOADABLE float select(float src0, float src1, int cond) {\n"
" return cond ? src1 : src0;\n"
"}\n"
"\n"
+"\n"
"// This will be optimized out by LLVM and will output LLVM select instructions\n"
"#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \\\n"
-"OVERLOADABLE INLINE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \\\n"
+"INLINE OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \\\n"
" TYPE4 dst; \\\n"
" const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \\\n"
" const TYPE x1 = src1.x; \\\n"
"DECL_SELECT4(float4, float, int4, 0x80000000)\n"
"#undef DECL_SELECT4\n"
"\n"
-"#if 0\n"
-"INLINE OVERLOADABLE float2 mad(float2 a, float2 b, float2 c) {\n"
-" return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));\n"
-"}\n"
-"INLINE OVERLOADABLE float3 mad(float3 a, float3 b, float3 c) {\n"
-" return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));\n"
-"}\n"
-"INLINE OVERLOADABLE float4 mad(float4 a, float4 b, float4 c) {\n"
-" return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),\n"
-" mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));\n"
-"}\n"
-"#endif\n"
-"\n"
"/////////////////////////////////////////////////////////////////////////////\n"
"// Common Functions (see 6.11.4 of OCL 1.1 spec)\n"
"/////////////////////////////////////////////////////////////////////////////\n"
"DECL_MIN_MAX(unsigned char)\n"
"#undef DECL_MIN_MAX\n"
"\n"
+"INLINE OVERLOADABLE float fmax(float a, float b) { return max(a,b); }\n"
+"INLINE OVERLOADABLE float fmin(float a, float b) { return min(a,b); }\n"
"INLINE OVERLOADABLE float mix(float x, float y, float a) { return x + (y-x)*a;}\n"
"\n"
"/////////////////////////////////////////////////////////////////////////////\n"
"INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}\n"
"\n"
"/////////////////////////////////////////////////////////////////////////////\n"
-"// Extensions to manipulate the register file\n"
-"/////////////////////////////////////////////////////////////////////////////\n"
-"\n"
-"// Direct addressing register regions\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int, int);\n"
-"\n"
-"// Gather from register file\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int);\n"
-"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int, int);\n"
-"\n"
-"/////////////////////////////////////////////////////////////////////////////\n"
-"// Extension to have uniform condition per hardware thread\n"
-"/////////////////////////////////////////////////////////////////////////////\n"
-"\n"
-"OVERLOADABLE unsigned short __gen_ocl_any(unsigned short cond);\n"
-"OVERLOADABLE unsigned short __gen_ocl_all(unsigned short cond);\n"
-"\n"
-"/////////////////////////////////////////////////////////////////////////////\n"
-"// Extension to support OBlock reads / writes\n"
-"/////////////////////////////////////////////////////////////////////////////\n"
-"\n"
-"OVERLOADABLE int __gen_ocl_obread(const __global void *address);\n"
-"OVERLOADABLE int __gen_ocl_obread(const __constant void *address);\n"
-"OVERLOADABLE int __gen_ocl_obread(const __local void *address);\n"
-"OVERLOADABLE void __gen_ocl_obwrite(const __global void *address, int);\n"
-"OVERLOADABLE void __gen_ocl_obwrite(const __local void *address, int);\n"
-"\n"
-"/////////////////////////////////////////////////////////////////////////////\n"
"// Force the compilation to SIMD8 or SIMD16\n"
"/////////////////////////////////////////////////////////////////////////////\n"
"\n"
"int __gen_ocl_force_simd8(void);\n"
"int __gen_ocl_force_simd16(void);\n"
"\n"
-"#define DECL_VOTE(TYPE) \\\n"
-"INLINE OVERLOADABLE TYPE __gen_ocl_any(TYPE cond) { \\\n"
-" return (TYPE) __gen_ocl_any((unsigned short) cond); \\\n"
-"} \\\n"
-"INLINE OVERLOADABLE TYPE __gen_ocl_all(TYPE cond) { \\\n"
-" return (TYPE) __gen_ocl_all((unsigned short) cond); \\\n"
-"}\n"
-"DECL_VOTE(unsigned int)\n"
-"DECL_VOTE(unsigned char)\n"
-"DECL_VOTE(int)\n"
-"DECL_VOTE(char)\n"
-"DECL_VOTE(short)\n"
-"DECL_VOTE(bool)\n"
-"#undef DECL_VOTE\n"
-"\n"
"#define NULL ((void*)0)\n"
"#undef PURE\n"
"#undef CONST\n"
--- /dev/null
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define exp native_exp
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 10.f
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__kernel void compiler_chocolux(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec3 s[4];
+ s[0]=(vec3)(0);
+ s[3]=(vec3)(sin(time),cos(time),0);
+ s[1]=s[3].zxy;
+ s[2]=s[3].zzx;
+
+ float t,b,c,h=0.0f;
+ vec3 m,n;
+ vec3 p=(vec3)(.2f);
+ vec3 d=normalize(.001f*(vec3)(gl_FragCoord,.0f)-p);
+
+ for(int i=0;i<4;i++)
+ {
+ t=2.0f;
+ for(int i=0;i<4;i++)
+ {
+ b=dot(d,n=s[i]-p);
+ c=b*b+.2f-dot(n,n);
+ if(b-c<t)
+ if(c>0.0f)
+ {
+ m=s[i];t=b-c;
+ }
+ }
+ p+=t*d;
+ d=reflect(d,n=normalize(p-m));
+ h+=pow(n.x*n.x,44.f)+n.x*n.x*.2f;
+ }
+ vec4 gl_FragColor=(vec4)(h,h*h,h*h*h*h,1.f);
+ OUTPUT;
+}
+
--- /dev/null
+// See http://www.iquilezles.org/articles/menger/menger.htm for the
+// full explanation of how this was done
+
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define exp native_exp
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+// fmod is not like glsl mod!
+__attribute__((always_inline, overloadable))
+float glsl_mod(float x,float y) { return x-y*floor(x/y); }
+__attribute__((always_inline, overloadable))
+float2 glsl_mod(float2 a,float2 b) { return (float2)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y)); }
+__attribute__((always_inline, overloadable))
+float3 glsl_mod(float3 a,float3 b) { return (float3)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y), glsl_mod(a.z,b.z)); }
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+__attribute__((always_inline))
+float maxcomp(vec3 p) { return max(p.x,max(p.y,p.z));}
+
+__attribute__((always_inline))
+float sdBox(vec3 p, vec3 b)
+{
+ vec3 di = fabs(p) - b;
+ float mc = maxcomp(di);
+ return min(mc,length(max(di,0.0f)));
+}
+
+__attribute__((always_inline))
+vec4 map(vec3 p)
+{
+ float d = sdBox(p,(vec3)(1.0f));
+ float4 res = (vec4)(d,1.f,0.f,0.f);
+
+ float s = 1.0f;
+ for( int m=0; m<3; m++ )
+ {
+ vec3 a = glsl_mod(p*s, 2.0f)-1.0f;
+ s *= 3.0f;
+ float rx = fabs(1.0f - 3.0f*fabs(a.x));
+ float ry = fabs(1.0f - 3.0f*fabs(a.y));
+ float rz = fabs(1.0f - 3.0f*fabs(a.z));
+
+ float da = max(rx,ry);
+ float db = max(ry,rz);
+ float dc = max(rz,rx);
+ float c = (min(da,min(db,dc))-1.0f)/s;
+ if (c > d)
+ {
+ d = c;
+ res = (vec4)(d, 0.2f*da*db*dc, (1.0f+(float)(m))/4.0f, 0.0f);
+ }
+ }
+ return (vec4)(res.x,res.y,res.z,0.f);
+}
+
+// GLSL ES doesn't seem to like loops with conditional break/return...
+#if 1
+__attribute__((always_inline))
+vec4 intersect( vec3 ro, vec3 rd )
+{
+ float t = 0.0f;
+ for(int i=0;i<64;i++)
+ {
+ vec4 h = map(ro + rd*t);
+ if( h.x<0.002f )
+ return (vec4)(t,h.yzw);
+ t += h.x;
+ }
+ return (vec4)(-1.0f);
+}
+#else
+__attribute__((always_inline))
+vec4 intersect( vec3 ro, vec3 rd )
+{
+ float t = 0.0f;
+ vec4 res = (vec4)(-1.0f);
+ for(int i=0;i<64;i++)
+ {
+ vec4 h = map(ro + rd*t);
+ if (h.x<0.002f)
+ {
+ if(res.x<0.0f) res = (vec4)(t,h.yzw);
+ }
+ t += h.x;
+ }
+ return res;
+}
+#endif
+
+__attribute__((always_inline))
+vec3 calcNormal(vec3 pos)
+{
+ vec3 eps = (vec3)(.001f,0.0f,0.0f);
+ vec3 nor;
+ nor.x = map(pos+eps.xyy).x - map(pos-eps.xyy).x;
+ nor.y = map(pos+eps.yxy).x - map(pos-eps.yxy).x;
+ nor.z = map(pos+eps.yyx).x - map(pos-eps.yyx).x;
+ return normalize(nor);
+}
+
+__kernel void compiler_menger_sponge(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+
+ // light
+ vec3 light = normalize((vec3)(1.0f,0.8f,-0.6f));
+
+ float ctime = time;
+ // camera
+ vec3 ro = 1.1f*(vec3)(2.5f*cos(0.5f*ctime),1.5f*cos(ctime*.23f),2.5f*sin(0.5f*ctime));
+ vec3 ww = normalize((vec3)(0.0f) - ro);
+ vec3 uu = normalize(cross( (vec3)(0.0f,1.0f,0.0f), ww ));
+ vec3 vv = normalize(cross(ww,uu));
+ vec3 rd = normalize( p.x*uu + p.y*vv + 1.5f*ww );
+ vec3 col = (vec3)(0.0f);
+ vec4 tmat = intersect(ro,rd);
+
+#if 0
+ if( tmat.x>0.0 )
+ col = (vec3)(
+ 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) );
+
+#else
+ if( tmat.x>0.0f )
+ {
+ vec3 pos = ro + tmat.x*rd;
+ vec3 nor = calcNormal(pos);
+
+ float dif1 = max(0.4f + 0.6f*dot(nor,light),0.0f);
+ float dif2 = max(0.4f + 0.6f*dot(nor,(vec3)(-light.x,light.y,-light.z)),0.0f);
+
+ // shadow
+ float ldis = 4.0f;
+ vec4 shadow = intersect( pos + light*ldis, -light );
+ if( shadow.x>0.0f && shadow.x<(ldis-0.01f) ) dif1=0.0f;
+
+ float ao = tmat.y;
+ col = 1.0f*ao*(vec3) (0.2f,0.2f,0.2f);
+ col += 2.0f*(0.5f+0.5f*ao)*dif1*(vec3)(1.0f,0.97f,0.85f);
+ col += 0.2f*(0.5f+0.5f*ao)*dif2*(vec3)(1.0f,0.97f,0.85f);
+ col += 1.0f*(0.5f+0.5f*ao)*(0.5f+0.5f*nor.y)*(vec3)(0.1f,0.15f,0.2f);
+
+ // gamma lighting
+ col = col*0.5f+0.5f*sqrt(col)*1.2f;
+
+ vec3 matcol = (vec3)(
+ 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z),
+ 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) );
+ col *= matcol;
+ col *= 1.5f*exp(-0.5f*tmat.x);
+ }
+#endif
+
+ vec4 gl_FragColor = (vec4)(col,1.0f);
+ OUTPUT;
+}
+
--- /dev/null
+typedef float2 vec2;
+typedef float3 vec3;
+typedef float4 vec4;
+
+#define sin native_sin
+#define cos native_cos
+#define tan native_tan
+#define exp native_exp
+#define normalize fast_normalize
+#define length fast_length
+#define mod fmod
+#define time 1.f
+
+inline vec3 reflect(vec3 I, vec3 N) {
+ return I - 2.0f * dot(N, I) * N;
+}
+
+inline float clamp(x,m,M) { return max(min(x,M),m); }
+
+inline uint pack_fp4(float4 u4) {
+ uint u;
+ u = (((uint) u4.x)) |
+ (((uint) u4.y) << 8) |
+ (((uint) u4.z) << 16);
+ return u;
+}
+
+#define OUTPUT do {\
+ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \
+ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \
+} while (0)
+
+inline float e(vec3 c)
+{
+ c=cos((vec3)(cos(c.x+time/6.0f)*c.x-cos(c.y*3.0f+time/5.0f)*c.y,
+ cos(time/4.0f)*c.z/3.0f*c.x-cos(time/7.0f)*c.y,
+ c.x+c.y+c.z+time));
+ return dot(c*c,(vec3)(1.0f))-1.0f;
+}
+
+__kernel void compiler_nautilus(__global uint *dst, float resx, float resy, int w)
+{
+ vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1));
+ vec2 c=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy);
+ vec3 o=(vec3)(c.x,c.y,0.0f),g=(vec3)(c.x,c.y,1.0f)/64.0f,v=(vec3)(0.5f);
+ float m = 0.4f;
+
+ for(int r=0;r<100;r++)
+ {
+ float h=e(o)-m;
+ if(h<0.0f)break;
+ o+=h*10.0f*g;
+ v+=h*0.02f;
+ }
+ // light (who needs a normal?)
+ v+=e(o+0.1f)*(vec3)(0.4f,0.7f,1.0f);
+
+ // ambient occlusion
+ float a=0.0f;
+ for(int q=0;q<100;q++)
+ {
+ float l = e(o+0.5f*(vec3)(cos(1.1f*(float)(q)),cos(1.6f*(float)(q)),cos(1.4f*(float)(q))))-m;
+ a+=clamp(4.0f*l,0.0f,1.0f);
+ }
+ v*=a/100.0f;
+ vec4 gl_FragColor=(vec4)(v,1.0f);
+ OUTPUT;
+}
+
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "compiler_shader_toy.hpp"
-
-static void compiler_clod(void) { run_kernel(256, 256, "compiler_clod"); }
-MAKE_UTEST_FROM_FUNCTION(compiler_clod);
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "compiler_shader_toy.hpp"
-
-static void compiler_ribbon(void) { run_kernel(256, 256, "compiler_ribbon"); }
-MAKE_UTEST_FROM_FUNCTION(compiler_ribbon);
-
* The code here is just to wrap the common code used by all the kernels (to run
* the code and assert its correctness)
*/
-#include "compiler_shader_toy.hpp"
+#include "utest_helper.hpp"
-void run_kernel(int w, int h, const char *name)
+static const int dim = 256;
+
+static void run_kernel(int w, int h, const char *name)
{
const size_t global[2] = {size_t(w), size_t(h)};
const size_t local[2] = {16, 1};
static void NAME(void) { run_kernel(W,H,#NAME); } \
MAKE_UTEST_FROM_FUNCTION(NAME);
-DECL_SHADER_TOY_TEST(256,256,compiler_clod);
-DECL_SHADER_TOY_TEST(256,256,compiler_ribbon);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_clod);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_ribbon);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_chocolux);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_nautilus);
+DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge);
#undef DECL_SHADER_TOY_TEST
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-#ifndef __COMPILER_SHADER_TOY_HPP__
-#define __COMPILER_SHADER_TOY_HPP__
-
-#include "utest_helper.hpp"
-
-/*! Run and check the output */
-void run_kernel(int w, int h, const char *name);
-
-#endif /* __COMPILER_SHADER_TOY_HPP__ */
-