From: Yaxun Liu Date: Tue, 16 Aug 2016 20:49:49 +0000 (+0000) Subject: [OpenCL] AMDGPU: Add extensions cl_amd_media_ops and cl_amd_media_ops2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3317446301c9bf6bd55431c731334bdb5a6aed94;p=platform%2Fupstream%2Fllvm.git [OpenCL] AMDGPU: Add extensions cl_amd_media_ops and cl_amd_media_ops2 Differential Revision: https://reviews.llvm.org/D23322 llvm-svn: 278851 --- diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def index 4919044..360fec4 100644 --- a/clang/include/clang/Basic/OpenCLExtensions.def +++ b/clang/include/clang/Basic/OpenCLExtensions.def @@ -75,6 +75,10 @@ OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U) // Clang Extensions. OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U) +// AMD OpenCL extensions +OPENCLEXT_INTERNAL(cl_amd_media_ops, 100, ~0U) +OPENCLEXT_INTERNAL(cl_amd_media_ops2, 100, ~0U) + #undef OPENCLEXT_INTERNAL #ifdef OPENCLEXT diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index b280b1a..0fecdce 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -2153,6 +2153,8 @@ public: Opts.cl_khr_int64_extended_atomics = 1; Opts.cl_khr_mipmap_image = 1; Opts.cl_khr_3d_image_writes = 1; + Opts.cl_amd_media_ops = 1; + Opts.cl_amd_media_ops2 = 1; } } diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 59f6eb8..ddd2cde 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -16843,6 +16843,196 @@ double __ovld sub_group_scan_inclusive_max(double x); #endif //cl_khr_subgroups cl_intel_subgroups +#ifdef cl_amd_media_ops +uint __ovld amd_bitalign(uint a, uint b, uint c); +uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c); +uint3 __ovld amd_bitalign(uint3 a, uint3 b, uint3 c); +uint4 __ovld amd_bitalign(uint4 a, uint4 b, uint4 c); +uint8 __ovld amd_bitalign(uint8 a, uint8 b, uint8 c); +uint16 __ovld amd_bitalign(uint16 a, uint16 b, uint16 c); + +uint __ovld amd_bytealign(uint a, uint b, uint c); +uint2 __ovld amd_bytealign(uint2 a, uint2 b, uint2 c); +uint3 __ovld amd_bytealign(uint3 a, uint3 b, uint3 c); +uint4 __ovld amd_bytealign(uint4 a, uint4 b, uint4 c); +uint8 __ovld amd_bytealign(uint8 a, uint8 b, uint8 c); +uint16 __ovld amd_bytealign(uint16 a, uint16 b, uint16 c); + +uint __ovld amd_lerp(uint a, uint b, uint c); +uint2 __ovld amd_lerp(uint2 a, uint2 b, uint2 c); +uint3 __ovld amd_lerp(uint3 a, uint3 b, uint3 c); +uint4 __ovld amd_lerp(uint4 a, uint4 b, uint4 c); +uint8 __ovld amd_lerp(uint8 a, uint8 b, uint8 c); +uint16 __ovld amd_lerp(uint16 a, uint16 b, uint16 c); + +uint __ovld amd_pack(float4 v); + +uint __ovld amd_sad4(uint4 x, uint4 y, uint z); + +uint __ovld amd_sadhi(uint a, uint b, uint c); +uint2 __ovld amd_sadhi(uint2 a, uint2 b, uint2 c); +uint3 __ovld amd_sadhi(uint3 a, uint3 b, uint3 c); +uint4 __ovld amd_sadhi(uint4 a, uint4 b, uint4 c); +uint8 __ovld amd_sadhi(uint8 a, uint8 b, uint8 c); +uint16 __ovld amd_sadhi(uint16 a, uint16 b, uint16 c); + +uint __ovld amd_sad(uint a, uint b, uint c); +uint2 __ovld amd_sad(uint2 a, uint2 b, uint2 c); +uint3 __ovld amd_sad(uint3 a, uint3 b, uint3 c); +uint4 __ovld amd_sad(uint4 a, uint4 b, uint4 c); +uint8 __ovld amd_sad(uint8 a, uint8 b, uint8 c); +uint16 __ovld amd_sad(uint16 a, uint16 b, uint16 c); + +float __ovld amd_unpack0(uint a); +float2 __ovld amd_unpack0(uint2 a); +float3 __ovld amd_unpack0(uint3 a); +float4 __ovld amd_unpack0(uint4 a); +float8 __ovld amd_unpack0(uint8 a); +float16 __ovld amd_unpack0(uint16 a); + +float __ovld amd_unpack1(uint a); +float2 __ovld amd_unpack1(uint2 a); +float3 __ovld amd_unpack1(uint3 a); +float4 __ovld amd_unpack1(uint4 a); +float8 __ovld amd_unpack1(uint8 a); +float16 __ovld amd_unpack1(uint16 a); + +float __ovld amd_unpack2(uint a); +float2 __ovld amd_unpack2(uint2 a); +float3 __ovld amd_unpack2(uint3 a); +float4 __ovld amd_unpack2(uint4 a); +float8 __ovld amd_unpack2(uint8 a); +float16 __ovld amd_unpack2(uint16 a); + +float __ovld amd_unpack3(uint a); +float2 __ovld amd_unpack3(uint2 a); +float3 __ovld amd_unpack3(uint3 a); +float4 __ovld amd_unpack3(uint4 a); +float8 __ovld amd_unpack3(uint8 a); +float16 __ovld amd_unpack3(uint16 a); +#endif // cl_amd_media_ops + +#ifdef cl_amd_media_ops2 +int __ovld amd_bfe(int src0, uint src1, uint src2); +int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2); +int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2); +int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2); +int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2); +int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2); + +uint __ovld amd_bfe(uint src0, uint src1, uint src2); +uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2); + +uint __ovld amd_bfm(uint src0, uint src1); +uint2 __ovld amd_bfm(uint2 src0, uint2 src1); +uint3 __ovld amd_bfm(uint3 src0, uint3 src1); +uint4 __ovld amd_bfm(uint4 src0, uint4 src1); +uint8 __ovld amd_bfm(uint8 src0, uint8 src1); +uint16 __ovld amd_bfm(uint16 src0, uint16 src1); + +float __ovld amd_max3(float src0, float src1, float src2); +float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2); +float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2); +float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2); +float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2); +float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2); + +int __ovld amd_max3(int src0, int src1, int src2); +int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2); +int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2); +int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2); +int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2); +int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2); + +uint __ovld amd_max3(uint src0, uint src1, uint src2); +uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2); + +float __ovld amd_median3(float src0, float src1, float src2); +float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2); +float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2); +float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2); +float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2); +float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2); + +int __ovld amd_median3(int src0, int src1, int src2); +int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2); +int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2); +int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2); +int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2); +int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2); + +uint __ovld amd_median3(uint src0, uint src1, uint src2); +uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2); + +float __ovld amd_min3(float src0, float src1, float src); +float2 __ovld amd_min3(float2 src0, float2 src1, float2 src); +float3 __ovld amd_min3(float3 src0, float3 src1, float3 src); +float4 __ovld amd_min3(float4 src0, float4 src1, float4 src); +float8 __ovld amd_min3(float8 src0, float8 src1, float8 src); +float16 __ovld amd_min3(float16 src0, float16 src1, float16 src); + +int __ovld amd_min3(int src0, int src1, int src2); +int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2); +int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2); +int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2); +int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2); +int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2); + +uint __ovld amd_min3(uint src0, uint src1, uint src2); +uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2); + +ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2); +ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2); +ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2); +ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2); +ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2); +ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2); + +ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2); +ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2); +ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2); +ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2); +ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2); +ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2); + +uint __ovld amd_msad(uint src0, uint src1, uint src2); +uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2); + +uint __ovld amd_sadd(uint src0, uint src1, uint src2); +uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2); + +uint __ovld amd_sadw(uint src0, uint src1, uint src2); +uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2); +uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2); +uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2); +uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2); +uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2); +#endif // cl_amd_media_ops2 + // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable diff --git a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl index 0bcf8be..3befefd 100644 --- a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl +++ b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl @@ -210,3 +210,14 @@ #endif #pragma OPENCL EXTENSION cl_khr_terminate_context: enable // expected-warning@-1{{unsupported OpenCL extension 'cl_khr_terminate_context' - ignoring}} + +#ifndef cl_amd_media_ops +#error "Missing cl_amd_media_ops define" +#endif +#pragma OPENCL EXTENSION cl_amd_media_ops: enable + +#ifndef cl_amd_media_ops2 +#error "Missing cl_amd_media_ops2 define" +#endif +#pragma OPENCL EXTENSION cl_amd_media_ops2: enable + diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl index 7c70664..6a3cfde 100644 --- a/clang/test/SemaOpenCL/extension-version.cl +++ b/clang/test/SemaOpenCL/extension-version.cl @@ -259,3 +259,14 @@ // expected-warning@+2{{unsupported OpenCL extension 'cl_khr_terminate_context' - ignoring}} #endif #pragma OPENCL EXTENSION cl_khr_terminate_context: enable + +#ifndef cl_amd_media_ops +#error "Missing cl_amd_media_ops define" +#endif +#pragma OPENCL EXTENSION cl_amd_media_ops: enable + +#ifndef cl_amd_media_ops2 +#error "Missing cl_amd_media_ops2 define" +#endif +#pragma OPENCL EXTENSION cl_amd_media_ops2: enable +