# the run-time itself
ADD_SUBDIRECTORY(src)
-ADD_SUBDIRECTORY(tests)
+ADD_SUBDIRECTORY(utests)
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#if 0
-/* ============================================================
-
-Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-
-Redistribution and use of this material is permitted under the following
-conditions:
-
-Redistributions must retain the above copyright notice and all terms of this
-license.
-
-In no event shall anyone redistributing or accessing or using this material
-commence or participate in any arbitration or legal action relating to this
-material against Advanced Micro Devices, Inc. or any copyright holders or
-contributors. The foregoing shall survive any expiration or termination of
-this license or any agreement or access or use related to this material.
-
-ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION
-OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL.
-
-THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT
-HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY
-REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO
-SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE
-FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER
-EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED
-WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
-ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT.
-IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
-OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY
-ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY
-OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES,
-INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS
-(US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS
-THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND
-ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES,
-OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE
-FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE
-CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR
-DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
-CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE
-THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL
-SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR
-ACCESS OR USE RELATED TO THIS MATERIAL.
-
-NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS
-MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO
-RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER
-COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH
-AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS
-DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S.
-MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED,
-EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS,
-INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS,
-COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS.
-MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY
-LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL.
-
-NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is
-provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to
-computer software and technical data, respectively. Use, duplication,
-distribution or disclosure by the U.S. Government and/or DOD agencies is
-subject to the full extent of restrictions in all applicable regulations,
-including those found at FAR52.227 and DFARS252.227 et seq. and any successor
-regulations thereof. Use of this material by the U.S. Government and/or DOD
-agencies is acknowledgment of the proprietary rights of any copyright holders
-and contributors, including those of Advanced Micro Devices, Inc., as well as
-the provisions of FAR52.227-14 through 23 regarding privately developed and/or
-commercial computer software.
-
-This license forms the entire agreement regarding the subject matter hereof and
-supersedes all proposals and prior discussions and writings between the parties
-with respect thereto. This license does not affect any ownership, rights, title,
-or interest in, or relating to, this material. No terms of this license can be
-modified or waived, and no breach of this license can be excused, unless done
-so in a writing signed by all affected parties. Each term of this license is
-separately enforceable. If any term of this license is determined to be or
-becomes unenforceable or illegal, such term shall be reformed to the minimum
-extent necessary in order for this license to remain in effect in accordance
-with its terms as modified by such reformation. This license shall be governed
-by and construed in accordance with the laws of the State of Texas without
-regard to rules on conflicts of law of any state or jurisdiction or the United
-Nations Convention on the International Sale of Goods. All disputes arising out
-of this license shall be subject to the jurisdiction of the federal and state
-courts in Austin, Texas, and all defenses are hereby waived concerning personal
-jurisdiction and venue of these courts.
-
-============================================================ */
-
-
-#include "AESEncryptDecrypt.hpp"
-
-using namespace AES;
-
-int AESEncryptDecrypt::setupAESEncryptDecrypt()
-{
- cl_uint sizeBytes = width*height*sizeof(cl_uchar);
- input = (cl_uchar*)malloc(sizeBytes);
- if(input == NULL)
- {
- sampleCommon->error("Failed to allocate host memory. (input)");
- return SDK_FAILURE;
- }
-
- /* initialize the input array, do NOTHING but assignment when decrypt*/
- int decrypt = 0;
- if(!decrypt)
- convertColorToGray(pixels, input);
- else
- convertGrayToGray(pixels, input);
-
- /* 1 Byte = 8 bits */
- keySize = keySizeBits/8;
-
- /* due to unknown represenation of cl_uchar */
- keySizeBits = keySize*sizeof(cl_uchar);
-
- key = (cl_uchar*)malloc(keySizeBits);
-
- /* random initialization of key */
- sampleCommon->fillRandom<cl_uchar>(key, keySize, 1, 0, 255, seed);
-
- /* expand the key */
- explandedKeySize = (rounds+1)*keySize;
- expandedKey = (cl_uchar*)malloc(explandedKeySize*sizeof(cl_uchar));
- roundKey = (cl_uchar*)malloc(explandedKeySize*sizeof(cl_uchar));
-
- keyExpansion(key, expandedKey, keySize, explandedKeySize);
- for(cl_uint i=0; i< rounds+1; ++i)
- {
- createRoundKey(expandedKey + keySize*i, roundKey + keySize*i);
- }
-
- output = (cl_uchar*)malloc(sizeBytes);
- if(output == NULL)
- {
- sampleCommon->error("Failed to allocate host memory. (output)");
- return SDK_FAILURE;
- }
-
- if(!quiet)
- {
- if(decrypt)
- {
- std::cout << "Decrypting Image ...." << std::endl;
- }
- else
- {
- std::cout << "Encrypting Image ...." << std::endl;
- }
-
- std::cout << "Input Image : " << inFilename << std::endl;
- std::cout << "Key : ";
- for(cl_uint i=0; i < keySize; ++i)
- {
- std::cout << (cl_uint)key[i] << " ";
- }
- std::cout << std::endl;
- }
-
- return SDK_SUCCESS;
-}
-
-void
-AESEncryptDecrypt::convertColorToGray(const uchar4 *pixels, cl_uchar *gray)
-{
- for(cl_int i=0; i< height; ++i)
- for(cl_int j=0; j<width; ++j)
- {
- cl_uint index = i*width + j;
- // gray = (0.3*R + 0.59*G + 0.11*B)
- gray[index] = cl_uchar (pixels[index].x * 0.3 +
- pixels[index].y * 0.59 +
- pixels[index].z * 0.11 );
- }
-}
-
-void
-AESEncryptDecrypt::convertGrayToGray(const uchar4 *pixels, cl_uchar *gray)
-{
- for(cl_int i=0; i< height; ++i)
- for(cl_int j=0; j<width; ++j)
- {
- cl_uint index = i*width + j;
- gray[index] = pixels[index].x;
- }
-}
-
-void
-AESEncryptDecrypt::convertGrayToPixels(const cl_uchar *gray, uchar4 *pixels)
-{
- for(cl_int i=0; i< height; ++i)
- for(cl_int j=0; j<width; ++j)
- {
- cl_uint index = i*width + j;
- pixels[index].x = gray[index];
- pixels[index].y = gray[index];
- pixels[index].z = gray[index];
- }
-}
-
-int
-AESEncryptDecrypt::genBinaryImage()
-{
- cl_int status = CL_SUCCESS;
-
- /*
- * Have a look at the available platforms and pick either
- * the AMD one if available or a reasonable default.
- */
- cl_uint numPlatforms;
- cl_platform_id platform = NULL;
- status = clGetPlatformIDs(0, NULL, &numPlatforms);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetPlatformIDs failed."))
- {
- return SDK_FAILURE;
- }
- if (0 < numPlatforms)
- {
- cl_platform_id* platforms = new cl_platform_id[numPlatforms];
- status = clGetPlatformIDs(numPlatforms, platforms, NULL);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetPlatformIDs failed."))
- {
- return SDK_FAILURE;
- }
-
- char platformName[100];
- for (unsigned i = 0; i < numPlatforms; ++i)
- {
- status = clGetPlatformInfo(platforms[i],
- CL_PLATFORM_VENDOR,
- sizeof(platformName),
- platformName,
- NULL);
-
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetPlatformInfo failed."))
- {
- return SDK_FAILURE;
- }
-
- platform = platforms[i];
- if (!strcmp(platformName, "Advanced Micro Devices, Inc."))
- {
- break;
- }
- }
- std::cout << "Platform found : " << platformName << "\n";
- delete[] platforms;
- }
-
- if(NULL == platform)
- {
- sampleCommon->error("NULL platform found so Exiting Application.");
- return SDK_FAILURE;
- }
-
- /*
- * If we could find our platform, use it. Otherwise use just available platform.
- */
- cl_context_properties cps[5] =
- {
- CL_CONTEXT_PLATFORM,
- (cl_context_properties)platform,
- CL_CONTEXT_OFFLINE_DEVICES_AMD,
- (cl_context_properties)1,
- 0
- };
-
- context = clCreateContextFromtype(cps,
- CL_DEVICE_TYPE_ALL,
- NULL,
- NULL,
- &status);
-
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clCreateContextFromtype failed."))
- {
- return SDK_FAILURE;
- }
-
- /* create a CL program using the kernel source */
- streamsdk::SDKFile kernelFile;
- std::string kernelPath = sampleCommon->getPath();
- kernelPath.append("AESEncryptDecrypt_Kernels.cl");
- if(!kernelFile.open(kernelPath.c_str()))
- {
- std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
- return SDK_FAILURE;
- }
- const char * source = kernelFile.source().c_str();
- size_t sourceSize[] = {strlen(source)};
- program = clCreateProgramWithSource(context,
- 1,
- &source,
- sourceSize,
- &status);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clCreateProgramWithSource failed."))
- {
- return SDK_FAILURE;
- }
-
- /* create a cl program executable for all the devices specified */
- status = clBuildProgram(program,
- 0,
- NULL,
- NULL,
- NULL,
- NULL);
-
- size_t numDevices;
- status = clGetProgramInfo(program,
- CL_PROGRAM_NUM_DEVICES,
- sizeof(numDevices),
- &numDevices,
- NULL );
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetProgramInfo(CL_PROGRAM_NUM_DEVICES) failed."))
- {
- return SDK_FAILURE;
- }
-
- std::cout << "Number of devices found : " << numDevices << "\n\n";
- devices = (cl_device_id *)malloc( sizeof(cl_device_id) * numDevices );
- if(devices == NULL)
- {
- sampleCommon->error("Failed to allocate host memory.(devices)");
- return SDK_FAILURE;
- }
- /* grab the handles to all of the devices in the program. */
- status = clGetProgramInfo(program,
- CL_PROGRAM_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices,
- NULL );
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetProgramInfo(CL_PROGRAM_DEVICES) failed."))
- {
- return SDK_FAILURE;
- }
-
-
- /* figure out the sizes of each of the binaries. */
- size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
- if(devices == NULL)
- {
- sampleCommon->error("Failed to allocate host memory.(binarySizes)");
- return SDK_FAILURE;
- }
-
- status = clGetProgramInfo(program,
- CL_PROGRAM_BINARY_SIZES,
- sizeof(size_t) * numDevices,
- binarySizes, NULL);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetProgramInfo(CL_PROGRAM_BINARY_SIZES) failed."))
- {
- return SDK_FAILURE;
- }
-
- size_t i = 0;
- /* copy over all of the generated binaries. */
- char **binaries = (char **)malloc( sizeof(char *) * numDevices );
- if(binaries == NULL)
- {
- sampleCommon->error("Failed to allocate host memory.(binaries)");
- return SDK_FAILURE;
- }
-
- for(i = 0; i < numDevices; i++)
- {
- if(binarySizes[i] != 0)
- {
- binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
- if(binaries[i] == NULL)
- {
- sampleCommon->error("Failed to allocate host memory.(binaries[i])");
- return SDK_FAILURE;
- }
- }
- else
- {
- binaries[i] = NULL;
- }
- }
- status = clGetProgramInfo(program,
- CL_PROGRAM_BINARIES,
- sizeof(char *) * numDevices,
- binaries,
- NULL);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetProgramInfo(CL_PROGRAM_BINARIES) failed."))
- {
- return SDK_FAILURE;
- }
-
- /* dump out each binary into its own separate file. */
- for(i = 0; i < numDevices; i++)
- {
- char fileName[100];
- sprintf(fileName, "%s.%d", dumpBinary.c_str(), (int)i);
- if(binarySizes[i] != 0)
- {
- char deviceName[1024];
- status = clGetDeviceInfo(devices[i],
- CL_DEVICE_NAME,
- sizeof(deviceName),
- deviceName,
- NULL);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetDeviceInfo(CL_DEVICE_NAME) failed."))
- {
- return SDK_FAILURE;
- }
-
- printf( "%s binary kernel: %s\n", deviceName, fileName);
- streamsdk::SDKFile BinaryFile;
- if(!BinaryFile.writeBinaryToFile(fileName,
- binaries[i],
- binarySizes[i]))
- {
- std::cout << "Failed to load kernel file : " << fileName << std::endl;
- return SDK_FAILURE;
- }
- }
- else
- {
- printf("Skipping %s since there is no binary data to write!\n",
- fileName);
- }
- }
-
- // Release all resouces and memory
- for(i = 0; i < numDevices; i++)
- {
- if(binaries[i] != NULL)
- {
- free(binaries[i]);
- binaries[i] = NULL;
- }
- }
-
- if(binaries != NULL)
- {
- free(binaries);
- binaries = NULL;
- }
-
- if(binarySizes != NULL)
- {
- free(binarySizes);
- binarySizes = NULL;
- }
-
- if(devices != NULL)
- {
- free(devices);
- devices = NULL;
- }
-
- status = clReleaseProgram(program);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clReleaseProgram failed."))
- {
- return SDK_FAILURE;
- }
-
- status = clReleaseContext(context);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clReleaseContext failed."))
- {
- return SDK_FAILURE;
- }
-
- return SDK_SUCCESS;
-}
-
-
-int
-AESEncryptDecrypt::setupCL(void)
-{
- cl_int status = 0;
- size_t deviceListSize;
-
- cl_device_type dtype;
-
- if(devicetype.compare("cpu") == 0)
- {
- dtype = CL_DEVICE_TYPE_CPU;
- }
- else //devicetype = "gpu"
- {
- dtype = CL_DEVICE_TYPE_GPU;
- }
-
- /*
- * Have a look at the available platforms and pick either
- * the AMD one if available or a reasonable default.
- */
-
- cl_uint numPlatforms;
- cl_platform_id platform = NULL;
- status = clGetPlatformIDs(0, NULL, &numPlatforms);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetPlatformIDs failed."))
- {
- return SDK_FAILURE;
- }
- if (0 < numPlatforms)
- {
- cl_platform_id* platforms = new cl_platform_id[numPlatforms];
- status = clGetPlatformIDs(numPlatforms, platforms, NULL);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetPlatformIDs failed."))
- {
- return SDK_FAILURE;
- }
- for (unsigned i = 0; i < numPlatforms; ++i)
- {
- char pbuf[100];
- status = clGetPlatformInfo(platforms[i],
- CL_PLATFORM_VENDOR,
- sizeof(pbuf),
- pbuf,
- NULL);
-
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clGetPlatformInfo failed."))
- {
- return SDK_FAILURE;
- }
-
- platform = platforms[i];
- if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
- {
- break;
- }
- }
- delete[] platforms;
- }
-
- if(NULL == platform)
- {
- sampleCommon->error("NULL platform found so Exiting Application.");
- return SDK_FAILURE;
- }
-
- // Display available devices.
- if(!sampleCommon->displayDevices(platform, dtype))
- {
- sampleCommon->error("sampleCommon::displayDevices() failed");
- return SDK_FAILURE;
- }
-
- /*
- * If we could find our platform, use it. Otherwise use just available platform.
- */
- cl_context_properties cps[3] =
- {
- CL_CONTEXT_PLATFORM,
- (cl_context_properties)platform,
- 0
- };
-
- context = clCreateContextFromtype(
- cps,
- dtype,
- NULL,
- NULL,
- &status);
-
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clCreateContextFromtype failed."))
- return SDK_FAILURE;
-
- /* First, get the size of device list data */
- status = clGetContextInfo(
- context,
- CL_CONTEXT_DEVICES,
- 0,
- NULL,
- &deviceListSize);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetContextInfo failed."))
- return SDK_FAILURE;
-
- int devicecount = (int)(deviceListSize / sizeof(cl_device_id));
- if(!sampleCommon->validateDeviceId(deviceId, devicecount))
- {
- sampleCommon->error("sampleCommon::validateDeviceId() failed");
- return SDK_FAILURE;
- }
-
- /* Now allocate memory for device list based on the size we got earlier */
- devices = (cl_device_id *)malloc(deviceListSize);
- if(devices == NULL)
- {
- sampleCommon->error("Failed to allocate memory (devices).");
- return SDK_FAILURE;
- }
-
- /* Now, get the device list data */
- status = clGetContextInfo(
- context,
- CL_CONTEXT_DEVICES,
- deviceListSize,
- devices,
- NULL);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetGetContextInfo failed."))
- return SDK_FAILURE;
-
- /* Get Device specific Information */
- status = clGetDeviceInfo(
- devices[deviceId],
- CL_DEVICE_MAX_WORK_GROUP_SIZE,
- sizeof(size_t),
- (void *)&maxWorkGroupSize,
- NULL);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed."))
- return SDK_FAILURE;
-
-
- status = clGetDeviceInfo(
- devices[deviceId],
- CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
- sizeof(cl_uint),
- (void *)&maxDimensions,
- NULL);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed."))
- return SDK_FAILURE;
-
-
- maxWorkItemSizes = (size_t *)malloc(maxDimensions*sizeof(size_t));
-
- status = clGetDeviceInfo(
- devices[deviceId],
- CL_DEVICE_MAX_WORK_ITEM_SIZES,
- sizeof(size_t)*maxDimensions,
- (void *)maxWorkItemSizes,
- NULL);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed."))
- return SDK_FAILURE;
-
-
- status = clGetDeviceInfo(
- devices[deviceId],
- CL_DEVICE_LOCAL_MEM_SIZE,
- sizeof(cl_ulong),
- (void *)&totalLocalMemory,
- NULL);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZES failed."))
- return SDK_FAILURE;
-
-
- {
- /* The block is to move the declaration of prop closer to its use */
- cl_command_queue_properties prop = 0;
- if(timing)
- prop |= CL_QUEUE_PROFILING_ENABLE;
-
- commandQueue = clCreateCommandQueue(
- context,
- devices[deviceId],
- prop,
- &status);
- if(!sampleCommon->checkVal(
- status,
- 0,
- "clCreateCommandQueue failed."))
- return SDK_FAILURE;
- }
- inputBuffer = clCreateBuffer(
- context,
- CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * width * height,
- input,
- &status);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clCreateBuffer failed. (inputBuffer)"))
- return SDK_FAILURE;
-
- outputBuffer = clCreateBuffer(
- context,
- CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * width * height,
- output,
- &status);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clCreateBuffer failed. (outputBuffer)"))
- return SDK_FAILURE;
-
- rKeyBuffer = clCreateBuffer(
- context,
- CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * explandedKeySize,
- roundKey,
- &status);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clCreateBuffer failed. (rKeyBuffer)"))
- return SDK_FAILURE;
-
- cl_uchar * sBox;
- sBox = (cl_uchar *)sbox;
- sBoxBuffer = clCreateBuffer(
- context,
- CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * 256,
- sBox,
- &status);
-
- cl_uchar * rsBox;
- rsBox = (cl_uchar *)rsbox;
- rsBoxBuffer = clCreateBuffer(
- context,
- CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * 256,
- rsBox,
- &status);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clCreateBuffer failed. (sBoxBuffer)"))
- return SDK_FAILURE;
-
- /* create a CL program using the kernel source */
- streamsdk::SDKFile kernelFile;
- std::string kernelPath = sampleCommon->getPath();
-
- if(isLoadBinaryEnabled())
- {
- kernelPath.append(loadBinary.c_str());
- if(!kernelFile.readBinaryFromFile(kernelPath.c_str()))
- {
- std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
- return SDK_FAILURE;
- }
-
- const char * binary = kernelFile.source().c_str();
- size_t binarySize = kernelFile.source().size();
- program = clCreateProgramWithBinary(context,
- 1,
- &devices[deviceId],
- (const size_t *)&binarySize,
- (const unsigned char**)&binary,
- NULL,
- &status);
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clCreateProgramWithBinary failed."))
- {
- return SDK_FAILURE;
- }
-
- }
- else
- {
- kernelPath.append("AESEncryptDecrypt_Kernels.cl");
- if(!kernelFile.open(kernelPath.c_str()))
- {
- std::cout << "Failed to load kernel file: " << kernelPath << std::endl;
- return SDK_FAILURE;
- }
- const char * source = kernelFile.source().c_str();
- size_t sourceSize[] = { strlen(source) };
- program = clCreateProgramWithSource(
- context,
- 1,
- &source,
- sourceSize,
- &status);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clCreateProgramWithSource failed."))
- return SDK_FAILURE;
- }
-
- /* create a cl program executable for all the devices specified */
- status = clBuildProgram(program, 1, &devices[deviceId], NULL, NULL, NULL);
- if(status != CL_SUCCESS)
- {
- if(status == CL_BUILD_PROGRAM_FAILURE)
- {
- cl_int logStatus;
- char * buildLog = NULL;
- size_t buildLogSize = 0;
- logStatus = clGetProgramBuildInfo(program,
- devices[deviceId],
- CL_PROGRAM_BUILD_LOG,
- buildLogSize,
- buildLog,
- &buildLogSize);
- if(!sampleCommon->checkVal(logStatus,
- CL_SUCCESS,
- "clGetProgramBuildInfo failed."))
- {
- return SDK_FAILURE;
- }
-
- buildLog = (char*)malloc(buildLogSize);
- if(buildLog == NULL)
- {
- sampleCommon->error("Failed to allocate host memory. (buildLog)");
- return SDK_FAILURE;
- }
- memset(buildLog, 0, buildLogSize);
-
- logStatus = clGetProgramBuildInfo(program,
- devices[deviceId],
- CL_PROGRAM_BUILD_LOG,
- buildLogSize,
- buildLog,
- NULL);
- if(!sampleCommon->checkVal(logStatus,
- CL_SUCCESS,
- "clGetProgramBuildInfo failed."))
- {
- free(buildLog);
- return SDK_FAILURE;
- }
-
- std::cout << " \n\t\t\tBUILD LOG\n";
- std::cout << " ************************************************\n";
- std::cout << buildLog << std::endl;
- std::cout << " ************************************************\n";
- free(buildLog);
- }
-
- if(!sampleCommon->checkVal(status,
- CL_SUCCESS,
- "clBuildProgram failed."))
- {
- return SDK_FAILURE;
- }
- }
-
- /* get a kernel object handle for a kernel with the given name */
- if(decrypt)
- {
- kernel = clCreateKernel(program, "AESDecrypt", &status);
- }
- else
- {
- kernel = clCreateKernel(program, "AESEncrypt", &status);
- }
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clCreateKernel failed."))
- return SDK_FAILURE;
-
- return SDK_SUCCESS;
-}
-
-
-int
-AESEncryptDecrypt::runCLKernels(void)
-{
- cl_int status;
- cl_event events[2];
-
- size_t globalThreads[2]= {width/4, height};
- size_t localThreads[2] = {1, 4};
-
- status = clGetKernelWorkGroupInfo(
- kernel,
- devices[deviceId],
- CL_KERNEL_LOCAL_MEM_SIZE,
- sizeof(cl_ulong),
- &usedLocalMemory,
- NULL);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetKernelWorkGroupInfo failed.(usedLocalMemory)"))
- return SDK_FAILURE;
-
- availableLocalMemory = totalLocalMemory - usedLocalMemory;
-
- /* two local memories buffers of sizeof(cl_uchar)*keySize */
- neededLocalMemory = 2*sizeof(cl_uchar)*keySize;
-
- if(neededLocalMemory > availableLocalMemory)
- {
- std::cout << "Unsupported: Insufficient local memory on device." << std::endl;
- return SDK_SUCCESS;
- }
-
- if(localThreads[0] > maxWorkItemSizes[0] ||
- localThreads[1] > maxWorkItemSizes[1] ||
- localThreads[0]*localThreads[1] > maxWorkGroupSize)
- {
- std::cout << "Unsupported: Device does not support requested number of work items."<<std::endl;
- return SDK_SUCCESS;
- }
-
- /* Check group size against kernelWorkGroupSize */
- status = clGetKernelWorkGroupInfo(kernel,
- devices[deviceId],
- CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(size_t),
- &kernelWorkGroupSize,
- 0);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clGetKernelWorkGroupInfo failed."))
- {
- return SDK_FAILURE;
- }
-
- if((cl_uint)(localThreads[0]*localThreads[1]) > kernelWorkGroupSize )
- {
- std::cout << "Out of Resources!" << std::endl;
- std::cout << "Group Size specified : " << localThreads[0] * localThreads[1] << std::endl;
- std::cout << "Max Group Size supported on the kernel : "
- << kernelWorkGroupSize << std::endl;
- return SDK_FAILURE;
- }
-
- /*** Set appropriate arguments to the kernel ***/
- status = clSetKernelArg(
- kernel,
- 0,
- sizeof(cl_mem),
- (void *)&outputBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (outputBuffer)"))
- return SDK_FAILURE;
-
- status = clSetKernelArg(
- kernel,
- 1,
- sizeof(cl_mem),
- (void *)&inputBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (inputBuffer)"))
- return SDK_FAILURE;
-
- status = clSetKernelArg(
- kernel,
- 2,
- sizeof(cl_mem),
- (void *)&rKeyBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (rKeyBuffer)"))
- return SDK_FAILURE;
-
- if(decrypt)
- {
- status = clSetKernelArg(
- kernel,
- 3,
- sizeof(cl_mem),
- (void *)&rsBoxBuffer);
- }
- else
- {
- status = clSetKernelArg(
- kernel,
- 3,
- sizeof(cl_mem),
- (void *)&sBoxBuffer);
- }
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (SBoxBuffer)"))
- return SDK_FAILURE;
-
- status = clSetKernelArg(
- kernel,
- 4,
- sizeof(cl_uchar)*keySize,
- NULL);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (block0)"))
- return SDK_FAILURE;
-
- status = clSetKernelArg(
- kernel,
- 5,
- sizeof(cl_uchar)*keySize,
- NULL);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (block1)"))
- return SDK_FAILURE;
-
- status = clSetKernelArg(
- kernel,
- 6,
- sizeof(cl_uint),
- (void *)&width);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (width)"))
- return SDK_FAILURE;
-
- status = clSetKernelArg(
- kernel,
- 7,
- sizeof(cl_uint),
- (void *)&rounds);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clSetKernelArg failed. (rounds)"))
- return SDK_FAILURE;
-
-
- /*
- * Enqueue a kernel run call.
- */
- status = clEnqueueNDRangeKernel(
- commandQueue,
- kernel,
- 2,
- NULL,
- globalThreads,
- localThreads,
- 0,
- NULL,
- &events[0]);
-
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clEnqueueNDRangeKernel failed."))
- return SDK_FAILURE;
-
-
- /* wait for the kernel call to finish execution */
- status = clWaitForEvents(1, &events[0]);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clWaitForEvents failed."))
- return SDK_FAILURE;
-
- /* Enqueue the results to application pointer*/
- status = clEnqueueReadBuffer(
- commandQueue,
- outputBuffer,
- CL_TRUE,
- 0,
- width * height * sizeof(cl_uchar),
- output,
- 0,
- NULL,
- &events[1]);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clEnqueueReadBuffer failed."))
- return SDK_FAILURE;
-
- /* Wait for the read buffer to finish execution */
- status = clWaitForEvents(1, &events[1]);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clWaitForEvents failed."))
- return SDK_FAILURE;
-
- clReleaseEvent(events[0]);
- clReleaseEvent(events[1]);
-
- return SDK_SUCCESS;
-}
-
-cl_uchar
-AESEncryptDecrypt::getRconValue(cl_uint num)
-{
- return Rcon[num];
-}
-
-void
-AESEncryptDecrypt::rotate(cl_uchar * word)
-{
- cl_uchar c = word[0];
- for(cl_uint i=0; i<3; ++i)
- {
- word[i] = word[i+1];
- }
- word[3] = c;
-}
-
-void
-AESEncryptDecrypt::core(cl_uchar * word, cl_uint iter)
-{
- rotate(word);
-
- for(cl_uint i=0; i < 4; ++i)
- {
- word[i] = getSBoxValue(word[i]);
- }
-
- word[0] = word[0]^getRconValue(iter);
-}
-
-void
-AESEncryptDecrypt::keyExpansion(cl_uchar * key, cl_uchar * expandedKey,
- cl_uint keySize, cl_uint explandedKeySize)
-{
- cl_uint currentSize = 0;
- cl_uint rConIteration = 1;
- cl_uchar temp[4] = {0};
-
- for(cl_uint i=0; i < keySize; ++i)
- {
- expandedKey[i] = key[i];
- }
-
- currentSize += keySize;
-
- while(currentSize < explandedKeySize)
- {
- for(cl_uint i=0; i < 4; ++i)
- {
- temp[i] = expandedKey[(currentSize - 4) + i];
- }
-
- if(currentSize%keySize == 0)
- {
- core(temp, rConIteration++);
- }
-
- //XXX: add extra SBOX here if the keySize is 32 Bytes
-
- for(cl_uint i=0; i < 4; ++i)
- {
- expandedKey[currentSize] = expandedKey[currentSize - keySize]^temp[i];
- currentSize++;
- }
- }
-}
-
-cl_uchar
-AESEncryptDecrypt::getSBoxValue(cl_uint num)
-{
- return sbox[num];
-}
-
-cl_uchar
-AESEncryptDecrypt::getSBoxInvert(cl_uint num)
-{
- return rsbox[num];
-}
-
-cl_uchar
-AESEncryptDecrypt::galoisMultiplication(cl_uchar a, cl_uchar b)
-{
- cl_uchar p = 0;
- for(cl_uint i=0; i < 8; ++i)
- {
- if((b&1) == 1)
- {
- p^=a;
- }
- cl_uchar hiBitSet = (a & 0x80);
- a <<= 1;
- if(hiBitSet == 0x80)
- {
- a ^= 0x1b;
- }
- b >>= 1;
- }
- return p;
-}
-
-void
-AESEncryptDecrypt::mixColumn(cl_uchar *column)
-{
- cl_uchar cpy[4];
- for(cl_uint i=0; i < 4; ++i)
- {
- cpy[i] = column[i];
- }
- column[0] = galoisMultiplication(cpy[0], 2)^
- galoisMultiplication(cpy[3], 1)^
- galoisMultiplication(cpy[2], 1)^
- galoisMultiplication(cpy[1], 3);
-
- column[1] = galoisMultiplication(cpy[1], 2)^
- galoisMultiplication(cpy[0], 1)^
- galoisMultiplication(cpy[3], 1)^
- galoisMultiplication(cpy[2], 3);
-
- column[2] = galoisMultiplication(cpy[2], 2)^
- galoisMultiplication(cpy[1], 1)^
- galoisMultiplication(cpy[0], 1)^
- galoisMultiplication(cpy[3], 3);
-
- column[3] = galoisMultiplication(cpy[3], 2)^
- galoisMultiplication(cpy[2], 1)^
- galoisMultiplication(cpy[1], 1)^
- galoisMultiplication(cpy[0], 3);
-}
-
-void
-AESEncryptDecrypt::mixColumnInv(cl_uchar *column)
-{
- cl_uchar cpy[4];
- for(cl_uint i=0; i < 4; ++i)
- {
- cpy[i] = column[i];
- }
- column[0] = galoisMultiplication(cpy[0], 14 )^
- galoisMultiplication(cpy[3], 9 )^
- galoisMultiplication(cpy[2], 13)^
- galoisMultiplication(cpy[1], 11);
-
- column[1] = galoisMultiplication(cpy[1], 14 )^
- galoisMultiplication(cpy[0], 9 )^
- galoisMultiplication(cpy[3], 13)^
- galoisMultiplication(cpy[2], 11);
-
- column[2] = galoisMultiplication(cpy[2], 14 )^
- galoisMultiplication(cpy[1], 9 )^
- galoisMultiplication(cpy[0], 13)^
- galoisMultiplication(cpy[3], 11);
-
- column[3] = galoisMultiplication(cpy[3], 14 )^
- galoisMultiplication(cpy[2], 9 )^
- galoisMultiplication(cpy[1], 13)^
- galoisMultiplication(cpy[0], 11);
-}
-
-void
-AESEncryptDecrypt::mixColumns(cl_uchar * state, cl_bool inverse)
-{
- cl_uchar column[4];
- for(cl_uint i=0; i < 4; ++i)
- {
- for(cl_uint j=0; j < 4; ++j)
- {
- column[j] = state[j*4 + i];
- }
-
- if(inverse)
- {
- mixColumnInv(column);
- }
- else
- {
- mixColumn(column);
- }
-
- for(cl_uint j=0; j < 4; ++j)
- {
- state[j*4 + i] = column[j];
- }
- }
-}
-
-void
-AESEncryptDecrypt::subBytes(cl_uchar * state, cl_bool inverse)
-{
- for(cl_uint i=0; i < keySize; ++i)
- {
- state[i] = inverse ? getSBoxInvert(state[i]): getSBoxValue(state[i]);
- }
-}
-
-void
-AESEncryptDecrypt::shiftRow(cl_uchar *state, cl_uchar nbr)
-{
- for(cl_uint i=0; i < nbr; ++i)
- {
- cl_uchar tmp = state[0];
- for(cl_uint j = 0; j < 3; ++j)
- {
- state[j] = state[j+1];
- }
- state[3] = tmp;
- }
-}
-
-void
-AESEncryptDecrypt::shiftRowInv(cl_uchar *state, cl_uchar nbr)
-{
- for(cl_uint i=0; i < nbr; ++i)
- {
- cl_uchar tmp = state[3];
- for(cl_uint j = 3; j > 0; --j)
- {
- state[j] = state[j-1];
- }
- state[0] = tmp;
- }
-}
-void
-AESEncryptDecrypt::shiftRows(cl_uchar * state, cl_bool inverse)
-{
- for(cl_uint i=0; i < 4; ++i)
- {
- if(inverse)
- shiftRowInv(state + i*4, i);
- else
- shiftRow(state + i*4, i);
- }
-}
-
-void
-AESEncryptDecrypt::addRoundKey(cl_uchar * state, cl_uchar * rKey)
-{
- for(cl_uint i=0; i < keySize; ++i)
- {
- state[i] = state[i] ^ rKey[i];
- }
-}
-
-void
-AESEncryptDecrypt::createRoundKey(cl_uchar * eKey, cl_uchar * rKey)
-{
- for(cl_uint i=0; i < 4; ++i)
- for(cl_uint j=0; j < 4; ++j)
- {
- rKey[i+ j*4] = eKey[i*4 + j];
- }
-}
-
-void
-AESEncryptDecrypt::aesRound(cl_uchar * state, cl_uchar * rKey)
-{
- subBytes(state, decrypt);
- shiftRows(state, decrypt);
- mixColumns(state, decrypt);
- addRoundKey(state, rKey);
-}
-
-void
-AESEncryptDecrypt::aesMain(cl_uchar * state, cl_uchar * rKey, cl_uint rounds)
-{
- addRoundKey(state, rKey);
- for(cl_uint i=1; i < rounds; ++i)
- {
- aesRound(state, rKey + keySize*i);
- }
- subBytes(state, decrypt);
- shiftRows(state, decrypt);
- addRoundKey(state, rKey + keySize*rounds);
-}
-
-void
-AESEncryptDecrypt::aesRoundInv(cl_uchar * state, cl_uchar * rKey)
-{
- shiftRows(state, decrypt);
- subBytes(state, decrypt);
- addRoundKey(state, rKey);
- mixColumns(state, decrypt);
-}
-
-void
-AESEncryptDecrypt::aesMainInv(cl_uchar * state, cl_uchar * rKey, cl_uint rounds)
-{
- addRoundKey(state, rKey + keySize*rounds);
- for(cl_uint i=rounds-1; i > 0; --i)
- {
- aesRoundInv(state, rKey + keySize*i);
- }
- shiftRows(state, decrypt);
- subBytes(state, decrypt);
- addRoundKey(state, rKey);
-}
-
-/**
- *
- *
- */
-void
-AESEncryptDecrypt::AESEncryptDecryptCPUReference(cl_uchar * output ,
- cl_uchar * input ,
- cl_uchar * rKey ,
- cl_uint explandedKeySize,
- cl_uint width ,
- cl_uint height ,
- cl_bool inverse )
-{
- cl_uchar block[16];
-
- for(cl_uint blocky = 0; blocky < height/4; ++blocky)
- for(cl_uint blockx= 0; blockx < width/4; ++blockx)
- {
- for(cl_uint i=0; i < 4; ++i)
- {
- for(cl_uint j=0; j < 4; ++j)
- {
- cl_uint index = (((blocky * width/4) + blockx) * keySize )+ (i*4 + j);
- block[i*4 + j] = input[index];
- }
- }
-
- if(inverse)
- aesMainInv(block, rKey, rounds);
- else
- aesMain(block, rKey, rounds);
-
- for(cl_uint i=0; i <4 ; ++i)
- {
- for(cl_uint j=0; j <4; ++j)
- {
- cl_uint index = (((blocky * width/4) + blockx) * keySize )+ (i*4 + j);
- output[index] = block[i*4 + j];
- }
- }
- }
-}
-
-
-int
-AESEncryptDecrypt::initialize()
-{
- // Call base class Initialize to get default configuration
- if(!this->SDKSample::initialize())
- return SDK_FAILURE;
-
- streamsdk::Option* ifilename_opt = new streamsdk::Option;
- if(!ifilename_opt)
- {
- sampleCommon->error("Memory allocation error.\n");
- return SDK_FAILURE;
- }
- ifilename_opt->_sVersion = "x";
- ifilename_opt->_lVersion = "input";
- ifilename_opt->_description = "Image as Input";
- ifilename_opt->_type = streamsdk::CA_ARG_STRING;
- ifilename_opt->_value = &inFilename;
- sampleArgs->AddOption(ifilename_opt);
-
- delete ifilename_opt;
-
- ////////////////
- streamsdk::Option* ofilename_opt = new streamsdk::Option;
- if(!ofilename_opt)
- {
- sampleCommon->error("Memory allocation error.\n");
- return SDK_FAILURE;
- }
- ofilename_opt->_sVersion = "y";
- ofilename_opt->_lVersion = "output";
- ofilename_opt->_description = "Image as Ouput";
- ofilename_opt->_type = streamsdk::CA_ARG_STRING;
- ofilename_opt->_value = &outFilename;
- sampleArgs->AddOption(ofilename_opt);
-
- delete ofilename_opt;
-
- ////////////////
- streamsdk::Option* decrypt_opt = new streamsdk::Option;
- if(!decrypt_opt)
- {
- sampleCommon->error("Memory allocation error.\n");
- return SDK_FAILURE;
- }
- decrypt_opt->_sVersion = "z";
- decrypt_opt->_lVersion = "decrypt";
- decrypt_opt->_description = "Decrypt the Input Image";
- decrypt_opt->_type = streamsdk::CA_NO_ARGUMENT;
- decrypt_opt->_value = &decrypt;
- sampleArgs->AddOption(decrypt_opt);
-
- delete decrypt_opt;
-
- streamsdk::Option* num_iterations = new streamsdk::Option;
- if(!num_iterations)
- {
- sampleCommon->error("Memory allocation error.\n");
- return SDK_FAILURE;
- }
-
- num_iterations->_sVersion = "i";
- num_iterations->_lVersion = "iterations";
- num_iterations->_description = "Number of iterations for kernel execution";
- num_iterations->_type = streamsdk::CA_ARG_INT;
- num_iterations->_value = &iterations;
-
- sampleArgs->AddOption(num_iterations);
-
- delete num_iterations;
-
- return SDK_SUCCESS;
-}
-
-int
-AESEncryptDecrypt::setup()
-{
-
- std::string filePath = sampleCommon->getPath() + inFilename;
- image.load(filePath.c_str());
-
- width = image.getWidth();
- height = image.getHeight();
-
- /* check condition for the bitmap to be initialized */
- if(width<0 || height <0)
- return SDK_FAILURE;
-
- pixels = image.getPixels();
-
- if(setupAESEncryptDecrypt()!=SDK_SUCCESS)
- return SDK_FAILURE;
-
- int timer = sampleCommon->createTimer();
- sampleCommon->resetTimer(timer);
- sampleCommon->startTimer(timer);
-
- if(setupCL()!=SDK_SUCCESS)
- return SDK_FAILURE;
-
- sampleCommon->stopTimer(timer);
-
- setupTime = (double)(sampleCommon->readTimer(timer));
-
- return SDK_SUCCESS;
-}
-
-
-int
-AESEncryptDecrypt::run()
-{
- int timer = sampleCommon->createTimer();
- sampleCommon->resetTimer(timer);
- sampleCommon->startTimer(timer);
-
- std::cout << "Executing kernel for " << iterations <<
- " iterations" << std::endl;
- std::cout << "-------------------------------------------" << std::endl;
-
- for(int i = 0; i < iterations; i++)
- {
- /* Arguments are set and execution call is enqueued on command buffer */
- if(runCLKernels()!=SDK_SUCCESS)
- return SDK_FAILURE;
- }
-
- sampleCommon->stopTimer(timer);
- totalKernelTime = (double)(sampleCommon->readTimer(timer)) / iterations;
-
-
- //XXX: Write output to an output Image
-
- convertGrayToPixels(output, pixels);
- image.write(outFilename.c_str());
-
- if(!quiet) {
- std::cout << "Output Filename : " << outFilename << std::endl;
- }
-
-
- return SDK_SUCCESS;
-}
-
-int
-AESEncryptDecrypt::verifyResults()
-{
- if(verify)
- {
- verificationOutput = (cl_uchar *) malloc(width*height*sizeof(cl_uchar));
- if(verificationOutput==NULL) {
- sampleCommon->error("Failed to allocate host memory. (verificationOutput)");
- return SDK_FAILURE;
- }
-
- /*
- * reference implementation
- */
- int refTimer = sampleCommon->createTimer();
- sampleCommon->resetTimer(refTimer);
- sampleCommon->startTimer(refTimer);
- AESEncryptDecryptCPUReference(verificationOutput, input, roundKey, explandedKeySize,
- width, height, decrypt);
- sampleCommon->stopTimer(refTimer);
- referenceKernelTime = sampleCommon->readTimer(refTimer);
-
- /* compare the results and see if they match */
- if(memcmp(output, verificationOutput, height*width*sizeof(cl_uchar)) == 0)
- {
- std::cout<<"Passed!\n";
- return SDK_SUCCESS;
- }
- else
- {
- std::cout<<"Failed\n";
- return SDK_FAILURE;
- }
- }
-
- return SDK_SUCCESS;
-}
-
-void AESEncryptDecrypt::printStats()
-{
- std::string strArray[4] = {"Width", "Height", "Time(sec)", "KernelTime(sec)"};
- std::string stats[4];
-
- totalTime = setupTime + totalKernelTime;
-
- stats[0] = sampleCommon->toString(width , std::dec);
- stats[1] = sampleCommon->toString(height , std::dec);
- stats[2] = sampleCommon->toString(totalTime, std::dec);
- stats[3] = sampleCommon->toString(totalKernelTime, std::dec);
-
- this->SDKSample::printStats(strArray, stats, 4);
-}
-
-int AESEncryptDecrypt::cleanup()
-{
- /* Releases OpenCL resources (Context, Memory etc.) */
- cl_int status;
-
- status = clReleaseKernel(kernel);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseKernel failed."))
- return SDK_FAILURE;
-
- status = clReleaseProgram(program);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseProgram failed."))
- return SDK_FAILURE;
-
- status = clReleaseMemObject(inputBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseMemObject failed."))
- return SDK_FAILURE;
-
- status = clReleaseMemObject(outputBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseMemObject failed."))
- return SDK_FAILURE;
-
- status = clReleaseMemObject(rKeyBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseMemObject failed."))
- return SDK_FAILURE;
-
- status = clReleaseMemObject(sBoxBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseMemObject failed."))
- return SDK_FAILURE;
-
- status = clReleaseMemObject(rsBoxBuffer);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseMemObject failed."))
- return SDK_FAILURE;
-
- status = clReleaseCommandQueue(commandQueue);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseCommandQueue failed."))
- return SDK_FAILURE;
-
- status = clReleaseContext(context);
- if(!sampleCommon->checkVal(
- status,
- CL_SUCCESS,
- "clReleaseContext failed."))
- return SDK_FAILURE;
-
- /* release program resources (input memory etc.) */
- if(input)
- free(input);
-
- if(key)
- free(key);
-
- if(expandedKey)
- free(expandedKey);
-
- if(roundKey)
- free(roundKey);
-
- if(output)
- free(output);
-
- if(verificationOutput)
- free(verificationOutput);
-
- if(devices)
- free(devices);
-
- if(maxWorkItemSizes)
- free(maxWorkItemSizes);
-
- return SDK_SUCCESS;
-}
-
-int
-main(int argc, char * argv[])
-{
- AESEncryptDecrypt clAESEncryptDecrypt("OpenCL AES Encrypt Decrypt");
-
- if(clAESEncryptDecrypt.initialize()!=SDK_SUCCESS)
- return SDK_FAILURE;
- if(!clAESEncryptDecrypt.parseCommandLine(argc, argv))
- return SDK_FAILURE;
-
- if(clAESEncryptDecrypt.isDumpBinaryEnabled())
- {
- return clAESEncryptDecrypt.genBinaryImage();
- }
- else
- {
- if(clAESEncryptDecrypt.setup()!=SDK_SUCCESS)
- return SDK_FAILURE;
- if(clAESEncryptDecrypt.run()!=SDK_SUCCESS)
- return SDK_FAILURE;
- if(clAESEncryptDecrypt.verifyResults()!=SDK_SUCCESS)
- return SDK_FAILURE;
- if(clAESEncryptDecrypt.cleanup()!=SDK_SUCCESS)
- return SDK_FAILURE;
- clAESEncryptDecrypt.printStats();
- }
-
- return SDK_SUCCESS;
-}
-
-#endif
-
-#include "common.h"
-void verify();
-
- cl_uchar sbox[256] =
- { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 //0
- , 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 //1
- , 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 //2
- , 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 //3
- , 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 //4
- , 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf //5
- , 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 //6
- , 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 //7
- , 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 //8
- , 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb //9
- , 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 //A
- , 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 //B
- , 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a //C
- , 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e //D
- , 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf //E
- , 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};//F
- //0 1 2 3 4 5 6 7 8 9 A B C D E F
-
-
- cl_uchar rsbox[256] =
- { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
- , 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
- , 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
- , 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
- , 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
- , 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
- , 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
- , 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
- , 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
- , 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
- , 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
- , 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
- , 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
- , 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
- , 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
- , 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d};
-
-
- cl_uchar Rcon[255] =
- { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a
- , 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39
- , 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a
- , 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8
- , 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef
- , 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc
- , 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b
- , 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3
- , 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94
- , 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20
- , 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35
- , 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f
- , 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04
- , 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63
- , 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd
- , 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb };
-
-char* filename = "input512.bmp";
-int width, height;
-cl_uchar *input;
-cl_uchar *output;
-
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err, i;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel e_kernel = getKernel(device, context, "aes_kernels.cl", "AESEncrypt");
- cl_kernel d_kernel = getKernel(device, context, "aes_kernels.cl", "AESDecrypt");
-
- cl_uchar4 *pixels = (cl_uchar4 *) readBmp(filename, &width, &height);
-
- cl_uint sizeBytes = width*height*sizeof(cl_uchar);
- input = newBuffer(sizeBytes, 0);
-
- /* initialize the input array, do NOTHING but assignment when decrypt*/
- int decrypt = 0;
-#if 0
- if(!decrypt)
- convertColorToGray(pixels, input);
- else
- convertGrayToGray(pixels, input);
-#endif
- output = (cl_uchar*)malloc(sizeBytes);
-
-
-
- cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * width * height, input, &err); CHK_ERR(err);
-
- cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * width * height, output, &err); CHK_ERR(err);
-
-#if 0
- cl_mem rKeyBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * explandedKeySize, roundKey, &err); CHK_ERR(err);
-
- cl_uchar *sBox = (cl_uchar *)sbox;
- cl_mem sBoxBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * 256, sBox, &err); CHK_ERR(err);
-
- cl_uchar *rsBox = (cl_uchar *)rsbox;
- clmem rsBoxBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_uchar ) * 256, rsBox, &err); CHK_ERR(err);
-#endif
-
-#if 0
-#if 0
- for (i = 0; i < MAX; i++) {
- a[i] = b[i] = (float) i;
- c[i] = 0.0f;
- }
-#else
- a = newBuffer(MAX * sizeof(float), 'f');
- b = newBuffer(MAX * sizeof(float), 'f');
- c = newBuffer(MAX * sizeof(float), '0');
- d = newBuffer(MAX * sizeof(float), '0');
-#endif
-
- cl_mem da = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), a, &err); CHK_ERR(err);
- cl_mem db = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), b, &err); CHK_ERR(err);
- cl_mem dc = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), c, &err); CHK_ERR(err);
-
-
- /* Execute */
- int gws = MAX;
- int lws = 16;
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &da); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &db); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dc); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(size_t), &gws); CHK_ERR(err);
-
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &gws, &lws, 0, NULL, NULL); CHK_ERR(err);
- err = clEnqueueReadBuffer(queue, dc, CL_TRUE, 0, MAX*sizeof(float), d, 0, NULL, NULL); CHK_ERR(err);
-
- verify();
-}
-
-void verify()
-{
- int i;
- for (i = 0; i < MAX; i++) {
- float err = d[i] - (a[i] + b[i]);
-
- err = fabsf(err);
-#define EPS 1.0e-7f // 1.0e-8 fails on cpu
- if (err >= EPS) {
- printf("Mismatch: %8d: %8f %8f %8f (err=%g 0x%.08x)\n", i, d[i], a[i], b[i], err, *(unsigned int *) &err);
- printf("Failed\n");
- exit(-1);
- }
- }
- printf("Passed\n");
-#endif
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-
-float *randArray, *output;
-float *refOutput, *stepsArray;
-int numSamples = 64;
-int numSteps = 255;
-
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err;
- char *ker_path = NULL;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("binomialOption_kernels_0.bin", device);
-#else
- ker_path = do_kiss_path("binomialOption_kernels_0.bin8", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "binomial_options");
-
- randArray = newBuffer(numSamples * sizeof(cl_float4), 'f');
- output = newBuffer(numSamples * sizeof(cl_float4), 0 );
- cl_mem randBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- numSamples * sizeof(cl_float4), randArray, &err);
- CHK_ERR(err);
- cl_mem outBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- numSamples * sizeof(cl_float4), output, &err);
- CHK_ERR(err);
-
- /* Execute */
- size_t gws[1] = {numSamples * (numSteps+1)};
- size_t lws[1] = {numSteps + 1};
-
- err = clSetKernelArg(kernel, 0, sizeof(int), &numSteps); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &randBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &outBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, (numSteps + 1) * sizeof(cl_float4), NULL); CHK_ERR(err);
- err = clSetKernelArg(kernel, 4, numSteps * sizeof(cl_float4), NULL); CHK_ERR(err);
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, gws, lws, 0, NULL, NULL); CHK_ERR(err);
-#if 0
- err = clEnqueueReadBuffer(queue, outBuffer, CL_TRUE, 0,
- numSamples * sizeof(cl_float4), output,
- 0, NULL, NULL);
-#else
- output = clIntelMapBuffer(outBuffer, &err);
- CHK_ERR(err);
-#endif
-
- verify();
- return 0;
-}
-
-#define RISKFREE 0.02f
-#define VOLATILITY 0.30f
-
-/*
- * Reduces the input array (in place)
- * length specifies the length of the array
- */
- void
-binomialOptionCPU()
-{
- refOutput = newBuffer(numSamples * sizeof(cl_float4), 0);
- stepsArray = newBuffer((numSteps + 1) * sizeof(cl_float4), 0);
- int bid;
-
- /* Iterate for all samples */
- for(bid = 0; bid < numSamples; ++bid)
- {
- float s[4];
- float x[4];
- float vsdt[4];
- float puByr[4];
- float pdByr[4];
- float optionYears[4];
-
- float inRand[4];
- int i, j, k;
-
-
- for(i = 0; i < 4; ++i)
- {
- inRand[i] = randArray[bid + i];
- s[i] = (1.0f - inRand[i]) * 5.0f + inRand[i] * 30.f;
- x[i] = (1.0f - inRand[i]) * 1.0f + inRand[i] * 100.f;
- optionYears[i] = (1.0f - inRand[i]) * 0.25f + inRand[i] * 10.f;
- float dt = optionYears[i] * (1.0f / (float)numSteps);
- vsdt[i] = VOLATILITY * sqrtf(dt);
- float rdt = RISKFREE * dt;
- float r = expf(rdt);
- float rInv = 1.0f / r;
- float u = expf(vsdt[i]);
- float d = 1.0f / u;
- float pu = (r - d)/(u - d);
- float pd = 1.0f - pu;
- puByr[i] = pu * rInv;
- pdByr[i] = pd * rInv;
- }
- // Compute values at expiration date:
- // Call option value at period end is v(t) = s(t) - x
- // If s(t) is greater than x, or zero otherwise...
- // The computation is similar for put options...
- for(j = 0; j <= numSteps; j++)
- {
- for(i = 0; i < 4; ++i)
- {
- float profit = s[i] * expf(vsdt[i] * (2.0f * j - numSteps)) - x[i];
- stepsArray[j * 4 + i] = profit > 0.0f ? profit : 0.0f;
- }
- }
-
- //walk backwards up on the binomial tree of depth numSteps
- //Reduce the price step by step
- for(j = numSteps; j > 0; --j)
- {
- for(k = 0; k <= j - 1; ++k)
- {
- for(i = 0; i < 4; ++i)
- {
- stepsArray[k * 4 + i] = pdByr[i] * stepsArray[(k + 1) * 4 + i] + puByr[i] * stepsArray[k * 4 + i];
- }
- }
- }
-
- //Copy the root to result
- refOutput[bid] = stepsArray[0];
- }
-}
-
-void verify()
-{
- binomialOptionCPU();
- int i;
- for (i=0; i<20; i++) {
- printf("%13.8f %13.8f\n", output[i], refOutput[i]);
- }
- int resC = comparef(output, refOutput, numSamples * 4, 0.001f);
- resC ? printf("Passed\n") : printf("Failed\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-
-int length = 1024;
-int *input, *refInput;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err, i;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernel(device, context, "bitonic_kernels.cl", "bitonicSort");
-
- int length_bytes = length * sizeof(cl_uint);
-
- input = newBuffer(length_bytes, 0);
- refInput = newBuffer(length_bytes, 0);
- for (i = 0; i < length; i++) {
- input[i] = refInput[i] = rand() & 0x0fffff;
- }
- cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
- length_bytes, input, &err);
- CHK_ERR(err);
-
- /*
- * Execute
- *
- * This algorithm is run as NS stages. Each stage has NP passes.
- * so the total number of times the kernel call is enqueued is NS * NP.
- *
- * For every stage S, we have S + 1 passes.
- * eg: For stage S = 0, we have 1 pass.
- * For stage S = 1, we have 2 passes.
- *
- * if length is 2^N, then the number of stages (numStages) is N.
- * Do keep in mind the fact that the algorithm only works for
- * arrays whose size is a power of 2.
- *
- * here, numStages is N.
- *
- * For an explanation of how the algorithm works, please go through
- * the documentation of this sample.
- */
-
- /*
- * 2^numStages should be equal to length.
- * i.e the number of times you halve length to get 1 should be numStages
- */
- int numStages = 0;
- int sortDescending = 1;
- for(i = length; i > 1; i >>= 1)
- ++numStages;
-
- /*** Set appropriate arguments to the kernel ***/
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(cl_uint), &length); CHK_ERR(err);
- err = clSetKernelArg(kernel, 4, sizeof(cl_uint), &sortDescending); CHK_ERR(err);
-
- int stage, passOfStage;
- size_t globalThreads[1] = {length / 2};
- size_t localThreads[1] = {1};
-
- for(stage = 0; stage < numStages; stage++) {
- /* stage of the algorithm */
- err = clSetKernelArg(kernel, 1, sizeof(cl_uint), &stage);
- CHK_ERR(err);
-
- /* Every stage has stage+1 passes. */
- for(passOfStage = 0; passOfStage < stage + 1; passOfStage++) {
- /* pass of the current stage */
- err = clSetKernelArg( kernel, 2, sizeof(cl_uint), &passOfStage);
- CHK_ERR(err);
-
- /*
- * Enqueue a kernel run call.
- * For simplicity, the groupsize used is 1.
- *
- * Each thread writes a sorted pair.
- * So, the number of threads (global) is half the length.
- */
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
- globalThreads, localThreads,
- 0, NULL, NULL);
- CHK_ERR(err);
-
- /* wait for the kernel call to finish execution */
- err = clFinish(queue);
- CHK_ERR(err);
- }
- }
- err = clEnqueueReadBuffer(queue, inputBuffer, CL_TRUE, 0,
- length_bytes, input,
- 0, NULL, NULL);
- CHK_ERR(err);
-
- verify();
-
-}
-
-int compare(const void *x, const void *y)
-{
- unsigned int a = *(unsigned int *)x;
- unsigned int b = *(unsigned int *)y;
- return (a-b);
-}
-
-
-void verify()
-{
- int i;
-
- qsort(refInput, length, sizeof(int), compare);
- for (i = 0; i < length; i++) {
- if (i < 20) {
- printf("%8d %8d\n", input[i], refInput[i]);
- }
- if (input[i] != refInput[i]) {
- printf("Failed at %d: %d vs. %d\n", i,
- input[i], refInput[i]);
- printf("Failed");
- exit(-1);
- }
- }
- printf("Passed\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-#include "cl_test.h"
-
-void verify(int compareCall, int comparePut);
-
-float *randArray, *devCallPrice, *devPutPrice, *hostCallPrice, *hostPutPrice;
-int width = 32;
-int height = 16;
-
-int main(int argc, char **argv)
-{
- struct args args = { 0 };
- char *ker_path = NULL;
- int err;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("blackscholes_kernel_0.bin8", device);
-#else
- ker_path = do_kiss_path("blackscholes_kernel_0.bin", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err);
- CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
- CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "blackScholes");
-
- int whf4 = width * height * sizeof(cl_float4);
-
- randArray = newBuffer(whf4, 'f');
- devCallPrice = newBuffer(whf4, 0);
- //devPutPrice = newBuffer(whf4, 0);
- hostCallPrice = newBuffer(whf4, 0);
- hostPutPrice = newBuffer(whf4, 0);
-
- cl_mem randBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, whf4, randArray, &err);
- CHK_ERR(err);
- cl_mem callPriceBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, whf4, NULL, &err);
- CHK_ERR(err);
- cl_mem putPriceBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, whf4, NULL, &err);
- CHK_ERR(err);
-
- size_t globalThreads[2] = { width, height };
- //size_t localThreads[2] = { 4, 4 };
- size_t localThreads[2] = { 16, 1 };
-
- /* whether sort is to be in increasing order. CL_TRUE implies increasing */
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &randBuf);
- CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(width), (const void *) &width);
- CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &callPriceBuf);
- CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &putPriceBuf);
- CHK_ERR(err);
-
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
- CHK_ERR(err);
-
- devPutPrice = clIntelMapBuffer(putPriceBuf, &err);
- CHK_ERR(err);
- devCallPrice = clIntelMapBuffer(callPriceBuf, &err);
- CHK_ERR(err);
- verify(1, 1);
-#if 0
- int i;
- for (i = 0; i < width*height*4; i++) {
- printf("%12.8f, %12.8f | %12.8f %12.8f\n",
- devCallPrice[i],
- hostCallPrice[i],
- devPutPrice[i],
- hostPutPrice[i]);
- fflush(stdout);
- }
-#endif
- clIntelUnmapBuffer(putPriceBuf);
- return err;
-}
-
-#define S_LOWER_LIMIT 10.0f
-#define S_UPPER_LIMIT 100.0f
-#define K_LOWER_LIMIT 10.0f
-#define K_UPPER_LIMIT 100.0f
-#define T_LOWER_LIMIT 1.0f
-#define T_UPPER_LIMIT 10.0f
-#define R_LOWER_LIMIT 0.01f
-#define R_UPPER_LIMIT 0.05f
-#define SIGMA_LOWER_LIMIT 0.01f
-#define SIGMA_UPPER_LIMIT 0.10f
-
-float phi(float X)
-{
- float y, absX, t;
-
- // the coeffs
- const float c1 = 0.319381530f;
- const float c2 = -0.356563782f;
- const float c3 = 1.781477937f;
- const float c4 = -1.821255978f;
- const float c5 = 1.330274429f;
-
- const float oneBySqrt2pi = 0.398942280f;
-
- absX = fabsf(X);
- t = 1.0f / (1.0f + 0.2316419f * absX);
-
- y = 1.0f - oneBySqrt2pi * exp(-X * X / 2.0f) *
- t * (c1 + t * (c2 + t * (c3 + t * (c4 + t * c5))));
-
- return (X < 0) ? (1.0f - y) : y;
-}
-
-void blackScholesCPU(int width, int height, float *randArray,
- float *hostCallPrice, float *hostPutPrice)
-{
- int y;
- for (y = 0; y < width * height * 4; ++y) {
- float d1, d2;
- float sigmaSqrtT;
- float KexpMinusRT;
- float s = S_LOWER_LIMIT * randArray[y] + S_UPPER_LIMIT * (1.0f - randArray [y]);
- float k = K_LOWER_LIMIT * randArray[y] + K_UPPER_LIMIT * (1.0f - randArray [y]);
- float t = T_LOWER_LIMIT * randArray[y] + T_UPPER_LIMIT * (1.0f - randArray [y]);
- float r = R_LOWER_LIMIT * randArray[y] + R_UPPER_LIMIT * (1.0f - randArray [y]);
- float sigma =
- SIGMA_LOWER_LIMIT * randArray[y] +
- SIGMA_UPPER_LIMIT * (1.0f - randArray[y]);
-
- sigmaSqrtT = sigma * sqrt(t);
-
- d1 = (log(s / k) +
- (r + sigma * sigma / 2.0f) * t) / sigmaSqrtT;
- d2 = d1 - sigmaSqrtT;
-
- KexpMinusRT = k * exp(-r * t);
- hostCallPrice[y] = s * phi(d1) - KexpMinusRT * phi(d2);
- hostPutPrice[y] = KexpMinusRT * phi(-d2) - s * phi(-d1);
- }
-}
-
-void verify(int compareCall, int comparePut)
-{
- blackScholesCPU(width, height, randArray, hostCallPrice, hostPutPrice);
- int resC = 1, resP = 1;
- if (compareCall)
- resC = comparef(hostCallPrice, devCallPrice, width * height * 4, 1.0e-5f);
- if (comparePut)
- resP = comparef(hostPutPrice, devPutPrice, width * height * 4, 1.0e-5f);
- printf("resC=%d, resP=%d: ", resC, resP);
- if (resC && resP)
- printf("Passed!\n");
- else
- printf("Failed!\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-#include "cl_test.h"
-
-void verify(float*);
-
-int width = 16;
-int height = 16;
-int blockWidth = 8;
-int blockSize = 8 * 8;
-int inverse = 0;
-
-float *input, *output, *ref;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err, i;
- char *ker_path = NULL;
-
- const cl_float a = cos(M_PI/16)/2;
- const cl_float b = cos(M_PI/8 )/2;
- const cl_float c = cos(3*M_PI/16)/2;
- const cl_float d = cos(5*M_PI/16)/2;
- const cl_float e = cos(3*M_PI/8)/2;
- const cl_float f = cos(7*M_PI/16)/2;
- const cl_float g = 1.0f/sqrt(8.0f);
-
- cl_float dct8x8[64] = {
- g, a, b, c, g, d, e, f,
- g, c, e, -f, -g, -a, -b, -d,
- g, d, -e, -a, -g, f, b, c,
- g, f, -b, -d, g, c, -e, -a,
- g, -f, -b, d, g, -c, -e, a,
- g, -d, -e, a, -g, -f, b, -c,
- g, -c, e, f, -g, a, -b, d,
- g, -a, b, -c, g, -d, e, f
- };
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("dct_kernels_0.bin8", device);
-#else
- ker_path = do_kiss_path("dct_kernels_0.bin", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "DCT");
-
- int bytes = width * height * sizeof(cl_float);
- input = newBuffer(bytes, 0);
- output = newBuffer(bytes, 0);
-
- for (i = 0; i < width*height; i++)
- input[i] = randf() * 255.0f;
-
- cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, input, &err);
- CHK_ERR(err);
- cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, bytes, output, &err);
- CHK_ERR(err);
- cl_mem dctBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * blockSize, dct8x8, &err);
- CHK_ERR(err);
-
- /* Execute */
- /*** Set appropriate arguments to the kernel ***/
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &outputBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dctBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, blockWidth * blockWidth * sizeof(cl_float), NULL); CHK_ERR(err);
- err = clSetKernelArg(kernel, 4, sizeof(cl_uint), &width); CHK_ERR(err);
- err = clSetKernelArg(kernel, 5, sizeof(cl_uint), &blockWidth); CHK_ERR(err);
- err = clSetKernelArg(kernel, 6, sizeof(cl_uint), &inverse); CHK_ERR(err);
-
- size_t globalThreads[2] = {width, height};
- size_t localThreads[2] = {blockWidth, blockWidth};
-
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
- CHK_ERR(err);
-
- output = clIntelMapBuffer(outputBuffer, &err);
- verify(dct8x8);
- return 0;
-}
-
-/*
- * Reference implementation of the Discrete Cosine Transfrom on the CPU
- */
- cl_uint
-getIdx(cl_uint blockIdx, cl_uint blockIdy, cl_uint localIdx, cl_uint localIdy, cl_uint blockWidth, cl_uint globalWidth)
-{
- cl_uint globalIdx = blockIdx * blockWidth + localIdx;
- cl_uint globalIdy = blockIdy * blockWidth + localIdy;
-
- return (globalIdy * globalWidth + globalIdx);
-}
-
- void
-DCTCPUReference(cl_float *ref,
- const cl_float * input ,
- const cl_float * dct8x8 ,
- const cl_uint width,
- const cl_uint height,
- const cl_uint numBlocksX,
- const cl_uint numBlocksY,
- const cl_uint inverse)
-{
- cl_float * temp = (cl_float *)malloc(width*height*sizeof(cl_float));
-
- /* for each block in the image */
- cl_uint blockIdy, blockIdx, i,j, k;
- for(blockIdy=0; blockIdy < numBlocksY; ++blockIdy)
- for(blockIdx=0; blockIdx < numBlocksX; ++blockIdx)
- {
- // First calculate A^T * X
- for(j=0; j < blockWidth ; ++j)
- for(i = 0; i < blockWidth ; ++i)
- {
- cl_uint index = getIdx(blockIdx, blockIdy, i, j, blockWidth, width);
- cl_float tmp = 0.0f;
- for(k=0; k < blockWidth; ++k)
- {
- // multiply with dct8x8(k,i) if forward DCT and dct8x8(i,k) if inverse DCT
- cl_uint index1 = (inverse) ? i*blockWidth +k : k*blockWidth + i;
- //input(k,j)
- cl_uint index2 = getIdx(blockIdx, blockIdy, j, k, blockWidth, width);
-
- tmp += dct8x8[index1]*input[index2];
- }
- temp[index] = tmp;
- //ref[index] = tmp;
- }
- // And now multiply the result of previous step with A i.e. calculate (A^T * X) * A
- for(j=0; j < blockWidth ; ++j)
- for(i = 0; i < blockWidth ; ++i)
- {
- cl_uint index = getIdx(blockIdx, blockIdy, i, j, blockWidth, width);
- cl_float tmp = 0.0f;
- for(k=0; k < blockWidth; ++k)
- {
- //input(i,k)
- cl_uint index1 = getIdx(blockIdx, blockIdy, k, i, blockWidth, width);
-
- // multiply with dct8x8(k,j) if forward DCT and dct8x8(j,k) if inverse DCT
- cl_uint index2 = (inverse) ? j*blockWidth +k : k*blockWidth + j;
-
- tmp += temp[index1]*dct8x8[index2];
- }
- ref[index] = tmp;
- }
- }
- free(temp);
-}
-
-void verify(float* dct8x8)
-{
- printf("Passed\n");
- ref = (cl_float*) newBuffer(width*height*sizeof(cl_float), 0);
- DCTCPUReference(ref, input, dct8x8, width, height, width/blockWidth, height/blockWidth, inverse);
-#if 1
- int i, j;
- for (j=0; j<height; j++, printf("\n"))
- for (i=0; i<width; i++) {
- printf("[%4.4f %4.4f]", output[i+j*width], ref[i+j*width]);
- }
-#endif
- int resC = comparef(output, ref, width * height, 1.0e-6f);
- printf("%d\n", resC);
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-#include "cl_test.h"
-
-void verify();
-
-int length = 2048;
-float *input, *output, *refout;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err;
- char *ker_path = NULL;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("fastWalsh_kernels_0.bin8", device);
-#else
- ker_path = do_kiss_path("fastWalsh_kernels_0.bin", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "fastWalshTransform");
-
- input = newBuffer(length * sizeof(float), 'p');
- refout = newBuffer(length * sizeof(float), 0 );
- memcpy(refout, input, length * sizeof(float));
-
- cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, length * sizeof(float), input, &err); CHK_ERR(err);
-
- /* Execute */
- size_t globalThreads[1] = { length/2};
- size_t localThreads[1] = { 256};
-
- /* the input array - also acts as output*/
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); CHK_ERR(err);
-
- int step;
-
- for(step = 1; step < length; step<<= 1) {
- /* stage of the algorithm */
- err = clSetKernelArg( kernel, 1, sizeof(int), &step); CHK_ERR(err);
-
- /* Enqueue a kernel run call */
- err = clEnqueueNDRangeKernel(queue,
- kernel, 1, NULL,
- globalThreads,
- localThreads,
- 0,
- NULL,
- NULL); CHK_ERR(err);
-
- /* wait for the kernel call to finish execution */
- /* err = clWaitForEvents(1, &eND); CHK_ERR(err); */
- }
-
-#if 0
- /* Enqueue readBuffer*/
- err = clEnqueueReadBuffer(queue, inputBuffer, CL_TRUE, 0, length * sizeof(cl_float), output, 0, NULL, NULL); CHK_ERR(err);
-#else
- output = clIntelMapBuffer(inputBuffer, &err);
-
-#endif
- verify();
- return 0;
-}
-
-void fastWalshCPU(float *vinput, int length)
-{
- int step;
-
- /* for each pass of the algorithm */
- for(step=1; step < length; step <<=1) {
- /* length of each block */
- cl_uint jump = step << 1;
-
- /* for each blocks */
- cl_uint group;
- for(group = 0; group < step; ++group) {
- /* for each pair of elements with in the block */
- cl_uint pair;
-
- for(pair = group; pair < length; pair += jump) {
- /* find its partner */
- cl_uint match = pair + step;
-
- cl_float T1 = vinput[pair];
- cl_float T2 = vinput[match];
-
- /* store the sum and difference of the numbers in the same locations */
- vinput[pair] = T1 + T2;
- vinput[match] = T1 - T2;
- }
- }
- }
-}
-
-void verify()
-{
- int i;
-
- fastWalshCPU(refout, length);
- comparef(output, refout, length, 1.0e-6);
- for (i = 0; i < 20; i++) {
- printf("%20.5f %20.8f\n", output[i], refout[i]);
- }
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-#include "cl_test.h"
-
-void verify();
-
-int length = 1024;
-float *input_i, *input_r, *output_i, *output_r, *refOutput_r, *refOutput_i;
-
-int main(int argc, char**argv)
-{
- char *ker_path = NULL;
- struct args args = {0};
- int err;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("fft_kernels_0.bin", device);
-#else
- ker_path = do_kiss_path("fft_kernels_0.bin8", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "kfft");
-
- cl_uint inputSizeBytes = length * sizeof(cl_float);
-
- /* allocate and init memory used by host */
- input_r = newBuffer(inputSizeBytes, 'p');
- input_i = newBuffer(inputSizeBytes, 0 );
- output_i = newBuffer(inputSizeBytes, 0 );
- output_r = newBuffer(inputSizeBytes, 0 );
-
- memcpy(output_i, input_i, inputSizeBytes);
- memcpy(output_r, input_r, inputSizeBytes);
-
- cl_mem buffer_r = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, inputSizeBytes, input_r, &err); CHK_ERR(err);
- cl_mem buffer_i = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, inputSizeBytes, input_i, &err); CHK_ERR(err);
-
- size_t globalThreads[1] = {64};
- size_t localThreads[1] = {64};
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer_r); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer_i); CHK_ERR(err);
-
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL);
- CHK_ERR(err);
-
- output_i = clIntelMapBuffer(buffer_i, &err); CHK_ERR(err);
- output_r = clIntelMapBuffer(buffer_r, &err); CHK_ERR(err);
- verify();
-
- return 0;
-}
-
-/*
- This computes an in-place complex-to-complex FFT
- x and y are the real and imaginary arrays of 2^m points.
- dir = 1 gives forward transform
- dir = -1 gives reverse transform
- */
-void fftCPU(int dir,long m,cl_float *x,cl_float *y)
-{
- long n,i,i1,j,k,i2,l,l1,l2;
- double c1,c2,tx,ty,t1,t2,u1,u2,z;
-
- /* Calculate the number of points */
- n = 1;
- for (i=0;i<m;i++)
- n *= 2;
-
- /* Do the bit reversal */
- i2 = n >> 1;
- j = 0;
- for (i=0;i<n-1;i++)
- {
- if (i < j)
- {
- tx = x[i];
- ty = y[i];
- x[i] = x[j];
- y[i] = y[j];
- x[j] = (cl_float)tx;
- y[j] = (cl_float)ty;
- }
- k = i2;
- while (k <= j)
- {
- j -= k;
- k >>= 1;
- }
- j += k;
- }
-
- /* Compute the FFT */
- c1 = -1.0;
- c2 = 0.0;
- l2 = 1;
- for (l=0;l<m;l++)
- {
- l1 = l2;
- l2 <<= 1;
- u1 = 1.0;
- u2 = 0.0;
- for (j=0;j<l1;j++)
- {
- for (i=j;i<n;i+=l2)
- {
- i1 = i + l1;
- t1 = u1 * x[i1] - u2 * y[i1];
- t2 = u1 * y[i1] + u2 * x[i1];
- x[i1] = (cl_float)(x[i] - t1);
- y[i1] = (cl_float)(y[i] - t2);
- x[i] += (cl_float)t1;
- y[i] += (cl_float)t2;
- }
- z = u1 * c1 - u2 * c2;
- u2 = u1 * c2 + u2 * c1;
- u1 = z;
- }
- c2 = sqrt((1.0 - c1) / 2.0);
- if (dir == 1)
- c2 = -c2;
- c1 = sqrt((1.0 + c1) / 2.0);
- }
-
- /* Scaling for forward transform */
- /*if (dir == 1) {
- for (i=0;i<n;i++) {
- x[i] /= n;
- y[i] /= n;
- }
- }*/
-}
-
-/**
- * Reference CPU implementation of FFT Convolution
- * for performance comparison
- */
-void fftCPUReference()
-{
- refOutput_r = newBuffer(length * sizeof(float), 0 );
- refOutput_i = newBuffer(length * sizeof(float), 0 );
-
- /* Copy data from input to reference buffers */
- memcpy(refOutput_r, input_r, length * sizeof(cl_float));
- memcpy(refOutput_i, input_i, length * sizeof(cl_float));
-
- /* Compute reference FFT */
- fftCPU(1, 10, refOutput_r, refOutput_i);
-}
-
-void verify()
-{
- fftCPUReference();
- int i;
- comparef(refOutput_r, output_r, length, 1.0e-5f);
- comparef(refOutput_i, output_i, length, 1.0e-5f);
- printf("Passed\n");
-
- for (i = 0; i < length; i++) {
- printf("%i %12.5f, %12.5f | %12.5f, %12.5f | %12.5f, %12.5f\n",
- i, input_r[i], input_i[i],
- refOutput_r[i], output_r[i],
- refOutput_i[i], output_i[i]);
- }
- printf("...\n");
-
- for (i = 0; i < 8; i++) {
- printf("%12.5f, %12.5f | %12.5f, %12.5f | %12.5f, %12.5f\n",
- input_r[length-1-i], input_i[length-1-i],
- refOutput_r[length-1-i], output_r[length-1-i],
- refOutput_i[length-1-i], output_i[length-1-i]);
- }
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-static int *dst = NULL;
-static const size_t w = 64;
-static const size_t h = 64;
-static const size_t iter = 4;
-
-int
-main(int argc, char **argv)
-{
- const size_t global[2] = {w, h};
- const size_t local[2] = {16, 1};
- const size_t sz = w * h * sizeof(char[4]);
- int status = 0;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("mandelbrot_0.bin8", "render")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("mandelbrot_0.bin", "render")) != 0)
- goto error;
-#endif
-
- /* One dry run */
- cl_mem cl_dst = clCreateBuffer(ctx, CL_MEM_PINNABLE, sz, NULL, &status);
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
- dst = (int *) clIntelMapBuffer(cl_dst, &status);
-
- writeBmp(dst, w, h, "mandelbrot.bmp");
- CALL (clIntelUnmapBuffer, cl_dst);
- CALL (clReleaseMemObject, cl_dst);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-
-float *input0, *input1, *output, *refout;
-int vectorsize = 4;
-int blockSize = 8;
-
-/* must be multiples of vectorsize and blocksize */
-#define M 32
-#define N 32
-#define K 32
-
-int width0 = M;
-int height0 = N;
-
-int width1 = K;
-int height1 = M;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- char *ker_path = NULL;
- int err;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- ker_path = do_kiss_path("matmul_kernels_0.bin", device);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "mmmKernel");
-
- input0 = (cl_float *) newBuffer(width0 * height0 * sizeof(cl_float), 'p');
- input1 = (cl_float *) newBuffer(width1 * height1 * sizeof(cl_float), 'p');
- output = (cl_float *) newBuffer(width1 * height0 * sizeof(cl_float), 0 );
- refout = (cl_float *) newBuffer(width1 * height0 * sizeof(cl_float), 0 );
-
- cl_mem inputBuffer0 = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- width0 * height0 * sizeof(cl_float), input0, &err); CHK_ERR(err);
- cl_mem inputBuffer1 = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- width1 * height1 * sizeof(cl_float), input1, &err); CHK_ERR(err);
- cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- width1 * height0 * sizeof(cl_float), output, &err); CHK_ERR(err);
-
- /* Execute */
- size_t globalThreads[2] = {width1 / 4, height0/ 4};
- size_t localThreads [2] = {blockSize, blockSize};
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer0); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer1); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(cl_int), &width0); CHK_ERR(err);
- err = clSetKernelArg(kernel, 4, sizeof(cl_int), &width1); CHK_ERR(err);
-
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
- output = clIntelMapBuffer(outputBuffer, &err);
-
- verify();
- clIntelUnmapBuffer(outputBuffer);
- return err;
-}
-
-void
-matmulCPU(float *output, float *input0, float * input1, int y, int x, int z)
-{
- int i, j, k;
-
- for(i=0; i < y; i++)
- for(j=0; j < z; j++)
- for(k=0; k < x; k++)
- output[i*z + j] += (input0[i*x + k]*input1[k*z + j]);
-}
-
-void verify()
-{
- matmulCPU(refout, input0, input1, height0, width0, width1);
- comparef(refout, output, width1 * height0, 1.0e-5f );
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-#include "cl_test.h"
-#include "CL/cl_intel.h"
-
-void verify();
-
-int width = 256;
-int height = 256;
-int blockSizeX = 8;
-int blockSizeY = 8;
-int mulFactor = 2;
-float *seeds, *deviceResult;
-
-int main(int argc, char**argv)
-{
- char *ker_path = NULL;
- struct args args = {0};
- int err, i;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("mersenne_kernels_0.bin8", device);
-#else
- ker_path = do_kiss_path("mersenne_kernels_0.bin", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "gaussianRand");
-
- seeds = newBuffer( width * height * sizeof(cl_uint4), 0);
-// deviceResult = newBuffer( width * height * mulFactor * sizeof(cl_float4), 0);
- for (i = 0; i < width * height * 4; ++i) {
- seeds[i] = (unsigned int)rand();
- }
-
- cl_mem seedsBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, width * height * sizeof(cl_float4), seeds, &err); CHK_ERR(err);
- cl_mem resultBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * height * sizeof(cl_float4) * mulFactor, NULL, &err); CHK_ERR(err);
-
- size_t globalThreads[2] = {width, height};
- size_t localThreads[2] = {blockSizeX, blockSizeY};
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &seedsBuf); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_uint), &width); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_uint), &mulFactor); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &resultBuf); CHK_ERR(err);
-
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
-#if 0
- err = clEnqueueReadBuffer(queue, resultBuf, CL_TRUE, 0, width * height * mulFactor * sizeof(cl_float4),
- deviceResult, 1, &eND, NULL);
-#endif
- deviceResult = clIntelMapBuffer(resultBuf, &err);
- CHK_ERR(err);
- verify();
- clIntelUnmapBuffer(resultBuf);
-
- return err;
-}
-
-void verify()
-{
- int i;
- // comparef(d, c, MAX, 1.0e-6);
- /* check mean value of generated random numbers */
- float meanVal = 0.0f;
-
- for(i = 0; i < height * width * (int)mulFactor * 4; ++i) {
- meanVal += deviceResult[i];
- }
-
- meanVal = fabs(meanVal) / (height * width * (int)mulFactor * 4);
- printf("Mean Value of random numbers: %12.8f\n", meanVal);
- printf("%s\n", (meanVal < 0.1f) ? "Passed" : "Failed");
-
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-int width = 64;
-int height = 64;
-int blockSizeX = 256;
-int blockSizeY = 1;
-
-
-cl_float *sigma; /**< Array of sigma values */
-cl_float *price; /**< Array of price values */
-cl_float *vega; /**< Array of vega values */
-cl_float *refPrice; /**< Array of reference price values */
-cl_float *refVega; /**< Array of reference vega values */
-cl_uint *randNum; /**< Array of random numbers */
-cl_float *priceVals; /**< Array of price values for given samples */
-cl_float *priceDeriv; /**< Array of price derivative values for given samples */
-cl_int steps = 10;
-
- typedef struct _MonteCalroAttrib
- {
- cl_float4 strikePrice;
- cl_float4 c1;
- cl_float4 c2;
- cl_float4 c3;
- cl_float4 initPrice;
- cl_float4 sigma;
- cl_float4 timeStep;
- }MonteCarloAttrib;
-
- float maturity = 1.f;
- int noOfSum = 12;
- int noOfTraj = 1024;
- float initPrice = 50.f;
- float strikePrice = 55.f;
- float interest = 0.06f;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err, i;
- const cl_float finalValue = 0.8f;
- const cl_float stepValue = finalValue / (cl_float)steps;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernel(device, context, "monteCarloAsian_kernels.cl", "calPriceVega");
-
- sigma = (cl_float*) newBuffer(steps * sizeof(cl_float), 0);
- sigma[0] = 0.01f;
- for (i = 1; i < steps; i++) {
- sigma[i] = sigma[i - 1] + stepValue;
- }
-
- price = (cl_float*) newBuffer(steps * sizeof(cl_float), 0);
- vega = (cl_float*) newBuffer(steps * sizeof(cl_float), 0);
- refPrice = (cl_float*) newBuffer(steps * sizeof(cl_float), 0);
- refVega = (cl_float*) newBuffer(steps * sizeof(cl_float), 0);
-
- /* Set samples and exercize points */
-
- width = noOfTraj / 4;
- height = noOfTraj / 2;
-
- randNum = (cl_uint*) newBuffer(width * height * sizeof(cl_uint4), 0);
- priceVals = (cl_float*) newBuffer(width * height * 2 * sizeof(cl_float4), 0);
- priceDeriv = (cl_float*) newBuffer(width * height * 2 * sizeof(cl_float4), 0);
-
- cl_mem randBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint4) * width * height, randNum, &err);
- CHK_ERR(err);
-
- cl_mem priceBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * width * height * 2, NULL, &err);
- CHK_ERR(err);
-
- cl_mem priceDerivBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * width * height * 2, NULL, &err);
- CHK_ERR(err);
-
- cl_event eND;
- size_t globalThreads[2] = {width, height};
- size_t localThreads[2] = {blockSizeX, blockSizeY};
-
- MonteCarloAttrib attributes;
-
- err = clSetKernelArg(kernel, 2, sizeof(cl_uint), &width); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &randBuf); CHK_ERR(err);
- err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &priceBuf); CHK_ERR(err);
- err = clSetKernelArg(kernel, 5, sizeof(cl_mem), &priceDerivBuf); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_int), &noOfSum); CHK_ERR(err);
-
- float timeStep = maturity / (noOfSum - 1);
- // Initialize random number generator
- srand(1);
-
- int k, j;
- for(k = 0; k < steps; k++) {
- for(j = 0; j < (width * height * 4); j++) {
- randNum[j] = (cl_uint)rand();
- }
-
- float c1 = (interest - 0.5f * sigma[k] * sigma[k]) * timeStep;
- float c2 = sigma[k] * sqrt(timeStep);
- float c3 = (interest + 0.5f * sigma[k] * sigma[k]);
-
- const cl_float4 c1F4 = {c1, c1, c1, c1};
- attributes.c1 = c1F4;
-
- const cl_float4 c2F4 = {c2, c2, c2, c2};
- attributes.c2 = c2F4;
-
- const cl_float4 c3F4 = {c3, c3, c3, c3};
- attributes.c3 = c3F4;
-
- cl_float4 initPriceF4 = {initPrice, initPrice, initPrice, initPrice};
- attributes.initPrice = initPriceF4;
-
- const cl_float4 strikePriceF4 = {strikePrice, strikePrice, strikePrice, strikePrice};
- attributes.strikePrice = strikePriceF4;
-
- const cl_float4 sigmaF4 = {sigma[k], sigma[k], sigma[k], sigma[k]};
- attributes.sigma = sigmaF4;
-
- const cl_float4 timeStepF4 = {timeStep, timeStep, timeStep, timeStep};
- attributes.timeStep = timeStepF4;
-
-
- /* Set appropriate arguments to the kernel */
- /* the input array - also acts as output for this pass (input for next) */
- err = clSetKernelArg(kernel, 0, sizeof(attributes), &attributes);
- CHK_ERR(err);
-
- /*
- * Enqueue a kernel run call.
- */
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads,
- 0, NULL, &eND);
- CHK_ERR(err);
-
- /* Enqueue the results to application pointer*/
- err = clEnqueueReadBuffer(queue, priceBuf, CL_TRUE, 0, width * height * 2 * sizeof(cl_float4), priceVals,
- 1, &eND, NULL);
- CHK_ERR(err);
-
- /* Enqueue the results to application pointer*/
- err = clEnqueueReadBuffer(queue, priceDerivBuf, CL_TRUE, 0, width * height * 2 * sizeof(cl_float4), priceDeriv,
- 0, NULL, NULL);
- CHK_ERR(err);
-
- /* Replace Following "for" loop with reduction kernel */
- for (i = 0; i < noOfTraj * noOfTraj; i++) {
- price[k] += priceVals[i];
- vega[k] += priceDeriv[i];
- }
-
- price[k] /= (noOfTraj * noOfTraj);
- vega[k] /= (noOfTraj * noOfTraj);
-
- price[k] = exp(-interest * maturity) * price[k];
- vega[k] = exp(-interest * maturity) * vega[k];
- }
- verify();
-}
-
-void
-lshift128(unsigned int* input,
- unsigned int shift,
- unsigned int * output)
-{
- unsigned int invshift = 32u - shift;
-
- output[0] = input[0] << shift;
- output[1] = (input[1] << shift) | (input[0] >> invshift);
- output[2] = (input[2] << shift) | (input[1] >> invshift);
- output[3] = (input[3] << shift) | (input[2] >> invshift);
-}
-
-void
-rshift128(unsigned int* input,
- unsigned int shift,
- unsigned int* output)
-{
- unsigned int invshift = 32u - shift;
- output[3]= input[3] >> shift;
- output[2] = (input[2] >> shift) | (input[0] >> invshift);
- output[1] = (input[1] >> shift) | (input[1] >> invshift);
- output[0] = (input[0] >> shift) | (input[2] >> invshift);
-}
-
-void
-generateRand(unsigned int* seed,
- float *gaussianRand1,
- float *gaussianRand2,
- unsigned int* nextRand)
-{
-
- unsigned int mulFactor = 4;
- unsigned int temp[8][4];
-
- unsigned int state1[4] = {seed[0], seed[1], seed[2], seed[3]};
- unsigned int state2[4] = {0u, 0u, 0u, 0u};
- unsigned int state3[4] = {0u, 0u, 0u, 0u};
- unsigned int state4[4] = {0u, 0u, 0u, 0u};
- unsigned int state5[4] = {0u, 0u, 0u, 0u};
-
- unsigned int stateMask = 1812433253u;
- unsigned int thirty = 30u;
- unsigned int mask4[4] = {stateMask, stateMask, stateMask, stateMask};
- unsigned int thirty4[4] = {thirty, thirty, thirty, thirty};
- unsigned int one4[4] = {1u, 1u, 1u, 1u};
- unsigned int two4[4] = {2u, 2u, 2u, 2u};
- unsigned int three4[4] = {3u, 3u, 3u, 3u};
- unsigned int four4[4] = {4u, 4u, 4u, 4u};
-
- unsigned int r1[4] = {0u, 0u, 0u, 0u};
- unsigned int r2[4] = {0u, 0u, 0u, 0u};
-
- unsigned int a[4] = {0u, 0u, 0u, 0u};
- unsigned int b[4] = {0u, 0u, 0u, 0u};
-
- unsigned int e[4] = {0u, 0u, 0u, 0u};
- unsigned int f[4] = {0u, 0u, 0u, 0u};
-
- unsigned int thirteen = 13u;
- unsigned int fifteen = 15u;
- unsigned int shift = 8u * 3u;
-
- unsigned int mask11 = 0xfdff37ffu;
- unsigned int mask12 = 0xef7f3f7du;
- unsigned int mask13 = 0xff777b7du;
- unsigned int mask14 = 0x7ff7fb2fu;
-
- const float one = 1.0f;
- const float intMax = 4294967296.0f;
- const float PI = 3.14159265358979f;
- const float two = 2.0f;
-
- float r[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float phi[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-
- float temp1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float temp2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-
- int c;
-
- //Initializing states.
- for(c = 0; c < 4; ++c) {
- state2[c] = mask4[c] * (state1[c] ^ (state1[c] >> thirty4[c])) + one4[c];
- state3[c] = mask4[c] * (state2[c] ^ (state2[c] >> thirty4[c])) + two4[c];
- state4[c] = mask4[c] * (state3[c] ^ (state3[c] >> thirty4[c])) + three4[c];
- state5[c] = mask4[c] * (state4[c] ^ (state4[c] >> thirty4[c])) + four4[c];
- }
-
- unsigned int i = 0;
- for(i = 0; i < mulFactor; ++i) {
- switch(i)
- {
- case 0:
- for(c = 0; c < 4; ++c)
- {
- r1[c] = state4[c];
- r2[c] = state5[c];
- a[c] = state1[c];
- b[c] = state3[c];
- }
- break;
- case 1:
- for(c = 0; c < 4; ++c)
- {
- r1[c] = r2[c];
- r2[c] = temp[0][c];
- a[c] = state2[c];
- b[c] = state4[c];
- }
- break;
- case 2:
- for(c = 0; c < 4; ++c)
- {
- r1[c] = r2[c];
- r2[c] = temp[1][c];
- a[c] = state3[c];
- b[c] = state5[c];
- }
- break;
- case 3:
- for(c = 0; c < 4; ++c)
- {
- r1[c] = r2[c];
- r2[c] = temp[2][c];
- a[c] = state4[c];
- b[c] = state1[c];
- }
- break;
- default:
- break;
-
- }
-
- lshift128(a, shift, e);
- rshift128(r1, shift, f);
-
- temp[i][0] = a[0] ^ e[0] ^ ((b[0] >> thirteen) & mask11) ^ f[0] ^ (r2[0] << fifteen);
- temp[i][1] = a[1] ^ e[1] ^ ((b[1] >> thirteen) & mask12) ^ f[1] ^ (r2[1] << fifteen);
- temp[i][2] = a[2] ^ e[2] ^ ((b[2] >> thirteen) & mask13) ^ f[2] ^ (r2[2] << fifteen);
- temp[i][3] = a[3] ^ e[3] ^ ((b[3] >> thirteen) & mask14) ^ f[3] ^ (r2[3] << fifteen);
-
- }
-
- for(c = 0; c < 4; ++c) {
- temp1[c] = temp[0][c] * one / intMax;
- temp2[c] = temp[1][c] * one / intMax;
- }
-
- for(c = 0; c < 4; ++c) {
- // Applying Box Mullar Transformations.
- r[c] = sqrt((-two) * log(temp1[c]));
- phi[c] = two * PI * temp2[c];
- gaussianRand1[c] = r[c] * cos(phi[c]);
- gaussianRand2[c] = r[c] * sin(phi[c]);
-
- nextRand[c] = temp[2][c];
- }
-}
-
-void
-calOutputs(float strikePrice,
- float* meanDeriv1,
- float* meanDeriv2,
- float* meanPrice1,
- float* meanPrice2,
- float* pathDeriv1,
- float* pathDeriv2,
- float* priceVec1,
- float* priceVec2)
-{
- float temp1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float temp2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float temp3[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float temp4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-
- float tempDiff1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float tempDiff2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-
- int c;
-
- for(c = 0; c < 4; ++c)
- {
- tempDiff1[c] = meanPrice1[c] - strikePrice;
- tempDiff2[c] = meanPrice2[c] - strikePrice;
- }
- if(tempDiff1[0] > 0.0f)
- {
- temp1[0] = 1.0f;
- temp3[0] = tempDiff1[0];
- }
- if(tempDiff1[1] > 0.0f)
- {
- temp1[1] = 1.0f;
- temp3[1] = tempDiff1[1];
- }
- if(tempDiff1[2] > 0.0f)
- {
- temp1[2] = 1.0f;
- temp3[2] = tempDiff1[2];
- }
- if(tempDiff1[3] > 0.0f)
- {
- temp1[3] = 1.0f;
- temp3[3] = tempDiff1[3];
- }
-
- if(tempDiff2[0] > 0.0f)
- {
- temp2[0] = 1.0f;
- temp4[0] = tempDiff2[0];
- }
- if(tempDiff2[1] > 0.0f)
- {
- temp2[1] = 1.0f;
- temp4[1] = tempDiff2[1];
- }
- if(tempDiff2[2] > 0.0f)
- {
- temp2[2] = 1.0f;
- temp4[2] = tempDiff2[2];
- }
- if(tempDiff2[3] > 0.0f)
- {
- temp2[3] = 1.0f;
- temp4[3] = tempDiff2[3];
- }
-
- for(c = 0; c < 4; ++c) {
- pathDeriv1[c] = meanDeriv1[c] * temp1[c];
- pathDeriv2[c] = meanDeriv2[c] * temp2[c];
- priceVec1[c] = temp3[c];
- priceVec2[c] = temp4[c];
- }
-}
-
-void cpuRef()
-{
-
- float timeStep = maturity / (noOfSum - 1);
-
- // Initialize random number generator
- srand(1);
- int i, j, k, c;
-
- for(k = 0; k < steps; k++) {
- float c1 = (interest - 0.5f * sigma[k] * sigma[k]) * timeStep;
- float c2 = sigma[k] * sqrt(timeStep);
- float c3 = (interest + 0.5f * sigma[k] * sigma[k]);
-
- for(j = 0; j < (width * height); j++) {
- unsigned int nextRand[4] = {0u, 0u, 0u, 0u};
- for (c = 0; c < 4; ++c)
- nextRand[c] = (cl_uint)rand();
-
- float trajPrice1[4] = {initPrice, initPrice, initPrice, initPrice};
- float sumPrice1[4] = {initPrice, initPrice, initPrice, initPrice};
- float sumDeriv1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float meanPrice1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float meanDeriv1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float price1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float pathDeriv1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-
- float trajPrice2[4] = {initPrice, initPrice, initPrice, initPrice};
- float sumPrice2[4] = {initPrice, initPrice, initPrice, initPrice};
- float sumDeriv2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float meanPrice2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float meanDeriv2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float price2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float pathDeriv2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-
- //Run the Monte Carlo simulation a total of Num_Sum - 1 times
- for(i = 1; i < noOfSum; i++) {
- unsigned int tempRand[4] = {0u, 0u, 0u, 0u};
- for(c = 0; c < 4; ++c)
- tempRand[c] = nextRand[c];
-
- float gaussian1[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float gaussian2[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- generateRand(tempRand, gaussian1, gaussian2, nextRand);
-
- //Calculate the trajectory price and sum price for all trajectories
- for(c = 0; c < 4; ++c) {
- trajPrice1[c] = trajPrice1[c] * exp(c1 + c2 * gaussian1[c]);
- trajPrice2[c] = trajPrice2[c] * exp(c1 + c2 * gaussian2[c]);
-
- sumPrice1[c] = sumPrice1[c] + trajPrice1[c];
- sumPrice2[c] = sumPrice2[c] + trajPrice2[c];
-
- float temp = c3 * timeStep * i;
-
- // Calculate the derivative price for all trajectories
- sumDeriv1[c] = sumDeriv1[c] + trajPrice1[c]
- * ((log(trajPrice1[c] / initPrice) - temp) / sigma[k]);
-
- sumDeriv2[c] = sumDeriv2[c] + trajPrice2[c]
- * ((log(trajPrice2[c] / initPrice) - temp) / sigma[k]);
- }
- }
-
- //Calculate the average price and \93average derivative\94 of each simulated path
- for(c = 0; c < 4; ++c) {
- meanPrice1[c] = sumPrice1[c] / noOfSum;
- meanPrice2[c] = sumPrice2[c] / noOfSum;
- meanDeriv1[c] = sumDeriv1[c] / noOfSum;
- meanDeriv2[c] = sumDeriv2[c] / noOfSum;
- }
-
- calOutputs(strikePrice, meanDeriv1, meanDeriv2, meanPrice1, meanPrice2,
- pathDeriv1, pathDeriv2, price1, price2);
-
- for(c = 0; c < 4; ++c) {
- priceVals[j * 8 + c] = price1[c];
- priceVals[j * 8 + 1 * 4 + c] = price2[c];
- priceDeriv[j * 8 + c] = pathDeriv1[c];
- priceDeriv[j * 8 + 1 * 4 + c] = pathDeriv2[c];
- }
- }
-
- /* Replace Following "for" loop with reduction kernel */
- for(i = 0; i < noOfTraj * noOfTraj; i++) {
- refPrice[k] += priceVals[i];
- refVega[k] += priceDeriv[i];
- }
-
- refPrice[k] /= (noOfTraj * noOfTraj);
- refVega[k] /= (noOfTraj * noOfTraj);
-
- refPrice[k] = exp(-interest * maturity) * refPrice[k];
- refVega[k] = exp(-interest * maturity) * refVega[k];
- }
-
- /* compare the results and see if they match */
- for(i = 0; i < steps; ++i)
- {
- if(fabs(price[i] - refPrice[i]) > 0.2f)
- {
- printf("Failed\n");
- exit(-1);
- }
- if(fabs(vega[i] - refVega[i]) > 0.2f)
- {
- printf("Failed\n");
- exit(-1);
- }
- }
- printf("Passed\n");
-}
-
-void verify()
-{
- int i;
- // comparef(d, c, MAX, 1.0e-6);
- cpuRef();
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-#include "cl_test.h"
-
-void verify();
-
-float *initPos, *initVel, *pos, *vel;
-int numBodies = 1024;
-float delT = 0.005f;
-float espSqr = 50.0f;
-int GROUP_SIZE = 256;
-int ITERATIONS = 1;
-
-int main(int argc, char**argv)
-{
- char *ker_path = NULL;
- struct args args = {0};
- int err, i;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
-#if TEST_SIMD8
- ker_path = do_kiss_path("nbody_kernels_0.bin8", device);
-#else
- ker_path = do_kiss_path("nbody_kernels_0.bin", device);
-#endif
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "nbody_sim");
-
- initPos = newBuffer(numBodies * sizeof(cl_float4), 0);
- initVel = newBuffer(numBodies * sizeof(cl_float4), 0);
- pos = newBuffer(numBodies * sizeof(cl_float4), 0);
- vel = newBuffer(numBodies * sizeof(cl_float4), 0);
-
- /* initialization of inputs */
- for (i = 0; i < numBodies; ++i) {
- int index = 4 * i;
- int j;
-
- // First 3 values are position in x,y and z direction
- for(j = 0; j < 3; ++j) {
- initPos[index + j] = randf2(3.0f, 50.0f);
- }
-
- // Mass value
- initPos[index + 3] = randf2(1.0, 1000.0);
-
- // First 3 values are velocity in x,y and z direction
- for (j = 0; j < 3; ++j) {
- initVel[index + j] = 0.0f;
- }
-
- // unused
- initVel[3] = 0.0f;
- }
-
- memcpy(pos, initPos, 4 * numBodies * sizeof(cl_float));
- memcpy(vel, initVel, 4 * numBodies * sizeof(cl_float));
-
- cl_mem curPosBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, numBodies * sizeof(cl_float4), pos, &err); CHK_ERR(err);
- cl_mem newPosBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, numBodies * sizeof(cl_float4), 0, &err); CHK_ERR(err);
- cl_mem curVelBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, numBodies * sizeof(cl_float4), vel, &err); CHK_ERR(err);
- cl_mem newVelBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, numBodies * sizeof(cl_float4), 0, &err); CHK_ERR(err);
-
- /* Execute */
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &curPosBuf);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &curVelBuf);
- err = clSetKernelArg(kernel, 2, sizeof(cl_int), &numBodies);
- err = clSetKernelArg(kernel, 3, sizeof(cl_float), &delT);
- err = clSetKernelArg(kernel, 4, sizeof(cl_float), &espSqr);
- err = clSetKernelArg(kernel, 5, GROUP_SIZE * 4 * sizeof(float), NULL);
- err = clSetKernelArg(kernel, 6, sizeof(cl_mem), &newPosBuf);
- err = clSetKernelArg(kernel, 7, sizeof(cl_mem), &newVelBuf);
-
- size_t globalThreads[] = {numBodies};
- size_t localThreads[] = {GROUP_SIZE};
-
- assert(ITERATIONS == 1);
- for (i = 0; i < ITERATIONS; i++) {
-
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); CHK_ERR(err);
-
- /* Copy data from new to old */
- // err = clEnqueueCopyBuffer(queue, newPosBuf, curPosBuf, 0, 0, sizeof(cl_float4) * numBodies, 0, 0, 0); CHK_ERR(err);
- // err = clEnqueueCopyBuffer(queue, newVelBuf, curVelBuf, 0, 0, sizeof(cl_float4) * numBodies, 0, 0, 0); CHK_ERR(err);
- // err = clFinish(queue); CHK_ERR(err);
-
- /* Enqueue readBuffer*/
- /* Wait for the read buffer to finish execution */
- // err = clEnqueueReadBuffer(queue, curPosBuf, CL_TRUE, 0, numBodies* sizeof(cl_float4), pos, 0, NULL, NULL); CHK_ERR(err);
- }
-
- pos = clIntelMapBuffer(newPosBuf, &err);
- verify();
- return 0;
-}
-
-void
-nBodyCPU(float* refPos, float* refVel)
-{
- int i, j, k;
-
- //Iterate for all samples
- for(i = 0; i < numBodies; ++i) {
- int myIndex = 4 * i;
- float acc[3] = {0.0f, 0.0f, 0.0f};
-
- for(j = 0; j < numBodies; ++j) {
- float r[3];
- int index = 4 * j;
-
- float distSqr = 0.0f;
- for(k = 0; k < 3; ++k) {
- r[k] = refPos[index + k] - refPos[myIndex + k];
- distSqr += r[k] * r[k];
- }
-
- float invDist = 1.0f / sqrt(distSqr + espSqr);
- float invDistCube = invDist * invDist * invDist;
- float s = refPos[index + 3] * invDistCube;
-
- for(k = 0; k < 3; ++k) {
- acc[k] += s * r[k];
- }
- }
-
- for(k = 0; k < 3; ++k) {
- refPos[myIndex + k] += refVel[myIndex + k] * delT + 0.5f * acc[k] * delT * delT;
- refVel[myIndex + k] += acc[k] * delT;
- }
- }
-}
-
-void verify()
-{
- int i;
-
- float* refPos = newBuffer(numBodies * sizeof(cl_float4), 0);
- float* refVel = newBuffer(numBodies * sizeof(cl_float4), 0);
- memcpy(refPos, initPos, 4 * numBodies * sizeof(cl_float));
- memcpy(refVel, initVel, 4 * numBodies * sizeof(cl_float));
-
- for (i = 0; i < ITERATIONS; i++) {
- nBodyCPU(refPos, refVel);
- }
-
- comparef(refPos, pos, numBodies * 4, 1.0e-4f);
- for (i = 0; i < 20; i++) {
- printf("%12.8f | %12.8f\n", refPos[i], pos[i]);
- }
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <CL/cl.h>
-#include <assert.h>
-
-#ifndef _WIN32
-#include "CL/cl_intel.h"
-#include "common.h"
-#endif
-
-#define USE_CL_SOURCE
-
-#ifdef USE_CL_SOURCE
-const char *progsrc =
-"__kernel void test1( \n" \
-" __global char* svm, \n" \
-" uint svmBase, \n" \
-" uint context) \n" \
-"{ \n" \
-" int i = get_global_id(0); \n" \
-" //__global int* ptr = (__global int*)(svm+context-svmBase); \n" \
-" //__global int* ptr = (__global int*)&svm[context-svmBase]; \n" \
-" svm -= svmBase; \n" \
-" __global int *ptr = (__global int *)&svm[context]; \n" \
-" ptr[i]=i; \n" \
-"} \n" \
-"\n";
-#else
-static const char* progsrc =
-" SHADER test1 \n"
-" VERSION_2_1 \n"
-" DCL_THREADGROUP VARIABLE; \n"
-" DCL_UAVRAW u0, TRUE = \n"
-" { \n"
-" KERNEL_ARGUMENT,0 \n"
-" }; \n"
-" DCL_CONSTANTREGISTER c0 = \n"
-" { \n"
-" KERNEL_ARGUMENT, 1, 0, \n"
-" KERNEL_ARGUMENT, 2, 0, \n"
-" UNUSED, 0, 0, \n"
-" UNUSED, 0, 0 \n"
-" }; \n"
-" DCL_INPUT i0.xyz, THREAD_ID; \n"
-" DCL_POINTER ptr1; \n"
-" DCL_TEMP r0; \n"
-" DCL_TEMP r1; \n"
-" DCL_POINTER ptr0; \n"
-" ADDRESS_OF ptr0, u0; \n"
-" DCL_TEMP r2; \n"
-" DCL_TEMP r3; \n"
-" DCL_TEMP r4; \n"
-" DCL_POINTER ptr2; \n"
-" DCL_TEMP r5; \n"
-" MOV r2.x, i0.x; \n"
-" PADD ptr1, ptr0,-c0.x; \n"
-" PADD ptr1, ptr1,c0.y;\n"
-" MOV r0.x, r2.x; \n"
-" MOV r5.x, r0.x; \n"
-" MOV r1.x, r5.x; \n"
-" SHL r3.x, r1.x, 2; \n"
-//" MOV r4.x, 0; \n"
-" PADD ptr2, ptr1, r3.x; \n"
-//" UADD r4.x, r4.x, 2; \n"
-//" STORE_RAW_PTR ptr2.x, r4.x; \n"
-" STORE_RAW_PTR ptr2.x, r2.x; \n"
-" RET; \n"
-" END \n"
-;
-#endif
-
-const char *src;
-const char *kernel_name;
-
-#define PAGE_SIZE (4 << 10)
-#define PAGE_ALIGNMENT_MASK (~(PAGE_SIZE-1))
-#define SVM_SIZE (128 << 20)
-
-cl_device_id device_id; // device ID
-cl_context context; // context
-cl_command_queue queue; // command queue
-char* svmBase = NULL;
-cl_mem svmMemObject;
-
-char *load_program_source(const char *filename)
-{
- FILE *fh;
- struct stat statbuf;
- char *source;
-
- if (!(fh=fopen(filename,"r")))
- return NULL;
-
- stat(filename, &statbuf);
- source = (char *)malloc(statbuf.st_size+1);
- fread(source, statbuf.st_size,1,fh);
- source[statbuf.st_size]=0;
- return source;
-}
-
-int init() {
- cl_uint num_devices;
- char device_name[1024];
- cl_int err;
- cl_platform_id platform;
-
- err = clGetPlatformIDs(1, &platform, NULL);
- if (err != CL_SUCCESS) {
- fprintf(stderr, "Error: clGetPlatformIDs failed. Error code = %d.\n", err);
- return err;
- }
-
- // Get an ID for the device
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1,
- &device_id, &num_devices);
- if (err != CL_SUCCESS)
- {
- printf("Error: clGetDeviceIDs");
- return err;
- }
- printf("num_gpu_devices: %d\n",num_devices);
- err = clGetDeviceInfo(device_id,
- CL_DEVICE_NAME,
- 1024,
- device_name,
- NULL);
- if (err != CL_SUCCESS) {
- printf("Error: clGetDeviceInfo %d\n",err);
- return err;
- }
-
- printf("device name is %s\n",device_name);
- context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
- if (!context)
- {
- printf("Error: clCreateContext. error code=%d\n",err);
- return err;
- }
-
- // Create a command queue [5]
- //
- queue = clCreateCommandQueue(context, device_id, 0, &err);
- if (!queue)
- {
- printf("Error:clCreateCommandQueue %d",err);
- return err;
- }
-#ifdef _WIN32
- svmBase = malloc(SVM_SIZE+PAGE_SIZE);
- (cl_uint)svmBase &= PAGE_ALIGNMENT_MASK;
- svmMemObject = clCreateBuffer(context, CL_MEM_READ_WRITE |
- CL_MEM_USE_HOST_PTR,
- (size_t)SVM_SIZE,
- svmBase,
- &err);
- if (err != CL_SUCCESS) {
- printf("Error: clCreateBuffer of SVM: %d\n",err);
- return err;
- }
-#else // Linux
- svmMemObject = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_PINNABLE,
- SVM_SIZE, NULL, &err);
- if (err != CL_SUCCESS) {
- printf("Error: clCreateBuffer of SVM: %d\n",err);
- return err;
- }
- if ((err = clIntelPinBuffer(svmMemObject)) != CL_SUCCESS) {
- printf("Error: clIntelPinBuffer: %d\n",err);
- return err;
- }
- svmBase = clIntelMapBuffer(svmMemObject,&err);
- if (err != CL_SUCCESS) {
- printf("Error: clIntelMapBuffer: %d\n",err);
- return err;
- }
-#endif
-
- printf("initializing svm with zeros\n");
- memset(svmBase,0,SVM_SIZE);
- return err;
-}
-
-int test2() {
- size_t global; // global domain size for our calculation
- size_t local; // local domain size for our calculation
- int err; // error code returned from api calls
- cl_kernel kernel;
-#ifndef _WIN32
- /*cl_event event;*/
-#endif
- int i;
- char *start;
- size_t offset;
- int failed;
-
-#ifdef _WIN32
- // Create the compute program from the source buffer [6]
- //
- cl_program program = clCreateProgramWithSource(context, 1,
- (const char **) & src, NULL, &err);
- if (!program)
- {
- printf("Error:clCreateProgramWithSource");
- return 0;
- }
- // Build the program executable [7]
- //
- err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- size_t len;
- char buffer[2048];
- //printf("Error: Failed to build program executable\n"); [8]
- clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
- sizeof(buffer), buffer, &len);
- printf("%s\n", buffer);
- exit(1);
- }
-#else
- char *ker_path = NULL;
-#endif
-
- // Create the compute kernel in the program we wish to run [9]
- //
-#ifdef _WIN32
- kernel = clCreateKernel(program, kernel_name, &err);
- if (!kernel || err != CL_SUCCESS)
- {
- printf("Error: clCreateKernel");
- return 0;
- }
-#else
- ker_path = do_kiss_path("svm_test_kernel_0.bin", device_id);
- kernel = getKernelFromBinary(device_id, context, ker_path, "test1");
-#endif
- // Set the arguments to our compute kernel [12]
- //
- //offset = SVM_SIZE >> 1;
- offset = 0;
- //offset = 16;
- //offset = 1;
- //global = (64 << 20) / sizeof(int);
- //global = SVM_SIZE / sizeof(int);
- //global = (SVM_SIZE >> 1) / sizeof(int);
- global = 1<<10;
- start = svmBase + offset;
- err = 0;
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &svmMemObject);
- err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), &svmBase);
- err |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &start);
- if (err != CL_SUCCESS)
- {
- printf("Error:clSetKernelArg %d",err);
- return 0;
- }
-
-#if 0
- // Get the maximum work-group size for executing the kernel on the device
- // [13]
- err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(size_t), &local, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error:clGetKernelWorkGroupInfo: %d",err);
- return 0;
- }
- printf("workgroup size is %d\n",(int)local);
-#endif
- local = 16;
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
- &global, &local /*NULL*/,
- 0, NULL, NULL /*&event*/);
- if (err != CL_SUCCESS)
- {
- printf("Error:clEnqueueNDRangeKernel: %d",err);
- return 0;
- }
-
- clFinish(queue);
-
- failed = 0;
- for (i=0;i<(int)global;i++) {
- if (((int*)start)[i]!=i) {
- printf("svmBase[%d]=%d.\n",(int) i+(int) offset,(int) start[i]);
- failed = 1;
- }
- }
-
- printf(failed?"Test failed\n":"Test passed.\n");
-
- return 0;
-}
-
-
-int main(int argc, char** argv)
-{
- //src = load_program_source(argv[1]);
- src = progsrc;
- //kernel_name = argv[2];
- kernel_name = "test1";
- if (init() != CL_SUCCESS)
- return -1;
- test2();
- return 0;
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-typedef union kernel_arg {
- float f;
- uint32_t u32;
- uint16_t u16;
- uint8_t u8;
- int32_t i32;
- int16_t i16;
- int8_t i8;
-} kernel_arg_t;
-
-#define W 32
-#define H 32
-
-int
-main (int argc, char *argv[])
-{
- cl_mem dst0, dst1, src;
- uint32_t *src_buffer = NULL;
- IF_DEBUG (uint32_t *dst0_buffer = NULL);
- IF_DEBUG (uint32_t *dst1_buffer = NULL);
- const size_t n = W*H;
- const size_t global_work_size[2] = {W, H};
- const size_t local_work_size[2] = {8,8};
- kernel_arg_t arg;
- int status = 0, i, j;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_2d_copy_kernels_0.bin8", "test_2d_copy")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_2d_copy_kernels_0.bin", "test_2d_copy")) != 0)
- goto error;
-#endif
-
- /* Fill the buffer with random values */
- if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < n; ++i)
- src_buffer[i] = rand();
-
- /* Allocate the two buffers */
- dst0 = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
- dst1 = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
- src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status);
- if (status != CL_SUCCESS)
- goto error;
- free(src_buffer);
-
- /* Set source and destination */
- arg.i32 = W;
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst0);
- CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst1);
- CALL (clSetKernelArg, kernel, 2, sizeof(cl_mem), &src);
- CALL (clSetKernelArg, kernel, 3, sizeof(float), &arg);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 2,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst0_buffer = (uint32_t *) clIntelMapBuffer(dst0, &status);
- if (status != CL_SUCCESS)
- goto error;
- dst1_buffer = (uint32_t *) clIntelMapBuffer(dst1, &status);
- if (status != CL_SUCCESS)
- goto error;
-#endif /* NDEBUG */
- src_buffer = (uint32_t *) clIntelMapBuffer(src, &status);
- if (status != CL_SUCCESS)
- goto error;
-
- for (j = 0; j < H; ++j)
- for (i = 0; i < W; ++i) {
- assert(dst0_buffer[i+j*W] == src_buffer[i+j*W]);
- assert(dst1_buffer[i+j*W] == i+j);
- }
- IF_DEBUG(CALL (clIntelUnmapBuffer, dst0));
- IF_DEBUG(CALL (clIntelUnmapBuffer, dst1));
- CALL (clIntelUnmapBuffer, src);
- CALL (clReleaseMemObject, dst0);
- CALL (clReleaseMemObject, dst1);
- CALL (clReleaseMemObject, src);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-typedef union kernel_arg {
- float f;
- uint32_t u32;
- uint16_t u16;
- uint8_t u8;
- int32_t i32;
- int16_t i16;
- int8_t i8;
-} kernel_arg_t;
-
-#define N 1024
-int
-main (int argc, char *argv[])
-{
- cl_mem dst, src;
- IF_DEBUG(uint32_t *dst_buffer = NULL);
- uint32_t *src_buffer = NULL;
- const size_t n = N;
- const size_t global_work_size = N;
- const size_t local_work_size = 256;
- kernel_arg_t arg;
- int status = 0, i;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_barrier_kernels_0.bin8", "test_barrier")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_barrier_kernels_0.bin", "test_barrier")) != 0)
- goto error;
-#endif
-
- /* Fill the buffer with random values */
- if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < n; ++i)
- src_buffer[i] = i;
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
- src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status);
- if (status != CL_SUCCESS)
- goto error;
- free(src_buffer);
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst);
- CALL (clSetKernelArg, kernel, 4, sizeof(cl_mem), &src);
- arg.f = 1.f;
- CALL (clSetKernelArg, kernel, 1, sizeof(float), &arg);
- arg.i8 = 2;
- CALL (clSetKernelArg, kernel, 2, sizeof(int8_t), &arg);
- arg.i32 = 3;
- CALL (clSetKernelArg, kernel, 3, sizeof(int32_t), &arg);
- arg.i16 = 4;
- CALL (clSetKernelArg, kernel, 5, sizeof(int16_t), &arg);
- arg.u32 = 5;
- CALL (clSetKernelArg, kernel, 6, sizeof(uint32_t), &arg);
- arg.i32 = 6;
- CALL (clSetKernelArg, kernel, 7, sizeof(int32_t), &arg);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
-#endif
- src_buffer = (uint32_t *) clIntelMapBuffer(src, &status);
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < N; ++i)
- assert(dst_buffer[i] == src_buffer[i] + 21);
-
- IF_DEBUG(CALL (clIntelUnmapBuffer, dst));
- CALL (clIntelUnmapBuffer, src);
- CALL (clReleaseMemObject, dst);
- CALL (clReleaseMemObject, src);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifndef NDEBUG
-static const uint32_t wrk_value[32] = {
- 71, 69, 67, 65, 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35,
- 33, 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9
-};
-#endif /* NDEBUG */
-
-#define N 512
-int
-main (int argc, char *argv[])
-{
- cl_mem dst, constants;
- IF_DEBUG(uint32_t *dst_buffer = NULL);
- uint32_t *cst_buffer = NULL;
- const size_t n = N;
- const size_t global_work_size = N;
- const size_t local_work_size = 32;
- int status = 0, i;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_constant_memory_0.bin8", "test_constant_memory")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_constant_memory_0.bin", "test_constant_memory")) != 0)
- goto error;
-#endif
-
- /* Allocate the dst buffer */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
-
- /* Allocate the constant buffer and fill it with ones */
- if ((cst_buffer = (uint32_t*) malloc(sizeof(uint32_t) * 32)) == NULL) {
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < 32; ++i)
- cst_buffer[i] = 1;
-
- constants = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR,
- 32 * sizeof(uint32_t),
- cst_buffer,
- &status);
- if (status != CL_SUCCESS)
- goto error;
- free(cst_buffer);
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst);
- CALL (clSetKernelArg, kernel, 1, 32*sizeof(int), NULL);
- CALL (clSetKernelArg, kernel, 2, sizeof(cl_mem), &constants);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < N; ++i)
- assert(dst_buffer[i] == wrk_value[i % 32]);
- CALL (clIntelUnmapBuffer, dst);
-#endif /* NDEBUG */
-
- CALL (clReleaseMemObject, dst);
- CALL (clReleaseMemObject, constants);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-int
-main (int argc, char *argv[])
-{
- cl_mem dst, src;
- IF_DEBUG(uint32_t *dst_buffer = NULL);
- uint32_t *src_buffer = NULL;
- const size_t w = 64, h = 64;
- const size_t n = w * h;
- const size_t global_work_size[2] = {w,h};
- const size_t local_work_size[2] = {16,1};
- int status = 0, i;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_copy_image_0.bin8", "test_copy_image")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_copy_image_0.bin", "test_copy_image")) != 0)
- goto error;
-#endif
-
- /* Fill the buffer with random values */
- if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < n; ++i)
- src_buffer[i] = i;
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
- const cl_image_format fmt = {
- .image_channel_order = CL_RGBA,
- .image_channel_data_type = CL_UNSIGNED_INT8
- };
- src = clCreateImage2D(ctx,
- CL_MEM_COPY_HOST_PTR,
- &fmt,
- w,
- h,
- w * sizeof(uint32_t),
- src_buffer,
- &status);
- if (status != CL_SUCCESS)
- goto error;
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src);
- CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 2,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < n; ++i) assert(src_buffer[i] == dst_buffer[i]);
- CALL (clIntelUnmapBuffer, dst);
-#endif /* NDEBUG */
-
- free(src_buffer);
- CALL (clReleaseMemObject, dst);
- CALL (clReleaseMemObject, src);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-int
-main (int argc, char *argv[])
-{
- cl_mem dst, src;
- uint32_t *dst_buffer = NULL, *src_buffer = NULL;
- const size_t n = 16;
- const size_t global_work_size = n;
- const size_t local_work_size = 16;
- int status = 0, i;
-
- if ((status = cl_test_init("CopyBuffer_0.bin", "CopyBuffer")) != 0)
- goto error;
-
- /* Fill the buffer with random values */
- if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- if ((dst_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < n; ++i)
- src_buffer[i] = i;
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR | CL_MEM_WRITE_ONLY, n * sizeof(uint32_t), dst_buffer, &status);
- if (status != CL_SUCCESS)
- goto error;
- src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, n * sizeof(uint32_t), src_buffer, &status);
- if (status != CL_SUCCESS)
- goto error;
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src);
- CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
- CALL (clEnqueueReadBuffer, queue,
- dst,
- CL_TRUE,
- 0,
- n* sizeof(uint32_t),
- (void *)dst_buffer,
- 0,
- NULL,
- NULL );
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < n; ++i) {
- printf("src_buffer[%d]:[%d], dst_buffer[%d]:[%d]\n",
- i, src_buffer[i], i, dst_buffer[i]);
-// assert(src_buffer[i] == dst_buffer[i]);
- }
-
- free(src_buffer);
- free(dst_buffer);
- CALL (clReleaseMemObject, dst);
- CALL (clReleaseMemObject, src);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-typedef union kernel_arg {
- float f;
- uint32_t u32;
- uint16_t u16;
- uint8_t u8;
- int32_t i32;
- int16_t i16;
- int8_t i8;
-} kernel_arg_t;
-
-int
-main (int argc, char *argv[])
-{
- cl_mem dst, src;
- IF_DEBUG(uint32_t *dst_buffer = NULL);
- uint32_t *src_buffer = NULL;
- const size_t n = 6*16;
- const size_t global_work_size = 16;
- const size_t local_work_size = 16;
- kernel_arg_t arg;
- int status = 0, i, j;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_imm_parameters_kernels_0.bin8", "test_imm_parameters")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_imm_parameters_kernels_0.bin", "test_imm_parameters")) != 0)
- goto error;
-#endif
-
- /* Fill the buffer with random values */
- if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < n; ++i)
- src_buffer[i] = i;
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
- src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status);
- if (status != CL_SUCCESS)
- goto error;
- free(src_buffer);
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst);
- CALL (clSetKernelArg, kernel, 4, sizeof(cl_mem), &src);
- arg.f = 1.f; CALL (clSetKernelArg, kernel, 1, sizeof(float), &arg);
- arg.i8 = 2; CALL (clSetKernelArg, kernel, 2, sizeof(int8_t), &arg);
- arg.i32 = 3; CALL (clSetKernelArg, kernel, 3, sizeof(int32_t), &arg);
- arg.i16 = 4; CALL (clSetKernelArg, kernel, 5, sizeof(int16_t), &arg);
- arg.u32 = 5; CALL (clSetKernelArg, kernel, 6, sizeof(uint32_t), &arg);
- arg.i32 = 6; CALL (clSetKernelArg, kernel, 7, sizeof(int32_t), &arg);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
-#endif /* NDEBUG */
-
- src_buffer = (uint32_t *) clIntelMapBuffer(src, &status);
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < 16; ++i)
- for (j = 0; j < 6; ++j)
- assert(dst_buffer[6*i+j] == src_buffer[i] + 1 + j);
-
- IF_DEBUG(CALL (clIntelUnmapBuffer, dst));
- CALL (clIntelUnmapBuffer, src);
- CALL (clReleaseMemObject, dst);
- CALL (clReleaseMemObject, src);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifndef NDEBUG
-static const uint32_t wrk_value[32] = {
- 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26,
- 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
-};
-#endif /* NDEBUG */
-
-#define N 1024
-int
-main (int argc, char *argv[])
-{
- cl_mem dst;
- IF_DEBUG(uint32_t *dst_buffer = NULL);
- const size_t n = N;
- const size_t global_work_size = N;
- const size_t local_work_size = 32;
- int status = 0;
-#ifndef NDEBUG
- int i;
-#endif /* NDEBUG */
-
-#if 0 //TEST_SIMD8
- if ((status = cl_test_init("test_local_memory_0.bin8", "test_local_memory")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_local_memory_0.bin", "test_local_memory")) != 0)
- goto error;
-#endif
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst);
- CALL (clSetKernelArg, kernel, 1, 32*sizeof(int), NULL);
- CALL (clSetKernelArg, kernel, 2, 32*sizeof(int), NULL);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < N; ++i)
- assert(dst_buffer[i] == wrk_value[i % 32]);
- CALL (clIntelUnmapBuffer, dst);
-#endif /* NDEBUG */
-
- CALL (clReleaseMemObject, dst);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-int
-main (int argc, char *argv[])
-{
- cl_mem dst, src;
- uint32_t *src_buffer = NULL;
- const size_t n = 1024;
- //const size_t global_work_size = 1024;
- //const size_t local_work_size = 16;
- int status = 0, i;
-
- if ((status = cl_test_init("CopyBuffer_0.bin", "CopyBuffer")) != 0)
- goto error;
-
- /* Fill the buffer with random values */
- if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) {
- fprintf(stderr, "Allocation failed\n");
- status = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- for (i = 0; i < n; ++i)
- src_buffer[i] = rand();
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
- src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status);
- if (status != CL_SUCCESS)
- goto error;
- free(src_buffer);
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src);
- CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst);
-
- /* Release the buffers we allocate here */
- cl_test_destroy();
- CALL (clReleaseMemObject, dst);
- CALL (clReleaseMemObject, src);
- printf("%i memory leaks\n", clIntelReportUnfreed());
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifndef NDEBUG
-static const uint32_t wrk_value[32] = {
- 64, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26,
- 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
-};
-#endif /* NDEBUG */
-
-#define N 16
-int
-main (int argc, char *argv[])
-{
- cl_mem dst;
- uint32_t *dst_buffer = NULL;
- const size_t n = N;
- const size_t global_work_size = N;
- const size_t local_work_size = 16;
- int status = 0;
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_private_memory_0.bin8", "test_private_memory")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_private_memory_0.bin", "test_private_memory")) != 0)
- goto error;
-#endif
-
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
- int i;
- for (i = 0; i < N; ++i)
- //assert(dst_buffer[i] == wrk_value[i % 32]);
- //printf("[%u %u]", (dst_buffer[i] >> 8)& 0xf, dst_buffer[i] & 0x3);
- printf("[%u]", dst_buffer[i]);
- assert(dst_buffer[0] == 496);
- assert(dst_buffer[1] == 528);
- assert(dst_buffer[2] == 560);
- assert(dst_buffer[3] == 592);
- assert(dst_buffer[4] == 624);
- assert(dst_buffer[5] == 656);
- assert(dst_buffer[6] == 688);
- assert(dst_buffer[7] == 720);
- assert(dst_buffer[8] == 752);
- assert(dst_buffer[9] == 784);
- assert(dst_buffer[10] == 816);
- assert(dst_buffer[11] == 848);
- assert(dst_buffer[12] == 880);
- assert(dst_buffer[13] == 912);
- assert(dst_buffer[14] == 944);
- assert(dst_buffer[15] == 976);
-
- CALL (clIntelUnmapBuffer, dst);
- CALL (clReleaseMemObject, dst);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "cl_test.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifndef NDEBUG
-static const uint32_t wrk_value[32] = {
- 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26,
- 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
-};
-#endif /* NDEBUG */
-
-#define N 544
-int
-main (int argc, char *argv[])
-{
- cl_mem dst;
- IF_DEBUG(uint32_t *dst_buffer = NULL);
- const size_t n = N;
- const size_t global_work_size = N;
- const size_t local_work_size = 32;
- int status = 0;
- IF_DEBUG(int i);
-
-#if TEST_SIMD8
- if ((status = cl_test_init("test_static_local_memory_0.bin8", "test_static_local_memory")) != 0)
- goto error;
-#else
- if ((status = cl_test_init("test_static_local_memory_0.bin", "test_static_local_memory")) != 0)
- goto error;
-#endif
-
- /* Allocate the two buffers */
- dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status);
- if (status != CL_SUCCESS)
- goto error;
-
- /* Set source and destination */
- CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst);
-
- /* Run the kernel */
- CALL (clEnqueueNDRangeKernel, queue,
- kernel,
- 1,
- NULL,
- &global_work_size,
- &local_work_size,
- 0,
- NULL,
- NULL);
-
- /* Be sure that everything run fine */
-#ifndef NDEBUG
- dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status);
- if (status != CL_SUCCESS)
- goto error;
- for (i = 0; i < N; ++i)
- assert(dst_buffer[i] == wrk_value[i % 32]);
- CALL (clIntelUnmapBuffer, dst);
-#endif /* NDEBUG */
-
- CALL (clReleaseMemObject, dst);
- cl_test_destroy();
- printf("%i memory leaks\n", clIntelReportUnfreed());
- assert(clIntelReportUnfreed() == 0);
-
-error:
- cl_report_error(status);
- return status;
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-float *input, *output, *refout;
-int width = 128;
-int height = 128;
-int blockSize = 16;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err, i;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernel(device, context, "transpose_kernels.cl", "matrixTranspose");
-
- input = newBuffer(width * height * sizeof(cl_float), 'p');
- output = newBuffer(width * height * sizeof(cl_float), '0');
- refout = newBuffer(width * height * sizeof(cl_float), '0');
-
-
- cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_float) * width * height, input, &err); CHK_ERR(err);
- cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
- sizeof(cl_float) * width * height, output, &err); CHK_ERR(err);
-
- /* Execute */
- size_t globalThreads[2] = {width, height};
- size_t localThreads [2] = {blockSize, blockSize};
-
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &outputBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_float)*blockSize*blockSize, NULL); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(cl_int), &width); CHK_ERR(err);
- err = clSetKernelArg(kernel, 4, sizeof(cl_int), &height); CHK_ERR(err);
- err = clSetKernelArg(kernel, 5, sizeof(cl_int), &blockSize); CHK_ERR(err);
-
- cl_event eND;
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads,
- 0, NULL, &eND);
- CHK_ERR(err);
-
- err = clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, width * height * sizeof(cl_float), output,
- 0, NULL, &eND);
- CHK_ERR(err);
-
- verify();
-}
-void transposeCPU(float *output, float *input, int width, int height)
-{
- int i, j;
-
- for(j=0; j < height; j++)
- for(i=0; i < width; i++)
- output[i*height + j] = input[j*width + i];
-}
-
-
-void verify()
-{
- int i;
- transposeCPU(refout, input, width, height);
- comparef(refout, output, width * height, 1e-5f);
- for (i=0; i<20; i++) {
- printf("%12.8f %12.8f\n", refout[i], input[i]);
- }
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-static cl_uchar4 *inputImageData, *outputImageData, *refOut;
-static int *pixelData;
-static int width;
-static int height;
-static int pixelSize = sizeof(cl_uchar4);
-static int factor = 25.0;
-static int blockSizeX = 64;
-static int blockSizeY = 1;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err;
- char *ker_path = NULL;
- char *input_path = NULL;
- char *output_path = NULL;
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- ker_path = do_kiss_path("urng_kernels_0.bin", device);
- input_path = do_kiss_path("urng_input.bmp", NULL);
- output_path = do_kiss_path("urng_output.bmp", NULL);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "noise_uniform");
-
- pixelData = readBmp(input_path, &width, &height);
- inputImageData = (cl_uchar4*) newBuffer(width * height * pixelSize, 0);
- outputImageData = (cl_uchar4*) newBuffer(width * height * pixelSize, 0);
- refOut = (cl_uchar4*) newBuffer(width * height * pixelSize, 0);
- memcpy(inputImageData, pixelData, width * height * pixelSize);
-
- cl_mem inputImageBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, width * height * pixelSize, inputImageData, &err); CHK_ERR(err);
- cl_mem outputImageBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, width * height * pixelSize,
- outputImageData, &err); CHK_ERR(err);
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImageBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImageBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(factor), &factor); CHK_ERR(err);
-
- size_t globalThreads[] = {width, height};
- size_t localThreads[] = {blockSizeX, blockSizeY};
-
- err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
- CHK_ERR(err);
- outputImageData = clIntelMapBuffer(outputImageBuffer, &err);
- CHK_ERR(err);
-
- writeBmp((int *) outputImageData, width, height, output_path);
- verify();
- clIntelUnmapBuffer(outputImageBuffer);
- return err;
-}
-
-void verify()
-{
- int i;
- // comparef(d, c, MAX, 1.0e-6);
- float mean = 0.0f;
- for(i = 0; i < (int)(width * height); i++) {
- mean += outputImageData[i].s[0] - inputImageData[i].s[0];
- }
- mean /= (width * height * factor);
-
- if(fabs(mean) < 1.0) {
- printf("%12.8f Passed!\n", mean);
- } else {
- printf("%12.8f Failed!\n", mean);
- }
- printf("Done\n");
-}
-
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#include "common.h"
-void verify();
-
-#define MAX 1000000
-float *a, *b, *c;
-
-int main(int argc, char**argv)
-{
- struct args args = {0};
- int err, i;
-
- parseArgs(argc, argv, &args);
-
- cl_device_id device = getDeviceID(args.d);
- cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err);
- cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err);
- cl_kernel kernel = getKernel(device, context, "vadd_kernel.cl", "vadd_gpu");
-
- a = newBuffer(MAX * sizeof(float), 'f');
- b = newBuffer(MAX * sizeof(float), 'f');
- c = newBuffer(MAX * sizeof(float), '0');
-
- cl_mem aBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), a, &err); CHK_ERR(err);
- cl_mem bBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), b, &err); CHK_ERR(err);
- cl_mem cBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), c, &err); CHK_ERR(err);
-
- /* Execute */
- int gws = MAX;
- int lws = 16;
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &aBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &cBuffer); CHK_ERR(err);
- err = clSetKernelArg(kernel, 3, sizeof(size_t), &gws); CHK_ERR(err);
-
- cl_event eND;
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &gws, &lws, 0, NULL, &eND); CHK_ERR(err);
- err = clEnqueueReadBuffer(queue, cBuffer, CL_TRUE, 0, MAX*sizeof(float), c, 1, &eND, NULL); CHK_ERR(err);
-
- verify();
-}
-
-void verify()
-{
- int i;
- float *d;
-
- d = newBuffer(MAX * sizeof(float), '0');
- for (i = 0; i < MAX; i++) {
- d[i] = a[i] + b[i];
- }
- comparef(d, c, MAX, 1.0e-6);
- printf("Done\n");
-}
-