From: Benjamin Segovia Date: Thu, 3 May 2012 15:14:53 +0000 (+0000) Subject: tests -> utests X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1a9bcd8ff623a0b96cb034df711e4b02bfab8c6e;p=contrib%2Fbeignet.git tests -> utests --- diff --git a/CMakeLists.txt b/CMakeLists.txt index a7548d2..d70be5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,5 +98,5 @@ ENDIF(GBE_FOUND) # the run-time itself ADD_SUBDIRECTORY(src) -ADD_SUBDIRECTORY(tests) +ADD_SUBDIRECTORY(utests) diff --git a/tests/TODO/aes.c b/tests/TODO/aes.c deleted file mode 100644 index 065b216..0000000 --- a/tests/TODO/aes.c +++ /dev/null @@ -1,1982 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#if 0 -/* ============================================================ - -Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved. - -Redistribution and use of this material is permitted under the following -conditions: - -Redistributions must retain the above copyright notice and all terms of this -license. - -In no event shall anyone redistributing or accessing or using this material -commence or participate in any arbitration or legal action relating to this -material against Advanced Micro Devices, Inc. or any copyright holders or -contributors. The foregoing shall survive any expiration or termination of -this license or any agreement or access or use related to this material. - -ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION -OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. - -THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT -HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY -REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO -SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE -FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER -EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED -WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, -ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. -IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT -OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY -ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY -OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, -INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS -(US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS -THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND -ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, -OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE -FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE -CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR -DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR -CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE -THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL -SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR -ACCESS OR USE RELATED TO THIS MATERIAL. - -NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS -MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO -RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER -COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH -AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS -DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. -MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, -EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, -INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, -COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. -MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY -LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. - -NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is -provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to -computer software and technical data, respectively. Use, duplication, -distribution or disclosure by the U.S. Government and/or DOD agencies is -subject to the full extent of restrictions in all applicable regulations, -including those found at FAR52.227 and DFARS252.227 et seq. and any successor -regulations thereof. Use of this material by the U.S. Government and/or DOD -agencies is acknowledgment of the proprietary rights of any copyright holders -and contributors, including those of Advanced Micro Devices, Inc., as well as -the provisions of FAR52.227-14 through 23 regarding privately developed and/or -commercial computer software. - -This license forms the entire agreement regarding the subject matter hereof and -supersedes all proposals and prior discussions and writings between the parties -with respect thereto. This license does not affect any ownership, rights, title, -or interest in, or relating to, this material. No terms of this license can be -modified or waived, and no breach of this license can be excused, unless done -so in a writing signed by all affected parties. Each term of this license is -separately enforceable. If any term of this license is determined to be or -becomes unenforceable or illegal, such term shall be reformed to the minimum -extent necessary in order for this license to remain in effect in accordance -with its terms as modified by such reformation. This license shall be governed -by and construed in accordance with the laws of the State of Texas without -regard to rules on conflicts of law of any state or jurisdiction or the United -Nations Convention on the International Sale of Goods. All disputes arising out -of this license shall be subject to the jurisdiction of the federal and state -courts in Austin, Texas, and all defenses are hereby waived concerning personal -jurisdiction and venue of these courts. - -============================================================ */ - - -#include "AESEncryptDecrypt.hpp" - -using namespace AES; - -int AESEncryptDecrypt::setupAESEncryptDecrypt() -{ - cl_uint sizeBytes = width*height*sizeof(cl_uchar); - input = (cl_uchar*)malloc(sizeBytes); - if(input == NULL) - { - sampleCommon->error("Failed to allocate host memory. (input)"); - return SDK_FAILURE; - } - - /* initialize the input array, do NOTHING but assignment when decrypt*/ - int decrypt = 0; - if(!decrypt) - convertColorToGray(pixels, input); - else - convertGrayToGray(pixels, input); - - /* 1 Byte = 8 bits */ - keySize = keySizeBits/8; - - /* due to unknown represenation of cl_uchar */ - keySizeBits = keySize*sizeof(cl_uchar); - - key = (cl_uchar*)malloc(keySizeBits); - - /* random initialization of key */ - sampleCommon->fillRandom(key, keySize, 1, 0, 255, seed); - - /* expand the key */ - explandedKeySize = (rounds+1)*keySize; - expandedKey = (cl_uchar*)malloc(explandedKeySize*sizeof(cl_uchar)); - roundKey = (cl_uchar*)malloc(explandedKeySize*sizeof(cl_uchar)); - - keyExpansion(key, expandedKey, keySize, explandedKeySize); - for(cl_uint i=0; i< rounds+1; ++i) - { - createRoundKey(expandedKey + keySize*i, roundKey + keySize*i); - } - - output = (cl_uchar*)malloc(sizeBytes); - if(output == NULL) - { - sampleCommon->error("Failed to allocate host memory. (output)"); - return SDK_FAILURE; - } - - if(!quiet) - { - if(decrypt) - { - std::cout << "Decrypting Image ...." << std::endl; - } - else - { - std::cout << "Encrypting Image ...." << std::endl; - } - - std::cout << "Input Image : " << inFilename << std::endl; - std::cout << "Key : "; - for(cl_uint i=0; i < keySize; ++i) - { - std::cout << (cl_uint)key[i] << " "; - } - std::cout << std::endl; - } - - return SDK_SUCCESS; -} - -void -AESEncryptDecrypt::convertColorToGray(const uchar4 *pixels, cl_uchar *gray) -{ - for(cl_int i=0; i< height; ++i) - for(cl_int j=0; jcheckVal(status, - CL_SUCCESS, - "clGetPlatformIDs failed.")) - { - return SDK_FAILURE; - } - if (0 < numPlatforms) - { - cl_platform_id* platforms = new cl_platform_id[numPlatforms]; - status = clGetPlatformIDs(numPlatforms, platforms, NULL); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetPlatformIDs failed.")) - { - return SDK_FAILURE; - } - - char platformName[100]; - for (unsigned i = 0; i < numPlatforms; ++i) - { - status = clGetPlatformInfo(platforms[i], - CL_PLATFORM_VENDOR, - sizeof(platformName), - platformName, - NULL); - - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetPlatformInfo failed.")) - { - return SDK_FAILURE; - } - - platform = platforms[i]; - if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) - { - break; - } - } - std::cout << "Platform found : " << platformName << "\n"; - delete[] platforms; - } - - if(NULL == platform) - { - sampleCommon->error("NULL platform found so Exiting Application."); - return SDK_FAILURE; - } - - /* - * If we could find our platform, use it. Otherwise use just available platform. - */ - cl_context_properties cps[5] = - { - CL_CONTEXT_PLATFORM, - (cl_context_properties)platform, - CL_CONTEXT_OFFLINE_DEVICES_AMD, - (cl_context_properties)1, - 0 - }; - - context = clCreateContextFromtype(cps, - CL_DEVICE_TYPE_ALL, - NULL, - NULL, - &status); - - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clCreateContextFromtype failed.")) - { - return SDK_FAILURE; - } - - /* create a CL program using the kernel source */ - streamsdk::SDKFile kernelFile; - std::string kernelPath = sampleCommon->getPath(); - kernelPath.append("AESEncryptDecrypt_Kernels.cl"); - if(!kernelFile.open(kernelPath.c_str())) - { - std::cout << "Failed to load kernel file : " << kernelPath << std::endl; - return SDK_FAILURE; - } - const char * source = kernelFile.source().c_str(); - size_t sourceSize[] = {strlen(source)}; - program = clCreateProgramWithSource(context, - 1, - &source, - sourceSize, - &status); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clCreateProgramWithSource failed.")) - { - return SDK_FAILURE; - } - - /* create a cl program executable for all the devices specified */ - status = clBuildProgram(program, - 0, - NULL, - NULL, - NULL, - NULL); - - size_t numDevices; - status = clGetProgramInfo(program, - CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL ); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetProgramInfo(CL_PROGRAM_NUM_DEVICES) failed.")) - { - return SDK_FAILURE; - } - - std::cout << "Number of devices found : " << numDevices << "\n\n"; - devices = (cl_device_id *)malloc( sizeof(cl_device_id) * numDevices ); - if(devices == NULL) - { - sampleCommon->error("Failed to allocate host memory.(devices)"); - return SDK_FAILURE; - } - /* grab the handles to all of the devices in the program. */ - status = clGetProgramInfo(program, - CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL ); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetProgramInfo(CL_PROGRAM_DEVICES) failed.")) - { - return SDK_FAILURE; - } - - - /* figure out the sizes of each of the binaries. */ - size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); - if(devices == NULL) - { - sampleCommon->error("Failed to allocate host memory.(binarySizes)"); - return SDK_FAILURE; - } - - status = clGetProgramInfo(program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, - binarySizes, NULL); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetProgramInfo(CL_PROGRAM_BINARY_SIZES) failed.")) - { - return SDK_FAILURE; - } - - size_t i = 0; - /* copy over all of the generated binaries. */ - char **binaries = (char **)malloc( sizeof(char *) * numDevices ); - if(binaries == NULL) - { - sampleCommon->error("Failed to allocate host memory.(binaries)"); - return SDK_FAILURE; - } - - for(i = 0; i < numDevices; i++) - { - if(binarySizes[i] != 0) - { - binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]); - if(binaries[i] == NULL) - { - sampleCommon->error("Failed to allocate host memory.(binaries[i])"); - return SDK_FAILURE; - } - } - else - { - binaries[i] = NULL; - } - } - status = clGetProgramInfo(program, - CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, - binaries, - NULL); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetProgramInfo(CL_PROGRAM_BINARIES) failed.")) - { - return SDK_FAILURE; - } - - /* dump out each binary into its own separate file. */ - for(i = 0; i < numDevices; i++) - { - char fileName[100]; - sprintf(fileName, "%s.%d", dumpBinary.c_str(), (int)i); - if(binarySizes[i] != 0) - { - char deviceName[1024]; - status = clGetDeviceInfo(devices[i], - CL_DEVICE_NAME, - sizeof(deviceName), - deviceName, - NULL); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetDeviceInfo(CL_DEVICE_NAME) failed.")) - { - return SDK_FAILURE; - } - - printf( "%s binary kernel: %s\n", deviceName, fileName); - streamsdk::SDKFile BinaryFile; - if(!BinaryFile.writeBinaryToFile(fileName, - binaries[i], - binarySizes[i])) - { - std::cout << "Failed to load kernel file : " << fileName << std::endl; - return SDK_FAILURE; - } - } - else - { - printf("Skipping %s since there is no binary data to write!\n", - fileName); - } - } - - // Release all resouces and memory - for(i = 0; i < numDevices; i++) - { - if(binaries[i] != NULL) - { - free(binaries[i]); - binaries[i] = NULL; - } - } - - if(binaries != NULL) - { - free(binaries); - binaries = NULL; - } - - if(binarySizes != NULL) - { - free(binarySizes); - binarySizes = NULL; - } - - if(devices != NULL) - { - free(devices); - devices = NULL; - } - - status = clReleaseProgram(program); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clReleaseProgram failed.")) - { - return SDK_FAILURE; - } - - status = clReleaseContext(context); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clReleaseContext failed.")) - { - return SDK_FAILURE; - } - - return SDK_SUCCESS; -} - - -int -AESEncryptDecrypt::setupCL(void) -{ - cl_int status = 0; - size_t deviceListSize; - - cl_device_type dtype; - - if(devicetype.compare("cpu") == 0) - { - dtype = CL_DEVICE_TYPE_CPU; - } - else //devicetype = "gpu" - { - dtype = CL_DEVICE_TYPE_GPU; - } - - /* - * Have a look at the available platforms and pick either - * the AMD one if available or a reasonable default. - */ - - cl_uint numPlatforms; - cl_platform_id platform = NULL; - status = clGetPlatformIDs(0, NULL, &numPlatforms); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetPlatformIDs failed.")) - { - return SDK_FAILURE; - } - if (0 < numPlatforms) - { - cl_platform_id* platforms = new cl_platform_id[numPlatforms]; - status = clGetPlatformIDs(numPlatforms, platforms, NULL); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetPlatformIDs failed.")) - { - return SDK_FAILURE; - } - for (unsigned i = 0; i < numPlatforms; ++i) - { - char pbuf[100]; - status = clGetPlatformInfo(platforms[i], - CL_PLATFORM_VENDOR, - sizeof(pbuf), - pbuf, - NULL); - - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clGetPlatformInfo failed.")) - { - return SDK_FAILURE; - } - - platform = platforms[i]; - if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) - { - break; - } - } - delete[] platforms; - } - - if(NULL == platform) - { - sampleCommon->error("NULL platform found so Exiting Application."); - return SDK_FAILURE; - } - - // Display available devices. - if(!sampleCommon->displayDevices(platform, dtype)) - { - sampleCommon->error("sampleCommon::displayDevices() failed"); - return SDK_FAILURE; - } - - /* - * If we could find our platform, use it. Otherwise use just available platform. - */ - cl_context_properties cps[3] = - { - CL_CONTEXT_PLATFORM, - (cl_context_properties)platform, - 0 - }; - - context = clCreateContextFromtype( - cps, - dtype, - NULL, - NULL, - &status); - - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clCreateContextFromtype failed.")) - return SDK_FAILURE; - - /* First, get the size of device list data */ - status = clGetContextInfo( - context, - CL_CONTEXT_DEVICES, - 0, - NULL, - &deviceListSize); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetContextInfo failed.")) - return SDK_FAILURE; - - int devicecount = (int)(deviceListSize / sizeof(cl_device_id)); - if(!sampleCommon->validateDeviceId(deviceId, devicecount)) - { - sampleCommon->error("sampleCommon::validateDeviceId() failed"); - return SDK_FAILURE; - } - - /* Now allocate memory for device list based on the size we got earlier */ - devices = (cl_device_id *)malloc(deviceListSize); - if(devices == NULL) - { - sampleCommon->error("Failed to allocate memory (devices)."); - return SDK_FAILURE; - } - - /* Now, get the device list data */ - status = clGetContextInfo( - context, - CL_CONTEXT_DEVICES, - deviceListSize, - devices, - NULL); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetGetContextInfo failed.")) - return SDK_FAILURE; - - /* Get Device specific Information */ - status = clGetDeviceInfo( - devices[deviceId], - CL_DEVICE_MAX_WORK_GROUP_SIZE, - sizeof(size_t), - (void *)&maxWorkGroupSize, - NULL); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed.")) - return SDK_FAILURE; - - - status = clGetDeviceInfo( - devices[deviceId], - CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - sizeof(cl_uint), - (void *)&maxDimensions, - NULL); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed.")) - return SDK_FAILURE; - - - maxWorkItemSizes = (size_t *)malloc(maxDimensions*sizeof(size_t)); - - status = clGetDeviceInfo( - devices[deviceId], - CL_DEVICE_MAX_WORK_ITEM_SIZES, - sizeof(size_t)*maxDimensions, - (void *)maxWorkItemSizes, - NULL); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed.")) - return SDK_FAILURE; - - - status = clGetDeviceInfo( - devices[deviceId], - CL_DEVICE_LOCAL_MEM_SIZE, - sizeof(cl_ulong), - (void *)&totalLocalMemory, - NULL); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZES failed.")) - return SDK_FAILURE; - - - { - /* The block is to move the declaration of prop closer to its use */ - cl_command_queue_properties prop = 0; - if(timing) - prop |= CL_QUEUE_PROFILING_ENABLE; - - commandQueue = clCreateCommandQueue( - context, - devices[deviceId], - prop, - &status); - if(!sampleCommon->checkVal( - status, - 0, - "clCreateCommandQueue failed.")) - return SDK_FAILURE; - } - inputBuffer = clCreateBuffer( - context, - CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * width * height, - input, - &status); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clCreateBuffer failed. (inputBuffer)")) - return SDK_FAILURE; - - outputBuffer = clCreateBuffer( - context, - CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * width * height, - output, - &status); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clCreateBuffer failed. (outputBuffer)")) - return SDK_FAILURE; - - rKeyBuffer = clCreateBuffer( - context, - CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * explandedKeySize, - roundKey, - &status); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clCreateBuffer failed. (rKeyBuffer)")) - return SDK_FAILURE; - - cl_uchar * sBox; - sBox = (cl_uchar *)sbox; - sBoxBuffer = clCreateBuffer( - context, - CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * 256, - sBox, - &status); - - cl_uchar * rsBox; - rsBox = (cl_uchar *)rsbox; - rsBoxBuffer = clCreateBuffer( - context, - CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * 256, - rsBox, - &status); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clCreateBuffer failed. (sBoxBuffer)")) - return SDK_FAILURE; - - /* create a CL program using the kernel source */ - streamsdk::SDKFile kernelFile; - std::string kernelPath = sampleCommon->getPath(); - - if(isLoadBinaryEnabled()) - { - kernelPath.append(loadBinary.c_str()); - if(!kernelFile.readBinaryFromFile(kernelPath.c_str())) - { - std::cout << "Failed to load kernel file : " << kernelPath << std::endl; - return SDK_FAILURE; - } - - const char * binary = kernelFile.source().c_str(); - size_t binarySize = kernelFile.source().size(); - program = clCreateProgramWithBinary(context, - 1, - &devices[deviceId], - (const size_t *)&binarySize, - (const unsigned char**)&binary, - NULL, - &status); - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clCreateProgramWithBinary failed.")) - { - return SDK_FAILURE; - } - - } - else - { - kernelPath.append("AESEncryptDecrypt_Kernels.cl"); - if(!kernelFile.open(kernelPath.c_str())) - { - std::cout << "Failed to load kernel file: " << kernelPath << std::endl; - return SDK_FAILURE; - } - const char * source = kernelFile.source().c_str(); - size_t sourceSize[] = { strlen(source) }; - program = clCreateProgramWithSource( - context, - 1, - &source, - sourceSize, - &status); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clCreateProgramWithSource failed.")) - return SDK_FAILURE; - } - - /* create a cl program executable for all the devices specified */ - status = clBuildProgram(program, 1, &devices[deviceId], NULL, NULL, NULL); - if(status != CL_SUCCESS) - { - if(status == CL_BUILD_PROGRAM_FAILURE) - { - cl_int logStatus; - char * buildLog = NULL; - size_t buildLogSize = 0; - logStatus = clGetProgramBuildInfo(program, - devices[deviceId], - CL_PROGRAM_BUILD_LOG, - buildLogSize, - buildLog, - &buildLogSize); - if(!sampleCommon->checkVal(logStatus, - CL_SUCCESS, - "clGetProgramBuildInfo failed.")) - { - return SDK_FAILURE; - } - - buildLog = (char*)malloc(buildLogSize); - if(buildLog == NULL) - { - sampleCommon->error("Failed to allocate host memory. (buildLog)"); - return SDK_FAILURE; - } - memset(buildLog, 0, buildLogSize); - - logStatus = clGetProgramBuildInfo(program, - devices[deviceId], - CL_PROGRAM_BUILD_LOG, - buildLogSize, - buildLog, - NULL); - if(!sampleCommon->checkVal(logStatus, - CL_SUCCESS, - "clGetProgramBuildInfo failed.")) - { - free(buildLog); - return SDK_FAILURE; - } - - std::cout << " \n\t\t\tBUILD LOG\n"; - std::cout << " ************************************************\n"; - std::cout << buildLog << std::endl; - std::cout << " ************************************************\n"; - free(buildLog); - } - - if(!sampleCommon->checkVal(status, - CL_SUCCESS, - "clBuildProgram failed.")) - { - return SDK_FAILURE; - } - } - - /* get a kernel object handle for a kernel with the given name */ - if(decrypt) - { - kernel = clCreateKernel(program, "AESDecrypt", &status); - } - else - { - kernel = clCreateKernel(program, "AESEncrypt", &status); - } - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clCreateKernel failed.")) - return SDK_FAILURE; - - return SDK_SUCCESS; -} - - -int -AESEncryptDecrypt::runCLKernels(void) -{ - cl_int status; - cl_event events[2]; - - size_t globalThreads[2]= {width/4, height}; - size_t localThreads[2] = {1, 4}; - - status = clGetKernelWorkGroupInfo( - kernel, - devices[deviceId], - CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(cl_ulong), - &usedLocalMemory, - NULL); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clGetKernelWorkGroupInfo failed.(usedLocalMemory)")) - return SDK_FAILURE; - - availableLocalMemory = totalLocalMemory - usedLocalMemory; - - /* two local memories buffers of sizeof(cl_uchar)*keySize */ - neededLocalMemory = 2*sizeof(cl_uchar)*keySize; - - if(neededLocalMemory > availableLocalMemory) - { - std::cout << "Unsupported: Insufficient local memory on device." << std::endl; - return SDK_SUCCESS; - } - - if(localThreads[0] > maxWorkItemSizes[0] || - localThreads[1] > maxWorkItemSizes[1] || - localThreads[0]*localThreads[1] > maxWorkGroupSize) - { - std::cout << "Unsupported: Device does not support requested number of work items."<checkVal( - status, - CL_SUCCESS, - "clGetKernelWorkGroupInfo failed.")) - { - return SDK_FAILURE; - } - - if((cl_uint)(localThreads[0]*localThreads[1]) > kernelWorkGroupSize ) - { - std::cout << "Out of Resources!" << std::endl; - std::cout << "Group Size specified : " << localThreads[0] * localThreads[1] << std::endl; - std::cout << "Max Group Size supported on the kernel : " - << kernelWorkGroupSize << std::endl; - return SDK_FAILURE; - } - - /*** Set appropriate arguments to the kernel ***/ - status = clSetKernelArg( - kernel, - 0, - sizeof(cl_mem), - (void *)&outputBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (outputBuffer)")) - return SDK_FAILURE; - - status = clSetKernelArg( - kernel, - 1, - sizeof(cl_mem), - (void *)&inputBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (inputBuffer)")) - return SDK_FAILURE; - - status = clSetKernelArg( - kernel, - 2, - sizeof(cl_mem), - (void *)&rKeyBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (rKeyBuffer)")) - return SDK_FAILURE; - - if(decrypt) - { - status = clSetKernelArg( - kernel, - 3, - sizeof(cl_mem), - (void *)&rsBoxBuffer); - } - else - { - status = clSetKernelArg( - kernel, - 3, - sizeof(cl_mem), - (void *)&sBoxBuffer); - } - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (SBoxBuffer)")) - return SDK_FAILURE; - - status = clSetKernelArg( - kernel, - 4, - sizeof(cl_uchar)*keySize, - NULL); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (block0)")) - return SDK_FAILURE; - - status = clSetKernelArg( - kernel, - 5, - sizeof(cl_uchar)*keySize, - NULL); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (block1)")) - return SDK_FAILURE; - - status = clSetKernelArg( - kernel, - 6, - sizeof(cl_uint), - (void *)&width); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (width)")) - return SDK_FAILURE; - - status = clSetKernelArg( - kernel, - 7, - sizeof(cl_uint), - (void *)&rounds); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clSetKernelArg failed. (rounds)")) - return SDK_FAILURE; - - - /* - * Enqueue a kernel run call. - */ - status = clEnqueueNDRangeKernel( - commandQueue, - kernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - &events[0]); - - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clEnqueueNDRangeKernel failed.")) - return SDK_FAILURE; - - - /* wait for the kernel call to finish execution */ - status = clWaitForEvents(1, &events[0]); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clWaitForEvents failed.")) - return SDK_FAILURE; - - /* Enqueue the results to application pointer*/ - status = clEnqueueReadBuffer( - commandQueue, - outputBuffer, - CL_TRUE, - 0, - width * height * sizeof(cl_uchar), - output, - 0, - NULL, - &events[1]); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clEnqueueReadBuffer failed.")) - return SDK_FAILURE; - - /* Wait for the read buffer to finish execution */ - status = clWaitForEvents(1, &events[1]); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clWaitForEvents failed.")) - return SDK_FAILURE; - - clReleaseEvent(events[0]); - clReleaseEvent(events[1]); - - return SDK_SUCCESS; -} - -cl_uchar -AESEncryptDecrypt::getRconValue(cl_uint num) -{ - return Rcon[num]; -} - -void -AESEncryptDecrypt::rotate(cl_uchar * word) -{ - cl_uchar c = word[0]; - for(cl_uint i=0; i<3; ++i) - { - word[i] = word[i+1]; - } - word[3] = c; -} - -void -AESEncryptDecrypt::core(cl_uchar * word, cl_uint iter) -{ - rotate(word); - - for(cl_uint i=0; i < 4; ++i) - { - word[i] = getSBoxValue(word[i]); - } - - word[0] = word[0]^getRconValue(iter); -} - -void -AESEncryptDecrypt::keyExpansion(cl_uchar * key, cl_uchar * expandedKey, - cl_uint keySize, cl_uint explandedKeySize) -{ - cl_uint currentSize = 0; - cl_uint rConIteration = 1; - cl_uchar temp[4] = {0}; - - for(cl_uint i=0; i < keySize; ++i) - { - expandedKey[i] = key[i]; - } - - currentSize += keySize; - - while(currentSize < explandedKeySize) - { - for(cl_uint i=0; i < 4; ++i) - { - temp[i] = expandedKey[(currentSize - 4) + i]; - } - - if(currentSize%keySize == 0) - { - core(temp, rConIteration++); - } - - //XXX: add extra SBOX here if the keySize is 32 Bytes - - for(cl_uint i=0; i < 4; ++i) - { - expandedKey[currentSize] = expandedKey[currentSize - keySize]^temp[i]; - currentSize++; - } - } -} - -cl_uchar -AESEncryptDecrypt::getSBoxValue(cl_uint num) -{ - return sbox[num]; -} - -cl_uchar -AESEncryptDecrypt::getSBoxInvert(cl_uint num) -{ - return rsbox[num]; -} - -cl_uchar -AESEncryptDecrypt::galoisMultiplication(cl_uchar a, cl_uchar b) -{ - cl_uchar p = 0; - for(cl_uint i=0; i < 8; ++i) - { - if((b&1) == 1) - { - p^=a; - } - cl_uchar hiBitSet = (a & 0x80); - a <<= 1; - if(hiBitSet == 0x80) - { - a ^= 0x1b; - } - b >>= 1; - } - return p; -} - -void -AESEncryptDecrypt::mixColumn(cl_uchar *column) -{ - cl_uchar cpy[4]; - for(cl_uint i=0; i < 4; ++i) - { - cpy[i] = column[i]; - } - column[0] = galoisMultiplication(cpy[0], 2)^ - galoisMultiplication(cpy[3], 1)^ - galoisMultiplication(cpy[2], 1)^ - galoisMultiplication(cpy[1], 3); - - column[1] = galoisMultiplication(cpy[1], 2)^ - galoisMultiplication(cpy[0], 1)^ - galoisMultiplication(cpy[3], 1)^ - galoisMultiplication(cpy[2], 3); - - column[2] = galoisMultiplication(cpy[2], 2)^ - galoisMultiplication(cpy[1], 1)^ - galoisMultiplication(cpy[0], 1)^ - galoisMultiplication(cpy[3], 3); - - column[3] = galoisMultiplication(cpy[3], 2)^ - galoisMultiplication(cpy[2], 1)^ - galoisMultiplication(cpy[1], 1)^ - galoisMultiplication(cpy[0], 3); -} - -void -AESEncryptDecrypt::mixColumnInv(cl_uchar *column) -{ - cl_uchar cpy[4]; - for(cl_uint i=0; i < 4; ++i) - { - cpy[i] = column[i]; - } - column[0] = galoisMultiplication(cpy[0], 14 )^ - galoisMultiplication(cpy[3], 9 )^ - galoisMultiplication(cpy[2], 13)^ - galoisMultiplication(cpy[1], 11); - - column[1] = galoisMultiplication(cpy[1], 14 )^ - galoisMultiplication(cpy[0], 9 )^ - galoisMultiplication(cpy[3], 13)^ - galoisMultiplication(cpy[2], 11); - - column[2] = galoisMultiplication(cpy[2], 14 )^ - galoisMultiplication(cpy[1], 9 )^ - galoisMultiplication(cpy[0], 13)^ - galoisMultiplication(cpy[3], 11); - - column[3] = galoisMultiplication(cpy[3], 14 )^ - galoisMultiplication(cpy[2], 9 )^ - galoisMultiplication(cpy[1], 13)^ - galoisMultiplication(cpy[0], 11); -} - -void -AESEncryptDecrypt::mixColumns(cl_uchar * state, cl_bool inverse) -{ - cl_uchar column[4]; - for(cl_uint i=0; i < 4; ++i) - { - for(cl_uint j=0; j < 4; ++j) - { - column[j] = state[j*4 + i]; - } - - if(inverse) - { - mixColumnInv(column); - } - else - { - mixColumn(column); - } - - for(cl_uint j=0; j < 4; ++j) - { - state[j*4 + i] = column[j]; - } - } -} - -void -AESEncryptDecrypt::subBytes(cl_uchar * state, cl_bool inverse) -{ - for(cl_uint i=0; i < keySize; ++i) - { - state[i] = inverse ? getSBoxInvert(state[i]): getSBoxValue(state[i]); - } -} - -void -AESEncryptDecrypt::shiftRow(cl_uchar *state, cl_uchar nbr) -{ - for(cl_uint i=0; i < nbr; ++i) - { - cl_uchar tmp = state[0]; - for(cl_uint j = 0; j < 3; ++j) - { - state[j] = state[j+1]; - } - state[3] = tmp; - } -} - -void -AESEncryptDecrypt::shiftRowInv(cl_uchar *state, cl_uchar nbr) -{ - for(cl_uint i=0; i < nbr; ++i) - { - cl_uchar tmp = state[3]; - for(cl_uint j = 3; j > 0; --j) - { - state[j] = state[j-1]; - } - state[0] = tmp; - } -} -void -AESEncryptDecrypt::shiftRows(cl_uchar * state, cl_bool inverse) -{ - for(cl_uint i=0; i < 4; ++i) - { - if(inverse) - shiftRowInv(state + i*4, i); - else - shiftRow(state + i*4, i); - } -} - -void -AESEncryptDecrypt::addRoundKey(cl_uchar * state, cl_uchar * rKey) -{ - for(cl_uint i=0; i < keySize; ++i) - { - state[i] = state[i] ^ rKey[i]; - } -} - -void -AESEncryptDecrypt::createRoundKey(cl_uchar * eKey, cl_uchar * rKey) -{ - for(cl_uint i=0; i < 4; ++i) - for(cl_uint j=0; j < 4; ++j) - { - rKey[i+ j*4] = eKey[i*4 + j]; - } -} - -void -AESEncryptDecrypt::aesRound(cl_uchar * state, cl_uchar * rKey) -{ - subBytes(state, decrypt); - shiftRows(state, decrypt); - mixColumns(state, decrypt); - addRoundKey(state, rKey); -} - -void -AESEncryptDecrypt::aesMain(cl_uchar * state, cl_uchar * rKey, cl_uint rounds) -{ - addRoundKey(state, rKey); - for(cl_uint i=1; i < rounds; ++i) - { - aesRound(state, rKey + keySize*i); - } - subBytes(state, decrypt); - shiftRows(state, decrypt); - addRoundKey(state, rKey + keySize*rounds); -} - -void -AESEncryptDecrypt::aesRoundInv(cl_uchar * state, cl_uchar * rKey) -{ - shiftRows(state, decrypt); - subBytes(state, decrypt); - addRoundKey(state, rKey); - mixColumns(state, decrypt); -} - -void -AESEncryptDecrypt::aesMainInv(cl_uchar * state, cl_uchar * rKey, cl_uint rounds) -{ - addRoundKey(state, rKey + keySize*rounds); - for(cl_uint i=rounds-1; i > 0; --i) - { - aesRoundInv(state, rKey + keySize*i); - } - shiftRows(state, decrypt); - subBytes(state, decrypt); - addRoundKey(state, rKey); -} - -/** - * - * - */ -void -AESEncryptDecrypt::AESEncryptDecryptCPUReference(cl_uchar * output , - cl_uchar * input , - cl_uchar * rKey , - cl_uint explandedKeySize, - cl_uint width , - cl_uint height , - cl_bool inverse ) -{ - cl_uchar block[16]; - - for(cl_uint blocky = 0; blocky < height/4; ++blocky) - for(cl_uint blockx= 0; blockx < width/4; ++blockx) - { - for(cl_uint i=0; i < 4; ++i) - { - for(cl_uint j=0; j < 4; ++j) - { - cl_uint index = (((blocky * width/4) + blockx) * keySize )+ (i*4 + j); - block[i*4 + j] = input[index]; - } - } - - if(inverse) - aesMainInv(block, rKey, rounds); - else - aesMain(block, rKey, rounds); - - for(cl_uint i=0; i <4 ; ++i) - { - for(cl_uint j=0; j <4; ++j) - { - cl_uint index = (((blocky * width/4) + blockx) * keySize )+ (i*4 + j); - output[index] = block[i*4 + j]; - } - } - } -} - - -int -AESEncryptDecrypt::initialize() -{ - // Call base class Initialize to get default configuration - if(!this->SDKSample::initialize()) - return SDK_FAILURE; - - streamsdk::Option* ifilename_opt = new streamsdk::Option; - if(!ifilename_opt) - { - sampleCommon->error("Memory allocation error.\n"); - return SDK_FAILURE; - } - ifilename_opt->_sVersion = "x"; - ifilename_opt->_lVersion = "input"; - ifilename_opt->_description = "Image as Input"; - ifilename_opt->_type = streamsdk::CA_ARG_STRING; - ifilename_opt->_value = &inFilename; - sampleArgs->AddOption(ifilename_opt); - - delete ifilename_opt; - - //////////////// - streamsdk::Option* ofilename_opt = new streamsdk::Option; - if(!ofilename_opt) - { - sampleCommon->error("Memory allocation error.\n"); - return SDK_FAILURE; - } - ofilename_opt->_sVersion = "y"; - ofilename_opt->_lVersion = "output"; - ofilename_opt->_description = "Image as Ouput"; - ofilename_opt->_type = streamsdk::CA_ARG_STRING; - ofilename_opt->_value = &outFilename; - sampleArgs->AddOption(ofilename_opt); - - delete ofilename_opt; - - //////////////// - streamsdk::Option* decrypt_opt = new streamsdk::Option; - if(!decrypt_opt) - { - sampleCommon->error("Memory allocation error.\n"); - return SDK_FAILURE; - } - decrypt_opt->_sVersion = "z"; - decrypt_opt->_lVersion = "decrypt"; - decrypt_opt->_description = "Decrypt the Input Image"; - decrypt_opt->_type = streamsdk::CA_NO_ARGUMENT; - decrypt_opt->_value = &decrypt; - sampleArgs->AddOption(decrypt_opt); - - delete decrypt_opt; - - streamsdk::Option* num_iterations = new streamsdk::Option; - if(!num_iterations) - { - sampleCommon->error("Memory allocation error.\n"); - return SDK_FAILURE; - } - - num_iterations->_sVersion = "i"; - num_iterations->_lVersion = "iterations"; - num_iterations->_description = "Number of iterations for kernel execution"; - num_iterations->_type = streamsdk::CA_ARG_INT; - num_iterations->_value = &iterations; - - sampleArgs->AddOption(num_iterations); - - delete num_iterations; - - return SDK_SUCCESS; -} - -int -AESEncryptDecrypt::setup() -{ - - std::string filePath = sampleCommon->getPath() + inFilename; - image.load(filePath.c_str()); - - width = image.getWidth(); - height = image.getHeight(); - - /* check condition for the bitmap to be initialized */ - if(width<0 || height <0) - return SDK_FAILURE; - - pixels = image.getPixels(); - - if(setupAESEncryptDecrypt()!=SDK_SUCCESS) - return SDK_FAILURE; - - int timer = sampleCommon->createTimer(); - sampleCommon->resetTimer(timer); - sampleCommon->startTimer(timer); - - if(setupCL()!=SDK_SUCCESS) - return SDK_FAILURE; - - sampleCommon->stopTimer(timer); - - setupTime = (double)(sampleCommon->readTimer(timer)); - - return SDK_SUCCESS; -} - - -int -AESEncryptDecrypt::run() -{ - int timer = sampleCommon->createTimer(); - sampleCommon->resetTimer(timer); - sampleCommon->startTimer(timer); - - std::cout << "Executing kernel for " << iterations << - " iterations" << std::endl; - std::cout << "-------------------------------------------" << std::endl; - - for(int i = 0; i < iterations; i++) - { - /* Arguments are set and execution call is enqueued on command buffer */ - if(runCLKernels()!=SDK_SUCCESS) - return SDK_FAILURE; - } - - sampleCommon->stopTimer(timer); - totalKernelTime = (double)(sampleCommon->readTimer(timer)) / iterations; - - - //XXX: Write output to an output Image - - convertGrayToPixels(output, pixels); - image.write(outFilename.c_str()); - - if(!quiet) { - std::cout << "Output Filename : " << outFilename << std::endl; - } - - - return SDK_SUCCESS; -} - -int -AESEncryptDecrypt::verifyResults() -{ - if(verify) - { - verificationOutput = (cl_uchar *) malloc(width*height*sizeof(cl_uchar)); - if(verificationOutput==NULL) { - sampleCommon->error("Failed to allocate host memory. (verificationOutput)"); - return SDK_FAILURE; - } - - /* - * reference implementation - */ - int refTimer = sampleCommon->createTimer(); - sampleCommon->resetTimer(refTimer); - sampleCommon->startTimer(refTimer); - AESEncryptDecryptCPUReference(verificationOutput, input, roundKey, explandedKeySize, - width, height, decrypt); - sampleCommon->stopTimer(refTimer); - referenceKernelTime = sampleCommon->readTimer(refTimer); - - /* compare the results and see if they match */ - if(memcmp(output, verificationOutput, height*width*sizeof(cl_uchar)) == 0) - { - std::cout<<"Passed!\n"; - return SDK_SUCCESS; - } - else - { - std::cout<<"Failed\n"; - return SDK_FAILURE; - } - } - - return SDK_SUCCESS; -} - -void AESEncryptDecrypt::printStats() -{ - std::string strArray[4] = {"Width", "Height", "Time(sec)", "KernelTime(sec)"}; - std::string stats[4]; - - totalTime = setupTime + totalKernelTime; - - stats[0] = sampleCommon->toString(width , std::dec); - stats[1] = sampleCommon->toString(height , std::dec); - stats[2] = sampleCommon->toString(totalTime, std::dec); - stats[3] = sampleCommon->toString(totalKernelTime, std::dec); - - this->SDKSample::printStats(strArray, stats, 4); -} - -int AESEncryptDecrypt::cleanup() -{ - /* Releases OpenCL resources (Context, Memory etc.) */ - cl_int status; - - status = clReleaseKernel(kernel); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseKernel failed.")) - return SDK_FAILURE; - - status = clReleaseProgram(program); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseProgram failed.")) - return SDK_FAILURE; - - status = clReleaseMemObject(inputBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseMemObject failed.")) - return SDK_FAILURE; - - status = clReleaseMemObject(outputBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseMemObject failed.")) - return SDK_FAILURE; - - status = clReleaseMemObject(rKeyBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseMemObject failed.")) - return SDK_FAILURE; - - status = clReleaseMemObject(sBoxBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseMemObject failed.")) - return SDK_FAILURE; - - status = clReleaseMemObject(rsBoxBuffer); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseMemObject failed.")) - return SDK_FAILURE; - - status = clReleaseCommandQueue(commandQueue); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseCommandQueue failed.")) - return SDK_FAILURE; - - status = clReleaseContext(context); - if(!sampleCommon->checkVal( - status, - CL_SUCCESS, - "clReleaseContext failed.")) - return SDK_FAILURE; - - /* release program resources (input memory etc.) */ - if(input) - free(input); - - if(key) - free(key); - - if(expandedKey) - free(expandedKey); - - if(roundKey) - free(roundKey); - - if(output) - free(output); - - if(verificationOutput) - free(verificationOutput); - - if(devices) - free(devices); - - if(maxWorkItemSizes) - free(maxWorkItemSizes); - - return SDK_SUCCESS; -} - -int -main(int argc, char * argv[]) -{ - AESEncryptDecrypt clAESEncryptDecrypt("OpenCL AES Encrypt Decrypt"); - - if(clAESEncryptDecrypt.initialize()!=SDK_SUCCESS) - return SDK_FAILURE; - if(!clAESEncryptDecrypt.parseCommandLine(argc, argv)) - return SDK_FAILURE; - - if(clAESEncryptDecrypt.isDumpBinaryEnabled()) - { - return clAESEncryptDecrypt.genBinaryImage(); - } - else - { - if(clAESEncryptDecrypt.setup()!=SDK_SUCCESS) - return SDK_FAILURE; - if(clAESEncryptDecrypt.run()!=SDK_SUCCESS) - return SDK_FAILURE; - if(clAESEncryptDecrypt.verifyResults()!=SDK_SUCCESS) - return SDK_FAILURE; - if(clAESEncryptDecrypt.cleanup()!=SDK_SUCCESS) - return SDK_FAILURE; - clAESEncryptDecrypt.printStats(); - } - - return SDK_SUCCESS; -} - -#endif - -#include "common.h" -void verify(); - - cl_uchar sbox[256] = - { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 //0 - , 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 //1 - , 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 //2 - , 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 //3 - , 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 //4 - , 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf //5 - , 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 //6 - , 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 //7 - , 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 //8 - , 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb //9 - , 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 //A - , 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 //B - , 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a //C - , 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e //D - , 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf //E - , 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};//F - //0 1 2 3 4 5 6 7 8 9 A B C D E F - - - cl_uchar rsbox[256] = - { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - , 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - , 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - , 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - , 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - , 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - , 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - , 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - , 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - , 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - , 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - , 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - , 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - , 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - , 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - , 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}; - - - cl_uchar Rcon[255] = - { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a - , 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39 - , 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a - , 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8 - , 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef - , 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc - , 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b - , 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3 - , 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94 - , 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20 - , 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35 - , 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f - , 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04 - , 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63 - , 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd - , 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb }; - -char* filename = "input512.bmp"; -int width, height; -cl_uchar *input; -cl_uchar *output; - - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err, i; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel e_kernel = getKernel(device, context, "aes_kernels.cl", "AESEncrypt"); - cl_kernel d_kernel = getKernel(device, context, "aes_kernels.cl", "AESDecrypt"); - - cl_uchar4 *pixels = (cl_uchar4 *) readBmp(filename, &width, &height); - - cl_uint sizeBytes = width*height*sizeof(cl_uchar); - input = newBuffer(sizeBytes, 0); - - /* initialize the input array, do NOTHING but assignment when decrypt*/ - int decrypt = 0; -#if 0 - if(!decrypt) - convertColorToGray(pixels, input); - else - convertGrayToGray(pixels, input); -#endif - output = (cl_uchar*)malloc(sizeBytes); - - - - cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * width * height, input, &err); CHK_ERR(err); - - cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * width * height, output, &err); CHK_ERR(err); - -#if 0 - cl_mem rKeyBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * explandedKeySize, roundKey, &err); CHK_ERR(err); - - cl_uchar *sBox = (cl_uchar *)sbox; - cl_mem sBoxBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * 256, sBox, &err); CHK_ERR(err); - - cl_uchar *rsBox = (cl_uchar *)rsbox; - clmem rsBoxBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_uchar ) * 256, rsBox, &err); CHK_ERR(err); -#endif - -#if 0 -#if 0 - for (i = 0; i < MAX; i++) { - a[i] = b[i] = (float) i; - c[i] = 0.0f; - } -#else - a = newBuffer(MAX * sizeof(float), 'f'); - b = newBuffer(MAX * sizeof(float), 'f'); - c = newBuffer(MAX * sizeof(float), '0'); - d = newBuffer(MAX * sizeof(float), '0'); -#endif - - cl_mem da = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), a, &err); CHK_ERR(err); - cl_mem db = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), b, &err); CHK_ERR(err); - cl_mem dc = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), c, &err); CHK_ERR(err); - - - /* Execute */ - int gws = MAX; - int lws = 16; - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &da); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &db); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dc); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(size_t), &gws); CHK_ERR(err); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &gws, &lws, 0, NULL, NULL); CHK_ERR(err); - err = clEnqueueReadBuffer(queue, dc, CL_TRUE, 0, MAX*sizeof(float), d, 0, NULL, NULL); CHK_ERR(err); - - verify(); -} - -void verify() -{ - int i; - for (i = 0; i < MAX; i++) { - float err = d[i] - (a[i] + b[i]); - - err = fabsf(err); -#define EPS 1.0e-7f // 1.0e-8 fails on cpu - if (err >= EPS) { - printf("Mismatch: %8d: %8f %8f %8f (err=%g 0x%.08x)\n", i, d[i], a[i], b[i], err, *(unsigned int *) &err); - printf("Failed\n"); - exit(-1); - } - } - printf("Passed\n"); -#endif -} diff --git a/tests/TODO/binomialOption.c b/tests/TODO/binomialOption.c deleted file mode 100644 index ab84110..0000000 --- a/tests/TODO/binomialOption.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); - -float *randArray, *output; -float *refOutput, *stepsArray; -int numSamples = 64; -int numSteps = 255; - - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err; - char *ker_path = NULL; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("binomialOption_kernels_0.bin", device); -#else - ker_path = do_kiss_path("binomialOption_kernels_0.bin8", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "binomial_options"); - - randArray = newBuffer(numSamples * sizeof(cl_float4), 'f'); - output = newBuffer(numSamples * sizeof(cl_float4), 0 ); - cl_mem randBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - numSamples * sizeof(cl_float4), randArray, &err); - CHK_ERR(err); - cl_mem outBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, - numSamples * sizeof(cl_float4), output, &err); - CHK_ERR(err); - - /* Execute */ - size_t gws[1] = {numSamples * (numSteps+1)}; - size_t lws[1] = {numSteps + 1}; - - err = clSetKernelArg(kernel, 0, sizeof(int), &numSteps); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &randBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &outBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, (numSteps + 1) * sizeof(cl_float4), NULL); CHK_ERR(err); - err = clSetKernelArg(kernel, 4, numSteps * sizeof(cl_float4), NULL); CHK_ERR(err); - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, gws, lws, 0, NULL, NULL); CHK_ERR(err); -#if 0 - err = clEnqueueReadBuffer(queue, outBuffer, CL_TRUE, 0, - numSamples * sizeof(cl_float4), output, - 0, NULL, NULL); -#else - output = clIntelMapBuffer(outBuffer, &err); - CHK_ERR(err); -#endif - - verify(); - return 0; -} - -#define RISKFREE 0.02f -#define VOLATILITY 0.30f - -/* - * Reduces the input array (in place) - * length specifies the length of the array - */ - void -binomialOptionCPU() -{ - refOutput = newBuffer(numSamples * sizeof(cl_float4), 0); - stepsArray = newBuffer((numSteps + 1) * sizeof(cl_float4), 0); - int bid; - - /* Iterate for all samples */ - for(bid = 0; bid < numSamples; ++bid) - { - float s[4]; - float x[4]; - float vsdt[4]; - float puByr[4]; - float pdByr[4]; - float optionYears[4]; - - float inRand[4]; - int i, j, k; - - - for(i = 0; i < 4; ++i) - { - inRand[i] = randArray[bid + i]; - s[i] = (1.0f - inRand[i]) * 5.0f + inRand[i] * 30.f; - x[i] = (1.0f - inRand[i]) * 1.0f + inRand[i] * 100.f; - optionYears[i] = (1.0f - inRand[i]) * 0.25f + inRand[i] * 10.f; - float dt = optionYears[i] * (1.0f / (float)numSteps); - vsdt[i] = VOLATILITY * sqrtf(dt); - float rdt = RISKFREE * dt; - float r = expf(rdt); - float rInv = 1.0f / r; - float u = expf(vsdt[i]); - float d = 1.0f / u; - float pu = (r - d)/(u - d); - float pd = 1.0f - pu; - puByr[i] = pu * rInv; - pdByr[i] = pd * rInv; - } - // Compute values at expiration date: - // Call option value at period end is v(t) = s(t) - x - // If s(t) is greater than x, or zero otherwise... - // The computation is similar for put options... - for(j = 0; j <= numSteps; j++) - { - for(i = 0; i < 4; ++i) - { - float profit = s[i] * expf(vsdt[i] * (2.0f * j - numSteps)) - x[i]; - stepsArray[j * 4 + i] = profit > 0.0f ? profit : 0.0f; - } - } - - //walk backwards up on the binomial tree of depth numSteps - //Reduce the price step by step - for(j = numSteps; j > 0; --j) - { - for(k = 0; k <= j - 1; ++k) - { - for(i = 0; i < 4; ++i) - { - stepsArray[k * 4 + i] = pdByr[i] * stepsArray[(k + 1) * 4 + i] + puByr[i] * stepsArray[k * 4 + i]; - } - } - } - - //Copy the root to result - refOutput[bid] = stepsArray[0]; - } -} - -void verify() -{ - binomialOptionCPU(); - int i; - for (i=0; i<20; i++) { - printf("%13.8f %13.8f\n", output[i], refOutput[i]); - } - int resC = comparef(output, refOutput, numSamples * 4, 0.001f); - resC ? printf("Passed\n") : printf("Failed\n"); -} - diff --git a/tests/TODO/bitonicSort.c b/tests/TODO/bitonicSort.c deleted file mode 100644 index 0d5eaf2..0000000 --- a/tests/TODO/bitonicSort.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); - -int length = 1024; -int *input, *refInput; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err, i; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernel(device, context, "bitonic_kernels.cl", "bitonicSort"); - - int length_bytes = length * sizeof(cl_uint); - - input = newBuffer(length_bytes, 0); - refInput = newBuffer(length_bytes, 0); - for (i = 0; i < length; i++) { - input[i] = refInput[i] = rand() & 0x0fffff; - } - cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - length_bytes, input, &err); - CHK_ERR(err); - - /* - * Execute - * - * This algorithm is run as NS stages. Each stage has NP passes. - * so the total number of times the kernel call is enqueued is NS * NP. - * - * For every stage S, we have S + 1 passes. - * eg: For stage S = 0, we have 1 pass. - * For stage S = 1, we have 2 passes. - * - * if length is 2^N, then the number of stages (numStages) is N. - * Do keep in mind the fact that the algorithm only works for - * arrays whose size is a power of 2. - * - * here, numStages is N. - * - * For an explanation of how the algorithm works, please go through - * the documentation of this sample. - */ - - /* - * 2^numStages should be equal to length. - * i.e the number of times you halve length to get 1 should be numStages - */ - int numStages = 0; - int sortDescending = 1; - for(i = length; i > 1; i >>= 1) - ++numStages; - - /*** Set appropriate arguments to the kernel ***/ - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(cl_uint), &length); CHK_ERR(err); - err = clSetKernelArg(kernel, 4, sizeof(cl_uint), &sortDescending); CHK_ERR(err); - - int stage, passOfStage; - size_t globalThreads[1] = {length / 2}; - size_t localThreads[1] = {1}; - - for(stage = 0; stage < numStages; stage++) { - /* stage of the algorithm */ - err = clSetKernelArg(kernel, 1, sizeof(cl_uint), &stage); - CHK_ERR(err); - - /* Every stage has stage+1 passes. */ - for(passOfStage = 0; passOfStage < stage + 1; passOfStage++) { - /* pass of the current stage */ - err = clSetKernelArg( kernel, 2, sizeof(cl_uint), &passOfStage); - CHK_ERR(err); - - /* - * Enqueue a kernel run call. - * For simplicity, the groupsize used is 1. - * - * Each thread writes a sorted pair. - * So, the number of threads (global) is half the length. - */ - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, - globalThreads, localThreads, - 0, NULL, NULL); - CHK_ERR(err); - - /* wait for the kernel call to finish execution */ - err = clFinish(queue); - CHK_ERR(err); - } - } - err = clEnqueueReadBuffer(queue, inputBuffer, CL_TRUE, 0, - length_bytes, input, - 0, NULL, NULL); - CHK_ERR(err); - - verify(); - -} - -int compare(const void *x, const void *y) -{ - unsigned int a = *(unsigned int *)x; - unsigned int b = *(unsigned int *)y; - return (a-b); -} - - -void verify() -{ - int i; - - qsort(refInput, length, sizeof(int), compare); - for (i = 0; i < length; i++) { - if (i < 20) { - printf("%8d %8d\n", input[i], refInput[i]); - } - if (input[i] != refInput[i]) { - printf("Failed at %d: %d vs. %d\n", i, - input[i], refInput[i]); - printf("Failed"); - exit(-1); - } - } - printf("Passed\n"); -} - diff --git a/tests/TODO/blackscholes.c b/tests/TODO/blackscholes.c deleted file mode 100644 index 3282a2d..0000000 --- a/tests/TODO/blackscholes.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -#include "cl_test.h" - -void verify(int compareCall, int comparePut); - -float *randArray, *devCallPrice, *devPutPrice, *hostCallPrice, *hostPutPrice; -int width = 32; -int height = 16; - -int main(int argc, char **argv) -{ - struct args args = { 0 }; - char *ker_path = NULL; - int err; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("blackscholes_kernel_0.bin8", device); -#else - ker_path = do_kiss_path("blackscholes_kernel_0.bin", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); - CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); - CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "blackScholes"); - - int whf4 = width * height * sizeof(cl_float4); - - randArray = newBuffer(whf4, 'f'); - devCallPrice = newBuffer(whf4, 0); - //devPutPrice = newBuffer(whf4, 0); - hostCallPrice = newBuffer(whf4, 0); - hostPutPrice = newBuffer(whf4, 0); - - cl_mem randBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, whf4, randArray, &err); - CHK_ERR(err); - cl_mem callPriceBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, whf4, NULL, &err); - CHK_ERR(err); - cl_mem putPriceBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, whf4, NULL, &err); - CHK_ERR(err); - - size_t globalThreads[2] = { width, height }; - //size_t localThreads[2] = { 4, 4 }; - size_t localThreads[2] = { 16, 1 }; - - /* whether sort is to be in increasing order. CL_TRUE implies increasing */ - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &randBuf); - CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(width), (const void *) &width); - CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &callPriceBuf); - CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &putPriceBuf); - CHK_ERR(err); - - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); - CHK_ERR(err); - - devPutPrice = clIntelMapBuffer(putPriceBuf, &err); - CHK_ERR(err); - devCallPrice = clIntelMapBuffer(callPriceBuf, &err); - CHK_ERR(err); - verify(1, 1); -#if 0 - int i; - for (i = 0; i < width*height*4; i++) { - printf("%12.8f, %12.8f | %12.8f %12.8f\n", - devCallPrice[i], - hostCallPrice[i], - devPutPrice[i], - hostPutPrice[i]); - fflush(stdout); - } -#endif - clIntelUnmapBuffer(putPriceBuf); - return err; -} - -#define S_LOWER_LIMIT 10.0f -#define S_UPPER_LIMIT 100.0f -#define K_LOWER_LIMIT 10.0f -#define K_UPPER_LIMIT 100.0f -#define T_LOWER_LIMIT 1.0f -#define T_UPPER_LIMIT 10.0f -#define R_LOWER_LIMIT 0.01f -#define R_UPPER_LIMIT 0.05f -#define SIGMA_LOWER_LIMIT 0.01f -#define SIGMA_UPPER_LIMIT 0.10f - -float phi(float X) -{ - float y, absX, t; - - // the coeffs - const float c1 = 0.319381530f; - const float c2 = -0.356563782f; - const float c3 = 1.781477937f; - const float c4 = -1.821255978f; - const float c5 = 1.330274429f; - - const float oneBySqrt2pi = 0.398942280f; - - absX = fabsf(X); - t = 1.0f / (1.0f + 0.2316419f * absX); - - y = 1.0f - oneBySqrt2pi * exp(-X * X / 2.0f) * - t * (c1 + t * (c2 + t * (c3 + t * (c4 + t * c5)))); - - return (X < 0) ? (1.0f - y) : y; -} - -void blackScholesCPU(int width, int height, float *randArray, - float *hostCallPrice, float *hostPutPrice) -{ - int y; - for (y = 0; y < width * height * 4; ++y) { - float d1, d2; - float sigmaSqrtT; - float KexpMinusRT; - float s = S_LOWER_LIMIT * randArray[y] + S_UPPER_LIMIT * (1.0f - randArray [y]); - float k = K_LOWER_LIMIT * randArray[y] + K_UPPER_LIMIT * (1.0f - randArray [y]); - float t = T_LOWER_LIMIT * randArray[y] + T_UPPER_LIMIT * (1.0f - randArray [y]); - float r = R_LOWER_LIMIT * randArray[y] + R_UPPER_LIMIT * (1.0f - randArray [y]); - float sigma = - SIGMA_LOWER_LIMIT * randArray[y] + - SIGMA_UPPER_LIMIT * (1.0f - randArray[y]); - - sigmaSqrtT = sigma * sqrt(t); - - d1 = (log(s / k) + - (r + sigma * sigma / 2.0f) * t) / sigmaSqrtT; - d2 = d1 - sigmaSqrtT; - - KexpMinusRT = k * exp(-r * t); - hostCallPrice[y] = s * phi(d1) - KexpMinusRT * phi(d2); - hostPutPrice[y] = KexpMinusRT * phi(-d2) - s * phi(-d1); - } -} - -void verify(int compareCall, int comparePut) -{ - blackScholesCPU(width, height, randArray, hostCallPrice, hostPutPrice); - int resC = 1, resP = 1; - if (compareCall) - resC = comparef(hostCallPrice, devCallPrice, width * height * 4, 1.0e-5f); - if (comparePut) - resP = comparef(hostPutPrice, devPutPrice, width * height * 4, 1.0e-5f); - printf("resC=%d, resP=%d: ", resC, resP); - if (resC && resP) - printf("Passed!\n"); - else - printf("Failed!\n"); -} - diff --git a/tests/TODO/dct.c b/tests/TODO/dct.c deleted file mode 100644 index a5cfc5f..0000000 --- a/tests/TODO/dct.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -#include "cl_test.h" - -void verify(float*); - -int width = 16; -int height = 16; -int blockWidth = 8; -int blockSize = 8 * 8; -int inverse = 0; - -float *input, *output, *ref; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err, i; - char *ker_path = NULL; - - const cl_float a = cos(M_PI/16)/2; - const cl_float b = cos(M_PI/8 )/2; - const cl_float c = cos(3*M_PI/16)/2; - const cl_float d = cos(5*M_PI/16)/2; - const cl_float e = cos(3*M_PI/8)/2; - const cl_float f = cos(7*M_PI/16)/2; - const cl_float g = 1.0f/sqrt(8.0f); - - cl_float dct8x8[64] = { - g, a, b, c, g, d, e, f, - g, c, e, -f, -g, -a, -b, -d, - g, d, -e, -a, -g, f, b, c, - g, f, -b, -d, g, c, -e, -a, - g, -f, -b, d, g, -c, -e, a, - g, -d, -e, a, -g, -f, b, -c, - g, -c, e, f, -g, a, -b, d, - g, -a, b, -c, g, -d, e, f - }; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("dct_kernels_0.bin8", device); -#else - ker_path = do_kiss_path("dct_kernels_0.bin", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "DCT"); - - int bytes = width * height * sizeof(cl_float); - input = newBuffer(bytes, 0); - output = newBuffer(bytes, 0); - - for (i = 0; i < width*height; i++) - input[i] = randf() * 255.0f; - - cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, input, &err); - CHK_ERR(err); - cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, bytes, output, &err); - CHK_ERR(err); - cl_mem dctBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * blockSize, dct8x8, &err); - CHK_ERR(err); - - /* Execute */ - /*** Set appropriate arguments to the kernel ***/ - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &outputBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dctBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, blockWidth * blockWidth * sizeof(cl_float), NULL); CHK_ERR(err); - err = clSetKernelArg(kernel, 4, sizeof(cl_uint), &width); CHK_ERR(err); - err = clSetKernelArg(kernel, 5, sizeof(cl_uint), &blockWidth); CHK_ERR(err); - err = clSetKernelArg(kernel, 6, sizeof(cl_uint), &inverse); CHK_ERR(err); - - size_t globalThreads[2] = {width, height}; - size_t localThreads[2] = {blockWidth, blockWidth}; - - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); - CHK_ERR(err); - - output = clIntelMapBuffer(outputBuffer, &err); - verify(dct8x8); - return 0; -} - -/* - * Reference implementation of the Discrete Cosine Transfrom on the CPU - */ - cl_uint -getIdx(cl_uint blockIdx, cl_uint blockIdy, cl_uint localIdx, cl_uint localIdy, cl_uint blockWidth, cl_uint globalWidth) -{ - cl_uint globalIdx = blockIdx * blockWidth + localIdx; - cl_uint globalIdy = blockIdy * blockWidth + localIdy; - - return (globalIdy * globalWidth + globalIdx); -} - - void -DCTCPUReference(cl_float *ref, - const cl_float * input , - const cl_float * dct8x8 , - const cl_uint width, - const cl_uint height, - const cl_uint numBlocksX, - const cl_uint numBlocksY, - const cl_uint inverse) -{ - cl_float * temp = (cl_float *)malloc(width*height*sizeof(cl_float)); - - /* for each block in the image */ - cl_uint blockIdy, blockIdx, i,j, k; - for(blockIdy=0; blockIdy < numBlocksY; ++blockIdy) - for(blockIdx=0; blockIdx < numBlocksX; ++blockIdx) - { - // First calculate A^T * X - for(j=0; j < blockWidth ; ++j) - for(i = 0; i < blockWidth ; ++i) - { - cl_uint index = getIdx(blockIdx, blockIdy, i, j, blockWidth, width); - cl_float tmp = 0.0f; - for(k=0; k < blockWidth; ++k) - { - // multiply with dct8x8(k,i) if forward DCT and dct8x8(i,k) if inverse DCT - cl_uint index1 = (inverse) ? i*blockWidth +k : k*blockWidth + i; - //input(k,j) - cl_uint index2 = getIdx(blockIdx, blockIdy, j, k, blockWidth, width); - - tmp += dct8x8[index1]*input[index2]; - } - temp[index] = tmp; - //ref[index] = tmp; - } - // And now multiply the result of previous step with A i.e. calculate (A^T * X) * A - for(j=0; j < blockWidth ; ++j) - for(i = 0; i < blockWidth ; ++i) - { - cl_uint index = getIdx(blockIdx, blockIdy, i, j, blockWidth, width); - cl_float tmp = 0.0f; - for(k=0; k < blockWidth; ++k) - { - //input(i,k) - cl_uint index1 = getIdx(blockIdx, blockIdy, k, i, blockWidth, width); - - // multiply with dct8x8(k,j) if forward DCT and dct8x8(j,k) if inverse DCT - cl_uint index2 = (inverse) ? j*blockWidth +k : k*blockWidth + j; - - tmp += temp[index1]*dct8x8[index2]; - } - ref[index] = tmp; - } - } - free(temp); -} - -void verify(float* dct8x8) -{ - printf("Passed\n"); - ref = (cl_float*) newBuffer(width*height*sizeof(cl_float), 0); - DCTCPUReference(ref, input, dct8x8, width, height, width/blockWidth, height/blockWidth, inverse); -#if 1 - int i, j; - for (j=0; j. - * - * Author: Benjamin Segovia - */ - -#include "common.h" -#include "cl_test.h" - -void verify(); - -int length = 2048; -float *input, *output, *refout; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err; - char *ker_path = NULL; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("fastWalsh_kernels_0.bin8", device); -#else - ker_path = do_kiss_path("fastWalsh_kernels_0.bin", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "fastWalshTransform"); - - input = newBuffer(length * sizeof(float), 'p'); - refout = newBuffer(length * sizeof(float), 0 ); - memcpy(refout, input, length * sizeof(float)); - - cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, length * sizeof(float), input, &err); CHK_ERR(err); - - /* Execute */ - size_t globalThreads[1] = { length/2}; - size_t localThreads[1] = { 256}; - - /* the input array - also acts as output*/ - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); CHK_ERR(err); - - int step; - - for(step = 1; step < length; step<<= 1) { - /* stage of the algorithm */ - err = clSetKernelArg( kernel, 1, sizeof(int), &step); CHK_ERR(err); - - /* Enqueue a kernel run call */ - err = clEnqueueNDRangeKernel(queue, - kernel, 1, NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); CHK_ERR(err); - - /* wait for the kernel call to finish execution */ - /* err = clWaitForEvents(1, &eND); CHK_ERR(err); */ - } - -#if 0 - /* Enqueue readBuffer*/ - err = clEnqueueReadBuffer(queue, inputBuffer, CL_TRUE, 0, length * sizeof(cl_float), output, 0, NULL, NULL); CHK_ERR(err); -#else - output = clIntelMapBuffer(inputBuffer, &err); - -#endif - verify(); - return 0; -} - -void fastWalshCPU(float *vinput, int length) -{ - int step; - - /* for each pass of the algorithm */ - for(step=1; step < length; step <<=1) { - /* length of each block */ - cl_uint jump = step << 1; - - /* for each blocks */ - cl_uint group; - for(group = 0; group < step; ++group) { - /* for each pair of elements with in the block */ - cl_uint pair; - - for(pair = group; pair < length; pair += jump) { - /* find its partner */ - cl_uint match = pair + step; - - cl_float T1 = vinput[pair]; - cl_float T2 = vinput[match]; - - /* store the sum and difference of the numbers in the same locations */ - vinput[pair] = T1 + T2; - vinput[match] = T1 - T2; - } - } - } -} - -void verify() -{ - int i; - - fastWalshCPU(refout, length); - comparef(output, refout, length, 1.0e-6); - for (i = 0; i < 20; i++) { - printf("%20.5f %20.8f\n", output[i], refout[i]); - } - printf("Done\n"); -} - diff --git a/tests/TODO/fft.c b/tests/TODO/fft.c deleted file mode 100644 index eee5239..0000000 --- a/tests/TODO/fft.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -#include "cl_test.h" - -void verify(); - -int length = 1024; -float *input_i, *input_r, *output_i, *output_r, *refOutput_r, *refOutput_i; - -int main(int argc, char**argv) -{ - char *ker_path = NULL; - struct args args = {0}; - int err; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("fft_kernels_0.bin", device); -#else - ker_path = do_kiss_path("fft_kernels_0.bin8", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "kfft"); - - cl_uint inputSizeBytes = length * sizeof(cl_float); - - /* allocate and init memory used by host */ - input_r = newBuffer(inputSizeBytes, 'p'); - input_i = newBuffer(inputSizeBytes, 0 ); - output_i = newBuffer(inputSizeBytes, 0 ); - output_r = newBuffer(inputSizeBytes, 0 ); - - memcpy(output_i, input_i, inputSizeBytes); - memcpy(output_r, input_r, inputSizeBytes); - - cl_mem buffer_r = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, inputSizeBytes, input_r, &err); CHK_ERR(err); - cl_mem buffer_i = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, inputSizeBytes, input_i, &err); CHK_ERR(err); - - size_t globalThreads[1] = {64}; - size_t localThreads[1] = {64}; - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer_r); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer_i); CHK_ERR(err); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); - CHK_ERR(err); - - output_i = clIntelMapBuffer(buffer_i, &err); CHK_ERR(err); - output_r = clIntelMapBuffer(buffer_r, &err); CHK_ERR(err); - verify(); - - return 0; -} - -/* - This computes an in-place complex-to-complex FFT - x and y are the real and imaginary arrays of 2^m points. - dir = 1 gives forward transform - dir = -1 gives reverse transform - */ -void fftCPU(int dir,long m,cl_float *x,cl_float *y) -{ - long n,i,i1,j,k,i2,l,l1,l2; - double c1,c2,tx,ty,t1,t2,u1,u2,z; - - /* Calculate the number of points */ - n = 1; - for (i=0;i> 1; - j = 0; - for (i=0;i>= 1; - } - j += k; - } - - /* Compute the FFT */ - c1 = -1.0; - c2 = 0.0; - l2 = 1; - for (l=0;l. - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -static int *dst = NULL; -static const size_t w = 64; -static const size_t h = 64; -static const size_t iter = 4; - -int -main(int argc, char **argv) -{ - const size_t global[2] = {w, h}; - const size_t local[2] = {16, 1}; - const size_t sz = w * h * sizeof(char[4]); - int status = 0; - -#if TEST_SIMD8 - if ((status = cl_test_init("mandelbrot_0.bin8", "render")) != 0) - goto error; -#else - if ((status = cl_test_init("mandelbrot_0.bin", "render")) != 0) - goto error; -#endif - - /* One dry run */ - cl_mem cl_dst = clCreateBuffer(ctx, CL_MEM_PINNABLE, sz, NULL, &status); - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); - dst = (int *) clIntelMapBuffer(cl_dst, &status); - - writeBmp(dst, w, h, "mandelbrot.bmp"); - CALL (clIntelUnmapBuffer, cl_dst); - CALL (clReleaseMemObject, cl_dst); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/matmul.c b/tests/TODO/matmul.c deleted file mode 100644 index c26c87e..0000000 --- a/tests/TODO/matmul.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); - -float *input0, *input1, *output, *refout; -int vectorsize = 4; -int blockSize = 8; - -/* must be multiples of vectorsize and blocksize */ -#define M 32 -#define N 32 -#define K 32 - -int width0 = M; -int height0 = N; - -int width1 = K; -int height1 = M; - -int main(int argc, char**argv) -{ - struct args args = {0}; - char *ker_path = NULL; - int err; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - ker_path = do_kiss_path("matmul_kernels_0.bin", device); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "mmmKernel"); - - input0 = (cl_float *) newBuffer(width0 * height0 * sizeof(cl_float), 'p'); - input1 = (cl_float *) newBuffer(width1 * height1 * sizeof(cl_float), 'p'); - output = (cl_float *) newBuffer(width1 * height0 * sizeof(cl_float), 0 ); - refout = (cl_float *) newBuffer(width1 * height0 * sizeof(cl_float), 0 ); - - cl_mem inputBuffer0 = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - width0 * height0 * sizeof(cl_float), input0, &err); CHK_ERR(err); - cl_mem inputBuffer1 = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - width1 * height1 * sizeof(cl_float), input1, &err); CHK_ERR(err); - cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - width1 * height0 * sizeof(cl_float), output, &err); CHK_ERR(err); - - /* Execute */ - size_t globalThreads[2] = {width1 / 4, height0/ 4}; - size_t localThreads [2] = {blockSize, blockSize}; - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer0); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer1); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(cl_int), &width0); CHK_ERR(err); - err = clSetKernelArg(kernel, 4, sizeof(cl_int), &width1); CHK_ERR(err); - - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); - output = clIntelMapBuffer(outputBuffer, &err); - - verify(); - clIntelUnmapBuffer(outputBuffer); - return err; -} - -void -matmulCPU(float *output, float *input0, float * input1, int y, int x, int z) -{ - int i, j, k; - - for(i=0; i < y; i++) - for(j=0; j < z; j++) - for(k=0; k < x; k++) - output[i*z + j] += (input0[i*x + k]*input1[k*z + j]); -} - -void verify() -{ - matmulCPU(refout, input0, input1, height0, width0, width1); - comparef(refout, output, width1 * height0, 1.0e-5f ); - printf("Done\n"); -} - diff --git a/tests/TODO/mersenneTwister.c b/tests/TODO/mersenneTwister.c deleted file mode 100644 index 83200ad..0000000 --- a/tests/TODO/mersenneTwister.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -#include "cl_test.h" -#include "CL/cl_intel.h" - -void verify(); - -int width = 256; -int height = 256; -int blockSizeX = 8; -int blockSizeY = 8; -int mulFactor = 2; -float *seeds, *deviceResult; - -int main(int argc, char**argv) -{ - char *ker_path = NULL; - struct args args = {0}; - int err, i; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("mersenne_kernels_0.bin8", device); -#else - ker_path = do_kiss_path("mersenne_kernels_0.bin", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "gaussianRand"); - - seeds = newBuffer( width * height * sizeof(cl_uint4), 0); -// deviceResult = newBuffer( width * height * mulFactor * sizeof(cl_float4), 0); - for (i = 0; i < width * height * 4; ++i) { - seeds[i] = (unsigned int)rand(); - } - - cl_mem seedsBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, width * height * sizeof(cl_float4), seeds, &err); CHK_ERR(err); - cl_mem resultBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * height * sizeof(cl_float4) * mulFactor, NULL, &err); CHK_ERR(err); - - size_t globalThreads[2] = {width, height}; - size_t localThreads[2] = {blockSizeX, blockSizeY}; - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &seedsBuf); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_uint), &width); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_uint), &mulFactor); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &resultBuf); CHK_ERR(err); - - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); -#if 0 - err = clEnqueueReadBuffer(queue, resultBuf, CL_TRUE, 0, width * height * mulFactor * sizeof(cl_float4), - deviceResult, 1, &eND, NULL); -#endif - deviceResult = clIntelMapBuffer(resultBuf, &err); - CHK_ERR(err); - verify(); - clIntelUnmapBuffer(resultBuf); - - return err; -} - -void verify() -{ - int i; - // comparef(d, c, MAX, 1.0e-6); - /* check mean value of generated random numbers */ - float meanVal = 0.0f; - - for(i = 0; i < height * width * (int)mulFactor * 4; ++i) { - meanVal += deviceResult[i]; - } - - meanVal = fabs(meanVal) / (height * width * (int)mulFactor * 4); - printf("Mean Value of random numbers: %12.8f\n", meanVal); - printf("%s\n", (meanVal < 0.1f) ? "Passed" : "Failed"); - - printf("Done\n"); -} - diff --git a/tests/TODO/monteCarloAsian.c b/tests/TODO/monteCarloAsian.c deleted file mode 100644 index 9138cc2..0000000 --- a/tests/TODO/monteCarloAsian.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); -int width = 64; -int height = 64; -int blockSizeX = 256; -int blockSizeY = 1; - - -cl_float *sigma; /**< Array of sigma values */ -cl_float *price; /**< Array of price values */ -cl_float *vega; /**< Array of vega values */ -cl_float *refPrice; /**< Array of reference price values */ -cl_float *refVega; /**< Array of reference vega values */ -cl_uint *randNum; /**< Array of random numbers */ -cl_float *priceVals; /**< Array of price values for given samples */ -cl_float *priceDeriv; /**< Array of price derivative values for given samples */ -cl_int steps = 10; - - typedef struct _MonteCalroAttrib - { - cl_float4 strikePrice; - cl_float4 c1; - cl_float4 c2; - cl_float4 c3; - cl_float4 initPrice; - cl_float4 sigma; - cl_float4 timeStep; - }MonteCarloAttrib; - - float maturity = 1.f; - int noOfSum = 12; - int noOfTraj = 1024; - float initPrice = 50.f; - float strikePrice = 55.f; - float interest = 0.06f; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err, i; - const cl_float finalValue = 0.8f; - const cl_float stepValue = finalValue / (cl_float)steps; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernel(device, context, "monteCarloAsian_kernels.cl", "calPriceVega"); - - sigma = (cl_float*) newBuffer(steps * sizeof(cl_float), 0); - sigma[0] = 0.01f; - for (i = 1; i < steps; i++) { - sigma[i] = sigma[i - 1] + stepValue; - } - - price = (cl_float*) newBuffer(steps * sizeof(cl_float), 0); - vega = (cl_float*) newBuffer(steps * sizeof(cl_float), 0); - refPrice = (cl_float*) newBuffer(steps * sizeof(cl_float), 0); - refVega = (cl_float*) newBuffer(steps * sizeof(cl_float), 0); - - /* Set samples and exercize points */ - - width = noOfTraj / 4; - height = noOfTraj / 2; - - randNum = (cl_uint*) newBuffer(width * height * sizeof(cl_uint4), 0); - priceVals = (cl_float*) newBuffer(width * height * 2 * sizeof(cl_float4), 0); - priceDeriv = (cl_float*) newBuffer(width * height * 2 * sizeof(cl_float4), 0); - - cl_mem randBuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint4) * width * height, randNum, &err); - CHK_ERR(err); - - cl_mem priceBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * width * height * 2, NULL, &err); - CHK_ERR(err); - - cl_mem priceDerivBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * width * height * 2, NULL, &err); - CHK_ERR(err); - - cl_event eND; - size_t globalThreads[2] = {width, height}; - size_t localThreads[2] = {blockSizeX, blockSizeY}; - - MonteCarloAttrib attributes; - - err = clSetKernelArg(kernel, 2, sizeof(cl_uint), &width); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &randBuf); CHK_ERR(err); - err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &priceBuf); CHK_ERR(err); - err = clSetKernelArg(kernel, 5, sizeof(cl_mem), &priceDerivBuf); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_int), &noOfSum); CHK_ERR(err); - - float timeStep = maturity / (noOfSum - 1); - // Initialize random number generator - srand(1); - - int k, j; - for(k = 0; k < steps; k++) { - for(j = 0; j < (width * height * 4); j++) { - randNum[j] = (cl_uint)rand(); - } - - float c1 = (interest - 0.5f * sigma[k] * sigma[k]) * timeStep; - float c2 = sigma[k] * sqrt(timeStep); - float c3 = (interest + 0.5f * sigma[k] * sigma[k]); - - const cl_float4 c1F4 = {c1, c1, c1, c1}; - attributes.c1 = c1F4; - - const cl_float4 c2F4 = {c2, c2, c2, c2}; - attributes.c2 = c2F4; - - const cl_float4 c3F4 = {c3, c3, c3, c3}; - attributes.c3 = c3F4; - - cl_float4 initPriceF4 = {initPrice, initPrice, initPrice, initPrice}; - attributes.initPrice = initPriceF4; - - const cl_float4 strikePriceF4 = {strikePrice, strikePrice, strikePrice, strikePrice}; - attributes.strikePrice = strikePriceF4; - - const cl_float4 sigmaF4 = {sigma[k], sigma[k], sigma[k], sigma[k]}; - attributes.sigma = sigmaF4; - - const cl_float4 timeStepF4 = {timeStep, timeStep, timeStep, timeStep}; - attributes.timeStep = timeStepF4; - - - /* Set appropriate arguments to the kernel */ - /* the input array - also acts as output for this pass (input for next) */ - err = clSetKernelArg(kernel, 0, sizeof(attributes), &attributes); - CHK_ERR(err); - - /* - * Enqueue a kernel run call. - */ - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, - 0, NULL, &eND); - CHK_ERR(err); - - /* Enqueue the results to application pointer*/ - err = clEnqueueReadBuffer(queue, priceBuf, CL_TRUE, 0, width * height * 2 * sizeof(cl_float4), priceVals, - 1, &eND, NULL); - CHK_ERR(err); - - /* Enqueue the results to application pointer*/ - err = clEnqueueReadBuffer(queue, priceDerivBuf, CL_TRUE, 0, width * height * 2 * sizeof(cl_float4), priceDeriv, - 0, NULL, NULL); - CHK_ERR(err); - - /* Replace Following "for" loop with reduction kernel */ - for (i = 0; i < noOfTraj * noOfTraj; i++) { - price[k] += priceVals[i]; - vega[k] += priceDeriv[i]; - } - - price[k] /= (noOfTraj * noOfTraj); - vega[k] /= (noOfTraj * noOfTraj); - - price[k] = exp(-interest * maturity) * price[k]; - vega[k] = exp(-interest * maturity) * vega[k]; - } - verify(); -} - -void -lshift128(unsigned int* input, - unsigned int shift, - unsigned int * output) -{ - unsigned int invshift = 32u - shift; - - output[0] = input[0] << shift; - output[1] = (input[1] << shift) | (input[0] >> invshift); - output[2] = (input[2] << shift) | (input[1] >> invshift); - output[3] = (input[3] << shift) | (input[2] >> invshift); -} - -void -rshift128(unsigned int* input, - unsigned int shift, - unsigned int* output) -{ - unsigned int invshift = 32u - shift; - output[3]= input[3] >> shift; - output[2] = (input[2] >> shift) | (input[0] >> invshift); - output[1] = (input[1] >> shift) | (input[1] >> invshift); - output[0] = (input[0] >> shift) | (input[2] >> invshift); -} - -void -generateRand(unsigned int* seed, - float *gaussianRand1, - float *gaussianRand2, - unsigned int* nextRand) -{ - - unsigned int mulFactor = 4; - unsigned int temp[8][4]; - - unsigned int state1[4] = {seed[0], seed[1], seed[2], seed[3]}; - unsigned int state2[4] = {0u, 0u, 0u, 0u}; - unsigned int state3[4] = {0u, 0u, 0u, 0u}; - unsigned int state4[4] = {0u, 0u, 0u, 0u}; - unsigned int state5[4] = {0u, 0u, 0u, 0u}; - - unsigned int stateMask = 1812433253u; - unsigned int thirty = 30u; - unsigned int mask4[4] = {stateMask, stateMask, stateMask, stateMask}; - unsigned int thirty4[4] = {thirty, thirty, thirty, thirty}; - unsigned int one4[4] = {1u, 1u, 1u, 1u}; - unsigned int two4[4] = {2u, 2u, 2u, 2u}; - unsigned int three4[4] = {3u, 3u, 3u, 3u}; - unsigned int four4[4] = {4u, 4u, 4u, 4u}; - - unsigned int r1[4] = {0u, 0u, 0u, 0u}; - unsigned int r2[4] = {0u, 0u, 0u, 0u}; - - unsigned int a[4] = {0u, 0u, 0u, 0u}; - unsigned int b[4] = {0u, 0u, 0u, 0u}; - - unsigned int e[4] = {0u, 0u, 0u, 0u}; - unsigned int f[4] = {0u, 0u, 0u, 0u}; - - unsigned int thirteen = 13u; - unsigned int fifteen = 15u; - unsigned int shift = 8u * 3u; - - unsigned int mask11 = 0xfdff37ffu; - unsigned int mask12 = 0xef7f3f7du; - unsigned int mask13 = 0xff777b7du; - unsigned int mask14 = 0x7ff7fb2fu; - - const float one = 1.0f; - const float intMax = 4294967296.0f; - const float PI = 3.14159265358979f; - const float two = 2.0f; - - float r[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float phi[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - float temp1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float temp2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - int c; - - //Initializing states. - for(c = 0; c < 4; ++c) { - state2[c] = mask4[c] * (state1[c] ^ (state1[c] >> thirty4[c])) + one4[c]; - state3[c] = mask4[c] * (state2[c] ^ (state2[c] >> thirty4[c])) + two4[c]; - state4[c] = mask4[c] * (state3[c] ^ (state3[c] >> thirty4[c])) + three4[c]; - state5[c] = mask4[c] * (state4[c] ^ (state4[c] >> thirty4[c])) + four4[c]; - } - - unsigned int i = 0; - for(i = 0; i < mulFactor; ++i) { - switch(i) - { - case 0: - for(c = 0; c < 4; ++c) - { - r1[c] = state4[c]; - r2[c] = state5[c]; - a[c] = state1[c]; - b[c] = state3[c]; - } - break; - case 1: - for(c = 0; c < 4; ++c) - { - r1[c] = r2[c]; - r2[c] = temp[0][c]; - a[c] = state2[c]; - b[c] = state4[c]; - } - break; - case 2: - for(c = 0; c < 4; ++c) - { - r1[c] = r2[c]; - r2[c] = temp[1][c]; - a[c] = state3[c]; - b[c] = state5[c]; - } - break; - case 3: - for(c = 0; c < 4; ++c) - { - r1[c] = r2[c]; - r2[c] = temp[2][c]; - a[c] = state4[c]; - b[c] = state1[c]; - } - break; - default: - break; - - } - - lshift128(a, shift, e); - rshift128(r1, shift, f); - - temp[i][0] = a[0] ^ e[0] ^ ((b[0] >> thirteen) & mask11) ^ f[0] ^ (r2[0] << fifteen); - temp[i][1] = a[1] ^ e[1] ^ ((b[1] >> thirteen) & mask12) ^ f[1] ^ (r2[1] << fifteen); - temp[i][2] = a[2] ^ e[2] ^ ((b[2] >> thirteen) & mask13) ^ f[2] ^ (r2[2] << fifteen); - temp[i][3] = a[3] ^ e[3] ^ ((b[3] >> thirteen) & mask14) ^ f[3] ^ (r2[3] << fifteen); - - } - - for(c = 0; c < 4; ++c) { - temp1[c] = temp[0][c] * one / intMax; - temp2[c] = temp[1][c] * one / intMax; - } - - for(c = 0; c < 4; ++c) { - // Applying Box Mullar Transformations. - r[c] = sqrt((-two) * log(temp1[c])); - phi[c] = two * PI * temp2[c]; - gaussianRand1[c] = r[c] * cos(phi[c]); - gaussianRand2[c] = r[c] * sin(phi[c]); - - nextRand[c] = temp[2][c]; - } -} - -void -calOutputs(float strikePrice, - float* meanDeriv1, - float* meanDeriv2, - float* meanPrice1, - float* meanPrice2, - float* pathDeriv1, - float* pathDeriv2, - float* priceVec1, - float* priceVec2) -{ - float temp1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float temp2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float temp3[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float temp4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - float tempDiff1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float tempDiff2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - int c; - - for(c = 0; c < 4; ++c) - { - tempDiff1[c] = meanPrice1[c] - strikePrice; - tempDiff2[c] = meanPrice2[c] - strikePrice; - } - if(tempDiff1[0] > 0.0f) - { - temp1[0] = 1.0f; - temp3[0] = tempDiff1[0]; - } - if(tempDiff1[1] > 0.0f) - { - temp1[1] = 1.0f; - temp3[1] = tempDiff1[1]; - } - if(tempDiff1[2] > 0.0f) - { - temp1[2] = 1.0f; - temp3[2] = tempDiff1[2]; - } - if(tempDiff1[3] > 0.0f) - { - temp1[3] = 1.0f; - temp3[3] = tempDiff1[3]; - } - - if(tempDiff2[0] > 0.0f) - { - temp2[0] = 1.0f; - temp4[0] = tempDiff2[0]; - } - if(tempDiff2[1] > 0.0f) - { - temp2[1] = 1.0f; - temp4[1] = tempDiff2[1]; - } - if(tempDiff2[2] > 0.0f) - { - temp2[2] = 1.0f; - temp4[2] = tempDiff2[2]; - } - if(tempDiff2[3] > 0.0f) - { - temp2[3] = 1.0f; - temp4[3] = tempDiff2[3]; - } - - for(c = 0; c < 4; ++c) { - pathDeriv1[c] = meanDeriv1[c] * temp1[c]; - pathDeriv2[c] = meanDeriv2[c] * temp2[c]; - priceVec1[c] = temp3[c]; - priceVec2[c] = temp4[c]; - } -} - -void cpuRef() -{ - - float timeStep = maturity / (noOfSum - 1); - - // Initialize random number generator - srand(1); - int i, j, k, c; - - for(k = 0; k < steps; k++) { - float c1 = (interest - 0.5f * sigma[k] * sigma[k]) * timeStep; - float c2 = sigma[k] * sqrt(timeStep); - float c3 = (interest + 0.5f * sigma[k] * sigma[k]); - - for(j = 0; j < (width * height); j++) { - unsigned int nextRand[4] = {0u, 0u, 0u, 0u}; - for (c = 0; c < 4; ++c) - nextRand[c] = (cl_uint)rand(); - - float trajPrice1[4] = {initPrice, initPrice, initPrice, initPrice}; - float sumPrice1[4] = {initPrice, initPrice, initPrice, initPrice}; - float sumDeriv1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float meanPrice1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float meanDeriv1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float price1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float pathDeriv1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - float trajPrice2[4] = {initPrice, initPrice, initPrice, initPrice}; - float sumPrice2[4] = {initPrice, initPrice, initPrice, initPrice}; - float sumDeriv2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float meanPrice2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float meanDeriv2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float price2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float pathDeriv2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - //Run the Monte Carlo simulation a total of Num_Sum - 1 times - for(i = 1; i < noOfSum; i++) { - unsigned int tempRand[4] = {0u, 0u, 0u, 0u}; - for(c = 0; c < 4; ++c) - tempRand[c] = nextRand[c]; - - float gaussian1[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float gaussian2[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - generateRand(tempRand, gaussian1, gaussian2, nextRand); - - //Calculate the trajectory price and sum price for all trajectories - for(c = 0; c < 4; ++c) { - trajPrice1[c] = trajPrice1[c] * exp(c1 + c2 * gaussian1[c]); - trajPrice2[c] = trajPrice2[c] * exp(c1 + c2 * gaussian2[c]); - - sumPrice1[c] = sumPrice1[c] + trajPrice1[c]; - sumPrice2[c] = sumPrice2[c] + trajPrice2[c]; - - float temp = c3 * timeStep * i; - - // Calculate the derivative price for all trajectories - sumDeriv1[c] = sumDeriv1[c] + trajPrice1[c] - * ((log(trajPrice1[c] / initPrice) - temp) / sigma[k]); - - sumDeriv2[c] = sumDeriv2[c] + trajPrice2[c] - * ((log(trajPrice2[c] / initPrice) - temp) / sigma[k]); - } - } - - //Calculate the average price and “average derivative” of each simulated path - for(c = 0; c < 4; ++c) { - meanPrice1[c] = sumPrice1[c] / noOfSum; - meanPrice2[c] = sumPrice2[c] / noOfSum; - meanDeriv1[c] = sumDeriv1[c] / noOfSum; - meanDeriv2[c] = sumDeriv2[c] / noOfSum; - } - - calOutputs(strikePrice, meanDeriv1, meanDeriv2, meanPrice1, meanPrice2, - pathDeriv1, pathDeriv2, price1, price2); - - for(c = 0; c < 4; ++c) { - priceVals[j * 8 + c] = price1[c]; - priceVals[j * 8 + 1 * 4 + c] = price2[c]; - priceDeriv[j * 8 + c] = pathDeriv1[c]; - priceDeriv[j * 8 + 1 * 4 + c] = pathDeriv2[c]; - } - } - - /* Replace Following "for" loop with reduction kernel */ - for(i = 0; i < noOfTraj * noOfTraj; i++) { - refPrice[k] += priceVals[i]; - refVega[k] += priceDeriv[i]; - } - - refPrice[k] /= (noOfTraj * noOfTraj); - refVega[k] /= (noOfTraj * noOfTraj); - - refPrice[k] = exp(-interest * maturity) * refPrice[k]; - refVega[k] = exp(-interest * maturity) * refVega[k]; - } - - /* compare the results and see if they match */ - for(i = 0; i < steps; ++i) - { - if(fabs(price[i] - refPrice[i]) > 0.2f) - { - printf("Failed\n"); - exit(-1); - } - if(fabs(vega[i] - refVega[i]) > 0.2f) - { - printf("Failed\n"); - exit(-1); - } - } - printf("Passed\n"); -} - -void verify() -{ - int i; - // comparef(d, c, MAX, 1.0e-6); - cpuRef(); - printf("Done\n"); -} - diff --git a/tests/TODO/nbody.c b/tests/TODO/nbody.c deleted file mode 100644 index a205a6b..0000000 --- a/tests/TODO/nbody.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -#include "cl_test.h" - -void verify(); - -float *initPos, *initVel, *pos, *vel; -int numBodies = 1024; -float delT = 0.005f; -float espSqr = 50.0f; -int GROUP_SIZE = 256; -int ITERATIONS = 1; - -int main(int argc, char**argv) -{ - char *ker_path = NULL; - struct args args = {0}; - int err, i; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); -#if TEST_SIMD8 - ker_path = do_kiss_path("nbody_kernels_0.bin8", device); -#else - ker_path = do_kiss_path("nbody_kernels_0.bin", device); -#endif - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "nbody_sim"); - - initPos = newBuffer(numBodies * sizeof(cl_float4), 0); - initVel = newBuffer(numBodies * sizeof(cl_float4), 0); - pos = newBuffer(numBodies * sizeof(cl_float4), 0); - vel = newBuffer(numBodies * sizeof(cl_float4), 0); - - /* initialization of inputs */ - for (i = 0; i < numBodies; ++i) { - int index = 4 * i; - int j; - - // First 3 values are position in x,y and z direction - for(j = 0; j < 3; ++j) { - initPos[index + j] = randf2(3.0f, 50.0f); - } - - // Mass value - initPos[index + 3] = randf2(1.0, 1000.0); - - // First 3 values are velocity in x,y and z direction - for (j = 0; j < 3; ++j) { - initVel[index + j] = 0.0f; - } - - // unused - initVel[3] = 0.0f; - } - - memcpy(pos, initPos, 4 * numBodies * sizeof(cl_float)); - memcpy(vel, initVel, 4 * numBodies * sizeof(cl_float)); - - cl_mem curPosBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, numBodies * sizeof(cl_float4), pos, &err); CHK_ERR(err); - cl_mem newPosBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, numBodies * sizeof(cl_float4), 0, &err); CHK_ERR(err); - cl_mem curVelBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, numBodies * sizeof(cl_float4), vel, &err); CHK_ERR(err); - cl_mem newVelBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, numBodies * sizeof(cl_float4), 0, &err); CHK_ERR(err); - - /* Execute */ - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &curPosBuf); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &curVelBuf); - err = clSetKernelArg(kernel, 2, sizeof(cl_int), &numBodies); - err = clSetKernelArg(kernel, 3, sizeof(cl_float), &delT); - err = clSetKernelArg(kernel, 4, sizeof(cl_float), &espSqr); - err = clSetKernelArg(kernel, 5, GROUP_SIZE * 4 * sizeof(float), NULL); - err = clSetKernelArg(kernel, 6, sizeof(cl_mem), &newPosBuf); - err = clSetKernelArg(kernel, 7, sizeof(cl_mem), &newVelBuf); - - size_t globalThreads[] = {numBodies}; - size_t localThreads[] = {GROUP_SIZE}; - - assert(ITERATIONS == 1); - for (i = 0; i < ITERATIONS; i++) { - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); CHK_ERR(err); - - /* Copy data from new to old */ - // err = clEnqueueCopyBuffer(queue, newPosBuf, curPosBuf, 0, 0, sizeof(cl_float4) * numBodies, 0, 0, 0); CHK_ERR(err); - // err = clEnqueueCopyBuffer(queue, newVelBuf, curVelBuf, 0, 0, sizeof(cl_float4) * numBodies, 0, 0, 0); CHK_ERR(err); - // err = clFinish(queue); CHK_ERR(err); - - /* Enqueue readBuffer*/ - /* Wait for the read buffer to finish execution */ - // err = clEnqueueReadBuffer(queue, curPosBuf, CL_TRUE, 0, numBodies* sizeof(cl_float4), pos, 0, NULL, NULL); CHK_ERR(err); - } - - pos = clIntelMapBuffer(newPosBuf, &err); - verify(); - return 0; -} - -void -nBodyCPU(float* refPos, float* refVel) -{ - int i, j, k; - - //Iterate for all samples - for(i = 0; i < numBodies; ++i) { - int myIndex = 4 * i; - float acc[3] = {0.0f, 0.0f, 0.0f}; - - for(j = 0; j < numBodies; ++j) { - float r[3]; - int index = 4 * j; - - float distSqr = 0.0f; - for(k = 0; k < 3; ++k) { - r[k] = refPos[index + k] - refPos[myIndex + k]; - distSqr += r[k] * r[k]; - } - - float invDist = 1.0f / sqrt(distSqr + espSqr); - float invDistCube = invDist * invDist * invDist; - float s = refPos[index + 3] * invDistCube; - - for(k = 0; k < 3; ++k) { - acc[k] += s * r[k]; - } - } - - for(k = 0; k < 3; ++k) { - refPos[myIndex + k] += refVel[myIndex + k] * delT + 0.5f * acc[k] * delT * delT; - refVel[myIndex + k] += acc[k] * delT; - } - } -} - -void verify() -{ - int i; - - float* refPos = newBuffer(numBodies * sizeof(cl_float4), 0); - float* refVel = newBuffer(numBodies * sizeof(cl_float4), 0); - memcpy(refPos, initPos, 4 * numBodies * sizeof(cl_float)); - memcpy(refVel, initVel, 4 * numBodies * sizeof(cl_float)); - - for (i = 0; i < ITERATIONS; i++) { - nBodyCPU(refPos, refVel); - } - - comparef(refPos, pos, numBodies * 4, 1.0e-4f); - for (i = 0; i < 20; i++) { - printf("%12.8f | %12.8f\n", refPos[i], pos[i]); - } - printf("Done\n"); -} - diff --git a/tests/TODO/svm_test.c b/tests/TODO/svm_test.c deleted file mode 100644 index 2468065..0000000 --- a/tests/TODO/svm_test.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef _WIN32 -#include "CL/cl_intel.h" -#include "common.h" -#endif - -#define USE_CL_SOURCE - -#ifdef USE_CL_SOURCE -const char *progsrc = -"__kernel void test1( \n" \ -" __global char* svm, \n" \ -" uint svmBase, \n" \ -" uint context) \n" \ -"{ \n" \ -" int i = get_global_id(0); \n" \ -" //__global int* ptr = (__global int*)(svm+context-svmBase); \n" \ -" //__global int* ptr = (__global int*)&svm[context-svmBase]; \n" \ -" svm -= svmBase; \n" \ -" __global int *ptr = (__global int *)&svm[context]; \n" \ -" ptr[i]=i; \n" \ -"} \n" \ -"\n"; -#else -static const char* progsrc = -" SHADER test1 \n" -" VERSION_2_1 \n" -" DCL_THREADGROUP VARIABLE; \n" -" DCL_UAVRAW u0, TRUE = \n" -" { \n" -" KERNEL_ARGUMENT,0 \n" -" }; \n" -" DCL_CONSTANTREGISTER c0 = \n" -" { \n" -" KERNEL_ARGUMENT, 1, 0, \n" -" KERNEL_ARGUMENT, 2, 0, \n" -" UNUSED, 0, 0, \n" -" UNUSED, 0, 0 \n" -" }; \n" -" DCL_INPUT i0.xyz, THREAD_ID; \n" -" DCL_POINTER ptr1; \n" -" DCL_TEMP r0; \n" -" DCL_TEMP r1; \n" -" DCL_POINTER ptr0; \n" -" ADDRESS_OF ptr0, u0; \n" -" DCL_TEMP r2; \n" -" DCL_TEMP r3; \n" -" DCL_TEMP r4; \n" -" DCL_POINTER ptr2; \n" -" DCL_TEMP r5; \n" -" MOV r2.x, i0.x; \n" -" PADD ptr1, ptr0,-c0.x; \n" -" PADD ptr1, ptr1,c0.y;\n" -" MOV r0.x, r2.x; \n" -" MOV r5.x, r0.x; \n" -" MOV r1.x, r5.x; \n" -" SHL r3.x, r1.x, 2; \n" -//" MOV r4.x, 0; \n" -" PADD ptr2, ptr1, r3.x; \n" -//" UADD r4.x, r4.x, 2; \n" -//" STORE_RAW_PTR ptr2.x, r4.x; \n" -" STORE_RAW_PTR ptr2.x, r2.x; \n" -" RET; \n" -" END \n" -; -#endif - -const char *src; -const char *kernel_name; - -#define PAGE_SIZE (4 << 10) -#define PAGE_ALIGNMENT_MASK (~(PAGE_SIZE-1)) -#define SVM_SIZE (128 << 20) - -cl_device_id device_id; // device ID -cl_context context; // context -cl_command_queue queue; // command queue -char* svmBase = NULL; -cl_mem svmMemObject; - -char *load_program_source(const char *filename) -{ - FILE *fh; - struct stat statbuf; - char *source; - - if (!(fh=fopen(filename,"r"))) - return NULL; - - stat(filename, &statbuf); - source = (char *)malloc(statbuf.st_size+1); - fread(source, statbuf.st_size,1,fh); - source[statbuf.st_size]=0; - return source; -} - -int init() { - cl_uint num_devices; - char device_name[1024]; - cl_int err; - cl_platform_id platform; - - err = clGetPlatformIDs(1, &platform, NULL); - if (err != CL_SUCCESS) { - fprintf(stderr, "Error: clGetPlatformIDs failed. Error code = %d.\n", err); - return err; - } - - // Get an ID for the device - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, - &device_id, &num_devices); - if (err != CL_SUCCESS) - { - printf("Error: clGetDeviceIDs"); - return err; - } - printf("num_gpu_devices: %d\n",num_devices); - err = clGetDeviceInfo(device_id, - CL_DEVICE_NAME, - 1024, - device_name, - NULL); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo %d\n",err); - return err; - } - - printf("device name is %s\n",device_name); - context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); - if (!context) - { - printf("Error: clCreateContext. error code=%d\n",err); - return err; - } - - // Create a command queue [5] - // - queue = clCreateCommandQueue(context, device_id, 0, &err); - if (!queue) - { - printf("Error:clCreateCommandQueue %d",err); - return err; - } -#ifdef _WIN32 - svmBase = malloc(SVM_SIZE+PAGE_SIZE); - (cl_uint)svmBase &= PAGE_ALIGNMENT_MASK; - svmMemObject = clCreateBuffer(context, CL_MEM_READ_WRITE | - CL_MEM_USE_HOST_PTR, - (size_t)SVM_SIZE, - svmBase, - &err); - if (err != CL_SUCCESS) { - printf("Error: clCreateBuffer of SVM: %d\n",err); - return err; - } -#else // Linux - svmMemObject = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_PINNABLE, - SVM_SIZE, NULL, &err); - if (err != CL_SUCCESS) { - printf("Error: clCreateBuffer of SVM: %d\n",err); - return err; - } - if ((err = clIntelPinBuffer(svmMemObject)) != CL_SUCCESS) { - printf("Error: clIntelPinBuffer: %d\n",err); - return err; - } - svmBase = clIntelMapBuffer(svmMemObject,&err); - if (err != CL_SUCCESS) { - printf("Error: clIntelMapBuffer: %d\n",err); - return err; - } -#endif - - printf("initializing svm with zeros\n"); - memset(svmBase,0,SVM_SIZE); - return err; -} - -int test2() { - size_t global; // global domain size for our calculation - size_t local; // local domain size for our calculation - int err; // error code returned from api calls - cl_kernel kernel; -#ifndef _WIN32 - /*cl_event event;*/ -#endif - int i; - char *start; - size_t offset; - int failed; - -#ifdef _WIN32 - // Create the compute program from the source buffer [6] - // - cl_program program = clCreateProgramWithSource(context, 1, - (const char **) & src, NULL, &err); - if (!program) - { - printf("Error:clCreateProgramWithSource"); - return 0; - } - // Build the program executable [7] - // - err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); - if (err != CL_SUCCESS) - { - size_t len; - char buffer[2048]; - //printf("Error: Failed to build program executable\n"); [8] - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, - sizeof(buffer), buffer, &len); - printf("%s\n", buffer); - exit(1); - } -#else - char *ker_path = NULL; -#endif - - // Create the compute kernel in the program we wish to run [9] - // -#ifdef _WIN32 - kernel = clCreateKernel(program, kernel_name, &err); - if (!kernel || err != CL_SUCCESS) - { - printf("Error: clCreateKernel"); - return 0; - } -#else - ker_path = do_kiss_path("svm_test_kernel_0.bin", device_id); - kernel = getKernelFromBinary(device_id, context, ker_path, "test1"); -#endif - // Set the arguments to our compute kernel [12] - // - //offset = SVM_SIZE >> 1; - offset = 0; - //offset = 16; - //offset = 1; - //global = (64 << 20) / sizeof(int); - //global = SVM_SIZE / sizeof(int); - //global = (SVM_SIZE >> 1) / sizeof(int); - global = 1<<10; - start = svmBase + offset; - err = 0; - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &svmMemObject); - err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), &svmBase); - err |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &start); - if (err != CL_SUCCESS) - { - printf("Error:clSetKernelArg %d",err); - return 0; - } - -#if 0 - // Get the maximum work-group size for executing the kernel on the device - // [13] - err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(size_t), &local, NULL); - if (err != CL_SUCCESS) - { - printf("Error:clGetKernelWorkGroupInfo: %d",err); - return 0; - } - printf("workgroup size is %d\n",(int)local); -#endif - local = 16; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, - &global, &local /*NULL*/, - 0, NULL, NULL /*&event*/); - if (err != CL_SUCCESS) - { - printf("Error:clEnqueueNDRangeKernel: %d",err); - return 0; - } - - clFinish(queue); - - failed = 0; - for (i=0;i<(int)global;i++) { - if (((int*)start)[i]!=i) { - printf("svmBase[%d]=%d.\n",(int) i+(int) offset,(int) start[i]); - failed = 1; - } - } - - printf(failed?"Test failed\n":"Test passed.\n"); - - return 0; -} - - -int main(int argc, char** argv) -{ - //src = load_program_source(argv[1]); - src = progsrc; - //kernel_name = argv[2]; - kernel_name = "test1"; - if (init() != CL_SUCCESS) - return -1; - test2(); - return 0; -} diff --git a/tests/TODO/test_2d_copy.c b/tests/TODO/test_2d_copy.c deleted file mode 100644 index 4a320b4..0000000 --- a/tests/TODO/test_2d_copy.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -typedef union kernel_arg { - float f; - uint32_t u32; - uint16_t u16; - uint8_t u8; - int32_t i32; - int16_t i16; - int8_t i8; -} kernel_arg_t; - -#define W 32 -#define H 32 - -int -main (int argc, char *argv[]) -{ - cl_mem dst0, dst1, src; - uint32_t *src_buffer = NULL; - IF_DEBUG (uint32_t *dst0_buffer = NULL); - IF_DEBUG (uint32_t *dst1_buffer = NULL); - const size_t n = W*H; - const size_t global_work_size[2] = {W, H}; - const size_t local_work_size[2] = {8,8}; - kernel_arg_t arg; - int status = 0, i, j; - -#if TEST_SIMD8 - if ((status = cl_test_init("test_2d_copy_kernels_0.bin8", "test_2d_copy")) != 0) - goto error; -#else - if ((status = cl_test_init("test_2d_copy_kernels_0.bin", "test_2d_copy")) != 0) - goto error; -#endif - - /* Fill the buffer with random values */ - if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < n; ++i) - src_buffer[i] = rand(); - - /* Allocate the two buffers */ - dst0 = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - dst1 = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status); - if (status != CL_SUCCESS) - goto error; - free(src_buffer); - - /* Set source and destination */ - arg.i32 = W; - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst0); - CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst1); - CALL (clSetKernelArg, kernel, 2, sizeof(cl_mem), &src); - CALL (clSetKernelArg, kernel, 3, sizeof(float), &arg); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 2, - NULL, - global_work_size, - local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst0_buffer = (uint32_t *) clIntelMapBuffer(dst0, &status); - if (status != CL_SUCCESS) - goto error; - dst1_buffer = (uint32_t *) clIntelMapBuffer(dst1, &status); - if (status != CL_SUCCESS) - goto error; -#endif /* NDEBUG */ - src_buffer = (uint32_t *) clIntelMapBuffer(src, &status); - if (status != CL_SUCCESS) - goto error; - - for (j = 0; j < H; ++j) - for (i = 0; i < W; ++i) { - assert(dst0_buffer[i+j*W] == src_buffer[i+j*W]); - assert(dst1_buffer[i+j*W] == i+j); - } - IF_DEBUG(CALL (clIntelUnmapBuffer, dst0)); - IF_DEBUG(CALL (clIntelUnmapBuffer, dst1)); - CALL (clIntelUnmapBuffer, src); - CALL (clReleaseMemObject, dst0); - CALL (clReleaseMemObject, dst1); - CALL (clReleaseMemObject, src); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_barrier.c b/tests/TODO/test_barrier.c deleted file mode 100644 index 6ad04f2..0000000 --- a/tests/TODO/test_barrier.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -typedef union kernel_arg { - float f; - uint32_t u32; - uint16_t u16; - uint8_t u8; - int32_t i32; - int16_t i16; - int8_t i8; -} kernel_arg_t; - -#define N 1024 -int -main (int argc, char *argv[]) -{ - cl_mem dst, src; - IF_DEBUG(uint32_t *dst_buffer = NULL); - uint32_t *src_buffer = NULL; - const size_t n = N; - const size_t global_work_size = N; - const size_t local_work_size = 256; - kernel_arg_t arg; - int status = 0, i; - -#if TEST_SIMD8 - if ((status = cl_test_init("test_barrier_kernels_0.bin8", "test_barrier")) != 0) - goto error; -#else - if ((status = cl_test_init("test_barrier_kernels_0.bin", "test_barrier")) != 0) - goto error; -#endif - - /* Fill the buffer with random values */ - if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < n; ++i) - src_buffer[i] = i; - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status); - if (status != CL_SUCCESS) - goto error; - free(src_buffer); - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst); - CALL (clSetKernelArg, kernel, 4, sizeof(cl_mem), &src); - arg.f = 1.f; - CALL (clSetKernelArg, kernel, 1, sizeof(float), &arg); - arg.i8 = 2; - CALL (clSetKernelArg, kernel, 2, sizeof(int8_t), &arg); - arg.i32 = 3; - CALL (clSetKernelArg, kernel, 3, sizeof(int32_t), &arg); - arg.i16 = 4; - CALL (clSetKernelArg, kernel, 5, sizeof(int16_t), &arg); - arg.u32 = 5; - CALL (clSetKernelArg, kernel, 6, sizeof(uint32_t), &arg); - arg.i32 = 6; - CALL (clSetKernelArg, kernel, 7, sizeof(int32_t), &arg); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; -#endif - src_buffer = (uint32_t *) clIntelMapBuffer(src, &status); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < N; ++i) - assert(dst_buffer[i] == src_buffer[i] + 21); - - IF_DEBUG(CALL (clIntelUnmapBuffer, dst)); - CALL (clIntelUnmapBuffer, src); - CALL (clReleaseMemObject, dst); - CALL (clReleaseMemObject, src); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_constant_memory.c b/tests/TODO/test_constant_memory.c deleted file mode 100644 index bac8b49..0000000 --- a/tests/TODO/test_constant_memory.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -#ifndef NDEBUG -static const uint32_t wrk_value[32] = { - 71, 69, 67, 65, 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, - 33, 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9 -}; -#endif /* NDEBUG */ - -#define N 512 -int -main (int argc, char *argv[]) -{ - cl_mem dst, constants; - IF_DEBUG(uint32_t *dst_buffer = NULL); - uint32_t *cst_buffer = NULL; - const size_t n = N; - const size_t global_work_size = N; - const size_t local_work_size = 32; - int status = 0, i; - -#if TEST_SIMD8 - if ((status = cl_test_init("test_constant_memory_0.bin8", "test_constant_memory")) != 0) - goto error; -#else - if ((status = cl_test_init("test_constant_memory_0.bin", "test_constant_memory")) != 0) - goto error; -#endif - - /* Allocate the dst buffer */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - - /* Allocate the constant buffer and fill it with ones */ - if ((cst_buffer = (uint32_t*) malloc(sizeof(uint32_t) * 32)) == NULL) { - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < 32; ++i) - cst_buffer[i] = 1; - - constants = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, - 32 * sizeof(uint32_t), - cst_buffer, - &status); - if (status != CL_SUCCESS) - goto error; - free(cst_buffer); - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst); - CALL (clSetKernelArg, kernel, 1, 32*sizeof(int), NULL); - CALL (clSetKernelArg, kernel, 2, sizeof(cl_mem), &constants); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < N; ++i) - assert(dst_buffer[i] == wrk_value[i % 32]); - CALL (clIntelUnmapBuffer, dst); -#endif /* NDEBUG */ - - CALL (clReleaseMemObject, dst); - CALL (clReleaseMemObject, constants); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_copy_image.c b/tests/TODO/test_copy_image.c deleted file mode 100644 index 33a4f8d..0000000 --- a/tests/TODO/test_copy_image.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -int -main (int argc, char *argv[]) -{ - cl_mem dst, src; - IF_DEBUG(uint32_t *dst_buffer = NULL); - uint32_t *src_buffer = NULL; - const size_t w = 64, h = 64; - const size_t n = w * h; - const size_t global_work_size[2] = {w,h}; - const size_t local_work_size[2] = {16,1}; - int status = 0, i; - -#if TEST_SIMD8 - if ((status = cl_test_init("test_copy_image_0.bin8", "test_copy_image")) != 0) - goto error; -#else - if ((status = cl_test_init("test_copy_image_0.bin", "test_copy_image")) != 0) - goto error; -#endif - - /* Fill the buffer with random values */ - if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < n; ++i) - src_buffer[i] = i; - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - const cl_image_format fmt = { - .image_channel_order = CL_RGBA, - .image_channel_data_type = CL_UNSIGNED_INT8 - }; - src = clCreateImage2D(ctx, - CL_MEM_COPY_HOST_PTR, - &fmt, - w, - h, - w * sizeof(uint32_t), - src_buffer, - &status); - if (status != CL_SUCCESS) - goto error; - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src); - CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 2, - NULL, - global_work_size, - local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < n; ++i) assert(src_buffer[i] == dst_buffer[i]); - CALL (clIntelUnmapBuffer, dst); -#endif /* NDEBUG */ - - free(src_buffer); - CALL (clReleaseMemObject, dst); - CALL (clReleaseMemObject, src); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_enqueue_read.c b/tests/TODO/test_enqueue_read.c deleted file mode 100644 index 9f3addd..0000000 --- a/tests/TODO/test_enqueue_read.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -int -main (int argc, char *argv[]) -{ - cl_mem dst, src; - uint32_t *dst_buffer = NULL, *src_buffer = NULL; - const size_t n = 16; - const size_t global_work_size = n; - const size_t local_work_size = 16; - int status = 0, i; - - if ((status = cl_test_init("CopyBuffer_0.bin", "CopyBuffer")) != 0) - goto error; - - /* Fill the buffer with random values */ - if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - if ((dst_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < n; ++i) - src_buffer[i] = i; - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR | CL_MEM_WRITE_ONLY, n * sizeof(uint32_t), dst_buffer, &status); - if (status != CL_SUCCESS) - goto error; - src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, n * sizeof(uint32_t), src_buffer, &status); - if (status != CL_SUCCESS) - goto error; - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src); - CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ - CALL (clEnqueueReadBuffer, queue, - dst, - CL_TRUE, - 0, - n* sizeof(uint32_t), - (void *)dst_buffer, - 0, - NULL, - NULL ); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < n; ++i) { - printf("src_buffer[%d]:[%d], dst_buffer[%d]:[%d]\n", - i, src_buffer[i], i, dst_buffer[i]); -// assert(src_buffer[i] == dst_buffer[i]); - } - - free(src_buffer); - free(dst_buffer); - CALL (clReleaseMemObject, dst); - CALL (clReleaseMemObject, src); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_imm_parameters.c b/tests/TODO/test_imm_parameters.c deleted file mode 100644 index 88d8786..0000000 --- a/tests/TODO/test_imm_parameters.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -typedef union kernel_arg { - float f; - uint32_t u32; - uint16_t u16; - uint8_t u8; - int32_t i32; - int16_t i16; - int8_t i8; -} kernel_arg_t; - -int -main (int argc, char *argv[]) -{ - cl_mem dst, src; - IF_DEBUG(uint32_t *dst_buffer = NULL); - uint32_t *src_buffer = NULL; - const size_t n = 6*16; - const size_t global_work_size = 16; - const size_t local_work_size = 16; - kernel_arg_t arg; - int status = 0, i, j; - -#if TEST_SIMD8 - if ((status = cl_test_init("test_imm_parameters_kernels_0.bin8", "test_imm_parameters")) != 0) - goto error; -#else - if ((status = cl_test_init("test_imm_parameters_kernels_0.bin", "test_imm_parameters")) != 0) - goto error; -#endif - - /* Fill the buffer with random values */ - if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < n; ++i) - src_buffer[i] = i; - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status); - if (status != CL_SUCCESS) - goto error; - free(src_buffer); - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst); - CALL (clSetKernelArg, kernel, 4, sizeof(cl_mem), &src); - arg.f = 1.f; CALL (clSetKernelArg, kernel, 1, sizeof(float), &arg); - arg.i8 = 2; CALL (clSetKernelArg, kernel, 2, sizeof(int8_t), &arg); - arg.i32 = 3; CALL (clSetKernelArg, kernel, 3, sizeof(int32_t), &arg); - arg.i16 = 4; CALL (clSetKernelArg, kernel, 5, sizeof(int16_t), &arg); - arg.u32 = 5; CALL (clSetKernelArg, kernel, 6, sizeof(uint32_t), &arg); - arg.i32 = 6; CALL (clSetKernelArg, kernel, 7, sizeof(int32_t), &arg); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; -#endif /* NDEBUG */ - - src_buffer = (uint32_t *) clIntelMapBuffer(src, &status); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < 16; ++i) - for (j = 0; j < 6; ++j) - assert(dst_buffer[6*i+j] == src_buffer[i] + 1 + j); - - IF_DEBUG(CALL (clIntelUnmapBuffer, dst)); - CALL (clIntelUnmapBuffer, src); - CALL (clReleaseMemObject, dst); - CALL (clReleaseMemObject, src); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_local_memory.c b/tests/TODO/test_local_memory.c deleted file mode 100644 index aab7d0f..0000000 --- a/tests/TODO/test_local_memory.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -#ifndef NDEBUG -static const uint32_t wrk_value[32] = { - 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, - 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 -}; -#endif /* NDEBUG */ - -#define N 1024 -int -main (int argc, char *argv[]) -{ - cl_mem dst; - IF_DEBUG(uint32_t *dst_buffer = NULL); - const size_t n = N; - const size_t global_work_size = N; - const size_t local_work_size = 32; - int status = 0; -#ifndef NDEBUG - int i; -#endif /* NDEBUG */ - -#if 0 //TEST_SIMD8 - if ((status = cl_test_init("test_local_memory_0.bin8", "test_local_memory")) != 0) - goto error; -#else - if ((status = cl_test_init("test_local_memory_0.bin", "test_local_memory")) != 0) - goto error; -#endif - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst); - CALL (clSetKernelArg, kernel, 1, 32*sizeof(int), NULL); - CALL (clSetKernelArg, kernel, 2, 32*sizeof(int), NULL); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < N; ++i) - assert(dst_buffer[i] == wrk_value[i % 32]); - CALL (clIntelUnmapBuffer, dst); -#endif /* NDEBUG */ - - CALL (clReleaseMemObject, dst); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_memory_leak.c b/tests/TODO/test_memory_leak.c deleted file mode 100644 index 4ddcdd8..0000000 --- a/tests/TODO/test_memory_leak.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -int -main (int argc, char *argv[]) -{ - cl_mem dst, src; - uint32_t *src_buffer = NULL; - const size_t n = 1024; - //const size_t global_work_size = 1024; - //const size_t local_work_size = 16; - int status = 0, i; - - if ((status = cl_test_init("CopyBuffer_0.bin", "CopyBuffer")) != 0) - goto error; - - /* Fill the buffer with random values */ - if ((src_buffer = malloc(sizeof(uint32_t) * n)) == NULL) { - fprintf(stderr, "Allocation failed\n"); - status = CL_OUT_OF_HOST_MEMORY; - goto error; - } - for (i = 0; i < n; ++i) - src_buffer[i] = rand(); - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - src = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer, &status); - if (status != CL_SUCCESS) - goto error; - free(src_buffer); - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src); - CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &dst); - - /* Release the buffers we allocate here */ - cl_test_destroy(); - CALL (clReleaseMemObject, dst); - CALL (clReleaseMemObject, src); - printf("%i memory leaks\n", clIntelReportUnfreed()); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_private_memory.c b/tests/TODO/test_private_memory.c deleted file mode 100644 index 4c83d26..0000000 --- a/tests/TODO/test_private_memory.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -#ifndef NDEBUG -static const uint32_t wrk_value[32] = { - 64, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, - 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 -}; -#endif /* NDEBUG */ - -#define N 16 -int -main (int argc, char *argv[]) -{ - cl_mem dst; - uint32_t *dst_buffer = NULL; - const size_t n = N; - const size_t global_work_size = N; - const size_t local_work_size = 16; - int status = 0; - -#if TEST_SIMD8 - if ((status = cl_test_init("test_private_memory_0.bin8", "test_private_memory")) != 0) - goto error; -#else - if ((status = cl_test_init("test_private_memory_0.bin", "test_private_memory")) != 0) - goto error; -#endif - - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; - int i; - for (i = 0; i < N; ++i) - //assert(dst_buffer[i] == wrk_value[i % 32]); - //printf("[%u %u]", (dst_buffer[i] >> 8)& 0xf, dst_buffer[i] & 0x3); - printf("[%u]", dst_buffer[i]); - assert(dst_buffer[0] == 496); - assert(dst_buffer[1] == 528); - assert(dst_buffer[2] == 560); - assert(dst_buffer[3] == 592); - assert(dst_buffer[4] == 624); - assert(dst_buffer[5] == 656); - assert(dst_buffer[6] == 688); - assert(dst_buffer[7] == 720); - assert(dst_buffer[8] == 752); - assert(dst_buffer[9] == 784); - assert(dst_buffer[10] == 816); - assert(dst_buffer[11] == 848); - assert(dst_buffer[12] == 880); - assert(dst_buffer[13] == 912); - assert(dst_buffer[14] == 944); - assert(dst_buffer[15] == 976); - - CALL (clIntelUnmapBuffer, dst); - CALL (clReleaseMemObject, dst); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/test_static_local_memory.c b/tests/TODO/test_static_local_memory.c deleted file mode 100644 index 245a5ac..0000000 --- a/tests/TODO/test_static_local_memory.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "cl_test.h" - -#include -#include -#include - -#ifndef NDEBUG -static const uint32_t wrk_value[32] = { - 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, - 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 -}; -#endif /* NDEBUG */ - -#define N 544 -int -main (int argc, char *argv[]) -{ - cl_mem dst; - IF_DEBUG(uint32_t *dst_buffer = NULL); - const size_t n = N; - const size_t global_work_size = N; - const size_t local_work_size = 32; - int status = 0; - IF_DEBUG(int i); - -#if TEST_SIMD8 - if ((status = cl_test_init("test_static_local_memory_0.bin8", "test_static_local_memory")) != 0) - goto error; -#else - if ((status = cl_test_init("test_static_local_memory_0.bin", "test_static_local_memory")) != 0) - goto error; -#endif - - /* Allocate the two buffers */ - dst = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); - if (status != CL_SUCCESS) - goto error; - - /* Set source and destination */ - CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst); - - /* Run the kernel */ - CALL (clEnqueueNDRangeKernel, queue, - kernel, - 1, - NULL, - &global_work_size, - &local_work_size, - 0, - NULL, - NULL); - - /* Be sure that everything run fine */ -#ifndef NDEBUG - dst_buffer = (uint32_t *) clIntelMapBuffer(dst, &status); - if (status != CL_SUCCESS) - goto error; - for (i = 0; i < N; ++i) - assert(dst_buffer[i] == wrk_value[i % 32]); - CALL (clIntelUnmapBuffer, dst); -#endif /* NDEBUG */ - - CALL (clReleaseMemObject, dst); - cl_test_destroy(); - printf("%i memory leaks\n", clIntelReportUnfreed()); - assert(clIntelReportUnfreed() == 0); - -error: - cl_report_error(status); - return status; -} - diff --git a/tests/TODO/transpose.c b/tests/TODO/transpose.c deleted file mode 100644 index 556527a..0000000 --- a/tests/TODO/transpose.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); -float *input, *output, *refout; -int width = 128; -int height = 128; -int blockSize = 16; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err, i; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernel(device, context, "transpose_kernels.cl", "matrixTranspose"); - - input = newBuffer(width * height * sizeof(cl_float), 'p'); - output = newBuffer(width * height * sizeof(cl_float), '0'); - refout = newBuffer(width * height * sizeof(cl_float), '0'); - - - cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_float) * width * height, input, &err); CHK_ERR(err); - cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(cl_float) * width * height, output, &err); CHK_ERR(err); - - /* Execute */ - size_t globalThreads[2] = {width, height}; - size_t localThreads [2] = {blockSize, blockSize}; - - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &outputBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_float)*blockSize*blockSize, NULL); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(cl_int), &width); CHK_ERR(err); - err = clSetKernelArg(kernel, 4, sizeof(cl_int), &height); CHK_ERR(err); - err = clSetKernelArg(kernel, 5, sizeof(cl_int), &blockSize); CHK_ERR(err); - - cl_event eND; - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, - 0, NULL, &eND); - CHK_ERR(err); - - err = clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, width * height * sizeof(cl_float), output, - 0, NULL, &eND); - CHK_ERR(err); - - verify(); -} -void transposeCPU(float *output, float *input, int width, int height) -{ - int i, j; - - for(j=0; j < height; j++) - for(i=0; i < width; i++) - output[i*height + j] = input[j*width + i]; -} - - -void verify() -{ - int i; - transposeCPU(refout, input, width, height); - comparef(refout, output, width * height, 1e-5f); - for (i=0; i<20; i++) { - printf("%12.8f %12.8f\n", refout[i], input[i]); - } - printf("Done\n"); -} - diff --git a/tests/TODO/urng.c b/tests/TODO/urng.c deleted file mode 100644 index a96ba2f..0000000 --- a/tests/TODO/urng.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); -static cl_uchar4 *inputImageData, *outputImageData, *refOut; -static int *pixelData; -static int width; -static int height; -static int pixelSize = sizeof(cl_uchar4); -static int factor = 25.0; -static int blockSizeX = 64; -static int blockSizeY = 1; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err; - char *ker_path = NULL; - char *input_path = NULL; - char *output_path = NULL; - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - ker_path = do_kiss_path("urng_kernels_0.bin", device); - input_path = do_kiss_path("urng_input.bmp", NULL); - output_path = do_kiss_path("urng_output.bmp", NULL); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernelFromBinary(device, context, ker_path, "noise_uniform"); - - pixelData = readBmp(input_path, &width, &height); - inputImageData = (cl_uchar4*) newBuffer(width * height * pixelSize, 0); - outputImageData = (cl_uchar4*) newBuffer(width * height * pixelSize, 0); - refOut = (cl_uchar4*) newBuffer(width * height * pixelSize, 0); - memcpy(inputImageData, pixelData, width * height * pixelSize); - - cl_mem inputImageBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, width * height * pixelSize, inputImageData, &err); CHK_ERR(err); - cl_mem outputImageBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, width * height * pixelSize, - outputImageData, &err); CHK_ERR(err); - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImageBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImageBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(factor), &factor); CHK_ERR(err); - - size_t globalThreads[] = {width, height}; - size_t localThreads[] = {blockSizeX, blockSizeY}; - - err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); - CHK_ERR(err); - outputImageData = clIntelMapBuffer(outputImageBuffer, &err); - CHK_ERR(err); - - writeBmp((int *) outputImageData, width, height, output_path); - verify(); - clIntelUnmapBuffer(outputImageBuffer); - return err; -} - -void verify() -{ - int i; - // comparef(d, c, MAX, 1.0e-6); - float mean = 0.0f; - for(i = 0; i < (int)(width * height); i++) { - mean += outputImageData[i].s[0] - inputImageData[i].s[0]; - } - mean /= (width * height * factor); - - if(fabs(mean) < 1.0) { - printf("%12.8f Passed!\n", mean); - } else { - printf("%12.8f Failed!\n", mean); - } - printf("Done\n"); -} - diff --git a/tests/TODO/vadd.c b/tests/TODO/vadd.c deleted file mode 100644 index 0cb3aa2..0000000 --- a/tests/TODO/vadd.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Author: Benjamin Segovia - */ - -#include "common.h" -void verify(); - -#define MAX 1000000 -float *a, *b, *c; - -int main(int argc, char**argv) -{ - struct args args = {0}; - int err, i; - - parseArgs(argc, argv, &args); - - cl_device_id device = getDeviceID(args.d); - cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &err); CHK_ERR(err); - cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err); CHK_ERR(err); - cl_kernel kernel = getKernel(device, context, "vadd_kernel.cl", "vadd_gpu"); - - a = newBuffer(MAX * sizeof(float), 'f'); - b = newBuffer(MAX * sizeof(float), 'f'); - c = newBuffer(MAX * sizeof(float), '0'); - - cl_mem aBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), a, &err); CHK_ERR(err); - cl_mem bBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), b, &err); CHK_ERR(err); - cl_mem cBuffer = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, MAX * sizeof(float), c, &err); CHK_ERR(err); - - /* Execute */ - int gws = MAX; - int lws = 16; - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &aBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &cBuffer); CHK_ERR(err); - err = clSetKernelArg(kernel, 3, sizeof(size_t), &gws); CHK_ERR(err); - - cl_event eND; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &gws, &lws, 0, NULL, &eND); CHK_ERR(err); - err = clEnqueueReadBuffer(queue, cBuffer, CL_TRUE, 0, MAX*sizeof(float), c, 1, &eND, NULL); CHK_ERR(err); - - verify(); -} - -void verify() -{ - int i; - float *d; - - d = newBuffer(MAX * sizeof(float), '0'); - for (i = 0; i < MAX; i++) { - d[i] = a[i] + b[i]; - } - comparef(d, c, MAX, 1.0e-6); - printf("Done\n"); -} - diff --git a/tests/CMakeLists.txt b/utests/CMakeLists.txt similarity index 100% rename from tests/CMakeLists.txt rename to utests/CMakeLists.txt diff --git a/tests/compiler_copy_buffer.cpp b/utests/compiler_copy_buffer.cpp similarity index 100% rename from tests/compiler_copy_buffer.cpp rename to utests/compiler_copy_buffer.cpp diff --git a/tests/compiler_copy_buffer_row.cpp b/utests/compiler_copy_buffer_row.cpp similarity index 100% rename from tests/compiler_copy_buffer_row.cpp rename to utests/compiler_copy_buffer_row.cpp diff --git a/tests/compiler_eot.cpp b/utests/compiler_eot.cpp similarity index 100% rename from tests/compiler_eot.cpp rename to utests/compiler_eot.cpp diff --git a/tests/compiler_write_only.cpp b/utests/compiler_write_only.cpp similarity index 100% rename from tests/compiler_write_only.cpp rename to utests/compiler_write_only.cpp diff --git a/tests/runtime_flat_address_space.cpp b/utests/runtime_flat_address_space.cpp similarity index 100% rename from tests/runtime_flat_address_space.cpp rename to utests/runtime_flat_address_space.cpp diff --git a/tests/utest.cpp b/utests/utest.cpp similarity index 100% rename from tests/utest.cpp rename to utests/utest.cpp diff --git a/tests/utest.hpp b/utests/utest.hpp similarity index 100% rename from tests/utest.hpp rename to utests/utest.hpp diff --git a/tests/utest_assert.cpp b/utests/utest_assert.cpp similarity index 100% rename from tests/utest_assert.cpp rename to utests/utest_assert.cpp diff --git a/tests/utest_assert.hpp b/utests/utest_assert.hpp similarity index 100% rename from tests/utest_assert.hpp rename to utests/utest_assert.hpp diff --git a/tests/utest_error.c b/utests/utest_error.c similarity index 100% rename from tests/utest_error.c rename to utests/utest_error.c diff --git a/tests/utest_error.h b/utests/utest_error.h similarity index 100% rename from tests/utest_error.h rename to utests/utest_error.h diff --git a/tests/utest_exception.hpp b/utests/utest_exception.hpp similarity index 100% rename from tests/utest_exception.hpp rename to utests/utest_exception.hpp diff --git a/tests/utest_file_map.cpp b/utests/utest_file_map.cpp similarity index 100% rename from tests/utest_file_map.cpp rename to utests/utest_file_map.cpp diff --git a/tests/utest_file_map.hpp b/utests/utest_file_map.hpp similarity index 100% rename from tests/utest_file_map.hpp rename to utests/utest_file_map.hpp diff --git a/tests/utest_helper.cpp b/utests/utest_helper.cpp similarity index 100% rename from tests/utest_helper.cpp rename to utests/utest_helper.cpp diff --git a/tests/utest_helper.hpp b/utests/utest_helper.hpp similarity index 100% rename from tests/utest_helper.hpp rename to utests/utest_helper.hpp