From 4e9e197c26c34b7055b2c5de6d4a770d317b468c Mon Sep 17 00:00:00 2001 From: Eurogiciel-BOT Date: Thu, 18 Sep 2014 08:22:10 +0000 Subject: [PATCH] Upstream version 9.38.207.0 Upstream commit-id 03026c9cdacf33376ad909d86a31a20c92d1bc20 Change-Id: I8d64c250e6604cc2a1e264fac85dbf5e1349d5f2 Signed-off-by: Eurogiciel-BOT --- packaging/crosswalk.spec | 2 +- src/third_party/khronos/CL/cl.h | 1214 ++ src/third_party/khronos/CL/cl.hpp | 12452 +++++++++++++++++++ src/third_party/khronos/CL/cl_d3d10.h | 126 + src/third_party/khronos/CL/cl_d3d11.h | 126 + src/third_party/khronos/CL/cl_dx9_media_sharing.h | 127 + src/third_party/khronos/CL/cl_egl.h | 133 + src/third_party/khronos/CL/cl_ext.h | 316 + src/third_party/khronos/CL/cl_gl.h | 162 + src/third_party/khronos/CL/cl_gl_ext.h | 69 + src/third_party/khronos/CL/cl_platform.h | 1278 ++ src/third_party/khronos/CL/opencl.h | 54 + src/v8/include/v8.h | 87 +- src/v8/src/api.h | 9 + src/v8/src/arm/assembler-arm-inl.h | 1 + src/v8/src/arm/assembler-arm.h | 29 + src/v8/src/arm/deoptimizer-arm.cc | 18 +- src/v8/src/arm/lithium-arm.cc | 74 +- src/v8/src/arm/lithium-arm.h | 18 +- src/v8/src/arm/lithium-codegen-arm.cc | 163 + src/v8/src/arm/lithium-codegen-arm.h | 5 + src/v8/src/arm/macro-assembler-arm.cc | 13 + src/v8/src/arm/macro-assembler-arm.h | 7 + src/v8/src/assembler.h | 1 + src/v8/src/bootstrapper.cc | 145 +- src/v8/src/contexts.h | 29 + src/v8/src/d8.h | 3 + src/v8/src/deoptimizer.cc | 392 +- src/v8/src/deoptimizer.h | 74 +- src/v8/src/elements-kind.cc | 7 + src/v8/src/elements-kind.h | 60 + src/v8/src/elements.cc | 17 + src/v8/src/factory.cc | 24 + src/v8/src/factory.h | 9 + src/v8/src/flag-definitions.h | 1 + src/v8/src/globals.h | 31 +- src/v8/src/heap/heap.cc | 48 + src/v8/src/heap/heap.h | 42 + src/v8/src/heap/mark-compact.cc | 3 + src/v8/src/heap/objects-visiting.cc | 7 +- src/v8/src/hydrogen-instructions.cc | 162 +- src/v8/src/hydrogen-instructions.h | 439 +- src/v8/src/hydrogen-representation-changes.cc | 15 +- src/v8/src/hydrogen-types.cc | 22 + src/v8/src/hydrogen-types.h | 19 +- src/v8/src/hydrogen.cc | 446 +- src/v8/src/hydrogen.h | 27 +- src/v8/src/ia32/assembler-ia32-inl.h | 1 + src/v8/src/ia32/assembler-ia32.cc | 388 +- src/v8/src/ia32/assembler-ia32.h | 78 +- src/v8/src/ia32/deoptimizer-ia32.cc | 49 +- src/v8/src/ia32/disasm-ia32.cc | 209 +- src/v8/src/ia32/lithium-codegen-ia32.cc | 1231 +- src/v8/src/ia32/lithium-codegen-ia32.h | 23 + src/v8/src/ia32/lithium-gap-resolver-ia32.cc | 54 + src/v8/src/ia32/lithium-ia32.cc | 362 +- src/v8/src/ia32/lithium-ia32.h | 223 +- src/v8/src/ia32/macro-assembler-ia32.cc | 136 +- src/v8/src/ia32/macro-assembler-ia32.h | 28 + src/v8/src/lithium-allocator-inl.h | 3 +- src/v8/src/lithium-allocator.cc | 89 +- src/v8/src/lithium-allocator.h | 11 + src/v8/src/lithium.cc | 36 +- src/v8/src/lithium.h | 74 +- src/v8/src/macros.py | 3 + src/v8/src/objects-debug.cc | 24 + src/v8/src/objects-inl.h | 287 + src/v8/src/objects-printer.cc | 76 + src/v8/src/objects.cc | 167 +- src/v8/src/objects.h | 406 +- src/v8/src/property-details.h | 13 + src/v8/src/runtime.cc | 773 +- src/v8/src/runtime.h | 102 +- src/v8/src/runtime.js | 90 + src/v8/src/simd128.js | 1063 ++ src/v8/src/types.cc | 3 + src/v8/src/x64/assembler-x64-inl.h | 5 + src/v8/src/x64/assembler-x64.cc | 600 +- src/v8/src/x64/assembler-x64.h | 75 + src/v8/src/x64/deoptimizer-x64.cc | 43 +- src/v8/src/x64/disasm-x64.cc | 227 +- src/v8/src/x64/lithium-codegen-x64.cc | 1242 +- src/v8/src/x64/lithium-codegen-x64.h | 15 + src/v8/src/x64/lithium-gap-resolver-x64.cc | 53 + src/v8/src/x64/lithium-x64.cc | 353 +- src/v8/src/x64/lithium-x64.h | 211 +- src/v8/src/x64/macro-assembler-x64.cc | 140 +- src/v8/src/x64/macro-assembler-x64.h | 33 + src/v8/test/cctest/test-disasm-ia32.cc | 80 + src/v8/test/cctest/test-heap.cc | 10 +- src/v8/test/cctest/test-mark-compact.cc | 2 + src/v8/test/fuzz-natives/testcfg.py | 4 +- .../test/mjsunit/runtime-gen/allocatefloat32x4.js | 4 + .../test/mjsunit/runtime-gen/allocatefloat64x2.js | 4 + src/v8/test/mjsunit/runtime-gen/allocateint32x4.js | 4 + src/v8/test/mjsunit/runtime-gen/float32x4clamp.js | 7 + .../mjsunit/runtime-gen/float32x4getsignmask.js | 5 + src/v8/test/mjsunit/runtime-gen/float32x4select.js | 7 + src/v8/test/mjsunit/runtime-gen/float64x2clamp.js | 7 + .../mjsunit/runtime-gen/float64x2getsignmask.js | 5 + .../test/mjsunit/runtime-gen/int32x4getsignmask.js | 5 + src/v8/test/mjsunit/runtime-gen/int32x4select.js | 7 + src/v8/test/mjsunit/simd/argument_object.js | 124 + src/v8/test/mjsunit/simd/builtin_operator.js | 183 + .../mjsunit/simd/builtin_operator_float64x2.js | 167 + src/v8/test/mjsunit/simd/captured_object.js | 80 + src/v8/test/mjsunit/simd/conversions.js | 81 + src/v8/test/mjsunit/simd/deopt.js | 78 + src/v8/test/mjsunit/simd/float32x4.js | 938 ++ src/v8/test/mjsunit/simd/float64x2.js | 520 + src/v8/test/mjsunit/simd/int32x4.js | 946 ++ src/v8/test/mjsunit/simd/osr.js | 43 + src/v8/test/mjsunit/simd/prototype.js | 60 + src/v8/test/mjsunit/simd/representation_change.js | 53 + src/v8/tools/generate-runtime-tests.py | 34 +- src/v8/tools/gyp/v8.gyp | 1 + src/v8/tools/js2c.py | 2 +- src/xwalk/DEPS.xwalk | 7 +- src/xwalk/VERSION | 2 +- .../org/xwalk/app/XWalkRuntimeActivityBase.java | 47 +- .../xwalk/app/runtime/XWalkCoreProviderImpl.java | 9 +- .../app/runtime/XWalkRuntimeLibraryException.java | 49 - src/xwalk/app/tools/android/extension_manager.py | 18 +- src/xwalk/app/tools/android/make_apk.py | 35 +- src/xwalk/app/tools/android/make_apk_test.py | 54 +- .../android/test_data/keystore/xwalk-test.keystore | Bin 2186 -> 4403 bytes src/xwalk/application/browser/application.cc | 21 +- src/xwalk/application/browser/application.h | 4 +- .../application/browser/application_protocols.cc | 6 +- .../application/browser/application_service.cc | 49 +- .../application/browser/application_service.h | 8 +- .../application/browser/application_system.cc | 19 +- src/xwalk/application/common/application_data.cc | 72 +- src/xwalk/application/common/application_data.h | 37 +- .../application/common/application_file_util.cc | 182 +- .../application/common/application_file_util.h | 32 +- .../common/application_file_util_unittest.cc | 15 +- .../common/application_manifest_constants.cc | 20 +- .../common/application_manifest_constants.h | 10 +- src/xwalk/application/common/id_util.cc | 7 +- src/xwalk/application/common/manifest.cc | 27 +- src/xwalk/application/common/manifest.h | 21 +- src/xwalk/application/common/manifest_handler.cc | 16 +- src/xwalk/application/common/manifest_handler.h | 5 +- .../common/manifest_handler_unittest.cc | 26 +- .../common/manifest_handlers/csp_handler.cc | 12 +- .../common/manifest_handlers/csp_handler.h | 4 +- .../manifest_handlers/csp_handler_unittest.cc | 11 +- .../permissions_handler_unittest.cc | 15 +- .../manifest_handlers/warp_handler_unittest.cc | 10 +- .../manifest_handlers/widget_handler_unittest.cc | 12 +- src/xwalk/application/common/manifest_unittest.cc | 24 - src/xwalk/application/common/package/package.h | 11 +- .../application/common/package/wgt_package.cc | 2 +- .../application/common/package/xpk_package.cc | 2 +- src/xwalk/application/common/security_policy.cc | 6 +- .../common/tizen/application_storage_impl.cc | 53 +- .../tizen/{package_path.cc => package_query.cc} | 61 +- .../tizen/{package_path.h => package_query.h} | 9 +- .../common/xwalk_application_common.gypi | 5 +- .../application/test/application_browsertest.cc | 10 +- .../application/test/application_multi_app_test.cc | 21 +- .../application/test/application_testapi_test.cc | 10 +- .../tools/tizen/xwalk_backend_plugin.cc | 86 +- .../application/tools/tizen/xwalk_backend_plugin.h | 9 +- .../tools/tizen/xwalk_package_installer.cc | 31 +- src/xwalk/packaging/crosswalk.spec | 2 +- .../core/src/org/xwalk/core/SharedXWalkView.java | 27 +- .../runtime/browser/xwalk_browser_main_parts.cc | 2 - 169 files changed, 31347 insertions(+), 852 deletions(-) create mode 100644 src/third_party/khronos/CL/cl.h create mode 100644 src/third_party/khronos/CL/cl.hpp create mode 100644 src/third_party/khronos/CL/cl_d3d10.h create mode 100644 src/third_party/khronos/CL/cl_d3d11.h create mode 100644 src/third_party/khronos/CL/cl_dx9_media_sharing.h create mode 100644 src/third_party/khronos/CL/cl_egl.h create mode 100644 src/third_party/khronos/CL/cl_ext.h create mode 100644 src/third_party/khronos/CL/cl_gl.h create mode 100644 src/third_party/khronos/CL/cl_gl_ext.h create mode 100644 src/third_party/khronos/CL/cl_platform.h create mode 100644 src/third_party/khronos/CL/opencl.h create mode 100644 src/v8/src/simd128.js create mode 100644 src/v8/test/mjsunit/runtime-gen/allocatefloat32x4.js create mode 100644 src/v8/test/mjsunit/runtime-gen/allocatefloat64x2.js create mode 100644 src/v8/test/mjsunit/runtime-gen/allocateint32x4.js create mode 100644 src/v8/test/mjsunit/runtime-gen/float32x4clamp.js create mode 100644 src/v8/test/mjsunit/runtime-gen/float32x4getsignmask.js create mode 100644 src/v8/test/mjsunit/runtime-gen/float32x4select.js create mode 100644 src/v8/test/mjsunit/runtime-gen/float64x2clamp.js create mode 100644 src/v8/test/mjsunit/runtime-gen/float64x2getsignmask.js create mode 100644 src/v8/test/mjsunit/runtime-gen/int32x4getsignmask.js create mode 100644 src/v8/test/mjsunit/runtime-gen/int32x4select.js create mode 100644 src/v8/test/mjsunit/simd/argument_object.js create mode 100644 src/v8/test/mjsunit/simd/builtin_operator.js create mode 100644 src/v8/test/mjsunit/simd/builtin_operator_float64x2.js create mode 100644 src/v8/test/mjsunit/simd/captured_object.js create mode 100644 src/v8/test/mjsunit/simd/conversions.js create mode 100644 src/v8/test/mjsunit/simd/deopt.js create mode 100644 src/v8/test/mjsunit/simd/float32x4.js create mode 100644 src/v8/test/mjsunit/simd/float64x2.js create mode 100644 src/v8/test/mjsunit/simd/int32x4.js create mode 100644 src/v8/test/mjsunit/simd/osr.js create mode 100644 src/v8/test/mjsunit/simd/prototype.js create mode 100644 src/v8/test/mjsunit/simd/representation_change.js delete mode 100644 src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkRuntimeLibraryException.java rename src/xwalk/application/common/tizen/{package_path.cc => package_query.cc} (57%) rename src/xwalk/application/common/tizen/{package_path.h => package_query.h} (64%) diff --git a/packaging/crosswalk.spec b/packaging/crosswalk.spec index 9b332ec..7444d33 100644 --- a/packaging/crosswalk.spec +++ b/packaging/crosswalk.spec @@ -16,7 +16,7 @@ %endif Name: crosswalk -Version: 9.38.205.0 +Version: 9.38.207.0 Release: 0 Summary: Chromium-based app runtime License: (BSD-3-Clause and LGPL-2.1+) diff --git a/src/third_party/khronos/CL/cl.h b/src/third_party/khronos/CL/cl.h new file mode 100644 index 0000000..316565d --- /dev/null +++ b/src/third_party/khronos/CL/cl.h @@ -0,0 +1,1214 @@ +/******************************************************************************* + * Copyright (c) 2008 - 2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; + cl_mem buffer; +} cl_image_desc; + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 + +/* OpenCL Version */ +#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1 + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 + +/* cl_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +/* cl_kernel_arg_type_qualifer */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id /* in_device */, + const cl_device_partition_property * /* properties */, + cl_uint /* num_devices */, + cl_device_id * /* out_devices */, + cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem /* buffer */, + cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + const cl_image_desc * /* image_desc */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback( cl_mem /* memobj */, + void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; + +/* Sampler APIs */ +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* kernel_names */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_headers */, + const cl_program * /* input_headers */, + const char ** /* header_include_names */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_programs */, + const cl_program * /* input_programs */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */, + cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; + + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel /* kernel */, + cl_uint /* arg_indx */, + cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback( cl_event /* event */, + cl_int /* command_exec_callback_type */, + void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* size */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* size */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin */, + const size_t * /* dst_origin */, + const size_t * /* region */, + size_t /* src_row_pitch */, + size_t /* src_slice_pitch */, + size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + const void * /* fill_color */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_objects */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (CL_CALLBACK * /*user_func*/)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, + const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ + diff --git a/src/third_party/khronos/CL/cl.hpp b/src/third_party/khronos/CL/cl.hpp new file mode 100644 index 0000000..38fac19 --- /dev/null +++ b/src/third_party/khronos/CL/cl.hpp @@ -0,0 +1,12452 @@ +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and + * OpenCL 1.2 (rev 15) + * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes + * + * Additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 + * + * \version 1.2.6 + * \date August 2013 + * + * Optional extension support + * + * cl + * cl_ext_device_fission + * #define USE_CL_DEVICE_FISSION + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * + * The interface is contained with a single C++ header file \em cl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings it is enough to simply include \em cl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * For detail documentation on the bindings see: + * + * The OpenCL C++ Wrapper API 1.2 (revision 09) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + * #define __CL_ENABLE_EXCEPTIONS + * + * #if defined(__APPLE__) || defined(__MACOSX) + * #include + * #else + * #include + * #endif + * #include + * #include + * #include + * + * const char * helloStr = "__kernel void " + * "hello(void) " + * "{ " + * " " + * "} "; + * + * int + * main(void) + * { + * cl_int err = CL_SUCCESS; + * try { + * + * std::vector platforms; + * cl::Platform::get(&platforms); + * if (platforms.size() == 0) { + * std::cout << "Platform size 0\n"; + * return -1; + * } + * + * cl_context_properties properties[] = + * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; + * cl::Context context(CL_DEVICE_TYPE_CPU, properties); + * + * std::vector devices = context.getInfo(); + * + * cl::Program::Sources source(1, + * std::make_pair(helloStr,strlen(helloStr))); + * cl::Program program_ = cl::Program(context, source); + * program_.build(devices); + * + * cl::Kernel kernel(program_, "hello", &err); + * + * cl::Event event; + * cl::CommandQueue queue(context, devices[0], 0, &err); + * queue.enqueueNDRangeKernel( + * kernel, + * cl::NullRange, + * cl::NDRange(4,4), + * cl::NullRange, + * NULL, + * &event); + * + * event.wait(); + * } + * catch (cl::Error err) { + * std::cerr + * << "ERROR: " + * << err.what() + * << "(" + * << err.err() + * << ")" + * << std::endl; + * } + * + * return EXIT_SUCCESS; + * } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +#ifdef _WIN32 + +#include +#include +#include +#include + +#if defined(__CL_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(__CL_ENABLE_EXCEPTIONS) + +#pragma push_macro("max") +#undef max +#if defined(USE_DX_INTEROP) +#include +#include +#endif +#endif // _WIN32 + +// +#if defined(USE_CL_DEVICE_FISSION) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#include +#include +#else +#include +#include +#endif // !__APPLE__ + +// To avoid accidentally taking ownership of core OpenCL types +// such as cl_kernel constructors are made explicit +// under OpenCL 1.2 +#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS explicit +#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include +#include + +#if !defined(__NO_STD_VECTOR) +#include +#endif + +#if !defined(__NO_STD_STRING) +#include +#endif + +#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) +#include + +#include +#include +#endif // linux + +#include + + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + +class Memory; + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __INIT_CL_EXT_FCN_PTR(name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +#if defined(CL_VERSION_1_2) +#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +class Program; +class Device; +class Context; +class CommandQueue; +class Memory; +class Buffer; + +#if defined(__CL_ENABLE_EXCEPTIONS) +/*! \brief Exception class + * + * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. + */ +class Error : public std::exception +{ +private: + cl_int err_; + const char * errStr_; +public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } +}; + +#define __ERR_STR(x) #x +#else +#define __ERR_STR(x) NULL +#endif // __CL_ENABLE_EXCEPTIONS + + +namespace detail +{ +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS +} + + + +//! \cond DOXYGEN_DETAIL +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) +#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#if defined(CL_VERSION_1_2) +#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) +#endif // #if defined(CL_VERSION_1_2) +#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __COPY_ERR __ERR_STR(cl::copy) +#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) +#if defined(CL_VERSION_1_2) +#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) + +#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#if defined(CL_VERSION_1_2) +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) +#endif // #if defined(CL_VERSION_1_2) +#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#if defined(CL_VERSION_1_2) +#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) + +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) + +#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) +#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) +#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) +#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) +#endif // #if defined(CL_VERSION_1_2) + +#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) + + +#define __RETAIN_ERR __ERR_STR(Retain Object) +#define __RELEASE_ERR __ERR_STR(Release Object) +#define __FLUSH_ERR __ERR_STR(clFlush) +#define __FINISH_ERR __ERR_STR(clFinish) +#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) + +/** + * CL 1.2 version that uses device fission. + */ +#if defined(CL_VERSION_1_2) +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) +#else +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // #if defined(CL_VERSION_1_2) + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#endif // #if defined(CL_VERSION_1_1) + +#endif // __CL_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + +/** + * CL 1.2 marker and barrier commands + */ +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) +#endif // #if defined(CL_VERSION_1_2) + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) + +/*! \class string + * \brief Simple string class, that provides a limited subset of std::string + * functionality but avoids many of the issues that come with that class. + + * \note Deprecated. Please use std::string as default or + * re-define the string class to match the std::string + * interface by defining STRING_CLASS + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + ::size_t size_; + char * str_; +public: + //! \brief Constructs an empty string, allocating no memory. + string(void) : size_(0), str_(NULL) + { + } + + /*! \brief Constructs a string populated from an arbitrary value of + * specified size. + * + * An extra '\0' is added, in case none was contained in str. + * + * \param str the initial value of the string instance. Note that '\0' + * characters receive no special treatment. If NULL, + * the string is left empty, with a size of 0. + * + * \param size the number of characters to copy from str. + */ + string(const char * str, ::size_t size) : + size_(size), + str_(NULL) + { + if( size > 0 ) { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } + } + } + + /*! \brief Constructs a string populated from a null-terminated value. + * + * \param str the null-terminated initial value of the string instance. + * If NULL, the string is left empty, with a size of 0. + */ + string(const char * str) : + size_(0), + str_(NULL) + { + if( str ) { + size_= ::strlen(str); + } + if( size_ > 0 ) { + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } + } + } + + void resize( ::size_t n ) + { + if( size_ == n ) { + return; + } + if (n == 0) { + if( str_ ) { + delete [] str_; + } + str_ = NULL; + size_ = 0; + } + else { + char *newString = new char[n + 1]; + int copySize = n; + if( size_ < n ) { + copySize = size_; + } + size_ = n; + + if(str_) { + memcpy(newString, str_, (copySize + 1) * sizeof(char)); + } + if( copySize < size_ ) { + memset(newString + copySize, 0, size_ - copySize); + } + newString[size_] = '\0'; + + delete [] str_; + str_ = newString; + } + } + + const char& operator[] ( ::size_t pos ) const + { + return str_[pos]; + } + + char& operator[] ( ::size_t pos ) + { + return str_[pos]; + } + + /*! \brief Copies the value of another string to this one. + * + * \param rhs the string to copy. + * + * \returns a reference to the modified instance. + */ + string& operator=(const string& rhs) + { + if (this == &rhs) { + return *this; + } + + if( str_ != NULL ) { + delete [] str_; + str_ = NULL; + size_ = 0; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + str_ = NULL; + size_ = 0; + } + else { + str_ = new char[rhs.size_ + 1]; + size_ = rhs.size_; + + if (str_ != NULL) { + memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + return *this; + } + + /*! \brief Constructs a string by copying the value of another instance. + * + * \param rhs the string to copy. + */ + string(const string& rhs) : + size_(0), + str_(NULL) + { + *this = rhs; + } + + //! \brief Destructor - frees memory used to hold the current value. + ~string() + { + delete[] str_; + str_ = NULL; + } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t size(void) const { return size_; } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t length(void) const { return size(); } + + /*! \brief Returns a pointer to the private copy held by this instance, + * or "" if empty/unset. + */ + const char * c_str(void) const { return (str_) ? str_ : "";} +}; +typedef cl::string STRING_CLASS; +#endif // #elif !defined(__USE_DEV_STRING) + +#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) +#define VECTOR_CLASS std::vector +#elif !defined(__USE_DEV_VECTOR) +#define VECTOR_CLASS cl::vector + +#if !defined(__MAX_DEFAULT_VECTOR_SIZE) +#define __MAX_DEFAULT_VECTOR_SIZE 10 +#endif + +/*! \class vector + * \brief Fixed sized vector implementation that mirroring + * + * \note Deprecated. Please use std::vector as default or + * re-define the vector class to match the std::vector + * interface by defining VECTOR_CLASS + + * \note Not recommended for use with custom objects as + * current implementation will construct N elements + * + * std::vector functionality. + * \brief Fixed sized vector compatible with std::vector. + * + * \note + * This differs from std::vector<> not just in memory allocation, + * but also in terms of when members are constructed, destroyed, + * and assigned instead of being copy constructed. + * + * \param T type of element contained in the vector. + * + * \param N maximum size of the vector. + */ +template +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + T data_[N]; + unsigned int size_; + +public: + //! \brief Constructs an empty vector with no memory allocated. + vector() : + size_(static_cast(0)) + {} + + //! \brief Deallocates the vector's memory and destroys all of its elements. + ~vector() + { + clear(); + } + + //! \brief Returns the number of elements currently contained. + unsigned int size(void) const + { + return size_; + } + + /*! \brief Empties the vector of all elements. + * \note + * This does not deallocate memory but will invoke destructors + * on contained elements. + */ + void clear() + { + while(!empty()) { + pop_back(); + } + } + + /*! \brief Appends an element after the last valid element. + * Calling this on a vector that has reached capacity will throw an + * exception if exceptions are enabled. + */ + void push_back (const T& x) + { + if (size() < N) { + new (&data_[size_]) T(x); + size_++; + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Removes the last valid element from the vector. + * Calling this on an empty vector will throw an exception + * if exceptions are enabled. + */ + void pop_back(void) + { + if (size_ != 0) { + --size_; + data_[size_].~T(); + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Constructs with a value copied from another. + * + * \param vec the vector to copy. + */ + vector(const vector& vec) : + size_(vec.size_) + { + if (size_ != 0) { + assign(vec.begin(), vec.end()); + } + } + + /*! \brief Constructs with a specified number of initial elements. + * + * \param size number of initial elements. + * + * \param val value of initial elements. + */ + vector(unsigned int size, const T& val = T()) : + size_(0) + { + for (unsigned int i = 0; i < size; i++) { + push_back(val); + } + } + + /*! \brief Overwrites the current content with that copied from another + * instance. + * + * \param rhs vector to copy. + * + * \returns a reference to this. + */ + vector& operator=(const vector& rhs) + { + if (this == &rhs) { + return *this; + } + + if (rhs.size_ != 0) { + assign(rhs.begin(), rhs.end()); + } else { + clear(); + } + + return *this; + } + + /*! \brief Tests equality against another instance. + * + * \param vec the vector against which to compare. + */ + bool operator==(vector &vec) + { + if (size() != vec.size()) { + return false; + } + + for( unsigned int i = 0; i < size(); ++i ) { + if( operator[](i) != vec[i] ) { + return false; + } + } + return true; + } + + //! \brief Conversion operator to T*. + operator T* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const T* () const { return data_; } + + //! \brief Tests whether this instance has any elements. + bool empty (void) const + { + return size_==0; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int max_size (void) const + { + return N; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int capacity () const + { + return N; + } + + /*! \brief Returns a reference to a given element. + * + * \param index which element to access. * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + T& operator[](int index) + { + return data_[index]; + } + + /*! \brief Returns a const reference to a given element. + * + * \param index which element to access. + * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + const T& operator[](int index) const + { + return data_[index]; + } + + /*! \brief Assigns elements of the vector based on a source iterator range. + * + * \param start Beginning iterator of source range + * \param end Enditerator of source range + * + * \note + * Will throw an exception if exceptions are enabled and size exceeded. + */ + template + void assign(I start, I end) + { + clear(); + while(start != end) { + push_back(*start); + start++; + } + } + + /*! \class iterator + * \brief Const iterator class for vectors + */ + class iterator + { + private: + const vector *vec_; + int index_; + + /** + * Internal iterator constructor to capture reference + * to the vector it iterates over rather than taking + * the vector by copy. + */ + iterator (const vector &vec, int index) : + vec_(&vec) + { + if( !vec.empty() ) { + index_ = index; + } else { + index_ = -1; + } + } + + public: + iterator(void) : + index_(-1), + vec_(NULL) + { + } + + iterator(const iterator& rhs) : + vec_(rhs.vec_), + index_(rhs.index_) + { + } + + ~iterator(void) {} + + static iterator begin(const cl::vector &vec) + { + iterator i(vec, 0); + + return i; + } + + static iterator end(const cl::vector &vec) + { + iterator i(vec, vec.size()); + + return i; + } + + bool operator==(iterator i) + { + return ((vec_ == i.vec_) && + (index_ == i.index_)); + } + + bool operator!=(iterator i) + { + return (!(*this==i)); + } + + iterator& operator++() + { + ++index_; + return *this; + } + + iterator operator++(int) + { + iterator retVal(*this); + ++index_; + return retVal; + } + + iterator& operator--() + { + --index_; + return *this; + } + + iterator operator--(int) + { + iterator retVal(*this); + --index_; + return retVal; + } + + const T& operator *() const + { + return (*vec_)[index_]; + } + }; + + iterator begin(void) + { + return iterator::begin(*this); + } + + iterator begin(void) const + { + return iterator::begin(*this); + } + + iterator end(void) + { + return iterator::end(*this); + } + + iterator end(void) const + { + return iterator::end(*this); + } + + T& front(void) + { + return data_[0]; + } + + T& back(void) + { + return data_[size_]; + } + + const T& front(void) const + { + return data_[0]; + } + + const T& back(void) const + { + return data_[size_-1]; + } +}; +#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) + + + + + +namespace detail { +#define __DEFAULT_NOT_INITIALIZED 1 +#define __DEFAULT_BEING_INITIALIZED 2 +#define __DEFAULT_INITIALIZED 4 + + /* + * Compare and exchange primitives are needed for handling of defaults + */ + inline int compare_exchange(volatile int * dest, int exchange, int comparand) + { +#ifdef _WIN32 + return (int)(InterlockedCompareExchange( + (volatile long*)dest, + (long)exchange, + (long)comparand)); +#elif defined(__APPLE__) || defined(__MACOSX) + return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); +#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) + return (__sync_val_compare_and_swap( + dest, + comparand, + exchange)); +#endif // !_WIN32 + } + + inline void fence() { _mm_mfence(); } +}; // namespace detail + + +/*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ +template +class size_t +{ +private: + ::size_t data_[N]; + +public: + //! \brief Initialize size_t to all 0s + size_t() + { + for( int i = 0; i < N; ++i ) { + data_[i] = 0; + } + } + + ::size_t& operator[](int index) + { + return data_[index]; + } + + const ::size_t& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator ::size_t* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const ::size_t* () const { return data_; } +}; + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) +{ + return f(name, sizeof(T), param, NULL); +} + +// Specialized getInfoHelper for VECTOR_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + typename T::cl_type * value = (typename T::cl_type *) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t elements = required / sizeof(typename T::cl_type); + param->assign(&value[0], &value[elements]); + for (::size_t i = 0; i < elements; i++) + { + if (value[i] != NULL) + { + err = (*param)[i].retain(); + if (err != CL_SUCCESS) { + return err; + } + } + } + return CL_SUCCESS; +} + +// Specialized for getInfo +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) +{ + cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); + + if (err != CL_SUCCESS) { + return err; + } + + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for STRING_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + char* value = (char*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + *param = value; + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for cl::size_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t* value = (::size_t*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + for(int i = 0; i < N; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} + +#define __PARAM_NAME_INFO_1_0(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ + F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ + F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#if defined(CL_VERSION_1_1) +#define __PARAM_NAME_INFO_1_1(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) +#endif // CL_VERSION_1_1 + + +#if defined(CL_VERSION_1_2) +#define __PARAM_NAME_INFO_1_2(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ + \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) +#endif // #if defined(CL_VERSION_1_2) + +#if defined(USE_CL_DEVICE_FISSION) +#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) +#endif // USE_CL_DEVICE_FISSION + +template +struct param_traits {}; + +#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) +#if defined(CL_VERSION_1_1) +__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 +#if defined(CL_VERSION_1_2) +__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); +#endif // USE_CL_DEVICE_FISSION + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return getInfoHelper(f, name, param, 0); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return getInfoHelper(f0, name, param, 0); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return getInfoHelper(f0, name, param, 0); +} + +template +struct ReferenceHandler +{ }; + +#if defined(CL_VERSION_1_2) +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // #if defined(CL_VERSION_1_2) +/** + * OpenCL 1.1 devices do not have retain/release. + */ +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_SUCCESS; } +}; +#endif // #if defined(CL_VERSION_1_2) + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_SUCCESS; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + + +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const char *versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' ) { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + ::size_t size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + char *versionInfo = (char *) alloca(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + ::size_t size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + cl_device_id *devices = (cl_device_id *) alloca(size); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj) : object_(obj) { } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + cl_int retain() const + { + return ReferenceHandler::retain(object_); + } + + cl_int release() const + { + return ReferenceHandler::release(object_); + } +}; + +template <> +class Wrapper +{ +public: + typedef cl_device_id cl_type; + +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); + + cl_int retain() const + { + if( referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if( referenceCountable_ ) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +} // namespace detail +//! \endcond + +/*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ +struct ImageFormat : public cl_image_format +{ + //! \brief Default constructor - performs no initialization. + ImageFormat(){} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Assignment operator. + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ +class Device : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Device() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const Device& device) : detail::Wrapper(device) { } + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const cl_device_id &device) : detail::Wrapper(device) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault(cl_int * err = NULL); + + /*! \brief Assignment operator from Device. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const Device& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /** + * CL 1.2 version + */ +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clCreateSubDevicesEXT(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + VECTOR_CLASS* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = clCreateSubDevices(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(CL_VERSION_1_2) + +/** + * CL 1.1 version that uses device fission. + */ +#if defined(CL_VERSION_1_1) +#if defined(USE_CL_DEVICE_FISSION) + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(USE_CL_DEVICE_FISSION) +#endif // #if defined(CL_VERSION_1_1) +}; + +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ +class Platform : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Platform() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const Platform& platform) : detail::Wrapper(platform) { } + + /*! \brief Constructor from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } + + /*! \brief Assignment operator from Platform. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const Platform& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetPlatformInfo(). + cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices( + cl_device_type type, + VECTOR_CLASS* devices) const + { + cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + +#if defined(USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + VECTOR_CLASS* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif + + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get( + VECTOR_CLASS* platforms) + { + cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + platforms->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_uint n = 0; + + if( platform == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + *platform = ids[0]; + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + Platform platform; + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (errResult != NULL) { + *errResult = err; + } + + return ids[0]; + } + + static Platform getDefault( + cl_int *errResult = NULL ) + { + return get(errResult); + } + + +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // #if defined(CL_VERSION_1_2) +}; // class Platform + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} +#endif // #if defined(CL_VERSION_1_1) + +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper +{ +private: + static volatile int default_initialized_; + static Context default_; + static volatile cl_int default_error_; +public: + /*! \brief Destructor. + * + * This calls clReleaseContext() on the value held by this instance. + */ + ~Context() { } + + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context( + const VECTOR_CLASS& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint) numDevices, + deviceIDs, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + Context( + const Device& device, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) || !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + VECTOR_CLASS platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + VECTOR_CLASS devices; + +#if defined(__CL_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(__CL_ENABLE_EXCEPTIONS) + } catch (Error) {} + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + NULL, + NULL, + NULL, + &error); + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + //! \brief Default constructor - initializes to NULL. + Context() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This calls clRetainContext() on the parameter's cl_context. + */ + Context(const Context& context) : detail::Wrapper(context) { } + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } + + /*! \brief Assignment operator from Context. + * + * This calls clRetainContext() on the parameter and clReleaseContext() on + * the previous value held by this instance. + */ + Context& operator = (const Context& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + VECTOR_CLASS* formats) const + { + cl_uint numEntries; + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + ImageFormat* value = (ImageFormat*) + alloca(numEntries * sizeof(ImageFormat)); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*) value, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(&value[0], &value[numEntries]); + return CL_SUCCESS; + } +}; + +inline Device Device::getDefault(cl_int * err) +{ + cl_int error; + Device device; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + device = context.getInfo()[0]; + if (err != NULL) { + *err = CL_SUCCESS; + } + } + + return device; +} + + +#ifdef _WIN32 +__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) Context Context::default_; +__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) Context Context::default_; +__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; +#endif + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ +class Event : public detail::Wrapper +{ +public: + /*! \brief Destructor. + * + * This calls clReleaseEvent() on the value held by this instance. + */ + ~Event() { } + + //! \brief Default constructor - initializes to NULL. + Event() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This calls clRetainEvent() on the parameter's cl_event. + */ + Event(const Event& event) : detail::Wrapper(event) { } + + /*! \brief Constructor from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + Event(const cl_event& event) : detail::Wrapper(event) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event& operator = (const Event& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_event. + * + * This calls clRetainEvent() on the parameter and clReleaseEvent() on + * the previous value held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif + + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int + waitForEvents(const VECTOR_CLASS& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); + } +}; + +#if defined(CL_VERSION_1_1) +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ +class UserEvent : public Event +{ +public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + UserEvent() : Event() { } + + //! \brief Copy constructor - performs shallow copy. + UserEvent(const UserEvent& event) : Event(event) { } + + //! \brief Assignment Operator - performs shallow copy. + UserEvent& operator = (const UserEvent& rhs) + { + if (this != &rhs) { + Event::operator=(rhs); + } + return *this; + } + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif + +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ +inline static cl_int +WaitForEvents(const VECTOR_CLASS& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ +class Memory : public detail::Wrapper +{ +public: + + /*! \brief Destructor. + * + * This calls clReleaseMemObject() on the value held by this instance. + */ + ~Memory() {} + + //! \brief Default constructor - initializes to NULL. + Memory() : detail::Wrapper() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainMemObject() on the parameter's cl_mem. + */ + Memory(const Memory& memory) : detail::Wrapper(memory) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_mem + * into the new Memory object. + */ + __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } + + /*! \brief Assignment operator from Memory. + * + * This calls clRetainMemObject() on the parameter and clReleaseMemObject() + * on the previous value held by this instance. + */ + Memory& operator = (const Memory& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif + +}; + +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Buffer() : Memory() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Buffer(const Buffer& buffer) : Memory(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } + + /*! \brief Assignment from Buffer - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const Buffer& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif +}; + +#if defined (USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferD3D10 : public Buffer +{ +public: + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) + { + static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + +#if defined(CL_VERSION_1_2) + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); +#endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferD3D10() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferD3D10 - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const BufferD3D10& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } +}; +#endif + +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferGL : public Buffer +{ +public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferGL() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL(const BufferGL& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const BufferGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferRenderGL : public Buffer +{ +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferRenderGL() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const BufferRenderGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image(const Image& image) : Memory(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } + + /*! \brief Assignment from Image - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const Image& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +#if defined(CL_VERSION_1_2) +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D, + width, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image1D(const Image1D& image1D) : Image(image1D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } + + /*! \brief Assignment from Image1D - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const Image1D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + width, + 0, 0, 0, 0, 0, 0, 0, + buffer() + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } + + Image1DBuffer& operator = (const Image1DBuffer& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_ARRAY, + width, + 0, 0, // height, depth (unused) + arraySize, + rowPitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image1DArray& operator = (const Image1DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image2D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image2D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image2D(const Image2D& image2D) : Image(image2D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } + + /*! \brief Assignment from Image2D - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const Image2D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D +{ +public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + + } + + //! \brief Default constructor - initializes to NULL. + Image2DGL() : Image2D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL(const Image2DGL& image) : Image2D(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } + + /*! \brief Assignment from Image2DGL - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const Image2DGL& rhs) + { + if (this != &rhs) { + Image2D::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t height, + ::size_t rowPitch, + ::size_t slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D_ARRAY, + width, + height, + 0, // depth (unused) + arraySize, + rowPitch, + slicePitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image2DArray& operator = (const Image2DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3D : public Image +{ +public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t depth, + ::size_t row_pitch = 0, + ::size_t slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE3D, + width, + height, + depth, + 0, // array size (unused) + row_pitch, + slice_pitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image3D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image3D(const Image3D& image3D) : Image(image3D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } + + /*! \brief Assignment from Image3D - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const Image3D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3DGL : public Image3D +{ +public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image3DGL() : Image3D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL(const Image3DGL& image) : Image3D(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } + + /*! \brief Assignment from Image3DGL - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const Image3DGL& rhs) + { + if (this != &rhs) { + Image3D::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + ImageGL(const ImageGL& image) : Image(image) { } + + __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } + + ImageGL& operator = (const ImageGL& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ +class Sampler : public detail::Wrapper +{ +public: + /*! \brief Destructor. + * + * This calls clReleaseSampler() on the value held by this instance. + */ + ~Sampler() { } + + //! \brief Default constructor - initializes to NULL. + Sampler() { } + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainSampler() on the parameter's cl_sampler. + */ + Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Assignment operator from Sampler. + * + * This calls clRetainSampler() on the parameter and clReleaseSampler() + * on the previous value held by this instance. + */ + Sampler& operator = (const Sampler& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +class Program; +class CommandQueue; +class Kernel; + +//! \brief Class interface for specifying NDRange values. +class NDRange +{ +private: + size_t<3> sizes_; + cl_uint dimensions_; + +public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() + : dimensions_(0) + { } + + //! \brief Constructs one-dimensional range. + NDRange(::size_t size0) + : dimensions_(1) + { + sizes_[0] = size0; + } + + //! \brief Constructs two-dimensional range. + NDRange(::size_t size0, ::size_t size1) + : dimensions_(2) + { + sizes_[0] = size0; + sizes_[1] = size1; + } + + //! \brief Constructs three-dimensional range. + NDRange(::size_t size0, ::size_t size1, ::size_t size2) + : dimensions_(3) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + /*! \brief Conversion operator to const ::size_t *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const ::size_t*() const { + return (const ::size_t*) sizes_; + } + + //! \brief Queries the number of dimensions in the range. + ::size_t dimensions() const { return dimensions_; } +}; + +//! \brief A zero-dimensional range. +static const NDRange NullRange; + +//! \brief Local address wrapper for use with Kernel::setArg +struct LocalSpaceArg +{ + ::size_t size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler +{ + static ::size_t size(const T&) { return sizeof(T); } + static T* ptr(T& value) { return &value; } +}; + +template <> +struct KernelArgumentHandler +{ + static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static void* ptr(LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +/*! __local + * \brief Helper function for generating LocalSpaceArg objects. + * Deprecated. Replaced with Local. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg +__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline LocalSpaceArg +__local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +//class KernelFunctor; + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + /*! \brief Destructor. + * + * This calls clReleaseKernel() on the value held by this instance. + */ + ~Kernel() { } + + //! \brief Default constructor - initializes to NULL. + Kernel() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainKernel() on the parameter's cl_kernel. + */ + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Assignment operator from Kernel. + * + * This calls clRetainKernel() on the parameter and clReleaseKernel() + * on the previous value held by this instance. + */ + Kernel& operator = (const Kernel& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_2) + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if defined(CL_VERSION_1_2) + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int setArg(cl_uint index, T value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, ::size_t size, void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } +}; + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: + typedef VECTOR_CLASS > Binaries; + typedef VECTOR_CLASS > Sources; + + Program( + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t n = (::size_t)sources.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const char** strings = (const char**) alloca(n * sizeof(const char*)); + + for (::size_t i = 0; i < n; ++i) { + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings, lengths, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const Binaries& binaries, + VECTOR_CLASS* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); + + for (::size_t i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } + + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + deviceIDs, + lengths, images, binaryStatus != NULL + ? &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if defined(CL_VERSION_1_2) + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const STRING_CLASS& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs, + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) + + Program() { } + + Program(const Program& program) : detail::Wrapper(program) { } + + __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } + + Program& operator = (const Program& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + cl_int build( + const VECTOR_CLASS& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + +#if defined(CL_VERSION_1_2) + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data), + __COMPILE_PROGRAM_ERR); + } +#endif + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int createKernels(VECTOR_CLASS* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + err = ::clCreateKernelsInProgram( + object_, numKernels, (cl_kernel*) value, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + kernels->assign(&value[0], &value[numKernels]); + return CL_SUCCESS; + } +}; + +#if defined(CL_VERSION_1_2) +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(); + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} + +inline Program linkProgram( + VECTOR_CLASS inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); + + if (programs != NULL) { + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + } + + cl_program prog = ::clLinkProgram( + Context::getDefault()(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} +#endif + +template<> +inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const +{ + VECTOR_CLASS< ::size_t> sizes = getInfo(); + VECTOR_CLASS binaries; + for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) + { + char *ptr = NULL; + if (*s != 0) + ptr = new char[*s]; + binaries.push_back(ptr); + } + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (err != NULL) { + *err = result; + } + return binaries; +} + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +private: + static volatile int default_initialized_; + static CommandQueue default_; + static volatile cl_int default_error_; +public: + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + } + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + VECTOR_CLASS devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + + object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (err != NULL) { + *err = error; + } + + } + + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + static CommandQueue getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + default_ = CommandQueue(context, device, 0, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + CommandQueue() { } + + CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue& operator = (const CommandQueue& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + (const ::size_t *)src_origin, + (const ::size_t *)dst_origin, + (const ::size_t *)region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified a as vector. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *)dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *) region, dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, src(), dst(), src_offset, + (const ::size_t *) dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t * row_pitch, + ::size_t * slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + (const ::size_t *) origin, (const ::size_t *) region, + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const VECTOR_CLASS &memObjects, + cl_mem_migration_flags flags, + const VECTOR_CLASS* events = NULL, + Event* event = NULL + ) + { + cl_event tmp; + + cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + static_cast(localMemObjects), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, + (const ::size_t*) global, + local.dimensions() != 0 ? (const ::size_t*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueTask( + const Kernel& kernel, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueNativeKernel( + void (CL_CALLBACK *userFptr)(void *), + std::pair args, + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* mem_locs = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) + ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) + : NULL; + + if (mems != NULL) { + for (unsigned int i = 0; i < mem_objects->size(); i++) { + mems[i] = ((*mem_objects)[i])(); + } + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems, + (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueMarker(object_, (cl_event*) event), + __ENQUEUE_MARKER_ERR); + } + + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + (const cl_event*) &events.front()), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int enqueueAcquireGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_2) +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_1) + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; + +#ifdef _WIN32 +__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) CommandQueue CommandQueue::default_; +__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) CommandQueue CommandQueue::default_; +__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#endif + +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +#if defined(CL_VERSION_1_1) +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +// Kernel Functor support +// New interface as of September 2011 +// Requires the C++11 std::tr1::function (note do not support TR1) +// Visual Studio 2010 and GCC 4.2 + +struct EnqueueArgs +{ + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + VECTOR_CLASS events_; + + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + +namespace detail { + +class NullType {}; + +template +struct SetArg +{ + static void set (Kernel kernel, T0 arg) + { + kernel.setArg(index, arg); + } +}; + +template +struct SetArg +{ + static void set (Kernel, NullType) + { + } +}; + +template < + typename T0, typename T1, typename T2, typename T3, + typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, + typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, + typename T20, typename T21, typename T22, typename T23, + typename T24, typename T25, typename T26, typename T27, + typename T28, typename T29, typename T30, typename T31 +> +class KernelFunctorGlobal +{ +private: + Kernel kernel_; + +public: + KernelFunctorGlobal( + Kernel kernel) : + kernel_(kernel) + {} + + KernelFunctorGlobal( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + Event operator() ( + const EnqueueArgs& args, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType() + ) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + +}; + +//------------------------------------------------------------------------------------------------------ + + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31> +struct functionImplementation_ +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3); + } + + +}; + +template< + typename T0, + typename T1, + typename T2> +struct functionImplementation_ +< T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2); + } + + +}; + +template< + typename T0, + typename T1> +struct functionImplementation_ +< T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1) + { + return functor_( + enqueueArgs, + arg0, + arg1); + } + + +}; + +template< + typename T0> +struct functionImplementation_ +< T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0) + { + return functor_( + enqueueArgs, + arg0); + } + + +}; + + + + + +} // namespace detail + +//---------------------------------------------------------------------------------------------- + +template < + typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, + typename T3 = detail::NullType, typename T4 = detail::NullType, + typename T5 = detail::NullType, typename T6 = detail::NullType, + typename T7 = detail::NullType, typename T8 = detail::NullType, + typename T9 = detail::NullType, typename T10 = detail::NullType, + typename T11 = detail::NullType, typename T12 = detail::NullType, + typename T13 = detail::NullType, typename T14 = detail::NullType, + typename T15 = detail::NullType, typename T16 = detail::NullType, + typename T17 = detail::NullType, typename T18 = detail::NullType, + typename T19 = detail::NullType, typename T20 = detail::NullType, + typename T21 = detail::NullType, typename T22 = detail::NullType, + typename T23 = detail::NullType, typename T24 = detail::NullType, + typename T25 = detail::NullType, typename T26 = detail::NullType, + typename T27 = detail::NullType, typename T28 = detail::NullType, + typename T29 = detail::NullType, typename T30 = detail::NullType, + typename T31 = detail::NullType +> +struct make_kernel : + public detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > +{ +public: + typedef detail::KernelFunctorGlobal< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > FunctorType; + + make_kernel( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(kernel)) + {} +}; + + +//---------------------------------------------------------------------------------------------------------------------- + +#undef __ERR_STR +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR + +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR + +#undef __CREATE_BUFFER_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_SAMPLER_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR + +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR +#undef __SET_PRINTF_CALLBACK_ERR + +#undef __WAIT_FOR_EVENTS_ERR + +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +#undef __BUILD_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR + +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_TASK_ERR +#undef __ENQUEUE_NATIVE_KERNEL + +#undef __CL_EXPLICIT_CONSTRUCTORS + +#undef __UNLOAD_COMPILER_ERR +#endif //__CL_USER_OVERRIDE_ERROR_STRINGS + +#undef __CL_FUNCTION_TYPE + +// Extensions +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_VERSION_1_1) +#undef __INIT_CL_EXT_FCN_PTR +#endif // #if defined(CL_VERSION_1_1) +#undef __CREATE_SUB_DEVICES + +#if defined(USE_CL_DEVICE_FISSION) +#undef __PARAM_NAME_DEVICE_FISSION +#endif // USE_CL_DEVICE_FISSION + +#undef __DEFAULT_NOT_INITIALIZED +#undef __DEFAULT_BEING_INITIALIZED +#undef __DEFAULT_INITIALIZED + +} // namespace cl + +#ifdef _WIN32 +#pragma pop_macro("max") +#endif // _WIN32 + +#endif // CL_HPP_ diff --git a/src/third_party/khronos/CL/cl_d3d10.h b/src/third_party/khronos/CL/cl_d3d10.h new file mode 100644 index 0000000..b6c90b3 --- /dev/null +++ b/src/third_party/khronos/CL/cl_d3d10.h @@ -0,0 +1,126 @@ +/********************************************************************************** + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_CL_D3D10_H +#define __OPENCL_CL_D3D10_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d10_sharing */ +#define cl_khr_d3d10_sharing 1 + +typedef cl_uint cl_d3d10_device_source_khr; +typedef cl_uint cl_d3d10_device_set_khr; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_D3D10_DEVICE_KHR -1002 +#define CL_INVALID_D3D10_RESOURCE_KHR -1003 +#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 +#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 + +/* cl_d3d10_device_source_nv */ +#define CL_D3D10_DEVICE_KHR 0x4010 +#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 + +/* cl_d3d10_device_set_nv */ +#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 +#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 + +/* cl_context_info */ +#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 +#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C + +/* cl_mem_info */ +#define CL_MEM_D3D10_RESOURCE_KHR 0x4015 + +/* cl_image_info */ +#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 +#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_D3D10_H */ + diff --git a/src/third_party/khronos/CL/cl_d3d11.h b/src/third_party/khronos/CL/cl_d3d11.h new file mode 100644 index 0000000..2e0a63f --- /dev/null +++ b/src/third_party/khronos/CL/cl_d3d11.h @@ -0,0 +1,126 @@ +/********************************************************************************** + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_CL_D3D11_H +#define __OPENCL_CL_D3D11_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d11_sharing */ +#define cl_khr_d3d11_sharing 1 + +typedef cl_uint cl_d3d11_device_source_khr; +typedef cl_uint cl_d3d11_device_set_khr; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_D3D11_DEVICE_KHR -1006 +#define CL_INVALID_D3D11_RESOURCE_KHR -1007 +#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 +#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 + +/* cl_d3d11_device_source */ +#define CL_D3D11_DEVICE_KHR 0x4019 +#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A + +/* cl_d3d11_device_set */ +#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B +#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C + +/* cl_context_info */ +#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D +#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D + +/* cl_mem_info */ +#define CL_MEM_D3D11_RESOURCE_KHR 0x401E + +/* cl_image_info */ +#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 +#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_D3D11_H */ + diff --git a/src/third_party/khronos/CL/cl_dx9_media_sharing.h b/src/third_party/khronos/CL/cl_dx9_media_sharing.h new file mode 100644 index 0000000..23f1631 --- /dev/null +++ b/src/third_party/khronos/CL/cl_dx9_media_sharing.h @@ -0,0 +1,127 @@ +/********************************************************************************** + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H +#define __OPENCL_CL_DX9_MEDIA_SHARING_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** +/* cl_khr_dx9_media_sharing */ +#define cl_khr_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_media_adapter_type_khr; +typedef cl_uint cl_dx9_media_adapter_set_khr; + +#if defined(_WIN32) +#include +typedef struct _cl_dx9_surface_info_khr +{ + IDirect3DSurface9 *resource; + HANDLE shared_handle; +} cl_dx9_surface_info_khr; +#endif + + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 +#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 +#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 +#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 + +/* cl_media_adapter_type_khr */ +#define CL_ADAPTER_D3D9_KHR 0x2020 +#define CL_ADAPTER_D3D9EX_KHR 0x2021 +#define CL_ADAPTER_DXVA_KHR 0x2022 + +/* cl_media_adapter_set_khr */ +#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 +#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 + +/* cl_context_info */ +#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 +#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 +#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 + +/* cl_mem_info */ +#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 +#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 + +/* cl_image_info */ +#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B +#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr * media_adapter_type, + void * media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void * surface_info, + cl_uint plane, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */ + diff --git a/src/third_party/khronos/CL/cl_egl.h b/src/third_party/khronos/CL/cl_egl.h new file mode 100644 index 0000000..93e6c9c --- /dev/null +++ b/src/third_party/khronos/CL/cl_egl.h @@ -0,0 +1,133 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_EGL_H +#define __OPENCL_CL_EGL_H + +#ifdef __APPLE__ + +#else +#include +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ +#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F +#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D +#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E + +/* Error type for clCreateFromEGLImageKHR */ +#define CL_INVALID_EGL_OBJECT_KHR -1093 +#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 + +/* CLeglImageKHR is an opaque handle to an EGLImage */ +typedef void* CLeglImageKHR; + +/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ +typedef void* CLeglDisplayKHR; + +/* CLeglSyncKHR is an opaque handle to an EGLSync object */ +typedef void* CLeglSyncKHR; + +/* properties passed to clCreateFromEGLImageKHR */ +typedef intptr_t cl_egl_image_properties_khr; + + +#define cl_khr_egl_image 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromEGLImageKHR(cl_context /* context */, + CLeglDisplayKHR /* egldisplay */, + CLeglImageKHR /* eglimage */, + cl_mem_flags /* flags */, + const cl_egl_image_properties_khr * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( + cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#define cl_khr_egl_event 1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromEGLSyncKHR(cl_context /* context */, + CLeglSyncKHR /* sync */, + CLeglDisplayKHR /* display */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( + cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_EGL_H */ diff --git a/src/third_party/khronos/CL/cl_ext.h b/src/third_party/khronos/CL/cl_ext.h new file mode 100644 index 0000000..710bea8 --- /dev/null +++ b/src/third_party/khronos/CL/cl_ext.h @@ -0,0 +1,316 @@ +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + #include + #include +#else + #include +#endif + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, + void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */, + const void * /* private_info */, + size_t /* cb */, + void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */); + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( + cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */); + + +/* Extension: cl_khr_image2D_buffer + * + * This extension allows a 2D image to be created from a cl_mem buffer without a copy. + * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t. + * Both the sampler and sampler-less read_image built-in functions are supported for 2D images + * and 2D images created from a buffer. Similarly, the write_image built-ins are also supported + * for 2D images created from a buffer. + * + * When the 2D image from buffer is created, the client must specify the width, + * height, image format (i.e. channel order and channel data type) and optionally the row pitch + * + * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels. + * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels. + */ + +/************************************* + * cl_khr_initalize_memory extension * + *************************************/ + +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E + + +/************************************** + * cl_khr_terminate_context extension * + **************************************/ + +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F +#define CL_CONTEXT_TERMINATE_KHR 0x2010 + +#define cl_khr_terminate_context 1 +extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; + + +/* + * Extension: cl_khr_spir + * + * This extension adds support to create an OpenCL program object from a + * Standard Portable Intermediate Representation (SPIR) instance + */ + +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 +#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 + +/********************************* +* cl_arm_printf extension +*********************************/ +#define CL_PRINTF_CALLBACK_ARM 0x40B0 +#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 + +#ifdef CL_VERSION_1_1 + /*********************************** + * cl_ext_device_fission extension * + ***********************************/ + #define cl_ext_device_fission 1 + + extern CL_API_ENTRY cl_int CL_API_CALL + clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + extern CL_API_ENTRY cl_int CL_API_CALL + clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef cl_ulong cl_device_partition_property_ext; + extern CL_API_ENTRY cl_int CL_API_CALL + clCreateSubDevicesEXT( cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + /* cl_device_partition_property_ext */ + #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 + #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 + #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 + #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + + /* clDeviceGetInfo selectors */ + #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 + #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 + #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 + #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 + #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + + /* error codes */ + #define CL_DEVICE_PARTITION_FAILED_EXT -1057 + #define CL_INVALID_PARTITION_COUNT_EXT -1058 + #define CL_INVALID_PARTITION_NAME_EXT -1059 + + /* CL_AFFINITY_DOMAINs */ + #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 + #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 + #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 + #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 + #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 + #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + + /* cl_device_partition_property_ext list terminators */ + #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) + #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) + #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + +/********************************* +* cl_qcom_ext_host_ptr extension +*********************************/ + +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +typedef cl_uint cl_image_pitch_info_qcom; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceImageInfoQCOM(cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef struct _cl_mem_ext_host_ptr +{ + /* Type of external memory allocation. */ + /* Legal values will be defined in layered extensions. */ + cl_uint allocation_type; + + /* Host cache policy for this external memory allocation. */ + cl_uint host_cache_policy; + +} cl_mem_ext_host_ptr; + +/********************************* +* cl_qcom_ion_host_ptr extension +*********************************/ + +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +typedef struct _cl_mem_ion_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* ION file descriptor */ + int ion_filedesc; + + /* Host pointer to the ION allocated memory */ + void* ion_hostptr; + +} cl_mem_ion_host_ptr; + +#endif /* CL_VERSION_1_1 */ + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/src/third_party/khronos/CL/cl_gl.h b/src/third_party/khronos/CL/cl_gl.h new file mode 100644 index 0000000..e52c1b6 --- /dev/null +++ b/src/third_party/khronos/CL/cl_gl.h @@ -0,0 +1,162 @@ +/********************************************************************************** + * Copyright (c) 2008 - 2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#define CL_GL_NUM_SAMPLES 0x2012 + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* bufobj */, + int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* renderbuffer */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem /* memobj */, + cl_gl_object_type * /* gl_object_type */, + cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem /* memobj */, + cl_gl_texture_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/src/third_party/khronos/CL/cl_gl_ext.h b/src/third_party/khronos/CL/cl_gl_ext.h new file mode 100644 index 0000000..77d5353 --- /dev/null +++ b/src/third_party/khronos/CL/cl_gl_ext.h @@ -0,0 +1,69 @@ +/********************************************************************************** + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ +/* OpenGL dependencies. */ + +#ifndef __OPENCL_CL_GL_EXT_H +#define __OPENCL_CL_GL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + #include +#else + #include +#endif + +/* + * For each extension, follow this template + * cl_VEN_extname extension */ +/* #define cl_VEN_extname 1 + * ... define new types, if any + * ... define new tokens, if any + * ... define new APIs, if any + * + * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header + * This allows us to avoid having to decide whether to include GL headers or GLES here. + */ + +/* + * cl_khr_gl_event extension + * See section 9.9 in the OpenCL 1.1 spec for more information + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context /* context */, + cl_GLsync /* cl_GLsync */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/src/third_party/khronos/CL/cl_platform.h b/src/third_party/khronos/CL/cl_platform.h new file mode 100644 index 0000000..7f6f5e8 --- /dev/null +++ b/src/third_party/khronos/CL/cl_platform.h @@ -0,0 +1,1278 @@ +/********************************************************************************** + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +#ifdef __APPLE__ + #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) + #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 + + #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 + #else + #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here! + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #endif +#else + #define CL_EXTENSION_WEAK_LINK + #define CL_API_SUFFIX__VERSION_1_0 + #define CL_EXT_SUFFIX__VERSION_1_0 + #define CL_API_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_1 + #define CL_API_SUFFIX__VERSION_1_2 + #define CL_EXT_SUFFIX__VERSION_1_2 + + #ifdef __GNUC__ + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #endif + #elif _WIN32 + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated) + #endif + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #endif +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef vector unsigned char __cl_uchar16; + typedef vector signed char __cl_char16; + typedef vector unsigned short __cl_ushort8; + typedef vector signed short __cl_short8; + typedef vector unsigned int __cl_uint4; + typedef vector signed int __cl_int4; + typedef vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && (_MSC_VER >= 1500) + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && (_MSC_VER >= 1500) +#pragma warning( pop ) +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/src/third_party/khronos/CL/opencl.h b/src/third_party/khronos/CL/opencl.h new file mode 100644 index 0000000..3f00524 --- /dev/null +++ b/src/third_party/khronos/CL/opencl.h @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2008-2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __APPLE__ + +#include +#include +#include +#include + +#else + +#include +#include +#include +#include + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ + diff --git a/src/v8/include/v8.h b/src/v8/include/v8.h index ef0bda6..415ccdc 100644 --- a/src/v8/include/v8.h +++ b/src/v8/include/v8.h @@ -1527,6 +1527,24 @@ class V8_EXPORT Value : public Data { bool IsFloat32Array() const; /** + * Returns true if this value is a Float32x4Array. + * This is an experimental feature. + */ + bool IsFloat32x4Array() const; + + /** + * Returns true if this value is a Float64x2Array. + * This is an experimental feature. + */ + bool IsFloat64x2Array() const; + + /** + * Returns true if this value is a Int32x4Array. + * This is an experimental feature. + */ + bool IsInt32x4Array() const; + + /** * Returns true if this value is a Float64Array. * This is an experimental feature. */ @@ -2054,8 +2072,11 @@ enum ExternalArrayType { kExternalInt16Array, kExternalUint16Array, kExternalInt32Array, + kExternalInt32x4Array, kExternalUint32Array, kExternalFloat32Array, + kExternalFloat32x4Array, + kExternalFloat64x2Array, kExternalFloat64Array, kExternalUint8ClampedArray, @@ -2967,6 +2988,42 @@ class V8_EXPORT Float32Array : public TypedArray { }; +class V8_EXPORT Float32x4Array : public TypedArray { + public: + static Local New(Handle array_buffer, + size_t byte_offset, size_t length); + V8_INLINE static Float32x4Array* Cast(Value* obj); + + private: + Float32x4Array(); + static void CheckCast(Value* obj); +}; + + +class V8_EXPORT Float64x2Array : public TypedArray { + public: + static Local New(Handle array_buffer, + size_t byte_offset, size_t length); + V8_INLINE static Float64x2Array* Cast(Value* obj); + + private: + Float64x2Array(); + static void CheckCast(Value* obj); +}; + + +class V8_EXPORT Int32x4Array : public TypedArray { + public: + static Local New(Handle array_buffer, + size_t byte_offset, size_t length); + V8_INLINE static Int32x4Array* Cast(Value* obj); + + private: + Int32x4Array(); + static void CheckCast(Value* obj); +}; + + /** * An instance of Float64Array constructor (ES6 draft 15.13.6). * This API is experimental and may change significantly. @@ -5593,7 +5650,7 @@ class Internals { static const int kJSObjectHeaderSize = 3 * kApiPointerSize; static const int kFixedArrayHeaderSize = 2 * kApiPointerSize; static const int kContextHeaderSize = 2 * kApiPointerSize; - static const int kContextEmbedderDataIndex = 95; + static const int kContextEmbedderDataIndex = 108; static const int kFullStringRepresentationMask = 0x07; static const int kStringEncodingMask = 0x4; static const int kExternalTwoByteRepresentationTag = 0x02; @@ -5611,7 +5668,7 @@ class Internals { static const int kNullValueRootIndex = 7; static const int kTrueValueRootIndex = 8; static const int kFalseValueRootIndex = 9; - static const int kEmptyStringRootIndex = 164; + static const int kEmptyStringRootIndex = 176; // The external allocation limit should be below 256 MB on all architectures // to avoid that resource-constrained embedders run low on memory. @@ -5626,7 +5683,7 @@ class Internals { static const int kNodeIsIndependentShift = 4; static const int kNodeIsPartiallyDependentShift = 5; - static const int kJSObjectType = 0xbc; + static const int kJSObjectType = 0xc2; static const int kFirstNonstringType = 0x80; static const int kOddballType = 0x83; static const int kForeignType = 0x88; @@ -6522,6 +6579,30 @@ Float32Array* Float32Array::Cast(v8::Value* value) { } +Float32x4Array* Float32x4Array::Cast(v8::Value* value) { +#ifdef V8_ENABLE_CHECKS + CheckCast(value); +#endif + return static_cast(value); +} + + +Float64x2Array* Float64x2Array::Cast(v8::Value* value) { +#ifdef V8_ENABLE_CHECKS + CheckCast(value); +#endif + return static_cast(value); +} + + +Int32x4Array* Int32x4Array::Cast(v8::Value* value) { +#ifdef V8_ENABLE_CHECKS + CheckCast(value); +#endif + return static_cast(value); +} + + Float64Array* Float64Array::Cast(v8::Value* value) { #ifdef V8_ENABLE_CHECKS CheckCast(value); diff --git a/src/v8/src/api.h b/src/v8/src/api.h index c87bd71..3845030 100644 --- a/src/v8/src/api.h +++ b/src/v8/src/api.h @@ -156,6 +156,9 @@ class RegisteredExtension { V(Uint32Array, JSTypedArray) \ V(Int32Array, JSTypedArray) \ V(Float32Array, JSTypedArray) \ + V(Float32x4Array, JSTypedArray) \ + V(Float64x2Array, JSTypedArray) \ + V(Int32x4Array, JSTypedArray) \ V(Float64Array, JSTypedArray) \ V(DataView, JSDataView) \ V(String, String) \ @@ -224,6 +227,12 @@ class Utils { v8::internal::Handle obj); static inline Local ToLocalFloat32Array( v8::internal::Handle obj); + static inline Local ToLocalFloat32x4Array( + v8::internal::Handle obj); + static inline Local ToLocalFloat64x2Array( + v8::internal::Handle obj); + static inline Local ToLocalInt32x4Array( + v8::internal::Handle obj); static inline Local ToLocalFloat64Array( v8::internal::Handle obj); diff --git a/src/v8/src/arm/assembler-arm-inl.h b/src/v8/src/arm/assembler-arm-inl.h index 1cfe34b..ec3854e 100644 --- a/src/v8/src/arm/assembler-arm-inl.h +++ b/src/v8/src/arm/assembler-arm-inl.h @@ -48,6 +48,7 @@ namespace internal { bool CpuFeatures::SupportsCrankshaft() { return IsSupported(VFP3); } +bool CpuFeatures::SupportsSIMD128InCrankshaft() { return false; } int Register::NumAllocatableRegisters() { diff --git a/src/v8/src/arm/assembler-arm.h b/src/v8/src/arm/assembler-arm.h index e33f48a..87c0b4d 100644 --- a/src/v8/src/arm/assembler-arm.h +++ b/src/v8/src/arm/assembler-arm.h @@ -302,6 +302,34 @@ struct QwNeonRegister { return r; } + static int ToAllocationIndex(QwNeonRegister reg) { + DCHECK(reg.code() < kMaxNumRegisters); + return reg.code(); + } + + static const char* AllocationIndexToString(int index) { + DCHECK(index >= 0 && index < kMaxNumRegisters); + const char* const names[] = { + "q0", + "q1", + "q2", + "q3", + "q4", + "q5", + "q6", + "q7", + "q8", + "q9", + "q10", + "q11", + "q12", + "q13", + "q14", + "q15", + }; + return names[index]; + } + bool is_valid() const { return (0 <= code_) && (code_ < kMaxNumRegisters); } @@ -322,6 +350,7 @@ struct QwNeonRegister { typedef QwNeonRegister QuadRegister; +typedef QwNeonRegister SIMD128Register; // Support for the VFP registers s0 to s31 (d0 to d15). diff --git a/src/v8/src/arm/deoptimizer-arm.cc b/src/v8/src/arm/deoptimizer-arm.cc index df2c098..b3b6d99 100644 --- a/src/v8/src/arm/deoptimizer-arm.cc +++ b/src/v8/src/arm/deoptimizer-arm.cc @@ -111,7 +111,7 @@ void Deoptimizer::SetPlatformCompiledStubRegisters( } -void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) { +void Deoptimizer::CopySIMD128Registers(FrameDescription* output_frame) { for (int i = 0; i < DwVfpRegister::kMaxNumRegisters; ++i) { double double_value = input_->GetDoubleRegister(i); output_frame->SetDoubleRegister(i, double_value); @@ -203,7 +203,7 @@ void Deoptimizer::EntryGenerator::Generate() { // Copy VFP registers to // double_registers_[DoubleRegister::kMaxNumAllocatableRegisters] - int double_regs_offset = FrameDescription::double_registers_offset(); + int double_regs_offset = FrameDescription::simd128_registers_offset(); for (int i = 0; i < DwVfpRegister::kMaxNumAllocatableRegisters; ++i) { int dst_offset = i * kDoubleSize + double_regs_offset; int src_offset = i * kDoubleSize + kNumberOfRegisters * kPointerSize; @@ -277,7 +277,7 @@ void Deoptimizer::EntryGenerator::Generate() { __ CheckFor32DRegs(ip); __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset())); - int src_offset = FrameDescription::double_registers_offset(); + int src_offset = FrameDescription::simd128_registers_offset(); for (int i = 0; i < DwVfpRegister::kMaxNumRegisters; ++i) { if (i == kDoubleRegZero.code()) continue; if (i == kScratchDoubleReg.code()) continue; @@ -349,6 +349,18 @@ void FrameDescription::SetCallerConstantPool(unsigned offset, intptr_t value) { } +double FrameDescription::GetDoubleRegister(unsigned n) const { + DCHECK(n < 2 * ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n / 2].d[n % 2]; +} + + +void FrameDescription::SetDoubleRegister(unsigned n, double value) { + DCHECK(n < 2 * ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n / 2].d[n % 2] = value; +} + + #undef __ } } // namespace v8::internal diff --git a/src/v8/src/arm/lithium-arm.cc b/src/v8/src/arm/lithium-arm.cc index 6b86088..f13ae44 100644 --- a/src/v8/src/arm/lithium-arm.cc +++ b/src/v8/src/arm/lithium-arm.cc @@ -1211,6 +1211,41 @@ LInstruction* LChunkBuilder::DoMathPowHalf(HUnaryMathOperation* instr) { } +LInstruction* LChunkBuilder::DoNullarySIMDOperation( + HNullarySIMDOperation* instr) { + UNIMPLEMENTED(); + return NULL; +} + + +LInstruction* LChunkBuilder::DoUnarySIMDOperation( + HUnarySIMDOperation* instr) { + UNIMPLEMENTED(); + return NULL; +} + + +LInstruction* LChunkBuilder::DoBinarySIMDOperation( + HBinarySIMDOperation* instr) { + UNIMPLEMENTED(); + return NULL; +} + + +LInstruction* LChunkBuilder::DoTernarySIMDOperation( + HTernarySIMDOperation* instr) { + UNIMPLEMENTED(); + return NULL; +} + + +LInstruction* LChunkBuilder::DoQuarternarySIMDOperation( + HQuarternarySIMDOperation* instr) { + UNIMPLEMENTED(); + return NULL; +} + + LInstruction* LChunkBuilder::DoCallNew(HCallNew* instr) { LOperand* context = UseFixed(instr->context(), cp); LOperand* constructor = UseFixed(instr->constructor(), r1); @@ -2188,7 +2223,10 @@ LInstruction* LChunkBuilder::DoLoadRoot(HLoadRoot* instr) { LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { DCHECK(instr->key()->representation().IsSmiOrInteger32()); ElementsKind elements_kind = instr->elements_kind(); - LOperand* key = UseRegisterOrConstantAtStart(instr->key()); + bool load_128bits_without_neon = IsSIMD128ElementsKind(elements_kind); + LOperand* key = load_128bits_without_neon + ? UseRegisterOrConstant(instr->key()) + : UseRegisterOrConstantAtStart(instr->key()); LInstruction* result = NULL; if (!instr->is_typed_elements()) { @@ -2199,15 +2237,25 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { DCHECK(instr->representation().IsSmiOrTagged()); obj = UseRegisterAtStart(instr->elements()); } - result = DefineAsRegister(new(zone()) LLoadKeyed(obj, key)); + result = DefineAsRegister(new(zone()) LLoadKeyed(obj, key, NULL, NULL)); } else { DCHECK( (instr->representation().IsInteger32() && !IsDoubleOrFloatElementsKind(elements_kind)) || (instr->representation().IsDouble() && - IsDoubleOrFloatElementsKind(elements_kind))); + IsDoubleOrFloatElementsKind(elements_kind)) || + (instr->representation().IsTagged() && + (IsSIMD128ElementsKind(elements_kind)))); LOperand* backing_store = UseRegister(instr->elements()); - result = DefineAsRegister(new(zone()) LLoadKeyed(backing_store, key)); + result = load_128bits_without_neon + ? DefineAsRegister(new(zone()) LLoadKeyed( + backing_store, key, TempRegister(), TempRegister())) + : DefineAsRegister(new(zone()) LLoadKeyed( + backing_store, key, NULL, NULL)); + if (load_128bits_without_neon) { + info()->MarkAsDeferredCalling(); + AssignPointerMap(result); + } } if ((instr->is_external() || instr->is_fixed_typed_array()) ? @@ -2265,22 +2313,32 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { } } - return new(zone()) LStoreKeyed(object, key, val); + return new(zone()) LStoreKeyed(object, key, val, NULL, NULL); } DCHECK( (instr->value()->representation().IsInteger32() && !IsDoubleOrFloatElementsKind(instr->elements_kind())) || (instr->value()->representation().IsDouble() && - IsDoubleOrFloatElementsKind(instr->elements_kind()))); + IsDoubleOrFloatElementsKind(instr->elements_kind())) || + (instr->value()->representation().IsTagged() && + IsSIMD128ElementsKind(instr->elements_kind()))); DCHECK((instr->is_fixed_typed_array() && instr->elements()->representation().IsTagged()) || (instr->is_external() && instr->elements()->representation().IsExternal())); LOperand* val = UseRegister(instr->value()); - LOperand* key = UseRegisterOrConstantAtStart(instr->key()); LOperand* backing_store = UseRegister(instr->elements()); - return new(zone()) LStoreKeyed(backing_store, key, val); + bool store_128bits_without_neon = + IsSIMD128ElementsKind(instr->elements_kind()); + LOperand* key = store_128bits_without_neon + ? UseRegisterOrConstant(instr->key()) + : UseRegisterOrConstantAtStart(instr->key()); + LStoreKeyed* result = + new(zone()) LStoreKeyed(backing_store, key, val, + store_128bits_without_neon ? TempRegister() : NULL, + store_128bits_without_neon ? TempRegister() : NULL); + return store_128bits_without_neon ? AssignEnvironment(result) : result; } diff --git a/src/v8/src/arm/lithium-arm.h b/src/v8/src/arm/lithium-arm.h index 16f522e..e6615de 100644 --- a/src/v8/src/arm/lithium-arm.h +++ b/src/v8/src/arm/lithium-arm.h @@ -1641,15 +1641,20 @@ class LLoadRoot V8_FINAL : public LTemplateInstruction<1, 0, 0> { }; -class LLoadKeyed V8_FINAL : public LTemplateInstruction<1, 2, 0> { +class LLoadKeyed V8_FINAL : public LTemplateInstruction<1, 2, 2> { public: - LLoadKeyed(LOperand* elements, LOperand* key) { + LLoadKeyed(LOperand* elements, LOperand* key, + LOperand* temp, LOperand* temp2) { inputs_[0] = elements; inputs_[1] = key; + temps_[0] = temp; + temps_[1] = temp2; } LOperand* elements() { return inputs_[0]; } LOperand* key() { return inputs_[1]; } + LOperand* temp() { return temps_[0]; } + LOperand* temp2() { return temps_[1]; } ElementsKind elements_kind() const { return hydrogen()->elements_kind(); } @@ -2216,12 +2221,15 @@ class LStoreNamedGeneric V8_FINAL : public LTemplateInstruction<0, 3, 0> { }; -class LStoreKeyed V8_FINAL : public LTemplateInstruction<0, 3, 0> { +class LStoreKeyed V8_FINAL : public LTemplateInstruction<0, 3, 2> { public: - LStoreKeyed(LOperand* object, LOperand* key, LOperand* value) { + LStoreKeyed(LOperand* object, LOperand* key, LOperand* value, + LOperand* temp, LOperand* temp2) { inputs_[0] = object; inputs_[1] = key; inputs_[2] = value; + temps_[0] = temp; + temps_[1] = temp2; } bool is_external() const { return hydrogen()->is_external(); } @@ -2234,6 +2242,8 @@ class LStoreKeyed V8_FINAL : public LTemplateInstruction<0, 3, 0> { LOperand* elements() { return inputs_[0]; } LOperand* key() { return inputs_[1]; } LOperand* value() { return inputs_[2]; } + LOperand* temp() { return temps_[0]; } + LOperand* temp2() { return temps_[1]; } ElementsKind elements_kind() const { return hydrogen()->elements_kind(); } diff --git a/src/v8/src/arm/lithium-codegen-arm.cc b/src/v8/src/arm/lithium-codegen-arm.cc index ff09e28..3812eee 100644 --- a/src/v8/src/arm/lithium-codegen-arm.cc +++ b/src/v8/src/arm/lithium-codegen-arm.cc @@ -3207,6 +3207,93 @@ void LCodeGen::DoAccessArgumentsAt(LAccessArgumentsAt* instr) { } +void LCodeGen::DoDeferredSIMD128ToTagged(LInstruction* instr, + Runtime::FunctionId id) { + // TODO(3095996): Get rid of this. For now, we need to make the + // result register contain a valid pointer because it is already + // contained in the register pointer map. + Register reg = ToRegister(instr->result()); + __ mov(reg, Operand::Zero()); + + PushSafepointRegistersScope scope(this); + __ ldr(cp, MemOperand(fp, StandardFrameConstants::kContextOffset)); + __ CallRuntimeSaveDoubles(id); + RecordSafepointWithRegisters( + instr->pointer_map(), 0, Safepoint::kNoLazyDeopt); + __ sub(r0, r0, Operand(kHeapObjectTag)); + __ StoreToSafepointRegisterSlot(r0, reg); +} + + +template +void LCodeGen::DoLoadKeyedSIMD128ExternalArray(LLoadKeyed* instr) { + class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { + public: + DeferredSIMD128ToTagged(LCodeGen* codegen, LInstruction* instr, + Runtime::FunctionId id) + : LDeferredCode(codegen), instr_(instr), id_(id) { } + virtual void Generate() V8_OVERRIDE { + codegen()->DoDeferredSIMD128ToTagged(instr_, id_); + } + virtual LInstruction* instr() V8_OVERRIDE { return instr_; } + private: + LInstruction* instr_; + Runtime::FunctionId id_; + }; + + // Allocate a SIMD128 object on the heap. + Register reg = ToRegister(instr->result()); + Register temp = ToRegister(instr->temp()); + Register temp2 = ToRegister(instr->temp2()); + Register scratch = scratch0(); + + DeferredSIMD128ToTagged* deferred = new(zone()) DeferredSIMD128ToTagged( + this, instr, static_cast(T::kRuntimeAllocatorId())); + __ jmp(deferred->entry()); + __ bind(deferred->exit()); + + // Copy the SIMD128 value from the external array to the heap object. + STATIC_ASSERT(T::kValueSize % kPointerSize == 0); + Register external_pointer = ToRegister(instr->elements()); + Register key = no_reg; + ElementsKind elements_kind = instr->elements_kind(); + bool key_is_constant = instr->key()->IsConstantOperand(); + int constant_key = 0; + if (key_is_constant) { + constant_key = ToInteger32(LConstantOperand::cast(instr->key())); + if (constant_key & 0xF0000000) { + Abort(kArrayIndexConstantValueTooBig); + } + } else { + key = ToRegister(instr->key()); + } + int element_size_shift = ElementsKindToShiftSize(elements_kind); + int shift_size = (instr->hydrogen()->key()->representation().IsSmi()) + ? (element_size_shift - kSmiTagSize) : element_size_shift; + int base_offset = instr->base_offset(); + Operand operand = key_is_constant + ? Operand(constant_key << element_size_shift) + : Operand(key, LSL, shift_size); + + __ add(scratch, external_pointer, operand); + + // Load the inner FixedTypedArray. + __ ldr(temp2, MemOperand(reg, T::kValueOffset)); + + for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { + __ ldr(temp, MemOperand(scratch, base_offset + offset)); + __ str( + temp, + MemOperand( + temp2, + FixedTypedArrayBase::kDataOffset - kHeapObjectTag + offset)); + } + + // Now that we have finished with the object's real address tag it + __ add(reg, reg, Operand(kHeapObjectTag)); +} + + void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { Register external_pointer = ToRegister(instr->elements()); Register key = no_reg; @@ -3243,6 +3330,12 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { } else { // i.e. elements_kind == EXTERNAL_DOUBLE_ELEMENTS __ vldr(result, scratch0(), base_offset); } + } else if (IsFloat32x4ElementsKind(elements_kind)) { + DoLoadKeyedSIMD128ExternalArray(instr); + } else if (IsFloat64x2ElementsKind(elements_kind)) { + DoLoadKeyedSIMD128ExternalArray(instr); + } else if (IsInt32x4ElementsKind(elements_kind)) { + DoLoadKeyedSIMD128ExternalArray(instr); } else { Register result = ToRegister(instr->result()); MemOperand mem_operand = PrepareKeyedOperand( @@ -3283,6 +3376,12 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { case FLOAT64_ELEMENTS: case EXTERNAL_FLOAT32_ELEMENTS: case EXTERNAL_FLOAT64_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: case FAST_HOLEY_DOUBLE_ELEMENTS: case FAST_HOLEY_ELEMENTS: case FAST_HOLEY_SMI_ELEMENTS: @@ -4234,6 +4333,58 @@ void LCodeGen::DoBoundsCheck(LBoundsCheck* instr) { } +template +void LCodeGen::DoStoreKeyedSIMD128ExternalArray(LStoreKeyed* instr) { + DCHECK(instr->value()->IsRegister()); + Register temp = ToRegister(instr->temp()); + Register temp2 = ToRegister(instr->temp2()); + Register input_reg = ToRegister(instr->value()); + __ SmiTst(input_reg); + DeoptimizeIf(eq, instr->environment()); + __ CompareObjectType(input_reg, temp, no_reg, T::kInstanceType); + DeoptimizeIf(ne, instr->environment()); + + STATIC_ASSERT(T::kValueSize % kPointerSize == 0); + Register external_pointer = ToRegister(instr->elements()); + Register key = no_reg; + ElementsKind elements_kind = instr->elements_kind(); + bool key_is_constant = instr->key()->IsConstantOperand(); + int constant_key = 0; + if (key_is_constant) { + constant_key = ToInteger32(LConstantOperand::cast(instr->key())); + if (constant_key & 0xF0000000) { + Abort(kArrayIndexConstantValueTooBig); + } + } else { + key = ToRegister(instr->key()); + } + int element_size_shift = ElementsKindToShiftSize(elements_kind); + int shift_size = (instr->hydrogen()->key()->representation().IsSmi()) + ? (element_size_shift - kSmiTagSize) : element_size_shift; + int base_offset = instr->base_offset(); + Register address = scratch0(); + if (key_is_constant) { + if (constant_key != 0) { + __ add(address, external_pointer, + Operand(constant_key << element_size_shift)); + } else { + address = external_pointer; + } + } else { + __ add(address, external_pointer, Operand(key, LSL, shift_size)); + } + + // Load the inner FixedTypedArray. + __ ldr(temp2, MemOperand(input_reg, T::kValueOffset - kHeapObjectTag)); + + for (int offset = 0; offset < T::kValueSize; offset += kPointerSize) { + __ ldr(temp, MemOperand(temp2, + FixedTypedArrayBase::kDataOffset - kHeapObjectTag + offset)); + __ str(temp, MemOperand(address, base_offset + offset)); + } +} + + void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { Register external_pointer = ToRegister(instr->elements()); Register key = no_reg; @@ -4276,6 +4427,12 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { } else { // Storing doubles, not floats. __ vstr(value, address, base_offset); } + } else if (IsFloat32x4ElementsKind(elements_kind)) { + DoStoreKeyedSIMD128ExternalArray(instr); + } else if (IsFloat64x2ElementsKind(elements_kind)) { + DoStoreKeyedSIMD128ExternalArray(instr); + } else if (IsInt32x4ElementsKind(elements_kind)) { + DoStoreKeyedSIMD128ExternalArray(instr); } else { Register value(ToRegister(instr->value())); MemOperand mem_operand = PrepareKeyedOperand( @@ -4307,6 +4464,12 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { case FLOAT64_ELEMENTS: case EXTERNAL_FLOAT32_ELEMENTS: case EXTERNAL_FLOAT64_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: case FAST_DOUBLE_ELEMENTS: case FAST_ELEMENTS: case FAST_SMI_ELEMENTS: diff --git a/src/v8/src/arm/lithium-codegen-arm.h b/src/v8/src/arm/lithium-codegen-arm.h index ee5f4e9..12ec783 100644 --- a/src/v8/src/arm/lithium-codegen-arm.h +++ b/src/v8/src/arm/lithium-codegen-arm.h @@ -122,6 +122,7 @@ class LCodeGen: public LCodeGenBase { Register result, Register object, Register index); + void DoDeferredSIMD128ToTagged(LInstruction* instr, Runtime::FunctionId id); // Parallel move support. void DoParallelMove(LParallelMove* move); @@ -326,9 +327,13 @@ class LCodeGen: public LCodeGenBase { void EnsureSpaceForLazyDeopt(int space_needed) V8_OVERRIDE; void DoLoadKeyedExternalArray(LLoadKeyed* instr); + template + void DoLoadKeyedSIMD128ExternalArray(LLoadKeyed* instr); void DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr); void DoLoadKeyedFixedArray(LLoadKeyed* instr); void DoStoreKeyedExternalArray(LStoreKeyed* instr); + template + void DoStoreKeyedSIMD128ExternalArray(LStoreKeyed* instr); void DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr); void DoStoreKeyedFixedArray(LStoreKeyed* instr); diff --git a/src/v8/src/arm/macro-assembler-arm.cc b/src/v8/src/arm/macro-assembler-arm.cc index 4b3cb4e..ef6625d 100644 --- a/src/v8/src/arm/macro-assembler-arm.cc +++ b/src/v8/src/arm/macro-assembler-arm.cc @@ -3272,6 +3272,19 @@ void MacroAssembler::AllocateHeapNumberWithValue(Register result, } +// Allocates a simd128 object or jumps to the need_gc label if the young space +// is full and a scavenge is needed. +void MacroAssembler::AllocateSIMDHeapObject(int size, + Register result, + Register scratch1, + Register scratch2, + Register map, + Label* gc_required, + TaggingMode tagging_mode) { + UNREACHABLE(); // NOTIMPLEMENTED +} + + // Copies a fixed number of fields of heap objects from src to dst. void MacroAssembler::CopyFields(Register dst, Register src, diff --git a/src/v8/src/arm/macro-assembler-arm.h b/src/v8/src/arm/macro-assembler-arm.h index d5ca12e..b2a127d 100644 --- a/src/v8/src/arm/macro-assembler-arm.h +++ b/src/v8/src/arm/macro-assembler-arm.h @@ -793,6 +793,13 @@ class MacroAssembler: public Assembler { Register scratch2, Register heap_number_map, Label* gc_required); + void AllocateSIMDHeapObject(int size, + Register result, + Register scratch1, + Register scratch2, + Register map, + Label* gc_required, + TaggingMode tagging_mode = TAG_RESULT); // Copies a fixed number of fields of heap objects from src to dst. void CopyFields(Register dst, diff --git a/src/v8/src/assembler.h b/src/v8/src/assembler.h index a128c09..720b6e5 100644 --- a/src/v8/src/assembler.h +++ b/src/v8/src/assembler.h @@ -186,6 +186,7 @@ class CpuFeatures : public AllStatic { } static inline bool SupportsCrankshaft(); + static inline bool SupportsSIMD128InCrankshaft(); static inline unsigned cache_line_size() { DCHECK(cache_line_size_ != 0); diff --git a/src/v8/src/bootstrapper.cc b/src/v8/src/bootstrapper.cc index 361960d..443373a 100644 --- a/src/v8/src/bootstrapper.cc +++ b/src/v8/src/bootstrapper.cc @@ -215,6 +215,7 @@ class Genesis BASE_EMBEDDED { Handle* external_map); bool InstallExperimentalNatives(); void InstallBuiltinFunctionIds(); + void InstallExperimentalSIMDBuiltinFunctionIds(); void InstallJSFunctionResultCaches(); void InitializeNormalizedMapCaches(); @@ -1129,7 +1130,7 @@ void Genesis::InitializeGlobal(Handle global_object, native_context()->set_##type##_array_fun(*fun); \ native_context()->set_##type##_array_external_map(*external_map); \ } - TYPED_ARRAYS(INSTALL_TYPED_ARRAY) + BUILTIN_TYPED_ARRAY(INSTALL_TYPED_ARRAY) #undef INSTALL_TYPED_ARRAY Handle data_view_fun = @@ -1415,6 +1416,65 @@ void Genesis::InitializeExperimentalGlobal() { native_context()->set_generator_object_prototype_map( *generator_object_prototype_map); } + + Handle global = Handle(native_context()->global_object()); + if (FLAG_simd_object) { + // --- S I M D --- + Handle name = factory()->InternalizeUtf8String("SIMD"); + Handle cons = factory()->NewFunction(name); + JSFunction::SetInstancePrototype(cons, + Handle(native_context()->initial_object_prototype(), + isolate())); + cons->SetInstanceClassName(*name); + Handle simd_object = factory()->NewJSObject(cons, TENURED); + DCHECK(simd_object->IsJSObject()); + JSObject::SetOwnPropertyIgnoreAttributes( + global, name, simd_object, DONT_ENUM).Check(); + native_context()->set_simd_object(*simd_object); + // --- f l o a t 3 2 x 4 --- + Handle float32x4_fun = + InstallFunction(simd_object, "float32x4", FLOAT32x4_TYPE, + Float32x4::kSize, + isolate()->initial_object_prototype(), + Builtins::kIllegal); + native_context()->set_float32x4_function(*float32x4_fun); + + // --- f l o a t 6 4 x 2 --- + Handle float64x2_fun = + InstallFunction(simd_object, "float64x2", FLOAT64x2_TYPE, + Float64x2::kSize, + isolate()->initial_object_prototype(), + Builtins::kIllegal); + native_context()->set_float64x2_function(*float64x2_fun); + + // --- i n t 3 2 x 4 --- + Handle int32x4_fun = + InstallFunction(simd_object, "int32x4", INT32x4_TYPE, + Int32x4::kSize, + isolate()->initial_object_prototype(), + Builtins::kIllegal); + native_context()->set_int32x4_function(*int32x4_fun); + + // --- F l o a t 3 2 x 4 A r r a y--- + Handle fun; + Handle external_map; + InstallTypedArray( + "Float32x4Array", FLOAT32x4_ELEMENTS, &fun, &external_map); + native_context()->set_float32x4_array_fun(*fun); + native_context()->set_float32x4_array_external_map(*external_map); + + // --- F l o a t 6 4 x 2 A r r a y--- + InstallTypedArray( + "Float64x2Array", FLOAT64x2_ELEMENTS, &fun, &external_map); + native_context()->set_float64x2_array_fun(*fun); + native_context()->set_float64x2_array_external_map(*external_map); + + // --- I n t 3 2 x 4 A r r a y--- + InstallTypedArray( + "Int32x4Array", INT32x4_ELEMENTS, &fun, &external_map); + native_context()->set_int32x4_array_fun(*fun); + native_context()->set_int32x4_array_external_map(*external_map); + } } @@ -2067,6 +2127,26 @@ bool Genesis::InstallExperimentalNatives() { INSTALL_EXPERIMENTAL_NATIVE(i, generators, "generator.js") INSTALL_EXPERIMENTAL_NATIVE(i, strings, "harmony-string.js") INSTALL_EXPERIMENTAL_NATIVE(i, arrays, "harmony-array.js") + if (FLAG_simd_object && + strcmp(ExperimentalNatives::GetScriptName(i).start(), + "native simd128.js") == 0) { + if (!CompileExperimentalBuiltin(isolate(), i)) return false; + // Store the map for the float32x4, float64x2 and int32x4 function + // prototype after the float32x4 and int32x4 function has been set up. + InstallExperimentalSIMDBuiltinFunctionIds(); + JSObject* float32x4_function_prototype = JSObject::cast( + native_context()->float32x4_function()->instance_prototype()); + native_context()->set_float32x4_function_prototype_map( + float32x4_function_prototype->map()); + JSObject* float64x2_function_prototype = JSObject::cast( + native_context()->float64x2_function()->instance_prototype()); + native_context()->set_float64x2_function_prototype_map( + float64x2_function_prototype->map()); + JSObject* int32x4_function_prototype = JSObject::cast( + native_context()->int32x4_function()->instance_prototype()); + native_context()->set_int32x4_function_prototype_map( + int32x4_function_prototype->map()); + } } InstallExperimentalNativeFunctions(); @@ -2074,6 +2154,35 @@ bool Genesis::InstallExperimentalNatives() { } +static Handle ResolveBuiltinSIMDIdHolder( + Handle native_context, + const char* holder_expr) { + Isolate* isolate = native_context->GetIsolate(); + Factory* factory = isolate->factory(); + Handle global(native_context->global_object()); + Handle holder = global; + char* name = const_cast(holder_expr); + char* period_pos = strchr(name, '.'); + while (period_pos != NULL) { + Vector property(name, + static_cast(period_pos - name)); + Handle property_string = factory->InternalizeUtf8String(property); + DCHECK(!property_string.is_null()); + holder = Object::GetProperty(holder, property_string).ToHandleChecked(); + if (strcmp(".prototype", period_pos) == 0) { + Handle function = Handle::cast(holder); + return Handle(JSObject::cast(function->prototype())); + } else { + name = period_pos + 1; + period_pos = strchr(name, '.'); + } + } + + return Handle::cast(Object::GetPropertyOrElement( + holder, factory->InternalizeUtf8String(name)).ToHandleChecked()); +} + + static void InstallBuiltinFunctionId(Handle holder, const char* function_name, BuiltinFunctionId id) { @@ -2099,6 +2208,40 @@ void Genesis::InstallBuiltinFunctionIds() { } +void Genesis::InstallExperimentalSIMDBuiltinFunctionIds() { + HandleScope scope(isolate()); +#define INSTALL_BUILTIN_ID(holder_expr, fun_name, name) \ + { \ + Handle holder = ResolveBuiltinSIMDIdHolder( \ + native_context(), #holder_expr); \ + BuiltinFunctionId id = k##name; \ + InstallBuiltinFunctionId(holder, #fun_name, id); \ + } + SIMD_ARRAY_OPERATIONS(INSTALL_BUILTIN_ID) +#define INSTALL_SIMD_NULLARY_FUNCTION_ID(p1, p2, p3, p4) \ + INSTALL_BUILTIN_ID(p1, p2, p3) + SIMD_NULLARY_OPERATIONS(INSTALL_SIMD_NULLARY_FUNCTION_ID) +#undef INSTALL_SIMD_NULLARY_FUNCTION_ID +#define INSTALL_SIMD_UNARY_FUNCTION_ID(p1, p2, p3, p4, p5) \ + INSTALL_BUILTIN_ID(p1, p2, p3) + SIMD_UNARY_OPERATIONS(INSTALL_SIMD_UNARY_FUNCTION_ID) +#undef INSTALL_SIMD_UNARY_FUNCTION_ID +#define INSTALL_SIMD_BINARY_FUNCTION_ID(p1, p2, p3, p4, p5, p6) \ + INSTALL_BUILTIN_ID(p1, p2, p3) + SIMD_BINARY_OPERATIONS(INSTALL_SIMD_BINARY_FUNCTION_ID) +#undef INSTALL_SIMD_BINARY_FUNCTION_ID +#define INSTALL_SIMD_TERNARY_FUNCTION_ID(p1, p2, p3, p4, p5, p6, p7) \ + INSTALL_BUILTIN_ID(p1, p2, p3) + SIMD_TERNARY_OPERATIONS(INSTALL_SIMD_TERNARY_FUNCTION_ID) +#undef INSTALL_SIMD_TERNARY_FUNCTION_ID +#define INSTALL_SIMD_QUARTERNARY_FUNCTION_ID(p1, p2, p3, p4, p5, p6, p7, p8) \ + INSTALL_BUILTIN_ID(p1, p2, p3) + SIMD_QUARTERNARY_OPERATIONS(INSTALL_SIMD_QUARTERNARY_FUNCTION_ID) +#undef INSTALL_SIMD_QUARTERNARY_FUNCTION_ID +#undef INSTALL_BUILTIN_ID +} + + // Do not forget to update macros.py with named constant // of cache id. #define JSFUNCTION_RESULT_CACHE_LIST(F) \ diff --git a/src/v8/src/contexts.h b/src/v8/src/contexts.h index 63c9955..7087d96 100644 --- a/src/v8/src/contexts.h +++ b/src/v8/src/contexts.h @@ -78,6 +78,15 @@ enum BindingFlags { V(SECURITY_TOKEN_INDEX, Object, security_token) \ V(BOOLEAN_FUNCTION_INDEX, JSFunction, boolean_function) \ V(NUMBER_FUNCTION_INDEX, JSFunction, number_function) \ + V(FLOAT32x4_FUNCTION_INDEX, JSFunction, float32x4_function) \ + V(FLOAT32x4_FUNCTION_PROTOTYPE_MAP_INDEX, Map, \ + float32x4_function_prototype_map) \ + V(FLOAT64x2_FUNCTION_INDEX, JSFunction, float64x2_function) \ + V(FLOAT64x2_FUNCTION_PROTOTYPE_MAP_INDEX, Map, \ + float64x2_function_prototype_map) \ + V(INT32x4_FUNCTION_INDEX, JSFunction, int32x4_function) \ + V(INT32x4_FUNCTION_PROTOTYPE_MAP_INDEX, Map, \ + int32x4_function_prototype_map) \ V(STRING_FUNCTION_INDEX, JSFunction, string_function) \ V(STRING_FUNCTION_PROTOTYPE_MAP_INDEX, Map, string_function_prototype_map) \ V(SYMBOL_FUNCTION_INDEX, JSFunction, symbol_function) \ @@ -87,6 +96,7 @@ enum BindingFlags { V(JS_ARRAY_MAPS_INDEX, Object, js_array_maps) \ V(DATE_FUNCTION_INDEX, JSFunction, date_function) \ V(JSON_OBJECT_INDEX, JSObject, json_object) \ + V(SIMD_OBJECT_INDEX, JSObject, simd_object) \ V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \ V(INITIAL_OBJECT_PROTOTYPE_INDEX, JSObject, initial_object_prototype) \ V(INITIAL_ARRAY_PROTOTYPE_INDEX, JSObject, initial_array_prototype) \ @@ -129,6 +139,9 @@ enum BindingFlags { V(INT32_ARRAY_FUN_INDEX, JSFunction, int32_array_fun) \ V(FLOAT32_ARRAY_FUN_INDEX, JSFunction, float32_array_fun) \ V(FLOAT64_ARRAY_FUN_INDEX, JSFunction, float64_array_fun) \ + V(FLOAT32x4_ARRAY_FUN_INDEX, JSFunction, float32x4_array_fun) \ + V(FLOAT64x2_ARRAY_FUN_INDEX, JSFunction, float64x2_array_fun) \ + V(INT32x4_ARRAY_FUN_INDEX, JSFunction, int32x4_array_fun) \ V(UINT8_CLAMPED_ARRAY_FUN_INDEX, JSFunction, uint8_clamped_array_fun) \ V(INT8_ARRAY_EXTERNAL_MAP_INDEX, Map, int8_array_external_map) \ V(UINT8_ARRAY_EXTERNAL_MAP_INDEX, Map, uint8_array_external_map) \ @@ -138,6 +151,9 @@ enum BindingFlags { V(UINT32_ARRAY_EXTERNAL_MAP_INDEX, Map, uint32_array_external_map) \ V(FLOAT32_ARRAY_EXTERNAL_MAP_INDEX, Map, float32_array_external_map) \ V(FLOAT64_ARRAY_EXTERNAL_MAP_INDEX, Map, float64_array_external_map) \ + V(FLOAT32x4_ARRAY_EXTERNAL_MAP_INDEX, Map, float32x4_array_external_map) \ + V(FLOAT64x2_ARRAY_EXTERNAL_MAP_INDEX, Map, float64x2_array_external_map) \ + V(INT32x4_ARRAY_EXTERNAL_MAP_INDEX, Map, int32x4_array_external_map) \ V(UINT8_CLAMPED_ARRAY_EXTERNAL_MAP_INDEX, Map, \ uint8_clamped_array_external_map) \ V(DATA_VIEW_FUN_INDEX, JSFunction, data_view_fun) \ @@ -289,6 +305,12 @@ class Context: public FixedArray { INITIAL_ARRAY_PROTOTYPE_INDEX, BOOLEAN_FUNCTION_INDEX, NUMBER_FUNCTION_INDEX, + FLOAT32x4_FUNCTION_INDEX, + FLOAT32x4_FUNCTION_PROTOTYPE_MAP_INDEX, + FLOAT64x2_FUNCTION_INDEX, + FLOAT64x2_FUNCTION_PROTOTYPE_MAP_INDEX, + INT32x4_FUNCTION_INDEX, + INT32x4_FUNCTION_PROTOTYPE_MAP_INDEX, STRING_FUNCTION_INDEX, STRING_FUNCTION_PROTOTYPE_MAP_INDEX, SYMBOL_FUNCTION_INDEX, @@ -298,6 +320,7 @@ class Context: public FixedArray { JS_ARRAY_MAPS_INDEX, DATE_FUNCTION_INDEX, JSON_OBJECT_INDEX, + SIMD_OBJECT_INDEX, REGEXP_FUNCTION_INDEX, CREATE_DATE_FUN_INDEX, TO_NUMBER_FUN_INDEX, @@ -338,6 +361,9 @@ class Context: public FixedArray { UINT32_ARRAY_FUN_INDEX, INT32_ARRAY_FUN_INDEX, FLOAT32_ARRAY_FUN_INDEX, + FLOAT32x4_ARRAY_FUN_INDEX, + FLOAT64x2_ARRAY_FUN_INDEX, + INT32x4_ARRAY_FUN_INDEX, FLOAT64_ARRAY_FUN_INDEX, UINT8_CLAMPED_ARRAY_FUN_INDEX, INT8_ARRAY_EXTERNAL_MAP_INDEX, @@ -347,6 +373,9 @@ class Context: public FixedArray { INT32_ARRAY_EXTERNAL_MAP_INDEX, UINT32_ARRAY_EXTERNAL_MAP_INDEX, FLOAT32_ARRAY_EXTERNAL_MAP_INDEX, + FLOAT32x4_ARRAY_EXTERNAL_MAP_INDEX, + FLOAT64x2_ARRAY_EXTERNAL_MAP_INDEX, + INT32x4_ARRAY_EXTERNAL_MAP_INDEX, FLOAT64_ARRAY_EXTERNAL_MAP_INDEX, UINT8_CLAMPED_ARRAY_EXTERNAL_MAP_INDEX, DATA_VIEW_FUN_INDEX, diff --git a/src/v8/src/d8.h b/src/v8/src/d8.h index 991e5a5..35e5eef 100644 --- a/src/v8/src/d8.h +++ b/src/v8/src/d8.h @@ -316,6 +316,9 @@ class Shell : public i::AllStatic { static void Int32Array(const v8::FunctionCallbackInfo& args); static void Uint32Array(const v8::FunctionCallbackInfo& args); static void Float32Array(const v8::FunctionCallbackInfo& args); + static void Float32x4Array(const v8::FunctionCallbackInfo& args); + static void Float64x2Array(const v8::FunctionCallbackInfo& args); + static void Int32x4Array(const v8::FunctionCallbackInfo& args); static void Float64Array(const v8::FunctionCallbackInfo& args); static void Uint8ClampedArray( const v8::FunctionCallbackInfo& args); diff --git a/src/v8/src/deoptimizer.cc b/src/v8/src/deoptimizer.cc index 1df7df8..b3ae6b1 100644 --- a/src/v8/src/deoptimizer.cc +++ b/src/v8/src/deoptimizer.cc @@ -1822,7 +1822,7 @@ void Deoptimizer::DoComputeCompiledStubFrame(TranslationIterator* iterator, } // Copy the double registers from the input into the output frame. - CopyDoubleRegisters(output_frame); + CopySIMD128Registers(output_frame); // Fill registers containing handler and number of parameters. SetPlatformCompiledStubRegisters(output_frame, descriptor); @@ -2011,6 +2011,61 @@ void Deoptimizer::MaterializeHeapObjects(JavaScriptFrameIterator* it) { Memory::Object_at(d.destination()) = *num; } + // Materialize all float32x4 before looking at arguments because when the + // output frames are used to materialize arguments objects later on they need + // to already contain valid float32x4 values. + for (int i = 0; i < deferred_float32x4s_.length(); i++) { + SIMD128MaterializationDescriptor
d = deferred_float32x4s_[i]; + float32x4_value_t x4 = d.value().f4; + Handle float32x4 = isolate_->factory()->NewFloat32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new float32x4 %p " + "[float32x4(%e, %e, %e, %e)] in slot %p\n", + reinterpret_cast(*float32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + Memory::Object_at(d.destination()) = *float32x4; + } + + // Materialize all float64x2 before looking at arguments because when the + // output frames are used to materialize arguments objects later on they need + // to already contain valid float64x2 values. + for (int i = 0; i < deferred_float64x2s_.length(); i++) { + SIMD128MaterializationDescriptor
d = deferred_float64x2s_[i]; + float64x2_value_t x2 = d.value().d2; + Handle float64x2 = isolate_->factory()->NewFloat64x2(x2); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new float64x2 %p " + "[float64x2(%e, %e)] in slot %p\n", + reinterpret_cast(*float64x2), + x2.storage[0], x2.storage[1], + d.destination()); + } + Memory::Object_at(d.destination()) = *float64x2; + } + + // Materialize all int32x4 before looking at arguments because when the + // output frames are used to materialize arguments objects later on they need + // to already contain valid int32x4 values. + for (int i = 0; i < deferred_int32x4s_.length(); i++) { + SIMD128MaterializationDescriptor
d = deferred_int32x4s_[i]; + int32x4_value_t x4 = d.value().i4; + Handle int32x4 = isolate_->factory()->NewInt32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new int32x4 %p " + "[int32x4(%u, %u, %u, %u)] in slot %p\n", + reinterpret_cast(*int32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + Memory::Object_at(d.destination()) = *int32x4; + } + + // Materialize all heap numbers required for arguments/captured objects. for (int i = 0; i < deferred_objects_double_values_.length(); i++) { HeapNumberMaterializationDescriptor d = @@ -2030,6 +2085,69 @@ void Deoptimizer::MaterializeHeapObjects(JavaScriptFrameIterator* it) { // Play it safe and clear all object double values before we continue. deferred_objects_double_values_.Clear(); + // Materialize all float32x4 values required for arguments/captured objects. + for (int i = 0; i < deferred_objects_float32x4_values_.length(); i++) { + SIMD128MaterializationDescriptor d = + deferred_objects_float32x4_values_[i]; + float32x4_value_t x4 = d.value().f4; + Handle float32x4 = isolate_->factory()->NewFloat32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new float32x4 %p " + "[float32x4(%e, %e, %e, %e)] for object at %d\n", + reinterpret_cast(*float32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + DCHECK(values.at(d.destination())->IsTheHole()); + values.Set(d.destination(), float32x4); + } + + // Play it safe and clear all object float32x4 values before we continue. + deferred_objects_float32x4_values_.Clear(); + + // Materialize all float64x2 values required for arguments/captured objects. + for (int i = 0; i < deferred_objects_float64x2_values_.length(); i++) { + SIMD128MaterializationDescriptor d = + deferred_objects_float64x2_values_[i]; + float64x2_value_t x2 = d.value().d2; + Handle float64x2 = isolate_->factory()->NewFloat64x2(x2); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new float64x2 %p " + "[float64x2(%e, %e)] for object at %d\n", + reinterpret_cast(*float64x2), + x2.storage[0], x2.storage[1], + d.destination()); + } + DCHECK(values.at(d.destination())->IsTheHole()); + values.Set(d.destination(), float64x2); + } + + // Play it safe and clear all object float64x2 values before we continue. + deferred_objects_float64x2_values_.Clear(); + + // Materialize all int32x4 values required for arguments/captured objects. + for (int i = 0; i < deferred_objects_int32x4_values_.length(); i++) { + SIMD128MaterializationDescriptor d = + deferred_objects_int32x4_values_[i]; + int32x4_value_t x4 = d.value().i4; + Handle int32x4 = isolate_->factory()->NewInt32x4(x4); + if (trace_scope_ != NULL) { + PrintF(trace_scope_->file(), + "Materialized a new int32x4 %p " + "[int32x4(%u, %u, %u, %u)] for object at %d\n", + reinterpret_cast(*int32x4), + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + d.destination()); + } + DCHECK(values.at(d.destination())->IsTheHole()); + values.Set(d.destination(), int32x4); + } + + // Play it safe and clear all object int32x4 values before we continue. + deferred_objects_int32x4_values_.Clear(); + // Materialize arguments/captured objects. if (!deferred_objects_.is_empty()) { List > materialized_objects(deferred_objects_.length()); @@ -2159,10 +2277,16 @@ void Deoptimizer::DoTranslateObjectAndSkip(TranslationIterator* iterator) { case Translation::INT32_REGISTER: case Translation::UINT32_REGISTER: case Translation::DOUBLE_REGISTER: + case Translation::FLOAT32x4_REGISTER: + case Translation::FLOAT64x2_REGISTER: + case Translation::INT32x4_REGISTER: case Translation::STACK_SLOT: case Translation::INT32_STACK_SLOT: case Translation::UINT32_STACK_SLOT: case Translation::DOUBLE_STACK_SLOT: + case Translation::FLOAT32x4_STACK_SLOT: + case Translation::FLOAT64x2_STACK_SLOT: + case Translation::INT32x4_STACK_SLOT: case Translation::LITERAL: { // The value is not part of any materialized object, so we can ignore it. iterator->Skip(Translation::NumberOfOperandsFor(opcode)); @@ -2312,6 +2436,49 @@ void Deoptimizer::DoTranslateObject(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_REGISTER: + case Translation::FLOAT64x2_REGISTER: + case Translation::INT32x4_REGISTER: { + int input_reg = iterator->Next(); + simd128_value_t value = input_->GetSIMD128Register(input_reg); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_REGISTER) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "float32x4(%e, %e, %e, %e) ; %s\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } else if (opcode == Translation::FLOAT64x2_REGISTER) { + float64x2_value_t x2 = value.d2; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "float64x2(%e, %e) ; %s\n", + x2.storage[0], x2.storage[1], + SIMD128Register::AllocationIndexToString(input_reg)); + } else { + DCHECK(opcode == Translation::INT32x4_REGISTER); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "int32x4(%u, %u, %u, %u) ; %s\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } + } + AddObjectSIMD128Value(value, opcode); + return; + } + case Translation::STACK_SLOT: { int input_slot_index = iterator->Next(); unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); @@ -2399,6 +2566,50 @@ void Deoptimizer::DoTranslateObject(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_STACK_SLOT: + case Translation::FLOAT64x2_STACK_SLOT: + case Translation::INT32x4_STACK_SLOT: { + int input_slot_index = iterator->Next(); + unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); + simd128_value_t value = input_->GetSIMD128FrameSlot(input_offset); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_STACK_SLOT) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "float32x4(%e, %e, %e, %e) ; [sp + %d]\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } else if (opcode == Translation::FLOAT64x2_STACK_SLOT) { + float64x2_value_t x2 = value.d2; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "float64x2(%e, %e) ; [sp + %d]\n", + x2.storage[0], x2.storage[1], + input_offset); + } else { + DCHECK(opcode == Translation::INT32x4_STACK_SLOT); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " object @0x%08" V8PRIxPTR ": [field #%d] <- ", + reinterpret_cast(object_slot), + field_index); + PrintF(trace_scope_->file(), + "int32x4(%u, %u, %u, %u) ; [sp + %d]\n", + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } + } + AddObjectSIMD128Value(value, opcode); + return; + } + case Translation::LITERAL: { Object* literal = ComputeLiteral(iterator->Next()); if (trace_scope_ != NULL) { @@ -2583,6 +2794,50 @@ void Deoptimizer::DoTranslateCommand(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_REGISTER: + case Translation::FLOAT64x2_REGISTER: + case Translation::INT32x4_REGISTER: { + int input_reg = iterator->Next(); + simd128_value_t value = input_->GetSIMD128Register(input_reg); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_REGISTER) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ":" + " [top + %d] <- float32x4(%e, %e, %e, %e) ; %s\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } else if (opcode == Translation::FLOAT64x2_REGISTER) { + float64x2_value_t x2 = value.d2; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ":" + " [top + %d] <- float64x2(%e, %e) ; %s\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x2.storage[0], x2.storage[1], + SIMD128Register::AllocationIndexToString(input_reg)); + } else { + DCHECK(opcode == Translation::INT32x4_REGISTER); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ":" + " [top + %d] <- int32x4(%u, %u, %u, %u) ; %s\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + SIMD128Register::AllocationIndexToString(input_reg)); + } + } + // We save the untagged value on the side and store a GC-safe + // temporary placeholder in the frame. + AddSIMD128Value(output_[frame_index]->GetTop() + output_offset, value, + opcode); + output_[frame_index]->SetFrameSlot(output_offset, kPlaceholder); + return; + } + case Translation::STACK_SLOT: { int input_slot_index = iterator->Next(); unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); @@ -2684,6 +2939,51 @@ void Deoptimizer::DoTranslateCommand(TranslationIterator* iterator, return; } + case Translation::FLOAT32x4_STACK_SLOT: + case Translation::FLOAT64x2_STACK_SLOT: + case Translation::INT32x4_STACK_SLOT: { + int input_slot_index = iterator->Next(); + unsigned input_offset = input_->GetOffsetFromSlotIndex(input_slot_index); + simd128_value_t value = input_->GetSIMD128FrameSlot(input_offset); + if (trace_scope_ != NULL) { + if (opcode == Translation::FLOAT32x4_STACK_SLOT) { + float32x4_value_t x4 = value.f4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ": " + "[top + %d] <- float32x4(%e, %e, %e, %e) ; [sp + %d]\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } else if (opcode == Translation::FLOAT64x2_STACK_SLOT) { + float64x2_value_t x2 = value.d2; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ": " + "[top + %d] <- float64x2(%e, %e) ; [sp + %d]\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x2.storage[0], x2.storage[1], + input_offset); + } else { + DCHECK(opcode == Translation::INT32x4_STACK_SLOT); + int32x4_value_t x4 = value.i4; + PrintF(trace_scope_->file(), + " 0x%08" V8PRIxPTR ": " + "[top + %d] <- int32x4(%u, %u, %u, %u) ; [sp + %d]\n", + output_[frame_index]->GetTop() + output_offset, + output_offset, + x4.storage[0], x4.storage[1], x4.storage[2], x4.storage[3], + input_offset); + } + } + // We save the untagged value on the side and store a GC-safe + // temporary placeholder in the frame. + AddSIMD128Value(output_[frame_index]->GetTop() + output_offset, value, + opcode); + output_[frame_index]->SetFrameSlot(output_offset, kPlaceholder); + return; + } + case Translation::LITERAL: { Object* literal = ComputeLiteral(iterator->Next()); if (trace_scope_ != NULL) { @@ -2830,6 +3130,27 @@ void Deoptimizer::AddObjectDoubleValue(double value) { } +void Deoptimizer::AddObjectSIMD128Value(simd128_value_t value, + int translation_opcode) { + deferred_objects_tagged_values_.Add(isolate()->heap()->the_hole_value()); + SIMD128MaterializationDescriptor value_desc( + deferred_objects_tagged_values_.length() - 1, value); + Translation::Opcode opcode = + static_cast(translation_opcode); + if (opcode == Translation::FLOAT32x4_REGISTER || + opcode == Translation::FLOAT32x4_STACK_SLOT) { + deferred_objects_float32x4_values_.Add(value_desc); + } else if (opcode == Translation::FLOAT64x2_REGISTER || + opcode == Translation::FLOAT64x2_STACK_SLOT) { + deferred_objects_float64x2_values_.Add(value_desc); + } else { + DCHECK(opcode == Translation::INT32x4_REGISTER || + opcode == Translation::INT32x4_STACK_SLOT); + deferred_objects_int32x4_values_.Add(value_desc); + } +} + + void Deoptimizer::AddDoubleValue(intptr_t slot_address, double value) { HeapNumberMaterializationDescriptor
value_desc( reinterpret_cast
(slot_address), value); @@ -2837,6 +3158,27 @@ void Deoptimizer::AddDoubleValue(intptr_t slot_address, double value) { } +void Deoptimizer::AddSIMD128Value(intptr_t slot_address, + simd128_value_t value, + int translation_opcode) { + SIMD128MaterializationDescriptor
value_desc( + reinterpret_cast
(slot_address), value); + Translation::Opcode opcode = + static_cast(translation_opcode); + if (opcode == Translation::FLOAT32x4_REGISTER || + opcode == Translation::FLOAT32x4_STACK_SLOT) { + deferred_float32x4s_.Add(value_desc); + } else if (opcode == Translation::FLOAT64x2_REGISTER || + opcode == Translation::FLOAT64x2_STACK_SLOT) { + deferred_float64x2s_.Add(value_desc); + } else { + DCHECK(opcode == Translation::INT32x4_REGISTER || + opcode == Translation::INT32x4_STACK_SLOT); + deferred_int32x4s_.Add(value_desc); + } +} + + void Deoptimizer::EnsureCodeForDeoptimizationEntry(Isolate* isolate, BailoutType type, int max_entry_id) { @@ -3080,6 +3422,12 @@ void Translation::StoreDoubleRegister(DoubleRegister reg) { } +void Translation::StoreSIMD128Register(SIMD128Register reg, Opcode opcode) { + buffer_->Add(opcode, zone()); + buffer_->Add(SIMD128Register::ToAllocationIndex(reg), zone()); +} + + void Translation::StoreStackSlot(int index) { buffer_->Add(STACK_SLOT, zone()); buffer_->Add(index, zone()); @@ -3104,6 +3452,12 @@ void Translation::StoreDoubleStackSlot(int index) { } +void Translation::StoreSIMD128StackSlot(int index, Opcode opcode) { + buffer_->Add(opcode, zone()); + buffer_->Add(index, zone()); +} + + void Translation::StoreLiteral(int literal_id) { buffer_->Add(LITERAL, zone()); buffer_->Add(literal_id, zone()); @@ -3131,10 +3485,16 @@ int Translation::NumberOfOperandsFor(Opcode opcode) { case INT32_REGISTER: case UINT32_REGISTER: case DOUBLE_REGISTER: + case FLOAT32x4_REGISTER: + case FLOAT64x2_REGISTER: + case INT32x4_REGISTER: case STACK_SLOT: case INT32_STACK_SLOT: case UINT32_STACK_SLOT: case DOUBLE_STACK_SLOT: + case FLOAT32x4_STACK_SLOT: + case FLOAT64x2_STACK_SLOT: + case INT32x4_STACK_SLOT: case LITERAL: case COMPILED_STUB_FRAME: return 1; @@ -3198,6 +3558,9 @@ SlotRef SlotRefValueBuilder::ComputeSlotForNextArgument( case Translation::INT32_REGISTER: case Translation::UINT32_REGISTER: case Translation::DOUBLE_REGISTER: + case Translation::FLOAT32x4_REGISTER: + case Translation::FLOAT64x2_REGISTER: + case Translation::INT32x4_REGISTER: // We are at safepoint which corresponds to call. All registers are // saved by caller so there would be no live registers at this // point. Thus these translation commands should not be used. @@ -3227,6 +3590,24 @@ SlotRef SlotRefValueBuilder::ComputeSlotForNextArgument( return SlotRef(slot_addr, SlotRef::DOUBLE); } + case Translation::FLOAT32x4_STACK_SLOT: { + int slot_index = iterator->Next(); + Address slot_addr = SlotAddress(frame, slot_index); + return SlotRef(slot_addr, SlotRef::FLOAT32x4); + } + + case Translation::FLOAT64x2_STACK_SLOT: { + int slot_index = iterator->Next(); + Address slot_addr = SlotAddress(frame, slot_index); + return SlotRef(slot_addr, SlotRef::FLOAT64x2); + } + + case Translation::INT32x4_STACK_SLOT: { + int slot_index = iterator->Next(); + Address slot_addr = SlotAddress(frame, slot_index); + return SlotRef(slot_addr, SlotRef::INT32x4); + } + case Translation::LITERAL: { int literal_index = iterator->Next(); return SlotRef(data->GetIsolate(), @@ -3376,6 +3757,15 @@ Handle SlotRef::GetValue(Isolate* isolate) { return isolate->factory()->NewNumber(value); } + case FLOAT32x4: + return isolate->factory()->NewFloat32x4(read_simd128_value(addr_).f4); + + case FLOAT64x2: + return isolate->factory()->NewFloat64x2(read_simd128_value(addr_).d2); + + case INT32x4: + return isolate->factory()->NewInt32x4(read_simd128_value(addr_).i4); + case LITERAL: return literal_; diff --git a/src/v8/src/deoptimizer.h b/src/v8/src/deoptimizer.h index a0cc697..64bff3e 100644 --- a/src/v8/src/deoptimizer.h +++ b/src/v8/src/deoptimizer.h @@ -32,6 +32,9 @@ static inline double read_double_value(Address p) { #endif // V8_HOST_CAN_READ_UNALIGNED } +static inline simd128_value_t read_simd128_value(Address p) { + return *reinterpret_cast(p); +} class FrameDescription; class TranslationIterator; @@ -52,6 +55,21 @@ class HeapNumberMaterializationDescriptor BASE_EMBEDDED { }; +template +class SIMD128MaterializationDescriptor BASE_EMBEDDED { + public: + SIMD128MaterializationDescriptor(T destination, simd128_value_t value) + : destination_(destination), value_(value) { } + + T destination() const { return destination_; } + simd128_value_t value() const { return value_; } + + private: + T destination_; + simd128_value_t value_; +}; + + class ObjectMaterializationDescriptor BASE_EMBEDDED { public: ObjectMaterializationDescriptor( @@ -332,7 +350,10 @@ class Deoptimizer : public Malloced { void AddObjectDuplication(intptr_t slot, int object_index); void AddObjectTaggedValue(intptr_t value); void AddObjectDoubleValue(double value); + void AddObjectSIMD128Value(simd128_value_t value, int translation_opcode); void AddDoubleValue(intptr_t slot_address, double value); + void AddSIMD128Value(intptr_t slot_address, simd128_value_t value, + int translation_opcode); bool ArgumentsObjectIsAdapted(int object_index) { ObjectMaterializationDescriptor desc = deferred_objects_.at(object_index); @@ -381,9 +402,9 @@ class Deoptimizer : public Malloced { void SetPlatformCompiledStubRegisters(FrameDescription* output_frame, CodeStubInterfaceDescriptor* desc); - // Fill the given output frame's double registers with the original values - // from the input frame's double registers. - void CopyDoubleRegisters(FrameDescription* output_frame); + // Fill the given output frame's simd128 registers with the original values + // from the input frame's simd128 registers. + void CopySIMD128Registers(FrameDescription* output_frame); // Determines whether the input frame contains alignment padding by looking // at the dynamic alignment state slot inside the frame. @@ -411,8 +432,17 @@ class Deoptimizer : public Malloced { List deferred_objects_tagged_values_; List > deferred_objects_double_values_; + List > + deferred_objects_float32x4_values_; + List > + deferred_objects_float64x2_values_; + List > + deferred_objects_int32x4_values_; List deferred_objects_; List > deferred_heap_numbers_; + List > deferred_float32x4s_; + List > deferred_float64x2s_; + List > deferred_int32x4s_; // Key for lookup of previously materialized objects Address stack_fp_; @@ -479,6 +509,11 @@ class FrameDescription { return read_double_value(reinterpret_cast
(ptr)); } + simd128_value_t GetSIMD128FrameSlot(unsigned offset) { + intptr_t* ptr = GetFrameSlotPointer(offset); + return read_simd128_value(reinterpret_cast
(ptr)); + } + void SetFrameSlot(unsigned offset, intptr_t value) { *GetFrameSlotPointer(offset) = value; } @@ -502,9 +537,11 @@ class FrameDescription { return registers_[n]; } - double GetDoubleRegister(unsigned n) const { - DCHECK(n < ARRAY_SIZE(double_registers_)); - return double_registers_[n]; + double GetDoubleRegister(unsigned n) const; + + simd128_value_t GetSIMD128Register(unsigned n) const { + DCHECK(n < ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n]; } void SetRegister(unsigned n, intptr_t value) { @@ -512,9 +549,11 @@ class FrameDescription { registers_[n] = value; } - void SetDoubleRegister(unsigned n, double value) { - DCHECK(n < ARRAY_SIZE(double_registers_)); - double_registers_[n] = value; + void SetDoubleRegister(unsigned n, double value); + + void SetSIMD128Register(unsigned n, simd128_value_t value) { + DCHECK(n < ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n] = value; } intptr_t GetTop() const { return top_; } @@ -558,8 +597,8 @@ class FrameDescription { return OFFSET_OF(FrameDescription, registers_); } - static int double_registers_offset() { - return OFFSET_OF(FrameDescription, double_registers_); + static int simd128_registers_offset() { + return OFFSET_OF(FrameDescription, simd128_registers_); } static int frame_size_offset() { @@ -591,7 +630,7 @@ class FrameDescription { uintptr_t frame_size_; // Number of bytes. JSFunction* function_; intptr_t registers_[Register::kNumRegisters]; - double double_registers_[DoubleRegister::kMaxNumRegisters]; + simd128_value_t simd128_registers_[SIMD128Register::kMaxNumRegisters]; intptr_t top_; intptr_t pc_; intptr_t fp_; @@ -690,10 +729,16 @@ class TranslationIterator BASE_EMBEDDED { V(INT32_REGISTER) \ V(UINT32_REGISTER) \ V(DOUBLE_REGISTER) \ + V(FLOAT32x4_REGISTER) \ + V(FLOAT64x2_REGISTER) \ + V(INT32x4_REGISTER) \ V(STACK_SLOT) \ V(INT32_STACK_SLOT) \ V(UINT32_STACK_SLOT) \ V(DOUBLE_STACK_SLOT) \ + V(FLOAT32x4_STACK_SLOT) \ + V(FLOAT64x2_STACK_SLOT) \ + V(INT32x4_STACK_SLOT) \ V(LITERAL) @@ -732,10 +777,12 @@ class Translation BASE_EMBEDDED { void StoreInt32Register(Register reg); void StoreUint32Register(Register reg); void StoreDoubleRegister(DoubleRegister reg); + void StoreSIMD128Register(SIMD128Register reg, Opcode opcode); void StoreStackSlot(int index); void StoreInt32StackSlot(int index); void StoreUint32StackSlot(int index); void StoreDoubleStackSlot(int index); + void StoreSIMD128StackSlot(int index, Opcode opcode); void StoreLiteral(int literal_id); void StoreArgumentsObject(bool args_known, int args_index, int args_length); @@ -765,6 +812,9 @@ class SlotRef BASE_EMBEDDED { INT32, UINT32, DOUBLE, + FLOAT32x4, + FLOAT64x2, + INT32x4, LITERAL, DEFERRED_OBJECT, // Object captured by the escape analysis. // The number of nested objects can be obtained diff --git a/src/v8/src/elements-kind.cc b/src/v8/src/elements-kind.cc index 0ebc6dc..894043e 100644 --- a/src/v8/src/elements-kind.cc +++ b/src/v8/src/elements-kind.cc @@ -39,6 +39,13 @@ int ElementsKindToShiftSize(ElementsKind elements_kind) { case FAST_HOLEY_DOUBLE_ELEMENTS: case FLOAT64_ELEMENTS: return 3; + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: + return 4; case FAST_SMI_ELEMENTS: case FAST_ELEMENTS: case FAST_HOLEY_SMI_ELEMENTS: diff --git a/src/v8/src/elements-kind.h b/src/v8/src/elements-kind.h index b48a5df..4bc1f60 100644 --- a/src/v8/src/elements-kind.h +++ b/src/v8/src/elements-kind.h @@ -35,9 +35,12 @@ enum ElementsKind { EXTERNAL_INT16_ELEMENTS, EXTERNAL_UINT16_ELEMENTS, EXTERNAL_INT32_ELEMENTS, + EXTERNAL_INT32x4_ELEMENTS, EXTERNAL_UINT32_ELEMENTS, EXTERNAL_FLOAT32_ELEMENTS, + EXTERNAL_FLOAT32x4_ELEMENTS, EXTERNAL_FLOAT64_ELEMENTS, + EXTERNAL_FLOAT64x2_ELEMENTS, EXTERNAL_UINT8_CLAMPED_ELEMENTS, // Fixed typed arrays @@ -47,8 +50,11 @@ enum ElementsKind { INT16_ELEMENTS, UINT32_ELEMENTS, INT32_ELEMENTS, + INT32x4_ELEMENTS, FLOAT32_ELEMENTS, + FLOAT32x4_ELEMENTS, FLOAT64_ELEMENTS, + FLOAT64x2_ELEMENTS, UINT8_CLAMPED_ELEMENTS, // Derived constants from ElementsKind @@ -128,11 +134,41 @@ inline bool IsExternalFloatOrDoubleElementsKind(ElementsKind kind) { } +inline bool IsExternalFloat32x4ElementsKind(ElementsKind kind) { + return kind == EXTERNAL_FLOAT32x4_ELEMENTS; +} + + +inline bool IsExternalFloat64x2ElementsKind(ElementsKind kind) { + return kind == EXTERNAL_FLOAT64x2_ELEMENTS; +} + + +inline bool IsExternalInt32x4ElementsKind(ElementsKind kind) { + return kind == EXTERNAL_INT32x4_ELEMENTS; +} + + inline bool IsFixedFloatElementsKind(ElementsKind kind) { return kind == FLOAT32_ELEMENTS || kind == FLOAT64_ELEMENTS; } +inline bool IsFixedFloat32x4ElementsKind(ElementsKind kind) { + return kind == FLOAT32x4_ELEMENTS; +} + + +inline bool IsFixedFloat64x2ElementsKind(ElementsKind kind) { + return kind == FLOAT64x2_ELEMENTS; +} + + +inline bool IsFixedInt32x4ElementsKind(ElementsKind kind) { + return kind == INT32x4_ELEMENTS; +} + + inline bool IsDoubleOrFloatElementsKind(ElementsKind kind) { return IsFastDoubleElementsKind(kind) || IsExternalFloatOrDoubleElementsKind(kind) || @@ -140,6 +176,30 @@ inline bool IsDoubleOrFloatElementsKind(ElementsKind kind) { } +inline bool IsFloat32x4ElementsKind(ElementsKind kind) { + return IsExternalFloat32x4ElementsKind(kind) || + IsFixedFloat32x4ElementsKind(kind); +} + + +inline bool IsFloat64x2ElementsKind(ElementsKind kind) { + return IsExternalFloat64x2ElementsKind(kind) || + IsFixedFloat64x2ElementsKind(kind); +} + + +inline bool IsInt32x4ElementsKind(ElementsKind kind) { + return IsExternalInt32x4ElementsKind(kind) || + IsFixedInt32x4ElementsKind(kind); +} + + +inline bool IsSIMD128ElementsKind(ElementsKind kind) { + return IsFloat32x4ElementsKind(kind) || IsFloat64x2ElementsKind(kind) || + IsInt32x4ElementsKind(kind); +} + + inline bool IsFastSmiOrObjectElementsKind(ElementsKind kind) { return kind == FAST_SMI_ELEMENTS || kind == FAST_HOLEY_SMI_ELEMENTS || diff --git a/src/v8/src/elements.cc b/src/v8/src/elements.cc index 945a9e7..4b583d7 100644 --- a/src/v8/src/elements.cc +++ b/src/v8/src/elements.cc @@ -31,9 +31,12 @@ // - ExternalInt16ElementsAccessor // - ExternalUint16ElementsAccessor // - ExternalInt32ElementsAccessor +// - ExternalInt32x4ElementsAccessor // - ExternalUint32ElementsAccessor // - ExternalFloat32ElementsAccessor +// - ExternalFloat32x4ElementsAccessor // - ExternalFloat64ElementsAccessor +// - ExternalFloat64x2ElementsAccessor // - ExternalUint8ClampedElementsAccessor // - FixedUint8ElementsAccessor // - FixedInt8ElementsAccessor @@ -41,8 +44,11 @@ // - FixedInt16ElementsAccessor // - FixedUint32ElementsAccessor // - FixedInt32ElementsAccessor +// - FixedInt32x4ElementsAccessor // - FixedFloat32ElementsAccessor +// - FixedFloat32x4ElementsAccessor // - FixedFloat64ElementsAccessor +// - FixedFloat64x2ElementsAccessor // - FixedUint8ClampedElementsAccessor // - DictionaryElementsAccessor // - SloppyArgumentsElementsAccessor @@ -84,12 +90,18 @@ static const int kPackedSizeNotKnown = -1; EXTERNAL_UINT16_ELEMENTS, ExternalUint16Array) \ V(ExternalInt32ElementsAccessor, EXTERNAL_INT32_ELEMENTS, \ ExternalInt32Array) \ + V(ExternalInt32x4ElementsAccessor, EXTERNAL_INT32x4_ELEMENTS, \ + ExternalInt32x4Array) \ V(ExternalUint32ElementsAccessor, \ EXTERNAL_UINT32_ELEMENTS, ExternalUint32Array) \ V(ExternalFloat32ElementsAccessor, \ EXTERNAL_FLOAT32_ELEMENTS, ExternalFloat32Array) \ + V(ExternalFloat32x4ElementsAccessor, \ + EXTERNAL_FLOAT32x4_ELEMENTS, ExternalFloat32x4Array) \ V(ExternalFloat64ElementsAccessor, \ EXTERNAL_FLOAT64_ELEMENTS, ExternalFloat64Array) \ + V(ExternalFloat64x2ElementsAccessor, \ + EXTERNAL_FLOAT64x2_ELEMENTS, ExternalFloat64x2Array) \ V(ExternalUint8ClampedElementsAccessor, \ EXTERNAL_UINT8_CLAMPED_ELEMENTS, \ ExternalUint8ClampedArray) \ @@ -99,8 +111,13 @@ static const int kPackedSizeNotKnown = -1; V(FixedInt16ElementsAccessor, INT16_ELEMENTS, FixedInt16Array) \ V(FixedUint32ElementsAccessor, UINT32_ELEMENTS, FixedUint32Array) \ V(FixedInt32ElementsAccessor, INT32_ELEMENTS, FixedInt32Array) \ + V(FixedInt32x4ElementsAccessor, INT32x4_ELEMENTS, FixedInt32x4Array) \ V(FixedFloat32ElementsAccessor, FLOAT32_ELEMENTS, FixedFloat32Array) \ + V(FixedFloat32x4ElementsAccessor, FLOAT32x4_ELEMENTS, \ + FixedFloat32x4Array) \ V(FixedFloat64ElementsAccessor, FLOAT64_ELEMENTS, FixedFloat64Array) \ + V(FixedFloat64x2ElementsAccessor, FLOAT64x2_ELEMENTS, \ + FixedFloat64x2Array) \ V(FixedUint8ClampedElementsAccessor, UINT8_CLAMPED_ELEMENTS, \ FixedUint8ClampedArray) diff --git a/src/v8/src/factory.cc b/src/v8/src/factory.cc index 934ab25..166347a 100644 --- a/src/v8/src/factory.cc +++ b/src/v8/src/factory.cc @@ -1037,6 +1037,30 @@ Handle Factory::NewHeapNumber(double value, } +Handle Factory::NewFloat32x4(float32x4_value_t value, + PretenureFlag pretenure) { + CALL_HEAP_FUNCTION( + isolate(), + isolate()->heap()->AllocateFloat32x4(value, pretenure), Float32x4); +} + + +Handle Factory::NewFloat64x2(float64x2_value_t value, + PretenureFlag pretenure) { + CALL_HEAP_FUNCTION( + isolate(), + isolate()->heap()->AllocateFloat64x2(value, pretenure), Float64x2); +} + + +Handle Factory::NewInt32x4(int32x4_value_t value, + PretenureFlag pretenure) { + CALL_HEAP_FUNCTION( + isolate(), + isolate()->heap()->AllocateInt32x4(value, pretenure), Int32x4); +} + + Handle Factory::NewTypeError(const char* message, Vector< Handle > args) { return NewError("MakeTypeError", message, args); diff --git a/src/v8/src/factory.h b/src/v8/src/factory.h index f09d247..a158be7 100644 --- a/src/v8/src/factory.h +++ b/src/v8/src/factory.h @@ -351,6 +351,15 @@ class Factory V8_FINAL { MutableMode mode = IMMUTABLE, PretenureFlag pretenure = NOT_TENURED); + Handle NewFloat32x4(float32x4_value_t value, + PretenureFlag pretenure = NOT_TENURED); + + Handle NewFloat64x2(float64x2_value_t value, + PretenureFlag pretenure = NOT_TENURED); + + Handle NewInt32x4(int32x4_value_t value, + PretenureFlag pretenure = NOT_TENURED); + // These objects are used by the api to create env-independent data // structures in the heap. inline Handle NewNeanderObject() { diff --git a/src/v8/src/flag-definitions.h b/src/v8/src/flag-definitions.h index 55af2e6..9bb59d9 100644 --- a/src/v8/src/flag-definitions.h +++ b/src/v8/src/flag-definitions.h @@ -147,6 +147,7 @@ struct MaybeBoolFlag { #define FLAG FLAG_FULL // Flags for language modes and experimental language features. +DEFINE_BOOL(simd_object, false, "enable SIMD object and operations") DEFINE_BOOL(use_strict, false, "enforce strict mode") DEFINE_BOOL(es_staging, false, "enable upcoming ES6+ features") diff --git a/src/v8/src/globals.h b/src/v8/src/globals.h index 889822f..b9049ee 100644 --- a/src/v8/src/globals.h +++ b/src/v8/src/globals.h @@ -92,6 +92,16 @@ typedef byte* Address; // ----------------------------------------------------------------------------- // Constants +struct float32x4_value_t { float storage[4]; }; +struct float64x2_value_t { double storage[2]; }; +struct int32x4_value_t { int32_t storage[4]; }; +union simd128_value_t { + double d[2]; + float32x4_value_t f4; + float64x2_value_t d2; + int32x4_value_t i4; +}; + const int KB = 1024; const int MB = KB * KB; const int GB = KB * KB * KB; @@ -108,14 +118,19 @@ const int kMinUInt16 = 0; const uint32_t kMaxUInt32 = 0xFFFFFFFFu; -const int kCharSize = sizeof(char); // NOLINT -const int kShortSize = sizeof(short); // NOLINT -const int kIntSize = sizeof(int); // NOLINT -const int kInt32Size = sizeof(int32_t); // NOLINT -const int kInt64Size = sizeof(int64_t); // NOLINT -const int kDoubleSize = sizeof(double); // NOLINT -const int kIntptrSize = sizeof(intptr_t); // NOLINT -const int kPointerSize = sizeof(void*); // NOLINT +const int kCharSize = sizeof(char); // NOLINT +const int kShortSize = sizeof(short); // NOLINT +const int kIntSize = sizeof(int); // NOLINT +const int kInt32Size = sizeof(int32_t); // NOLINT +const int kInt64Size = sizeof(int64_t); // NOLINT +const int kDoubleSize = sizeof(double); // NOLINT +const int kFloatSize = sizeof(float); // NOLINT +const int kFloat32x4Size = sizeof(float32x4_value_t); // NOLINT +const int kFloat64x2Size = sizeof(float64x2_value_t); // NOLINT +const int kInt32x4Size = sizeof(int32x4_value_t); // NOLINT +const int kSIMD128Size = sizeof(simd128_value_t); // NOLINT +const int kIntptrSize = sizeof(intptr_t); // NOLINT +const int kPointerSize = sizeof(void*); // NOLINT #if V8_TARGET_ARCH_X64 && V8_TARGET_ARCH_32_BIT const int kRegisterSize = kPointerSize + kPointerSize; #else diff --git a/src/v8/src/heap/heap.cc b/src/v8/src/heap/heap.cc index fa94181..08d159a 100644 --- a/src/v8/src/heap/heap.cc +++ b/src/v8/src/heap/heap.cc @@ -2643,6 +2643,54 @@ AllocationResult Heap::AllocateHeapNumber(double value, MutableMode mode, } +#define SIMD128_HEAP_ALLOCATE_FUNCTIONS(V) \ + V(Float32x4, float32x4) \ + V(Float64x2, float64x2) \ + V(Int32x4, int32x4) + + +#define DECLARE_SIMD_HEAP_ALLOCATE_FUNCTION(TYPE, type) \ +AllocationResult Heap::Allocate##TYPE(type##_value_t value, \ + PretenureFlag pretenure) { \ + STATIC_ASSERT(TYPE::kSize <= Page::kMaxRegularHeapObjectSize); \ + \ + AllocationSpace space = \ + SelectSpace(TYPE::kSize, OLD_DATA_SPACE, pretenure); \ + \ + HeapObject* result; \ + { AllocationResult allocation = \ + AllocateRaw(TYPE::kSize, space, OLD_DATA_SPACE); \ + if (!allocation.To(&result)) return allocation; \ + } \ + \ + result->set_map_no_write_barrier( \ + isolate()->native_context()->type##_function()->initial_map()); \ + JSObject::cast(result)->set_properties(empty_fixed_array()); \ + JSObject::cast(result)->set_elements(empty_fixed_array()); \ + \ + HeapObject* storage; \ + int storage_size = \ + FixedTypedArrayBase::kDataOffset + k##TYPE##Size; \ + space = SelectSpace(storage_size, OLD_DATA_SPACE, pretenure); \ + { AllocationResult allocation = \ + AllocateRaw(storage_size, space, OLD_DATA_SPACE); \ + if (!allocation.To(&storage)) return allocation; \ + } \ + \ + storage->set_map( \ + *isolate()->factory()->fixed_##type##_array_map()); \ + FixedTypedArrayBase* elements = FixedTypedArrayBase::cast(storage); \ + elements->set_length(static_cast(1)); \ + memset(elements->DataPtr(), 0, elements->DataSize()); \ + Fixed##TYPE##Array::cast(storage)->set(0, value); \ + TYPE::cast(result)->set_value(storage); \ + return result; \ +} + + +SIMD128_HEAP_ALLOCATE_FUNCTIONS(DECLARE_SIMD_HEAP_ALLOCATE_FUNCTION) + + AllocationResult Heap::AllocateCell(Object* value) { int size = Cell::kSize; STATIC_ASSERT(Cell::kSize <= Page::kMaxRegularHeapObjectSize); diff --git a/src/v8/src/heap/heap.h b/src/v8/src/heap/heap.h index c313333..3bc896f 100644 --- a/src/v8/src/heap/heap.h +++ b/src/v8/src/heap/heap.h @@ -106,8 +106,11 @@ namespace internal { V(Map, external_int16_array_map, ExternalInt16ArrayMap) \ V(Map, external_uint16_array_map, ExternalUint16ArrayMap) \ V(Map, external_int32_array_map, ExternalInt32ArrayMap) \ + V(Map, external_int32x4_array_map, ExternalInt32x4ArrayMap) \ V(Map, external_uint32_array_map, ExternalUint32ArrayMap) \ V(Map, external_float32_array_map, ExternalFloat32ArrayMap) \ + V(Map, external_float32x4_array_map, ExternalFloat32x4ArrayMap) \ + V(Map, external_float64x2_array_map, ExternalFloat64x2ArrayMap) \ V(Map, external_float64_array_map, ExternalFloat64ArrayMap) \ V(Map, external_uint8_clamped_array_map, ExternalUint8ClampedArrayMap) \ V(ExternalArray, empty_external_int8_array, EmptyExternalInt8Array) \ @@ -115,8 +118,11 @@ namespace internal { V(ExternalArray, empty_external_int16_array, EmptyExternalInt16Array) \ V(ExternalArray, empty_external_uint16_array, EmptyExternalUint16Array) \ V(ExternalArray, empty_external_int32_array, EmptyExternalInt32Array) \ + V(ExternalArray, empty_external_int32x4_array, EmptyExternalInt32x4Array) \ V(ExternalArray, empty_external_uint32_array, EmptyExternalUint32Array) \ V(ExternalArray, empty_external_float32_array, EmptyExternalFloat32Array) \ + V(ExternalArray, empty_external_float32x4_array, EmptyExternalFloat32x4Array)\ + V(ExternalArray, empty_external_float64x2_array, EmptyExternalFloat64x2Array)\ V(ExternalArray, empty_external_float64_array, EmptyExternalFloat64Array) \ V(ExternalArray, empty_external_uint8_clamped_array, \ EmptyExternalUint8ClampedArray) \ @@ -126,7 +132,10 @@ namespace internal { V(Map, fixed_int16_array_map, FixedInt16ArrayMap) \ V(Map, fixed_uint32_array_map, FixedUint32ArrayMap) \ V(Map, fixed_int32_array_map, FixedInt32ArrayMap) \ + V(Map, fixed_int32x4_array_map, FixedInt32x4ArrayMap) \ V(Map, fixed_float32_array_map, FixedFloat32ArrayMap) \ + V(Map, fixed_float32x4_array_map, FixedFloat32x4ArrayMap) \ + V(Map, fixed_float64x2_array_map, FixedFloat64x2ArrayMap) \ V(Map, fixed_float64_array_map, FixedFloat64ArrayMap) \ V(Map, fixed_uint8_clamped_array_map, FixedUint8ClampedArrayMap) \ V(FixedTypedArrayBase, empty_fixed_uint8_array, EmptyFixedUint8Array) \ @@ -136,6 +145,11 @@ namespace internal { V(FixedTypedArrayBase, empty_fixed_uint32_array, EmptyFixedUint32Array) \ V(FixedTypedArrayBase, empty_fixed_int32_array, EmptyFixedInt32Array) \ V(FixedTypedArrayBase, empty_fixed_float32_array, EmptyFixedFloat32Array) \ + V(FixedTypedArrayBase, empty_fixed_float32x4_array, \ + EmptyFixedFloat32x4Array) \ + V(FixedTypedArrayBase, empty_fixed_float64x2_array, \ + EmptyFixedFloat64x2Array) \ + V(FixedTypedArrayBase, empty_fixed_int32x4_array, EmptyFixedInt32x4Array) \ V(FixedTypedArrayBase, empty_fixed_float64_array, EmptyFixedFloat64Array) \ V(FixedTypedArrayBase, empty_fixed_uint8_clamped_array, \ EmptyFixedUint8ClampedArray) \ @@ -278,6 +292,9 @@ namespace internal { V(null_string, "null") \ V(number_string, "number") \ V(Number_string, "Number") \ + V(float32x4_string, "float32x4") \ + V(float64x2_string, "float64x2") \ + V(int32x4_string, "int32x4") \ V(nan_string, "NaN") \ V(RegExp_string, "RegExp") \ V(source_string, "source") \ @@ -338,6 +355,16 @@ namespace internal { V(throw_string, "throw") \ V(done_string, "done") \ V(value_string, "value") \ + V(signMask, "signMask") \ + V(x, "x") \ + V(y, "y") \ + V(z, "z") \ + V(w, "w") \ + V(flagX, "flagX") \ + V(flagY, "flagY") \ + V(flagZ, "flagZ") \ + V(flagW, "flagW") \ + V(simd, "SIMD") \ V(next_string, "next") \ V(byte_length_string, "byteLength") \ V(byte_offset_string, "byteOffset") \ @@ -1414,6 +1441,21 @@ class Heap { AllocateHeapNumber(double value, MutableMode mode = IMMUTABLE, PretenureFlag pretenure = NOT_TENURED); + // Allocated a Float32x4 from value. + MUST_USE_RESULT AllocationResult AllocateFloat32x4( + float32x4_value_t value, + PretenureFlag pretenure = NOT_TENURED); + + // Allocated a Float64x2 from value. + MUST_USE_RESULT AllocationResult AllocateFloat64x2( + float64x2_value_t value, + PretenureFlag pretenure = NOT_TENURED); + + // Allocated a Int32x4 from value. + MUST_USE_RESULT AllocationResult AllocateInt32x4( + int32x4_value_t value, + PretenureFlag pretenure = NOT_TENURED); + // Allocate a byte array of the specified length MUST_USE_RESULT AllocationResult AllocateByteArray(int length, PretenureFlag pretenure = NOT_TENURED); diff --git a/src/v8/src/heap/mark-compact.cc b/src/v8/src/heap/mark-compact.cc index abb4e1b..f73de5d 100644 --- a/src/v8/src/heap/mark-compact.cc +++ b/src/v8/src/heap/mark-compact.cc @@ -253,6 +253,9 @@ class VerifyNativeContextSeparationVisitor : public ObjectVisitor { case JS_ARRAY_TYPE: case JS_DATE_TYPE: case JS_OBJECT_TYPE: + case FLOAT32x4_TYPE: + case FLOAT64x2_TYPE: + case INT32x4_TYPE: case JS_REGEXP_TYPE: VisitPointer(HeapObject::RawField(object, JSObject::kMapOffset)); break; diff --git a/src/v8/src/heap/objects-visiting.cc b/src/v8/src/heap/objects-visiting.cc index a316d12..5c438e5 100644 --- a/src/v8/src/heap/objects-visiting.cc +++ b/src/v8/src/heap/objects-visiting.cc @@ -128,9 +128,11 @@ StaticVisitorBase::VisitorId StaticVisitorBase::GetVisitorId( case JS_MESSAGE_OBJECT_TYPE: case JS_SET_ITERATOR_TYPE: case JS_MAP_ITERATOR_TYPE: + case FLOAT32x4_TYPE: + case FLOAT64x2_TYPE: + case INT32x4_TYPE: return GetVisitorIdForSize(kVisitJSObject, kVisitJSObjectGeneric, instance_size); - case JS_FUNCTION_TYPE: return kVisitJSFunction; @@ -151,6 +153,9 @@ StaticVisitorBase::VisitorId StaticVisitorBase::GetVisitorId( case FIXED_UINT32_ARRAY_TYPE: case FIXED_INT32_ARRAY_TYPE: case FIXED_FLOAT32_ARRAY_TYPE: + case FIXED_INT32x4_ARRAY_TYPE: + case FIXED_FLOAT32x4_ARRAY_TYPE: + case FIXED_FLOAT64x2_ARRAY_TYPE: case FIXED_UINT8_CLAMPED_ARRAY_TYPE: return kVisitFixedTypedArray; diff --git a/src/v8/src/hydrogen-instructions.cc b/src/v8/src/hydrogen-instructions.cc index b75bec0..fcbd299 100644 --- a/src/v8/src/hydrogen-instructions.cc +++ b/src/v8/src/hydrogen-instructions.cc @@ -840,7 +840,6 @@ bool HInstruction::CanDeoptimize() { case HValue::kSeqStringGetChar: case HValue::kStoreCodeEntry: case HValue::kStoreFrameContext: - case HValue::kStoreKeyed: case HValue::kStoreNamedField: case HValue::kStoreNamedGeneric: case HValue::kStringCharCodeAt: @@ -849,8 +848,13 @@ bool HInstruction::CanDeoptimize() { case HValue::kTypeofIsAndBranch: case HValue::kUnknownOSRValue: case HValue::kUseConst: + case HValue::kNullarySIMDOperation: return false; + case HValue::kStoreKeyed: + return !CpuFeatures::SupportsSIMD128InCrankshaft() && + IsSIMD128ElementsKind(HStoreKeyed::cast(this)->elements_kind()); + case HValue::kAdd: case HValue::kAllocateBlockContext: case HValue::kApplyArguments: @@ -903,6 +907,10 @@ bool HInstruction::CanDeoptimize() { case HValue::kTypeof: case HValue::kUnaryMathOperation: case HValue::kWrapReceiver: + case HValue::kUnarySIMDOperation: + case HValue::kBinarySIMDOperation: + case HValue::kTernarySIMDOperation: + case HValue::kQuarternarySIMDOperation: return true; } UNREACHABLE(); @@ -1332,7 +1340,23 @@ bool HTypeofIsAndBranch::KnownSuccessorBlock(HBasicBlock** block) { type_literal_.IsKnownGlobal(isolate()->heap()->number_string()); *block = number_type ? FirstSuccessor() : SecondSuccessor(); return true; + } else if (value()->representation().IsFloat32x4()) { + bool float32x4_type = + type_literal_.IsKnownGlobal(isolate()->heap()->float32x4_string()); + *block = float32x4_type ? FirstSuccessor() : SecondSuccessor(); + return true; + } else if (value()->representation().IsFloat64x2()) { + bool float64x2_type = + type_literal_.IsKnownGlobal(isolate()->heap()->float64x2_string()); + *block = float64x2_type ? FirstSuccessor() : SecondSuccessor(); + return true; + } else if (value()->representation().IsInt32x4()) { + bool int32x4_type = + type_literal_.IsKnownGlobal(isolate()->heap()->int32x4_string()); + *block = int32x4_type ? FirstSuccessor() : SecondSuccessor(); + return true; } + *block = NULL; return false; } @@ -4789,4 +4813,140 @@ OStream& operator<<(OStream& os, const HObjectAccess& access) { return os << "@" << access.offset(); } + +HInstruction* HNullarySIMDOperation::New( + Zone* zone, HValue* context, BuiltinFunctionId op) { + return new(zone) HNullarySIMDOperation(context, op); +} + + +HInstruction* HUnarySIMDOperation::New( + Zone* zone, HValue* context, HValue* value, BuiltinFunctionId op, + Representation to) { + return new(zone) HUnarySIMDOperation(context, value, op, to); +} + + +HInstruction* HBinarySIMDOperation::New( + Zone* zone, HValue* context, HValue* left, HValue* right, + BuiltinFunctionId op) { + return new(zone) HBinarySIMDOperation(context, left, right, op); +} + + +HInstruction* HTernarySIMDOperation::New( + Zone* zone, HValue* context, HValue* mask, HValue* left, HValue* right, + BuiltinFunctionId op) { + return new(zone) HTernarySIMDOperation(context, mask, left, right, op); +} + + +HInstruction* HQuarternarySIMDOperation::New( + Zone* zone, HValue* context, HValue* x, HValue* y, HValue* z, HValue* w, + BuiltinFunctionId op) { + return new(zone) HQuarternarySIMDOperation(context, x, y, z, w, op); +} + + +const char* HNullarySIMDOperation::OpName() const { + switch (op()) { +#define SIMD_NULLARY_OPERATION_CASE_ITEM(module, function, name, p4) \ + case k##name: \ + return #module "." #function; +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +OStream& HNullarySIMDOperation::PrintDataTo(OStream& os) const { + return os << OpName(); +} + + +const char* HUnarySIMDOperation::OpName() const { + switch (op()) { +#define SIMD_UNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5) \ + case k##name: \ + return #module "." #function; +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +OStream& HUnarySIMDOperation::PrintDataTo(OStream& os) const { + return os << OpName() << " " << NameOf(value()); +} + + +const char* HBinarySIMDOperation::OpName() const { + switch (op()) { +#define SIMD_BINARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, p6) \ + case k##name: \ + return #module "." #function; +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +OStream& HBinarySIMDOperation::PrintDataTo(OStream& os) const { + return os << OpName() << " " << NameOf(left()) << " " + << NameOf(right()); +} + + +const char* HTernarySIMDOperation::OpName() const { + switch (op()) { +#define SIMD_TERNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, p6, \ + p7) \ + case k##name: \ + return #module "." #function; +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +OStream& HTernarySIMDOperation::PrintDataTo(OStream& os) const { + return os << OpName() << " " << NameOf(first()) << " " + << NameOf(second()) << " " << NameOf(third()); +} + + +const char* HQuarternarySIMDOperation::OpName() const { + switch (op()) { +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, \ + p6, p7, p8) \ + case k##name: \ + return #module "." #function; +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +OStream& HQuarternarySIMDOperation::PrintDataTo(OStream& os) const { + return os << OpName() << " " << NameOf(x()) << " " << NameOf(y()) << " " + << NameOf(z()) << " " << NameOf(w()); +} + + } } // namespace v8::internal diff --git a/src/v8/src/hydrogen-instructions.h b/src/v8/src/hydrogen-instructions.h index ed12435..c37aa1a 100644 --- a/src/v8/src/hydrogen-instructions.h +++ b/src/v8/src/hydrogen-instructions.h @@ -161,6 +161,11 @@ class OStream; V(Typeof) \ V(TypeofIsAndBranch) \ V(UnaryMathOperation) \ + V(NullarySIMDOperation) \ + V(UnarySIMDOperation) \ + V(BinarySIMDOperation) \ + V(TernarySIMDOperation) \ + V(QuarternarySIMDOperation) \ V(UnknownOSRValue) \ V(UseConst) \ V(WrapReceiver) @@ -602,6 +607,9 @@ class HValue : public ZoneObject { HType t = type(); if (t.IsSmi()) return Representation::Smi(); if (t.IsHeapNumber()) return Representation::Double(); + if (t.IsFloat32x4()) return Representation::Float32x4(); + if (t.IsFloat64x2()) return Representation::Float64x2(); + if (t.IsInt32x4()) return Representation::Int32x4(); if (t.IsHeapObject()) return r; return Representation::None(); } @@ -610,7 +618,9 @@ class HValue : public ZoneObject { HType type() const { return type_; } void set_type(HType new_type) { - DCHECK(new_type.IsSubtypeOf(type_)); + // TODO(ningxin): for SIMD ops, the initial type is None which + // hit the following ASSERT. + // DCHECK(new_type.IsSubtypeOf(type_)); type_ = new_type; } @@ -1672,7 +1682,15 @@ class HChange V8_FINAL : public HUnaryOperation { if (value->representation().IsSmi() || value->type().IsSmi()) { set_type(HType::Smi()); } else { - set_type(HType::TaggedNumber()); + if (to.IsFloat32x4()) { + set_type(HType::Float32x4()); + } else if (to.IsFloat64x2()) { + set_type(HType::Float64x2()); + } else if (to.IsInt32x4()) { + set_type(HType::Int32x4()); + } else { + set_type(HType::TaggedNumber()); + } if (to.IsTagged()) SetChangesFlag(kNewSpacePromotion); } } @@ -6001,6 +6019,17 @@ class HObjectAccess V8_FINAL { Representation::Integer32()); } + static HObjectAccess ForSIMD128Double0() { + return HObjectAccess( + kDouble, Float32x4::kValueOffset, Representation::Double()); + } + + static HObjectAccess ForSIMD128Double1() { + return HObjectAccess(kDouble, + Float32x4::kValueOffset + kDoubleSize, + Representation::Double()); + } + static HObjectAccess ForElementsPointer() { return HObjectAccess(kElementsPointer, JSObject::kElementsOffset); } @@ -6141,6 +6170,10 @@ class HObjectAccess V8_FINAL { Representation::UInteger16()); } + static HObjectAccess ForMapPrototype() { + return HObjectAccess(kInobject, Map::kPrototypeOffset); + } + static HObjectAccess ForPropertyCellValue() { return HObjectAccess(kInobject, PropertyCell::kValueOffset); } @@ -6658,6 +6691,15 @@ class HLoadKeyed V8_FINAL elements_kind == FLOAT32_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { set_representation(Representation::Double()); + } else if (IsFloat32x4ElementsKind(elements_kind)) { + set_representation(CpuFeatures::SupportsSIMD128InCrankshaft() ? + Representation::Float32x4() : Representation::Tagged()); + } else if (IsFloat64x2ElementsKind(elements_kind)) { + set_representation(CpuFeatures::SupportsSIMD128InCrankshaft() ? + Representation::Float64x2() : Representation::Tagged()); + } else if (IsInt32x4ElementsKind(elements_kind)) { + set_representation(CpuFeatures::SupportsSIMD128InCrankshaft() ? + Representation::Int32x4() : Representation::Tagged()); } else { set_representation(Representation::Integer32()); } @@ -6990,6 +7032,19 @@ class HStoreKeyed V8_FINAL return Representation::Integer32(); } + if (IsFloat32x4ElementsKind(kind)) { + return CpuFeatures::SupportsSIMD128InCrankshaft() ? + Representation::Float32x4() : Representation::Tagged(); + } + if (IsFloat64x2ElementsKind(kind)) { + return CpuFeatures::SupportsSIMD128InCrankshaft() ? + Representation::Float64x2() : Representation::Tagged(); + } + if (IsInt32x4ElementsKind(kind)) { + return CpuFeatures::SupportsSIMD128InCrankshaft() ? + Representation::Int32x4() : Representation::Tagged(); + } + if (IsFastSmiElementsKind(kind)) { return Representation::Smi(); } @@ -7859,6 +7914,386 @@ class HAllocateBlockContext: public HTemplateInstruction<2> { }; +class HNullarySIMDOperation V8_FINAL : public HTemplateInstruction<1> { + public: + static HInstruction* New(Zone* zone, + HValue* context, + BuiltinFunctionId op); + + HValue* context() { return OperandAt(0); } + + virtual OStream& PrintDataTo(OStream& os) const V8_OVERRIDE; + + virtual Representation RequiredInputRepresentation(int index) V8_OVERRIDE { + return Representation::Tagged(); + } + + BuiltinFunctionId op() const { return op_; } + const char* OpName() const; + + DECLARE_CONCRETE_INSTRUCTION(NullarySIMDOperation) + + protected: + virtual bool DataEquals(HValue* other) V8_OVERRIDE { + HNullarySIMDOperation* b = HNullarySIMDOperation::cast(other); + return op_ == b->op(); + } + + private: + HNullarySIMDOperation(HValue* context, BuiltinFunctionId op) + : HTemplateInstruction<1>(HType::None()), op_(op) { + SetOperandAt(0, context); + switch (op) { +#define SIMD_NULLARY_OPERATION_CASE_ITEM(p1, p2, name, representation) \ + case k##name: \ + set_representation(Representation::representation()); \ + set_type(HType::FromRepresentation(representation_)); \ + break; +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + } + SetFlag(kUseGVN); + } + + virtual bool IsDeletable() const V8_OVERRIDE { return true; } + + BuiltinFunctionId op_; +}; + + +class HUnarySIMDOperation V8_FINAL : public HTemplateInstruction<2> { + public: + static HInstruction* New(Zone* zone, + HValue* context, + HValue* value, + BuiltinFunctionId op, + Representation to = Representation::Float32x4()); + + HValue* context() { return OperandAt(0); } + HValue* value() const { return OperandAt(1); } + + virtual OStream& PrintDataTo(OStream& os) const V8_OVERRIDE; + + virtual Representation RequiredInputRepresentation(int index) V8_OVERRIDE { + if (index == 0) { + return Representation::Tagged(); + } else if (op_ == kSIMD128Change) { + return value()->representation(); + } else { + switch (op_) { +#define SIMD_UNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, representation) \ + case k##name: \ + return Representation::representation(); +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return Representation::None(); + } + } + } + + BuiltinFunctionId op() const { return op_; } + const char* OpName() const; + + DECLARE_CONCRETE_INSTRUCTION(UnarySIMDOperation) + + protected: + virtual bool DataEquals(HValue* other) V8_OVERRIDE { + HUnarySIMDOperation* b = HUnarySIMDOperation::cast(other); + return op_ == b->op(); + } + + private: + HUnarySIMDOperation(HValue* context, HValue* value, BuiltinFunctionId op, + Representation to = Representation::Float32x4()) + : HTemplateInstruction<2>(HType::None()), op_(op) { + SetOperandAt(0, context); + SetOperandAt(1, value); + switch (op) { + case kSIMD128Change: + set_representation(to); + set_type(HType::FromRepresentation(to)); + break; +#define SIMD_UNARY_OPERATION_CASE_ITEM(p1, p2, name, representation, p5) \ + case k##name: \ + set_representation(Representation::representation()); \ + set_type(HType::FromRepresentation(representation_)); \ + if (Representation::p5().IsInteger32()) { \ + SetFlag(kTruncatingToInt32); \ + } \ + break; +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + } + SetFlag(kUseGVN); + } + + virtual bool IsDeletable() const V8_OVERRIDE { return true; } + + BuiltinFunctionId op_; +}; + + +class HBinarySIMDOperation V8_FINAL : public HTemplateInstruction<3> { + public: + static HInstruction* New(Zone* zone, + HValue* context, + HValue* left, + HValue* right, + BuiltinFunctionId op); + + HValue* context() { return OperandAt(0); } + HValue* left() const { return OperandAt(1); } + HValue* right() const { return OperandAt(2); } + + virtual OStream& PrintDataTo(OStream& os) const V8_OVERRIDE; + + virtual Representation RequiredInputRepresentation(int index) V8_OVERRIDE { + if (index == 0) { + return Representation::Tagged(); + } else { + switch (op_) { +#define SIMD_BINARY_OPERATION_CASE_ITEM(p1, p2, name, p4, left_representation, \ + right_representation) \ + case k##name: \ + return index == 1 ? Representation::left_representation() \ + : Representation::right_representation(); \ + break; +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return Representation::None(); + } + } + } + + BuiltinFunctionId op() const { return op_; } + const char* OpName() const; + + DECLARE_CONCRETE_INSTRUCTION(BinarySIMDOperation) + + protected: + virtual bool DataEquals(HValue* other) V8_OVERRIDE { + HBinarySIMDOperation* b = HBinarySIMDOperation::cast(other); + return op_ == b->op(); + } + + private: + HBinarySIMDOperation(HValue* context, HValue* left, HValue* right, + BuiltinFunctionId op) + : HTemplateInstruction<3>(HType::None()), op_(op) { + SetOperandAt(0, context); + SetOperandAt(1, left); + SetOperandAt(2, right); + switch (op) { +#define SIMD_BINARY_OPERATION_CASE_ITEM(p1, p2, name, representation, p5, p6) \ + case k##name: \ + set_representation(Representation::representation()); \ + set_type(HType::FromRepresentation(representation_)); \ + if (Representation::p5().IsInteger32() || \ + Representation::p6().IsInteger32()) { \ + SetFlag(kTruncatingToInt32); \ + } \ + break; +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + } + SetFlag(kUseGVN); + } + + virtual bool IsDeletable() const V8_OVERRIDE { return true; } + + BuiltinFunctionId op_; +}; + + +class HTernarySIMDOperation V8_FINAL : public HTemplateInstruction<4> { + public: + static HInstruction* New(Zone* zone, + HValue* context, + HValue* first, + HValue* second, + HValue* third, + BuiltinFunctionId op); + + HValue* context() { return OperandAt(0); } + HValue* first() const { return OperandAt(1); } + HValue* second() const { return OperandAt(2); } + HValue* third() const { return OperandAt(3); } + + virtual OStream& PrintDataTo(OStream& os) const V8_OVERRIDE; + + virtual Representation RequiredInputRepresentation(int index) V8_OVERRIDE { + if (index == 0) { + return Representation::Tagged(); + } else { + switch (op_) { +#define SIMD_TERNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, \ + first_representation, second_representation, third_representation) \ + case k##name: \ + switch (index) { \ + case 1: return Representation::first_representation(); \ + case 2: return Representation::second_representation(); \ + case 3: return Representation::third_representation(); \ + default: \ + UNREACHABLE(); \ + return Representation::None(); \ + } +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return Representation::None(); + } + } + } + + BuiltinFunctionId op() const { return op_; } + const char* OpName() const; + + DECLARE_CONCRETE_INSTRUCTION(TernarySIMDOperation) + + protected: + virtual bool DataEquals(HValue* other) V8_OVERRIDE { + HTernarySIMDOperation* b = HTernarySIMDOperation::cast(other); + return op_ == b->op(); + } + + private: + HTernarySIMDOperation(HValue* context, HValue* first, HValue* second, + HValue* third, BuiltinFunctionId op) + : HTemplateInstruction<4>(HType::None()), op_(op) { + SetOperandAt(0, context); + SetOperandAt(1, first); + SetOperandAt(2, second); + SetOperandAt(3, third); + switch (op) { +#define SIMD_TERNARY_OPERATION_CASE_ITEM(p1, p2, name, representation, p5, \ + p6, p7) \ + case k##name: \ + set_representation(Representation::representation()); \ + set_type(HType::FromRepresentation(representation_)); \ + if (Representation::p5().IsInteger32() || \ + Representation::p6().IsInteger32() || \ + Representation::p7().IsInteger32()) { \ + SetFlag(kTruncatingToInt32); \ + } \ + break; +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + } + SetFlag(kUseGVN); + } + + virtual bool IsDeletable() const V8_OVERRIDE { return true; } + + BuiltinFunctionId op_; +}; + + +class HQuarternarySIMDOperation V8_FINAL : public HTemplateInstruction<5> { + public: + static HInstruction* New(Zone* zone, + HValue* context, + HValue* x, + HValue* y, + HValue* z, + HValue* w, + BuiltinFunctionId op); + + HValue* context() { return OperandAt(0); } + HValue* x() const { return OperandAt(1); } + HValue* y() const { return OperandAt(2); } + HValue* z() const { return OperandAt(3); } + HValue* w() const { return OperandAt(4); } + + virtual OStream& PrintDataTo(OStream& os) const V8_OVERRIDE; + + virtual Representation RequiredInputRepresentation(int index) V8_OVERRIDE { + if (index == 0) { + return Representation::Tagged(); + } else { + switch (op_) { +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, \ + first_representation, second_representation, third_representation, \ + fourth_representation) \ + case k##name: \ + switch (index) { \ + case 1: return Representation::first_representation(); \ + case 2: return Representation::second_representation(); \ + case 3: return Representation::third_representation(); \ + case 4: return Representation::fourth_representation(); \ + default: \ + UNREACHABLE(); \ + return Representation::None(); \ + } +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return Representation::None(); + } + } + } + + BuiltinFunctionId op() const { return op_; } + const char* OpName() const; + + DECLARE_CONCRETE_INSTRUCTION(QuarternarySIMDOperation) + + protected: + virtual bool DataEquals(HValue* other) V8_OVERRIDE { + HQuarternarySIMDOperation* b = HQuarternarySIMDOperation::cast(other); + return op_ == b->op(); + } + + private: + HQuarternarySIMDOperation(HValue* context, HValue* x, HValue* y, HValue* z, + HValue* w, BuiltinFunctionId op) + : HTemplateInstruction<5>(HType::None()), op_(op) { + SetOperandAt(0, context); + SetOperandAt(1, x); + SetOperandAt(2, y); + SetOperandAt(3, z); + SetOperandAt(4, w); + switch (op) { +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(p1, p2, name, representation, p5, \ + p6, p7, p8) \ + case k##name: \ + set_representation(Representation::representation()); \ + set_type(HType::FromRepresentation(representation_)); \ + if (Representation::p5().IsInteger32() || \ + Representation::p6().IsInteger32() || \ + Representation::p7().IsInteger32() || \ + Representation::p8().IsInteger32()) { \ + SetFlag(kTruncatingToInt32); \ + } \ + break; +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + } + SetFlag(kUseGVN); + } + + virtual bool IsDeletable() const V8_OVERRIDE { return true; } + + BuiltinFunctionId op_; +}; + #undef DECLARE_INSTRUCTION #undef DECLARE_CONCRETE_INSTRUCTION diff --git a/src/v8/src/hydrogen-representation-changes.cc b/src/v8/src/hydrogen-representation-changes.cc index ebb03b5..83a9abe 100644 --- a/src/v8/src/hydrogen-representation-changes.cc +++ b/src/v8/src/hydrogen-representation-changes.cc @@ -36,8 +36,19 @@ void HRepresentationChangesPhase::InsertRepresentationChangeForUse( } if (new_value == NULL) { - new_value = new(graph()->zone()) HChange( - value, to, is_truncating_to_smi, is_truncating_to_int); + if (((to.IsFloat32x4() || to.IsFloat64x2() || to.IsInt32x4()) && + !value->representation().IsTagged()) || + ((value->representation().IsFloat32x4() || + value->representation().IsFloat64x2() || + value->representation().IsInt32x4()) && + !to.IsTagged())) { + new_value = HUnarySIMDOperation::New(graph()->zone(), + graph()->entry_block()->last_environment()->context(), + value, kSIMD128Change, to); + } else { + new_value = new(graph()->zone()) HChange( + value, to, is_truncating_to_smi, is_truncating_to_int); + } if (!use_value->operand_position(use_index).IsUnknown()) { new_value->set_position(use_value->operand_position(use_index)); } else { diff --git a/src/v8/src/hydrogen-types.cc b/src/v8/src/hydrogen-types.cc index c83ff3c..a14a523 100644 --- a/src/v8/src/hydrogen-types.cc +++ b/src/v8/src/hydrogen-types.cc @@ -5,6 +5,7 @@ #include "src/hydrogen-types.h" #include "src/ostreams.h" +#include "src/property-details.h" #include "src/types-inl.h" @@ -43,6 +44,9 @@ HType HType::FromValue(Handle value) { if (value->IsSmi()) return HType::Smi(); if (value->IsNull()) return HType::Null(); if (value->IsHeapNumber()) return HType::HeapNumber(); + if (value->IsFloat32x4()) return HType::Float32x4(); + if (value->IsFloat64x2()) return HType::Float64x2(); + if (value->IsInt32x4()) return HType::Int32x4(); if (value->IsString()) return HType::String(); if (value->IsBoolean()) return HType::Boolean(); if (value->IsUndefined()) return HType::Undefined(); @@ -53,6 +57,24 @@ HType HType::FromValue(Handle value) { } +// static +HType HType::FromRepresentation(Representation representation) { + HType result = HType::Tagged(); + if (representation.IsSmi()) { + result = HType::Smi(); + } else if (representation.IsDouble()) { + result = HType::HeapNumber(); + } else if (representation.IsFloat32x4()) { + result = HType::Float32x4(); + } else if (representation.IsFloat64x2()) { + result = HType::Float64x2(); + } else if (representation.IsInt32x4()) { + result = HType::Int32x4(); + } + return result; +} + + OStream& operator<<(OStream& os, const HType& t) { // Note: The c1visualizer syntax for locals allows only a sequence of the // following characters: A-Za-z0-9_-|: diff --git a/src/v8/src/hydrogen-types.h b/src/v8/src/hydrogen-types.h index d662a16..c226084 100644 --- a/src/v8/src/hydrogen-types.h +++ b/src/v8/src/hydrogen-types.h @@ -16,6 +16,7 @@ namespace internal { template class Handle; class Object; class OStream; +class Representation; #define HTYPE_LIST(V) \ V(Any, 0x0) /* 0000 0000 0000 0000 */ \ @@ -27,12 +28,15 @@ class OStream; V(HeapPrimitive, 0x25) /* 0000 0000 0010 0101 */ \ V(Null, 0x27) /* 0000 0000 0010 0111 */ \ V(HeapNumber, 0x2d) /* 0000 0000 0010 1101 */ \ - V(String, 0x65) /* 0000 0000 0110 0101 */ \ - V(Boolean, 0xa5) /* 0000 0000 1010 0101 */ \ - V(Undefined, 0x125) /* 0000 0001 0010 0101 */ \ - V(JSObject, 0x221) /* 0000 0010 0010 0001 */ \ - V(JSArray, 0x621) /* 0000 0110 0010 0001 */ \ - V(None, 0x7ff) /* 0000 0111 1111 1111 */ + V(Float32x4, 0x65) /* 0000 0000 0110 0101 */ \ + V(Float64x2, 0xa5) /* 0000 0000 1010 0101 */ \ + V(Int32x4, 0x125) /* 0000 0001 0010 0101 */ \ + V(String, 0x225) /* 0000 0010 0010 0101 */ \ + V(Boolean, 0x425) /* 0000 0100 0010 0101 */ \ + V(Undefined, 0x825) /* 0000 1000 0010 0101 */ \ + V(JSObject, 0x1021) /* 0001 0000 0010 0001 */ \ + V(JSArray, 0x3021) /* 0011 0000 0010 0001 */ \ + V(None, 0x3fff) /* 0011 1111 1111 1111 */ class HType V8_FINAL { public: @@ -63,7 +67,8 @@ class HType V8_FINAL { template static HType FromType(typename T::TypeHandle type) V8_WARN_UNUSED_RESULT; - static HType FromValue(Handle value) V8_WARN_UNUSED_RESULT; + static HType FromValue(Handle value) ; + static HType FromRepresentation(Representation representation); friend OStream& operator<<(OStream& os, const HType& t); diff --git a/src/v8/src/hydrogen.cc b/src/v8/src/hydrogen.cc index 3ddd7cc..ca9adec 100644 --- a/src/v8/src/hydrogen.cc +++ b/src/v8/src/hydrogen.cc @@ -5600,7 +5600,8 @@ void HOptimizedGraphBuilder::VisitObjectLiteral(ObjectLiteral* expr) { CHECK_ALIVE(store = BuildNamedGeneric( STORE, NULL, literal, name, value)); } else { - PropertyAccessInfo info(this, STORE, ToType(map), name); + PropertyAccessInfo info( + this, STORE, ToType(map), name, map->instance_type()); if (info.CanAccessMonomorphic()) { HValue* checked_literal = Add(literal, map); DCHECK(!info.lookup()->IsPropertyCallbacks()); @@ -6070,6 +6071,10 @@ bool HOptimizedGraphBuilder::PropertyAccessInfo::LookupInPrototypes() { bool HOptimizedGraphBuilder::PropertyAccessInfo::CanAccessMonomorphic() { + if (IsSIMD128PropertyCallback() && + CpuFeatures::SupportsSIMD128InCrankshaft()) { + return true; + } if (!CanInlinePropertyAccess(type_)) return false; if (IsJSObjectFieldAccessor()) return IsLoad(); if (this->map()->function_with_prototype() && @@ -6107,11 +6112,22 @@ bool HOptimizedGraphBuilder::PropertyAccessInfo::CanAccessAsMonomorphic( STATIC_ASSERT(kMaxLoadPolymorphism == kMaxStorePolymorphism); if (types->length() > kMaxLoadPolymorphism) return false; + if (IsSIMD128PropertyCallback() && + CpuFeatures::SupportsSIMD128InCrankshaft()) { + for (int i = 1; i < types->length(); ++i) { + if (types->at(i)->instance_type() == types->first()->instance_type()) { + return false; + } + } + return true; + } + HObjectAccess access = HObjectAccess::ForMap(); // bogus default if (GetJSObjectFieldAccess(&access)) { for (int i = 1; i < types->length(); ++i) { PropertyAccessInfo test_info( - builder_, access_type_, ToType(types->at(i)), name_); + builder_, access_type_, ToType(types->at(i)), name_, + types->at(i)->instance_type()); HObjectAccess test_access = HObjectAccess::ForMap(); // bogus default if (!test_info.GetJSObjectFieldAccess(&test_access)) return false; if (!access.Equals(test_access)) return false; @@ -6130,7 +6146,8 @@ bool HOptimizedGraphBuilder::PropertyAccessInfo::CanAccessAsMonomorphic( for (int i = 1; i < types->length(); ++i) { PropertyAccessInfo test_info( - builder_, access_type_, ToType(types->at(i)), name_); + builder_, access_type_, ToType(types->at(i)), name_, + types->at(i)->instance_type()); if (!test_info.IsCompatible(this)) return false; } @@ -6153,6 +6170,31 @@ static bool NeedsWrappingFor(Type* type, Handle target) { } +static bool IsSIMDProperty(Handle name, uint8_t* mask) { + SmartArrayPointer cstring = name->ToCString(); + int i = 0; + while (i <= 3) { + int shift = 0; + switch (cstring[i]) { + case 'W': + shift++; + case 'Z': + shift++; + case 'Y': + shift++; + case 'X': + break; + default: + return false; + } + *mask |= (shift << 2*i); + i++; + } + + return true; +} + + HInstruction* HOptimizedGraphBuilder::BuildMonomorphicAccess( PropertyAccessInfo* info, HValue* object, @@ -6187,6 +6229,17 @@ HInstruction* HOptimizedGraphBuilder::BuildMonomorphicAccess( if (info->lookup()->IsField()) { if (info->IsLoad()) { + if (info->map()->constructor()->IsJSFunction()) { + JSFunction* constructor = JSFunction::cast(info->map()->constructor()); + String* class_name = + String::cast(constructor->shared()->instance_class_name()); + uint8_t mask = 0; + if (class_name->Equals(isolate()->heap()->simd()) && + IsSIMDProperty(info->name(), &mask) && + CpuFeatures::SupportsSIMD128InCrankshaft()) { + return New(mask); + } + } return BuildLoadNamedField(info, checked_holder); } else { return BuildStoreNamedField(info, checked_object, value); @@ -6249,7 +6302,9 @@ void HOptimizedGraphBuilder::HandlePolymorphicNamedFieldAccess( bool handle_smi = false; STATIC_ASSERT(kMaxLoadPolymorphism == kMaxStorePolymorphism); for (int i = 0; i < types->length() && count < kMaxLoadPolymorphism; ++i) { - PropertyAccessInfo info(this, access_type, ToType(types->at(i)), name); + PropertyAccessInfo info( + this, access_type, ToType(types->at(i)), name, + types->at(i)->instance_type()); if (info.type()->Is(Type::String())) { if (handled_string) continue; handled_string = true; @@ -6268,7 +6323,9 @@ void HOptimizedGraphBuilder::HandlePolymorphicNamedFieldAccess( handled_string = false; for (int i = 0; i < types->length() && count < kMaxLoadPolymorphism; ++i) { - PropertyAccessInfo info(this, access_type, ToType(types->at(i)), name); + PropertyAccessInfo info( + this, access_type, ToType(types->at(i)), name, + types->at(i)->instance_type()); if (info.type()->Is(Type::String())) { if (handled_string) continue; handled_string = true; @@ -6397,6 +6454,88 @@ static bool AreStringTypes(SmallMapList* types) { } +static bool AreInt32x4Types(SmallMapList* types) { + if (types == NULL || types->length() == 0) return false; + for (int i = 0; i < types->length(); i++) { + if (types->at(i)->instance_type() != INT32x4_TYPE) return false; + } + return true; +} + + +static bool AreFloat32x4Types(SmallMapList* types) { + if (types == NULL || types->length() == 0) return false; + for (int i = 0; i < types->length(); i++) { + if (types->at(i)->instance_type() != FLOAT32x4_TYPE) return false; + } + return true; +} + + +static bool AreFloat64x2Types(SmallMapList* types) { + if (types == NULL || types->length() == 0) return false; + for (int i = 0; i < types->length(); i++) { + if (types->at(i)->instance_type() != FLOAT64x2_TYPE) return false; + } + return true; +} + + +static BuiltinFunctionId NameToId(Isolate* isolate, Handle name, + InstanceType type) { + BuiltinFunctionId id; + if (name->Equals(isolate->heap()->signMask())) { + if (type == FLOAT32x4_TYPE) { + id = kFloat32x4GetSignMask; + } else if (type == FLOAT64x2_TYPE) { + id = kFloat64x2GetSignMask; + } else { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetSignMask; + } + } else if (name->Equals(isolate->heap()->x())) { + if (type == FLOAT32x4_TYPE) { + id = kFloat32x4GetX; + } else if (type == FLOAT64x2_TYPE) { + id = kFloat64x2GetX; + } else { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetX; + } + } else if (name->Equals(isolate->heap()->y())) { + if (type == FLOAT32x4_TYPE) { + id = kFloat32x4GetY; + } else if (type == FLOAT64x2_TYPE) { + id = kFloat64x2GetY; + } else { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetY; + } + } else if (name->Equals(isolate->heap()->z())) { + id = type == FLOAT32x4_TYPE ? kFloat32x4GetZ : kInt32x4GetZ; + } else if (name->Equals(isolate->heap()->w())) { + id = type == FLOAT32x4_TYPE ? kFloat32x4GetW : kInt32x4GetW; + } else if (name->Equals(isolate->heap()->flagX())) { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetFlagX; + } else if (name->Equals(isolate->heap()->flagY())) { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetFlagY; + } else if (name->Equals(isolate->heap()->flagZ())) { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetFlagZ; + } else if (name->Equals(isolate->heap()->flagW())) { + DCHECK(type == INT32x4_TYPE); + id = kInt32x4GetFlagW; + } else { + UNREACHABLE(); + id = kSIMD128Unreachable; + } + + return id; +} + + void HOptimizedGraphBuilder::BuildStore(Expression* expr, Property* prop, BailoutId ast_id, @@ -7253,7 +7392,9 @@ HInstruction* HOptimizedGraphBuilder::BuildNamedAccess( DCHECK(types != NULL); if (types->length() > 0) { - PropertyAccessInfo info(this, access, ToType(types->first()), name); + PropertyAccessInfo info( + this, access, ToType(types->first()), name, + types->first()->instance_type()); if (!info.CanAccessAsMonomorphic(types)) { HandlePolymorphicNamedFieldAccess( access, expr, ast_id, return_id, object, value, types, name); @@ -7264,9 +7405,61 @@ HInstruction* HOptimizedGraphBuilder::BuildNamedAccess( // Type::Number() is only supported by polymorphic load/call handling. DCHECK(!info.type()->Is(Type::Number())); BuildCheckHeapObject(object); + if (AreStringTypes(types)) { checked_object = Add(object, HCheckInstanceType::IS_STRING); + } else if (info.IsSIMD128PropertyCallback() && + AreFloat32x4Types(types) && + CpuFeatures::SupportsSIMD128InCrankshaft()) { + Handle function( + isolate()->native_context()->float32x4_function()); + HInstruction* constant_function = Add(function); + HObjectAccess map_access = HObjectAccess::ForPrototypeOrInitialMap(); + HInstruction* map = Add( + constant_function, static_cast(NULL), map_access); + HObjectAccess prototype_access = HObjectAccess::ForMapPrototype(); + HInstruction* prototype = Add( + map, static_cast(NULL), prototype_access); + Handle initial_function_prototype_map( + isolate()->native_context()->float32x4_function_prototype_map()); + Add(prototype, initial_function_prototype_map); + BuiltinFunctionId id = NameToId(isolate(), name, FLOAT32x4_TYPE); + return NewUncasted(object, id); + } else if (info.IsSIMD128PropertyCallback() && + AreFloat64x2Types(types) && + CpuFeatures::SupportsSIMD128InCrankshaft()) { + Handle function( + isolate()->native_context()->float64x2_function()); + HInstruction* constant_function = Add(function); + HObjectAccess map_access = HObjectAccess::ForPrototypeOrInitialMap(); + HInstruction* map = Add( + constant_function, static_cast(NULL), map_access); + HObjectAccess prototype_access = HObjectAccess::ForMapPrototype(); + HInstruction* prototype = Add( + map, static_cast(NULL), prototype_access); + Handle initial_function_prototype_map( + isolate()->native_context()->float64x2_function_prototype_map()); + Add(prototype, initial_function_prototype_map); + BuiltinFunctionId id = NameToId(isolate(), name, FLOAT64x2_TYPE); + return NewUncasted(object, id); + } else if (info.IsSIMD128PropertyCallback() && + AreInt32x4Types(types) && + CpuFeatures::SupportsSIMD128InCrankshaft()) { + Handle function( + isolate()->native_context()->int32x4_function()); + HInstruction* constant_function = Add(function); + HObjectAccess map_access = HObjectAccess::ForPrototypeOrInitialMap(); + HInstruction* map = Add( + constant_function, static_cast(NULL), map_access); + HObjectAccess prototype_access = HObjectAccess::ForMapPrototype(); + HInstruction* prototype = Add( + map, static_cast(NULL), prototype_access); + Handle initial_function_prototype_map( + isolate()->native_context()->int32x4_function_prototype_map()); + Add(prototype, initial_function_prototype_map); + BuiltinFunctionId id = NameToId(isolate(), name, INT32x4_TYPE); + return NewUncasted(object, id); } else { checked_object = Add(object, types); } @@ -7475,7 +7668,9 @@ void HOptimizedGraphBuilder::HandlePolymorphicCallNamed( for (int i = 0; i < types->length() && ordered_functions < kMaxCallPolymorphism; ++i) { - PropertyAccessInfo info(this, LOAD, ToType(types->at(i)), name); + PropertyAccessInfo info( + this, LOAD, ToType(types->at(i)), name, + types->at(i)->instance_type()); if (info.CanAccessMonomorphic() && info.lookup()->IsConstant() && info.constant()->IsJSFunction()) { @@ -7502,7 +7697,8 @@ void HOptimizedGraphBuilder::HandlePolymorphicCallNamed( for (int fn = 0; fn < ordered_functions; ++fn) { int i = order[fn].index(); - PropertyAccessInfo info(this, LOAD, ToType(types->at(i)), name); + PropertyAccessInfo info(this, LOAD, ToType(types->at(i)), name, + types->at(i)->instance_type()); if (info.type()->Is(Type::String())) { if (handled_string) continue; handled_string = true; @@ -8096,6 +8292,78 @@ bool HOptimizedGraphBuilder::TryInlineBuiltinFunctionCall(Call* expr) { return true; } break; +#define SIMD_NULLARY_OPERATION_CASE_ITEM(p1, p2, name, p4) \ + case k##name: +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && + expr->arguments()->length() == 0) { + Drop(2); // Receiver and function. + HInstruction* op = NewUncasted(id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_UNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5) \ + case k##name: +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && + expr->arguments()->length() == 1) { + HValue* argument = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = NewUncasted(argument, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_BINARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5, p6) \ + case k##name: +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && + expr->arguments()->length() == 2) { + HValue* right = Pop(); + HValue* left = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = NewUncasted(left, right, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_TERNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5, p6, p7) \ + case k##name: +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && + expr->arguments()->length() == 3) { + HValue* right = Pop(); + HValue* left = Pop(); + HValue* value = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = + NewUncasted(value, left, right, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5, p6, p7, p8) \ + case k##name: +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && + expr->arguments()->length() == 4) { + HValue* w = Pop(); + HValue* z = Pop(); + HValue* y = Pop(); + HValue* x = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = + NewUncasted(x, y, z, w, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; default: // Not supported for inlining yet. break; @@ -8479,6 +8747,143 @@ bool HOptimizedGraphBuilder::TryInlineBuiltinMethodCall( ast_context()->ReturnValue(index); return true; } +#define SIMD_NULLARY_OPERATION_CASE_ITEM(p1, p2, name, p4) \ + case k##name: +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 1) { + Drop(2); // Receiver and function. + HInstruction* op = NewUncasted(id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_UNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5) \ + case k##name: +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 2) { + HValue* argument = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = NewUncasted(argument, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_BINARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5, p6) \ + case k##name: +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 3) { + HValue* right = Pop(); + HValue* left = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = NewUncasted(left, right, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_TERNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5, p6, p7) \ + case k##name: +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 4) { + HValue* right = Pop(); + HValue* left = Pop(); + HValue* value = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = + NewUncasted(value, left, right, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } + break; +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(p1, p2, name, p4, p5, p6, p7, p8) \ + case k##name: +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 5) { + HValue* w = Pop(); + HValue* z = Pop(); + HValue* y = Pop(); + HValue* x = Pop(); + Drop(2); // Receiver and function. + HValue* context = environment()->context(); + HInstruction* op = + HQuarternarySIMDOperation::New(zone(), context, x, y, z, w, id); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } else if (CpuFeatures::SupportsSIMD128InCrankshaft() && + argument_count == 2) { + if (id == kFloat32x4Constructor) { + HValue* argument = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = + NewUncasted(argument, kFloat32x4Coercion); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } else if (id == kInt32x4Constructor) { + HValue* argument = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = + NewUncasted(argument, kInt32x4Coercion); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } else if (id == kFloat64x2Constructor) { + HValue* argument = Pop(); + Drop(2); // Receiver and function. + HInstruction* op = + NewUncasted(argument, kFloat64x2Coercion); + ast_context()->ReturnInstruction(op, expr->id()); + return true; + } else { + return false; + } + } + break; + case kFloat32x4ArrayGetAt: + case kFloat64x2ArrayGetAt: + case kInt32x4ArrayGetAt: + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 2) { + HValue* key = Pop(); + HValue* typed32x4_array = Pop(); + DCHECK(typed32x4_array == receiver); + Drop(1); // Drop function. + HInstruction* instr = BuildUncheckedMonomorphicElementAccess( + typed32x4_array, key, NULL, + receiver_map->instance_type() == JS_ARRAY_TYPE, + receiver_map->elements_kind(), + LOAD, // is_store. + NEVER_RETURN_HOLE, // load_mode. + STANDARD_STORE); + ast_context()->ReturnValue(instr); + return true; + } + break; + case kFloat32x4ArraySetAt: + case kFloat64x2ArraySetAt: + case kInt32x4ArraySetAt: + if (CpuFeatures::SupportsSIMD128InCrankshaft() && argument_count == 3) { + HValue* value = Pop(); + HValue* key = Pop(); + HValue* typed32x4_array = Pop(); + DCHECK(typed32x4_array == receiver); + Drop(1); // Drop function. + // TODO(haitao): add STORE_NO_TRANSITION_IGNORE_OUT_OF_BOUNDS. + KeyedAccessStoreMode store_mode = STANDARD_STORE; + BuildUncheckedMonomorphicElementAccess( + typed32x4_array, key, value, + receiver_map->instance_type() == JS_ARRAY_TYPE, + receiver_map->elements_kind(), + STORE, // is_store. + NEVER_RETURN_HOLE, // load_mode. + store_mode); + Push(value); + Add(expr->id(), REMOVABLE_SIMULATE); + ast_context()->ReturnValue(Pop()); + return true; + } + break; default: // Not yet supported for inlining. break; @@ -8967,7 +9372,8 @@ void HOptimizedGraphBuilder::VisitCall(Call* expr) { if (prop->key()->IsPropertyName() && types->length() > 0) { Handle name = prop->key()->AsLiteral()->AsPropertyName(); - PropertyAccessInfo info(this, LOAD, ToType(types->first()), name); + PropertyAccessInfo info(this, LOAD, ToType(types->first()), name, + types->first()->instance_type()); if (!info.CanAccessAsMonomorphic(types)) { HandlePolymorphicCallNamed(expr, receiver, types, name); return; @@ -9534,6 +9940,13 @@ HValue* HOptimizedGraphBuilder::BuildAllocateFixedTypedArray( length); HValue* filler = Add(static_cast(0)); + if (IsFixedFloat32x4ElementsKind(fixed_elements_kind)) { + filler = AddUncasted(kFloat32x4Zero); + } else if (IsFixedFloat64x2ElementsKind(fixed_elements_kind)) { + filler = AddUncasted(kFloat64x2Zero); + } else if (IsFixedInt32x4ElementsKind(fixed_elements_kind)) { + filler = AddUncasted(kInt32x4Zero); + } { LoopBuilder builder(this, context(), LoopBuilder::kPostIncrement); @@ -12347,6 +12760,15 @@ void HTracer::TraceLiveRange(LiveRange* range, const char* type, if (op->IsDoubleRegister()) { trace_.Add(" \"%s\"", DoubleRegister::AllocationIndexToString(assigned_reg)); + } else if (op->IsFloat32x4Register()) { + trace_.Add(" \"%s\"", + SIMD128Register::AllocationIndexToString(assigned_reg)); + } else if (op->IsFloat64x2Register()) { + trace_.Add(" \"%s\"", + SIMD128Register::AllocationIndexToString(assigned_reg)); + } else if (op->IsInt32x4Register()) { + trace_.Add(" \"%s\"", + SIMD128Register::AllocationIndexToString(assigned_reg)); } else { DCHECK(op->IsRegister()); trace_.Add(" \"%s\"", Register::AllocationIndexToString(assigned_reg)); @@ -12355,6 +12777,12 @@ void HTracer::TraceLiveRange(LiveRange* range, const char* type, LOperand* op = range->TopLevel()->GetSpillOperand(); if (op->IsDoubleStackSlot()) { trace_.Add(" \"double_stack:%d\"", op->index()); + } else if (op->IsFloat32x4StackSlot()) { + trace_.Add(" \"float32x4_stack:%d\"", op->index()); + } else if (op->IsFloat64x2StackSlot()) { + trace_.Add(" \"float64x2_stack:%d\"", op->index()); + } else if (op->IsInt32x4StackSlot()) { + trace_.Add(" \"int32x4_stack:%d\"", op->index()); } else { DCHECK(op->IsStackSlot()); trace_.Add(" \"stack:%d\"", op->index()); diff --git a/src/v8/src/hydrogen.h b/src/v8/src/hydrogen.h index bc91e19..11f5950 100644 --- a/src/v8/src/hydrogen.h +++ b/src/v8/src/hydrogen.h @@ -2445,14 +2445,16 @@ class HOptimizedGraphBuilder : public HGraphBuilder, public AstVisitor { PropertyAccessInfo(HOptimizedGraphBuilder* builder, PropertyAccessType access_type, Type* type, - Handle name) + Handle name, + InstanceType instance_type) : lookup_(builder->isolate()), builder_(builder), access_type_(access_type), type_(type), name_(name), field_type_(HType::Tagged()), - access_(HObjectAccess::ForMap()) { } + access_(HObjectAccess::ForMap()), + instance_type_(instance_type) { } // Checkes whether this PropertyAccessInfo can be handled as a monomorphic // load named. It additionally fills in the fields necessary to generate the @@ -2497,6 +2499,7 @@ class HOptimizedGraphBuilder : public HGraphBuilder, public AstVisitor { bool IsLoad() const { return access_type_ == LOAD; } LookupResult* lookup() { return &lookup_; } + Handle name() { return name_; } Handle holder() { return holder_; } Handle accessor() { return accessor_; } Handle constant() { return constant_; } @@ -2505,6 +2508,25 @@ class HOptimizedGraphBuilder : public HGraphBuilder, public AstVisitor { HType field_type() const { return field_type_; } HObjectAccess access() { return access_; } + bool IsSIMD128PropertyCallback() { + return (((instance_type_ == Float32x4::kInstanceType || + instance_type_ == Int32x4::kInstanceType) && + (name_->Equals(isolate()->heap()->signMask()) || + name_->Equals(isolate()->heap()->x()) || + name_->Equals(isolate()->heap()->y()) || + name_->Equals(isolate()->heap()->z()) || + name_->Equals(isolate()->heap()->w()))) || + (instance_type_ == Int32x4::kInstanceType && + (name_->Equals(isolate()->heap()->flagX()) || + name_->Equals(isolate()->heap()->flagY()) || + name_->Equals(isolate()->heap()->flagZ()) || + name_->Equals(isolate()->heap()->flagW()))) || + (instance_type_ == Float64x2::kInstanceType && + (name_->Equals(isolate()->heap()->signMask()) || + name_->Equals(isolate()->heap()->x()) || + name_->Equals(isolate()->heap()->y())))); + } + private: Type* ToType(Handle map) { return builder_->ToType(map); } Zone* zone() { return builder_->zone(); } @@ -2535,6 +2557,7 @@ class HOptimizedGraphBuilder : public HGraphBuilder, public AstVisitor { SmallMapList field_maps_; HType field_type_; HObjectAccess access_; + InstanceType instance_type_; }; HInstruction* BuildMonomorphicAccess(PropertyAccessInfo* info, diff --git a/src/v8/src/ia32/assembler-ia32-inl.h b/src/v8/src/ia32/assembler-ia32-inl.h index c7ec6d9..f90820b 100644 --- a/src/v8/src/ia32/assembler-ia32-inl.h +++ b/src/v8/src/ia32/assembler-ia32-inl.h @@ -46,6 +46,7 @@ namespace v8 { namespace internal { bool CpuFeatures::SupportsCrankshaft() { return true; } +bool CpuFeatures::SupportsSIMD128InCrankshaft() { return true; } static const byte kCallOpcode = 0xE8; diff --git a/src/v8/src/ia32/assembler-ia32.cc b/src/v8/src/ia32/assembler-ia32.cc index d8cd59c..4515b17 100644 --- a/src/v8/src/ia32/assembler-ia32.cc +++ b/src/v8/src/ia32/assembler-ia32.cc @@ -208,6 +208,52 @@ Operand::Operand(Register index, } +Operand::Operand(const Operand& operand, int32_t offset) { + DCHECK(operand.len_ >= 1); + // Operand encodes REX ModR/M [SIB] [Disp]. + byte modrm = operand.buf_[0]; + DCHECK(modrm < 0xC0); // Disallow mode 3 (register target). + bool has_sib = ((modrm & 0x07) == 0x04); + byte mode = modrm & 0xC0; + int disp_offset = has_sib ? 2 : 1; + int base_reg = (has_sib ? operand.buf_[1] : modrm) & 0x07; + // Mode 0 with rbp/r13 as ModR/M or SIB base register always has a 32-bit + // displacement. + bool is_baseless = (mode == 0) && (base_reg == 0x05); // No base or RIP base. + int32_t disp_value = 0; + if (mode == 0x80 || is_baseless) { + // Mode 2 or mode 0 with rbp/r13 as base: Word displacement. + disp_value = *BitCast(&operand.buf_[disp_offset]); + } else if (mode == 0x40) { + // Mode 1: Byte displacement. + disp_value = static_cast(operand.buf_[disp_offset]); + } + + // Write new operand with same registers, but with modified displacement. + DCHECK(offset >= 0 ? disp_value + offset >= disp_value + : disp_value + offset < disp_value); // No overflow. + disp_value += offset; + if (!is_int8(disp_value) || is_baseless) { + // Need 32 bits of displacement, mode 2 or mode 1 with register rbp/r13. + buf_[0] = (modrm & 0x3f) | (is_baseless ? 0x00 : 0x80); + len_ = disp_offset + 4; + Memory::int32_at(&buf_[disp_offset]) = disp_value; + } else if (disp_value != 0 || (base_reg == 0x05)) { + // Need 8 bits of displacement. + buf_[0] = (modrm & 0x3f) | 0x40; // Mode 1. + len_ = disp_offset + 1; + buf_[disp_offset] = static_cast(disp_value); + } else { + // Need no displacement. + buf_[0] = (modrm & 0x3f); // Mode 0. + len_ = disp_offset; + } + if (has_sib) { + buf_[1] = operand.buf_[1]; + } +} + + bool Operand::is_reg(Register reg) const { return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only. && ((buf_[0] & 0x07) == reg.code()); // register codes match. @@ -2031,6 +2077,15 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) { } +void Assembler::xorpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x57); + emit_sse_operand(dst, src); +} + + void Assembler::andps(XMMRegister dst, const Operand& src) { EnsureSpace ensure_space(this); EMIT(0x0F); @@ -2087,6 +2142,42 @@ void Assembler::divps(XMMRegister dst, const Operand& src) { } +void Assembler::addpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x58); + emit_sse_operand(dst, src); +} + + +void Assembler::subpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x5C); + emit_sse_operand(dst, src); +} + + +void Assembler::mulpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x59); + emit_sse_operand(dst, src); +} + + +void Assembler::divpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x5E); + emit_sse_operand(dst, src); +} + + void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { EnsureSpace ensure_space(this); EMIT(0xF2); @@ -2114,6 +2205,15 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) { } +void Assembler::andpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x54); + emit_sse_operand(dst, src); +} + + void Assembler::orpd(XMMRegister dst, XMMRegister src) { EnsureSpace ensure_space(this); EMIT(0x66); @@ -2171,6 +2271,15 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) { } +void Assembler::pcmpgtd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x66); + emit_sse_operand(dst, src); +} + + void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) { EnsureSpace ensure_space(this); EMIT(0xF2); @@ -2189,6 +2298,22 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) { } +void Assembler::movups(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x10); + emit_sse_operand(dst, src); +} + + +void Assembler::movups(const Operand& dst, XMMRegister src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x11); + emit_sse_operand(src, dst); +} + + void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { DCHECK(is_uint8(imm8)); EnsureSpace ensure_space(this); @@ -2199,6 +2324,17 @@ void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { } +void Assembler::shufpd(XMMRegister dst, XMMRegister src, byte imm8) { + DCHECK(is_uint8(imm8)); + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xC6); + emit_sse_operand(dst, src); + EMIT(imm8); +} + + void Assembler::movdqa(const Operand& dst, XMMRegister src) { EnsureSpace ensure_space(this); EMIT(0x66); @@ -2390,6 +2526,63 @@ void Assembler::psllq(XMMRegister dst, XMMRegister src) { } +void Assembler::pslld(XMMRegister reg, int8_t shift) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x72); + emit_sse_operand(esi, reg); // esi == 6 + EMIT(shift); +} + + +void Assembler::pslld(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xF2); + emit_sse_operand(dst, src); +} + + +void Assembler::psrld(XMMRegister reg, int8_t shift) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x72); + emit_sse_operand(edx, reg); // edx == 2 + EMIT(shift); +} + + +void Assembler::psrld(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xD2); + emit_sse_operand(dst, src); +} + + +void Assembler::psrad(XMMRegister reg, int8_t shift) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x72); + emit_sse_operand(esp, reg); // esp == 4 + EMIT(shift); +} + + +void Assembler::psrad(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xE2); + emit_sse_operand(dst, src); +} + + void Assembler::psrlq(XMMRegister reg, int8_t shift) { EnsureSpace ensure_space(this); EMIT(0x66); @@ -2409,6 +2602,16 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) { } +void Assembler::psrldq(XMMRegister dst, int8_t shift) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x73); + emit_sse_operand(ebx, dst); // ebx == 3 + EMIT(shift); +} + + void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) { EnsureSpace ensure_space(this); EMIT(0x66); @@ -2443,6 +2646,189 @@ void Assembler::pinsrd(XMMRegister dst, const Operand& src, int8_t offset) { } +void Assembler::minps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x5D); + emit_sse_operand(dst, src); +} + + +void Assembler::maxps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x5F); + emit_sse_operand(dst, src); +} + + +void Assembler::minpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x5D); + emit_sse_operand(dst, src); +} + + +void Assembler::maxpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x5F); + emit_sse_operand(dst, src); +} + + +void Assembler::rcpps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x53); + emit_sse_operand(dst, src); +} + + +void Assembler::rsqrtps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x52); + emit_sse_operand(dst, src); +} + + +void Assembler::sqrtps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x51); + emit_sse_operand(dst, src); +} + + +void Assembler::sqrtpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x51); + emit_sse_operand(dst, src); +} + + +void Assembler::cvtdq2ps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0x5B); + emit_sse_operand(dst, src); +} + + +void Assembler::paddd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xFE); + emit_sse_operand(dst, src); +} + + +void Assembler::psubd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xFA); + emit_sse_operand(dst, src); +} + + +void Assembler::pmulld(XMMRegister dst, const Operand& src) { + DCHECK(IsEnabled(SSE4_1)); + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x38); + EMIT(0x40); + emit_sse_operand(dst, src); +} + + +void Assembler::pmuludq(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0xF4); + emit_sse_operand(dst, src); +} + + +void Assembler::punpackldq(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x62); + emit_sse_operand(dst, src); +} + + +void Assembler::cvtps2dq(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x5B); + emit_sse_operand(dst, src); +} + + +void Assembler::cmpps(XMMRegister dst, XMMRegister src, int8_t cmp) { + EnsureSpace ensure_space(this); + EMIT(0x0F); + EMIT(0xC2); + emit_sse_operand(dst, src); + EMIT(cmp); +} + + +void Assembler::cmpeqps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x0); +} + + +void Assembler::cmpltps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x1); +} + + +void Assembler::cmpleps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x2); +} + + +void Assembler::cmpneqps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x4); +} + + +void Assembler::cmpnltps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x5); +} + + +void Assembler::cmpnleps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x6); +} + + +void Assembler::insertps(XMMRegister dst, XMMRegister src, byte imm8) { + DCHECK(CpuFeatures::IsSupported(SSE4_1)); + DCHECK(is_uint8(imm8)); + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x3A); + EMIT(0x21); + emit_sse_operand(dst, src); + EMIT(imm8); +} + + void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) { Register ireg = { reg.code() }; emit_operand(ireg, adr); @@ -2485,7 +2871,7 @@ void Assembler::RecordDebugBreakSlot() { void Assembler::RecordComment(const char* msg, bool force) { if (FLAG_code_comments || force) { - EnsureSpace ensure_space(this); + EnsureSpace ensure_space(this); RecordRelocInfo(RelocInfo::COMMENT, reinterpret_cast(msg)); } } diff --git a/src/v8/src/ia32/assembler-ia32.h b/src/v8/src/ia32/assembler-ia32.h index 5febffd..32a728a 100644 --- a/src/v8/src/ia32/assembler-ia32.h +++ b/src/v8/src/ia32/assembler-ia32.h @@ -193,7 +193,7 @@ struct XMMRegister { typedef XMMRegister DoubleRegister; - +typedef XMMRegister SIMD128Register; const XMMRegister xmm0 = { 0 }; const XMMRegister xmm1 = { 1 }; @@ -314,6 +314,7 @@ enum ScaleFactor { times_2 = 1, times_4 = 2, times_8 = 3, + maximal_scale_factor = times_8, times_int_size = times_4, times_half_pointer_size = times_2, times_pointer_size = times_4, @@ -352,6 +353,11 @@ class Operand BASE_EMBEDDED { int32_t disp, RelocInfo::Mode rmode = RelocInfo::NONE32); + // Offset from existing memory operand. + // Offset is added to existing displacement as 32-bit signed values and + // this must not overflow. + Operand(const Operand& base, int32_t offset); + static Operand StaticVariable(const ExternalReference& ext) { return Operand(reinterpret_cast(ext.address()), RelocInfo::EXTERNAL_REFERENCE); @@ -919,7 +925,10 @@ class Assembler : public AssemblerBase { // SSE instructions void movaps(XMMRegister dst, XMMRegister src); + void movups(XMMRegister dst, const Operand& src); + void movups(const Operand& dst, XMMRegister src); void shufps(XMMRegister dst, XMMRegister src, byte imm8); + void shufpd(XMMRegister dst, XMMRegister src, byte imm8); void andps(XMMRegister dst, const Operand& src); void andps(XMMRegister dst, XMMRegister src) { andps(dst, Operand(src)); } @@ -936,6 +945,63 @@ class Assembler : public AssemblerBase { void mulps(XMMRegister dst, XMMRegister src) { mulps(dst, Operand(src)); } void divps(XMMRegister dst, const Operand& src); void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); } + void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); } + void minps(XMMRegister dst, const Operand& src); + void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); } + void maxps(XMMRegister dst, const Operand& src); + void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); } + void rcpps(XMMRegister dst, const Operand& src); + void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); } + void rsqrtps(XMMRegister dst, const Operand& src); + void sqrtps(XMMRegister dst, XMMRegister src) { sqrtps(dst, Operand(src)); } + void sqrtps(XMMRegister dst, const Operand& src); + void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); } + void sqrtpd(XMMRegister dst, const Operand& src); + + void addpd(XMMRegister dst, const Operand& src); + void addpd(XMMRegister dst, XMMRegister src) { addpd(dst, Operand(src)); } + void subpd(XMMRegister dst, const Operand& src); + void subpd(XMMRegister dst, XMMRegister src) { subpd(dst, Operand(src)); } + void mulpd(XMMRegister dst, const Operand& src); + void mulpd(XMMRegister dst, XMMRegister src) { mulpd(dst, Operand(src)); } + void divpd(XMMRegister dst, const Operand& src); + void divpd(XMMRegister dst, XMMRegister src) { divpd(dst, Operand(src)); } + void minpd(XMMRegister dst, XMMRegister src) { minpd(dst, Operand(src)); } + void minpd(XMMRegister dst, const Operand& src); + void maxpd(XMMRegister dst, XMMRegister src) { maxpd(dst, Operand(src)); } + void maxpd(XMMRegister dst, const Operand& src); + + void cvtdq2ps(XMMRegister dst, const Operand& src); + void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp); + void cmpeqps(XMMRegister dst, XMMRegister src); + void cmpltps(XMMRegister dst, XMMRegister src); + void cmpleps(XMMRegister dst, XMMRegister src); + void cmpneqps(XMMRegister dst, XMMRegister src); + void cmpnltps(XMMRegister dst, XMMRegister src); + void cmpnleps(XMMRegister dst, XMMRegister src); + + // SSE 2, introduced by SIMD + void paddd(XMMRegister dst, XMMRegister src) { paddd(dst, Operand(src)); } + void paddd(XMMRegister dst, const Operand& src); + void psubd(XMMRegister dst, XMMRegister src) { psubd(dst, Operand(src)); } + void psubd(XMMRegister dst, const Operand& src); + void pmuludq(XMMRegister dst, XMMRegister src) { pmuludq(dst, Operand(src)); } + void pmuludq(XMMRegister dst, const Operand& src); + void punpackldq(XMMRegister dst, XMMRegister src) { + punpackldq(dst, Operand(src)); + } + void punpackldq(XMMRegister dst, const Operand& src); + void cvtps2dq(XMMRegister dst, XMMRegister src) { + cvtps2dq(dst, Operand(src)); + } + void cvtps2dq(XMMRegister dst, const Operand& src); + void cvtdq2ps(XMMRegister dst, XMMRegister src) { + cvtdq2ps(dst, Operand(src)); + } + // SSE 4.1, introduced by SIMD + void insertps(XMMRegister dst, XMMRegister src, byte imm8); + void pmulld(XMMRegister dst, XMMRegister src) { pmulld(dst, Operand(src)); } + void pmulld(XMMRegister dst, const Operand& src); // SSE2 instructions void cvttss2si(Register dst, const Operand& src); @@ -960,10 +1026,12 @@ class Assembler : public AssemblerBase { void mulsd(XMMRegister dst, const Operand& src); void divsd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src); + void xorpd(XMMRegister dst, const Operand& src); void sqrtsd(XMMRegister dst, XMMRegister src); void sqrtsd(XMMRegister dst, const Operand& src); void andpd(XMMRegister dst, XMMRegister src); + void andpd(XMMRegister dst, const Operand& src); void orpd(XMMRegister dst, XMMRegister src); void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); } @@ -983,6 +1051,7 @@ class Assembler : public AssemblerBase { void cmpltsd(XMMRegister dst, XMMRegister src); void pcmpeqd(XMMRegister dst, XMMRegister src); + void pcmpgtd(XMMRegister dst, XMMRegister src); void movdqa(XMMRegister dst, const Operand& src); void movdqa(const Operand& dst, XMMRegister src); @@ -1017,8 +1086,15 @@ class Assembler : public AssemblerBase { void psllq(XMMRegister reg, int8_t shift); void psllq(XMMRegister dst, XMMRegister src); + void pslld(XMMRegister reg, int8_t shift); + void pslld(XMMRegister dst, XMMRegister src); + void psrld(XMMRegister reg, int8_t shift); + void psrld(XMMRegister dst, XMMRegister src); + void psrad(XMMRegister reg, int8_t shift); + void psrad(XMMRegister dst, XMMRegister src); void psrlq(XMMRegister reg, int8_t shift); void psrlq(XMMRegister dst, XMMRegister src); + void psrldq(XMMRegister dst, int8_t shift); void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle); void pextrd(Register dst, XMMRegister src, int8_t offset) { pextrd(Operand(dst), src, offset); diff --git a/src/v8/src/ia32/deoptimizer-ia32.cc b/src/v8/src/ia32/deoptimizer-ia32.cc index 5fac885..8e300e5 100644 --- a/src/v8/src/ia32/deoptimizer-ia32.cc +++ b/src/v8/src/ia32/deoptimizer-ia32.cc @@ -182,8 +182,9 @@ void Deoptimizer::FillInputFrame(Address tos, JavaScriptFrame* frame) { } input_->SetRegister(esp.code(), reinterpret_cast(frame->sp())); input_->SetRegister(ebp.code(), reinterpret_cast(frame->fp())); + simd128_value_t zero = {{0.0, 0.0}}; for (int i = 0; i < XMMRegister::kMaxNumAllocatableRegisters; i++) { - input_->SetDoubleRegister(i, 0.0); + input_->SetSIMD128Register(i, zero); } // Fill the frame content from the actual data on the frame. @@ -203,10 +204,10 @@ void Deoptimizer::SetPlatformCompiledStubRegisters( } -void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) { +void Deoptimizer::CopySIMD128Registers(FrameDescription* output_frame) { for (int i = 0; i < XMMRegister::kMaxNumAllocatableRegisters; ++i) { - double double_value = input_->GetDoubleRegister(i); - output_frame->SetDoubleRegister(i, double_value); + simd128_value_t xmm_value = input_->GetSIMD128Register(i); + output_frame->SetSIMD128Register(i, xmm_value); } } @@ -233,19 +234,19 @@ void Deoptimizer::EntryGenerator::Generate() { // Save all general purpose registers before messing with them. const int kNumberOfRegisters = Register::kNumRegisters; - const int kDoubleRegsSize = kDoubleSize * - XMMRegister::kMaxNumAllocatableRegisters; - __ sub(esp, Immediate(kDoubleRegsSize)); + const int kXMMRegsSize = kSIMD128Size * + XMMRegister::kMaxNumAllocatableRegisters; + __ sub(esp, Immediate(kXMMRegsSize)); for (int i = 0; i < XMMRegister::kMaxNumAllocatableRegisters; ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int offset = i * kDoubleSize; - __ movsd(Operand(esp, offset), xmm_reg); + int offset = i * kSIMD128Size; + __ movups(Operand(esp, offset), xmm_reg); } __ pushad(); const int kSavedRegistersAreaSize = kNumberOfRegisters * kPointerSize + - kDoubleRegsSize; + kXMMRegsSize; // Get the bailout id from the stack. __ mov(ebx, Operand(esp, kSavedRegistersAreaSize)); @@ -283,13 +284,13 @@ void Deoptimizer::EntryGenerator::Generate() { __ pop(Operand(ebx, offset)); } - int double_regs_offset = FrameDescription::double_registers_offset(); + int xmm_regs_offset = FrameDescription::simd128_registers_offset(); // Fill in the double input registers. for (int i = 0; i < XMMRegister::kMaxNumAllocatableRegisters; ++i) { - int dst_offset = i * kDoubleSize + double_regs_offset; - int src_offset = i * kDoubleSize; - __ movsd(xmm0, Operand(esp, src_offset)); - __ movsd(Operand(ebx, dst_offset), xmm0); + int dst_offset = i * kSIMD128Size + xmm_regs_offset; + int src_offset = i * kSIMD128Size; + __ movups(xmm0, Operand(esp, src_offset)); + __ movups(Operand(ebx, dst_offset), xmm0); } // Clear FPU all exceptions. @@ -298,7 +299,7 @@ void Deoptimizer::EntryGenerator::Generate() { __ fnclex(); // Remove the bailout id, return address and the double registers. - __ add(esp, Immediate(kDoubleRegsSize + 2 * kPointerSize)); + __ add(esp, Immediate(kXMMRegsSize + 2 * kPointerSize)); // Compute a pointer to the unwinding limit in register ecx; that is // the first stack slot not part of the input frame. @@ -370,8 +371,8 @@ void Deoptimizer::EntryGenerator::Generate() { // In case of a failed STUB, we have to restore the XMM registers. for (int i = 0; i < XMMRegister::kMaxNumAllocatableRegisters; ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int src_offset = i * kDoubleSize + double_regs_offset; - __ movsd(xmm_reg, Operand(ebx, src_offset)); + int src_offset = i * kSIMD128Size + xmm_regs_offset; + __ movups(xmm_reg, Operand(ebx, src_offset)); } // Push state, pc, and continuation from the last output frame. @@ -424,6 +425,18 @@ void FrameDescription::SetCallerConstantPool(unsigned offset, intptr_t value) { } +double FrameDescription::GetDoubleRegister(unsigned n) const { + DCHECK(n < ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n].d[0]; +} + + +void FrameDescription::SetDoubleRegister(unsigned n, double value) { + DCHECK(n < ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n].d[0] = value; +} + + #undef __ diff --git a/src/v8/src/ia32/disasm-ia32.cc b/src/v8/src/ia32/disasm-ia32.cc index 22c2a55..e36b390 100644 --- a/src/v8/src/ia32/disasm-ia32.cc +++ b/src/v8/src/ia32/disasm-ia32.cc @@ -1047,8 +1047,23 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(regop), NameOfXMMRegister(rm)); data++; - } else if (f0byte >= 0x53 && f0byte <= 0x5F) { + } else if (f0byte == 0x10) { + data += 2; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("movups %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (f0byte == 0x11) { + AppendToBuffer("movups "); + data += 2; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + data += PrintRightXMMOperand(data); + AppendToBuffer(",%s", NameOfXMMRegister(regop)); + } else if (f0byte >= 0x51 && f0byte <= 0x5F) { const char* const pseudo_op[] = { + "sqrtps", + "rsqrtps", "rcpps", "andps", "andnps", @@ -1061,14 +1076,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, "subps", "minps", "divps", - "maxps", + "maxps" }; data += 2; int mod, regop, rm; get_modrm(*data, &mod, ®op, &rm); AppendToBuffer("%s %s,", - pseudo_op[f0byte - 0x53], + pseudo_op[f0byte - 0x51], NameOfXMMRegister(regop)); data += PrintRightXMMOperand(data); } else if (f0byte == 0x50) { @@ -1079,6 +1094,26 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfCPURegister(regop), NameOfXMMRegister(rm)); data++; + } else if (f0byte == 0xC2) { + // Intel manual 2A, Table 3-11. + data += 2; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + const char* const pseudo_op[] = { + "cmpeqps", + "cmpltps", + "cmpleps", + "cmpunordps", + "cmpneqps", + "cmpnltps", + "cmpnleps", + "cmpordps" + }; + AppendToBuffer("%s %s,%s", + pseudo_op[data[1]], + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + data += 2; } else if (f0byte== 0xC6) { // shufps xmm, xmm/m128, imm8 data += 2; @@ -1090,6 +1125,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(regop), static_cast(imm8)); data += 2; + } else if (f0byte== 0x5B) { + data += 2; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("cvtdq2ps %s,", + NameOfXMMRegister(rm)); + data += PrintRightXMMOperand(data); } else if ((f0byte & 0xF0) == 0x80) { data += JumpConditional(data, branch_hint); } else if (f0byte == 0xBE || f0byte == 0xBF || f0byte == 0xB6 || @@ -1244,6 +1286,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(regop), NameOfXMMRegister(rm)); data++; + } else if (*data == 0x40) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("pmulld %s,%s", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); } else if (*data == 0x2A) { // movntdqa data++; @@ -1276,6 +1325,16 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(rm), static_cast(imm8)); data += 2; + } else if (*data == 0x21) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + int8_t imm8 = static_cast(data[1]); + AppendToBuffer("insertps %s,%s,%d", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm), + static_cast(imm8)); + data += 2; } else if (*data == 0x17) { data++; int mod, regop, rm; @@ -1321,6 +1380,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfCPURegister(regop), NameOfXMMRegister(rm)); data++; + } else if (*data == 0x51) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("sqrtpd %s,%s", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + data++; } else if (*data == 0x54) { data++; int mod, regop, rm; @@ -1341,10 +1408,86 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, data++; int mod, regop, rm; get_modrm(*data, &mod, ®op, &rm); - AppendToBuffer("xorpd %s,%s", - NameOfXMMRegister(regop), - NameOfXMMRegister(rm)); + AppendToBuffer("xorpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x58) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("addpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x59) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("mulpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x5B) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("cvtps2dq %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x5C) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("subpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x5D) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("minpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x5E) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("divpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x5F) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("maxpd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0x62) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("punpackldq %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0xF4) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("pmuludq %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0xFA) { data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("psubd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + } else if (*data == 0xFE) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("paddd %s,", + NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); } else if (*data == 0x6E) { data++; int mod, regop, rm; @@ -1367,6 +1510,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(rm), static_cast(imm8)); data += 2; + } else if (*data == 0x66) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("pcmpgtd %s,%s", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + data++; } else if (*data == 0x76) { data++; int mod, regop, rm; @@ -1397,6 +1548,44 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(rm), static_cast(imm8)); data += 2; + } else if (*data == 0xF2) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("pslld %s,%s", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + data++; + } else if (*data == 0x72) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + int8_t imm8 = static_cast(data[1]); + DCHECK(regop == esi || regop == edx); + AppendToBuffer("%s %s,%d", + (regop == esi) ? "pslld" + : ((regop == edx) ? "psrld" : "psrad"), + NameOfXMMRegister(rm), + static_cast(imm8)); + data += 2; + } else if (*data == 0xC6) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + int8_t imm8 = static_cast(data[1]); + AppendToBuffer("shufpd %s,%s,%d", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm), + static_cast(imm8)); + data += 2; + } else if (*data == 0xD2) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("psrld %s,%s", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + data++; } else if (*data == 0xD3) { data++; int mod, regop, rm; @@ -1405,6 +1594,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(regop), NameOfXMMRegister(rm)); data++; + } else if (*data == 0xE2) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("psrad %s,%s", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + data++; } else if (*data == 0x7F) { AppendToBuffer("movdqa "); data++; diff --git a/src/v8/src/ia32/lithium-codegen-ia32.cc b/src/v8/src/ia32/lithium-codegen-ia32.cc index 245dcdc..287f6d7 100644 --- a/src/v8/src/ia32/lithium-codegen-ia32.cc +++ b/src/v8/src/ia32/lithium-codegen-ia32.cc @@ -501,6 +501,11 @@ XMMRegister LCodeGen::ToDoubleRegister(int index) const { } +XMMRegister LCodeGen::ToSIMD128Register(int index) const { + return XMMRegister::FromAllocationIndex(index); +} + + Register LCodeGen::ToRegister(LOperand* op) const { DCHECK(op->IsRegister()); return ToRegister(op->index()); @@ -513,6 +518,31 @@ XMMRegister LCodeGen::ToDoubleRegister(LOperand* op) const { } +XMMRegister LCodeGen::ToFloat32x4Register(LOperand* op) const { + DCHECK(op->IsFloat32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToFloat64x2Register(LOperand* op) const { + DCHECK(op->IsFloat64x2Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToInt32x4Register(LOperand* op) const { + DCHECK(op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToSIMD128Register(LOperand* op) const { + DCHECK(op->IsFloat32x4Register() || op->IsFloat64x2Register() || + op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + int32_t LCodeGen::ToInteger32(LConstantOperand* op) const { return ToRepresentation(op, Representation::Integer32()); } @@ -568,7 +598,12 @@ static int ArgumentsOffsetWithoutFrame(int index) { Operand LCodeGen::ToOperand(LOperand* op) const { if (op->IsRegister()) return Operand(ToRegister(op)); if (op->IsDoubleRegister()) return Operand(ToDoubleRegister(op)); - DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot()); + if (op->IsFloat32x4Register()) return Operand(ToFloat32x4Register(op)); + if (op->IsFloat64x2Register()) return Operand(ToFloat64x2Register(op)); + if (op->IsInt32x4Register()) return Operand(ToInt32x4Register(op)); + DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot() || + op->IsFloat32x4StackSlot() || op->IsFloat64x2StackSlot() || + op->IsInt32x4StackSlot()); if (NeedsEagerFrame()) { return Operand(ebp, StackSlotOffset(op->index())); } else { @@ -695,6 +730,15 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } } else if (op->IsDoubleStackSlot()) { translation->StoreDoubleStackSlot(op->index()); + } else if (op->IsFloat32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::FLOAT32x4_STACK_SLOT); + } else if (op->IsFloat64x2StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::FLOAT64x2_STACK_SLOT); + } else if (op->IsInt32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::INT32x4_STACK_SLOT); } else if (op->IsRegister()) { Register reg = ToRegister(op); if (is_tagged) { @@ -707,6 +751,15 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } else if (op->IsDoubleRegister()) { XMMRegister reg = ToDoubleRegister(op); translation->StoreDoubleRegister(reg); + } else if (op->IsFloat32x4Register()) { + XMMRegister reg = ToFloat32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::FLOAT32x4_REGISTER); + } else if (op->IsFloat64x2Register()) { + XMMRegister reg = ToFloat64x2Register(op); + translation->StoreSIMD128Register(reg, Translation::FLOAT64x2_REGISTER); + } else if (op->IsInt32x4Register()) { + XMMRegister reg = ToInt32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::INT32x4_REGISTER); } else if (op->IsConstantOperand()) { HConstant* constant = chunk()->LookupConstant(LConstantOperand::cast(op)); int src_index = DefineDeoptimizationLiteral(constant->handle(isolate())); @@ -2082,6 +2135,9 @@ void LCodeGen::DoBranch(LBranch* instr) { __ xorps(xmm_scratch, xmm_scratch); __ ucomisd(reg, xmm_scratch); EmitBranch(instr, not_equal); + } else if (r.IsSIMD128()) { + DCHECK(!info()->IsStub()); + EmitBranch(instr, no_condition); } else { DCHECK(r.IsTagged()); Register reg = ToRegister(instr->value()); @@ -3045,14 +3101,51 @@ void LCodeGen::DoAccessArgumentsAt(LAccessArgumentsAt* instr) { } +void LCodeGen::DoDeferredSIMD128ToTagged(LInstruction* instr, + Runtime::FunctionId id) { + // TODO(3095996): Get rid of this. For now, we need to make the + // result register contain a valid pointer because it is already + // contained in the register pointer map. + Register reg = ToRegister(instr->result()); + __ Move(reg, Immediate(0)); + + PushSafepointRegistersScope scope(this); + __ mov(esi, Operand(ebp, StandardFrameConstants::kContextOffset)); + __ CallRuntimeSaveDoubles(id); + RecordSafepointWithRegisters( + instr->pointer_map(), 0, Safepoint::kNoLazyDeopt); + __ StoreToSafepointRegisterSlot(reg, eax); +} + + +void LCodeGen::HandleExternalArrayOpRequiresTemp( + LOperand* key, + Representation key_representation, + ElementsKind elements_kind) { + if (ExternalArrayOpRequiresPreScale(key_representation, elements_kind)) { + int pre_shift_size = ElementsKindToShiftSize(elements_kind) - + static_cast(maximal_scale_factor); + if (key_representation.IsSmi()) { + pre_shift_size -= kSmiTagSize; + } + DCHECK(pre_shift_size > 0); + __ shl(ToRegister(key), pre_shift_size); + } else { + __ SmiUntag(ToRegister(key)); + } +} + + void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { ElementsKind elements_kind = instr->elements_kind(); LOperand* key = instr->key(); if (!key->IsConstantOperand() && - ExternalArrayOpRequiresTemp(instr->hydrogen()->key()->representation(), - elements_kind)) { - __ SmiUntag(ToRegister(key)); + ExternalArrayOpRequiresTemp( + instr->hydrogen()->key()->representation(), elements_kind)) { + HandleExternalArrayOpRequiresTemp( + key, instr->hydrogen()->key()->representation(), elements_kind); } + Operand operand(BuildFastArrayOperand( instr->elements(), key, @@ -3067,6 +3160,8 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { __ movsd(ToDoubleRegister(instr->result()), operand); + } else if (IsSIMD128ElementsKind(elements_kind)) { + __ movups(ToSIMD128Register(instr->result()), operand); } else { Register result(ToRegister(instr->result())); switch (elements_kind) { @@ -3102,8 +3197,14 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { break; case EXTERNAL_FLOAT32_ELEMENTS: case EXTERNAL_FLOAT64_ELEMENTS: + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: case FLOAT32_ELEMENTS: case FLOAT64_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: case FAST_SMI_ELEMENTS: case FAST_ELEMENTS: case FAST_DOUBLE_ELEMENTS: @@ -3194,8 +3295,11 @@ Operand LCodeGen::BuildFastArrayOperand( ((constant_value) << shift_size) + base_offset); } else { - // Take the tag bit into account while computing the shift size. - if (key_representation.IsSmi() && (shift_size >= 1)) { + if (ExternalArrayOpRequiresPreScale(key_representation, elements_kind)) { + // Make sure the key is pre-scaled against maximal_scale_factor. + shift_size = static_cast(maximal_scale_factor); + } else if (key_representation.IsSmi() && (shift_size >= 1)) { + // Take the tag bit into account while computing the shift size. shift_size -= kSmiTagSize; } ScaleFactor scale_factor = static_cast(shift_size); @@ -4114,8 +4218,10 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { if (!key->IsConstantOperand() && ExternalArrayOpRequiresTemp(instr->hydrogen()->key()->representation(), elements_kind)) { - __ SmiUntag(ToRegister(key)); + HandleExternalArrayOpRequiresTemp( + key, instr->hydrogen()->key()->representation(), elements_kind); } + Operand operand(BuildFastArrayOperand( instr->elements(), key, @@ -4130,6 +4236,8 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { __ movsd(operand, ToDoubleRegister(instr->value())); + } else if (IsSIMD128ElementsKind(elements_kind)) { + __ movups(operand, ToSIMD128Register(instr->value())); } else { Register value = ToRegister(instr->value()); switch (elements_kind) { @@ -4155,8 +4263,14 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { break; case EXTERNAL_FLOAT32_ELEMENTS: case EXTERNAL_FLOAT64_ELEMENTS: + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: case FLOAT32_ELEMENTS: case FLOAT64_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: case FAST_SMI_ELEMENTS: case FAST_ELEMENTS: case FAST_DOUBLE_ELEMENTS: @@ -5677,6 +5791,1109 @@ void LCodeGen::DoAllocateBlockContext(LAllocateBlockContext* instr) { } +template +void LCodeGen::HandleSIMD128ToTagged(LSIMD128ToTagged* instr) { + class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { + public: + DeferredSIMD128ToTagged(LCodeGen* codegen, + LInstruction* instr, + Runtime::FunctionId id) + : LDeferredCode(codegen), instr_(instr), id_(id) { } + virtual void Generate() V8_OVERRIDE { + codegen()->DoDeferredSIMD128ToTagged(instr_, id_); + } + virtual LInstruction* instr() V8_OVERRIDE { return instr_; } + private: + LInstruction* instr_; + Runtime::FunctionId id_; + }; + + XMMRegister input_reg = ToSIMD128Register(instr->value()); + Register reg = ToRegister(instr->result()); + Register tmp = ToRegister(instr->temp()); + Register tmp2 = ToRegister(instr->temp2()); + + DeferredSIMD128ToTagged* deferred = new(zone()) DeferredSIMD128ToTagged( + this, instr, static_cast(T::kRuntimeAllocatorId())); + + if (FLAG_inline_new) { + if (T::kInstanceType == FLOAT32x4_TYPE) { + __ AllocateFloat32x4(reg, tmp, tmp2, deferred->entry()); + } else if (T::kInstanceType == INT32x4_TYPE) { + __ AllocateInt32x4(reg, tmp, tmp2, deferred->entry()); + } else if (T::kInstanceType == FLOAT64x2_TYPE) { + __ AllocateFloat64x2(reg, tmp, tmp2, deferred->entry()); + } + } else { + __ jmp(deferred->entry()); + } + __ bind(deferred->exit()); + + // Load the inner FixedTypedArray object. + __ mov(tmp, FieldOperand(reg, T::kValueOffset)); + + __ movups(FieldOperand(tmp, FixedTypedArrayBase::kDataOffset), input_reg); +} + + +void LCodeGen::DoSIMD128ToTagged(LSIMD128ToTagged* instr) { + if (instr->value()->IsFloat32x4Register()) { + HandleSIMD128ToTagged(instr); + } else if (instr->value()->IsFloat64x2Register()) { + HandleSIMD128ToTagged(instr); + } else { + DCHECK(instr->value()->IsInt32x4Register()); + HandleSIMD128ToTagged(instr); + } +} + + +template +void LCodeGen::HandleTaggedToSIMD128(LTaggedToSIMD128* instr) { + LOperand* input = instr->value(); + DCHECK(input->IsRegister()); + LOperand* result = instr->result(); + DCHECK(result->IsSIMD128Register()); + + Register input_reg = ToRegister(input); + Register temp_reg = ToRegister(instr->temp()); + XMMRegister result_reg = ToSIMD128Register(result); + + __ test(input_reg, Immediate(kSmiTagMask)); + DeoptimizeIf(zero, instr->environment()); + __ CmpObjectType(input_reg, T::kInstanceType, temp_reg); + DeoptimizeIf(not_equal, instr->environment()); + + // Load the inner FixedTypedArray object. + __ mov(temp_reg, FieldOperand(input_reg, T::kValueOffset)); + + __ movups( + result_reg, FieldOperand(temp_reg, FixedTypedArrayBase::kDataOffset)); +} + + +void LCodeGen::DoTaggedToSIMD128(LTaggedToSIMD128* instr) { + if (instr->representation().IsFloat32x4()) { + HandleTaggedToSIMD128(instr); + } else if (instr->representation().IsFloat64x2()) { + HandleTaggedToSIMD128(instr); + } else { + DCHECK(instr->representation().IsInt32x4()); + HandleTaggedToSIMD128(instr); + } +} + + +void LCodeGen::DoNullarySIMDOperation(LNullarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Zero: { + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + __ xorps(result_reg, result_reg); + return; + } + case kFloat64x2Zero: { + XMMRegister result_reg = ToFloat64x2Register(instr->result()); + __ xorpd(result_reg, result_reg); + return; + } + case kInt32x4Zero: { + XMMRegister result_reg = ToInt32x4Register(instr->result()); + __ xorps(result_reg, result_reg); + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoUnarySIMDOperation(LUnarySIMDOperation* instr) { + uint8_t select = 0; + switch (instr->op()) { + case kFloat32x4Coercion: { + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + return; + } + case kFloat64x2Coercion: { + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + XMMRegister result_reg = ToFloat64x2Register(instr->result()); + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + return; + } + case kInt32x4Coercion: { + XMMRegister input_reg = ToInt32x4Register(instr->value()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + return; + } + case kSIMD128Change: { + Comment(";;; deoptimize: can not perform representation change" + "for float32x4 or int32x4"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + case kFloat32x4Abs: + case kFloat32x4Neg: + case kFloat32x4Reciprocal: + case kFloat32x4ReciprocalSqrt: + case kFloat32x4Sqrt: { + DCHECK(instr->value()->Equals(instr->result())); + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + switch (instr->op()) { + case kFloat32x4Abs: + __ absps(input_reg); + break; + case kFloat32x4Neg: + __ negateps(input_reg); + break; + case kFloat32x4Reciprocal: + __ rcpps(input_reg, input_reg); + break; + case kFloat32x4ReciprocalSqrt: + __ rsqrtps(input_reg, input_reg); + break; + case kFloat32x4Sqrt: + __ sqrtps(input_reg, input_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat64x2Abs: + case kFloat64x2Neg: + case kFloat64x2Sqrt: { + DCHECK(instr->value()->Equals(instr->result())); + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + switch (instr->op()) { + case kFloat64x2Abs: + __ abspd(input_reg); + break; + case kFloat64x2Neg: + __ negatepd(input_reg); + break; + case kFloat64x2Sqrt: + __ sqrtpd(input_reg, input_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kInt32x4Not: + case kInt32x4Neg: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + XMMRegister input_reg = ToInt32x4Register(instr->value()); + switch (instr->op()) { + case kInt32x4Not: + __ notps(input_reg); + break; + case kInt32x4Neg: + __ pnegd(input_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat32x4BitsToInt32x4: + case kFloat32x4ToInt32x4: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + if (instr->op() == kFloat32x4BitsToInt32x4) { + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + } else { + DCHECK(instr->op() == kFloat32x4ToInt32x4); + __ cvtps2dq(result_reg, input_reg); + } + return; + } + case kInt32x4BitsToFloat32x4: + case kInt32x4ToFloat32x4: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + XMMRegister input_reg = ToInt32x4Register(instr->value()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + if (instr->op() == kInt32x4BitsToFloat32x4) { + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + } else { + DCHECK(instr->op() == kInt32x4ToFloat32x4); + __ cvtdq2ps(result_reg, input_reg); + } + return; + } + case kFloat32x4Splat: { + DCHECK(instr->hydrogen()->value()->representation().IsDouble()); + XMMRegister input_reg = ToDoubleRegister(instr->value()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + XMMRegister xmm_scratch = xmm0; + __ xorps(xmm_scratch, xmm_scratch); + __ cvtsd2ss(xmm_scratch, input_reg); + __ shufps(xmm_scratch, xmm_scratch, 0x0); + __ movaps(result_reg, xmm_scratch); + return; + } + case kInt32x4Splat: { + DCHECK(instr->hydrogen()->value()->representation().IsInteger32()); + Register input_reg = ToRegister(instr->value()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + __ movd(result_reg, input_reg); + __ shufps(result_reg, result_reg, 0x0); + return; + } + case kInt32x4GetSignMask: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + XMMRegister input_reg = ToInt32x4Register(instr->value()); + Register result = ToRegister(instr->result()); + __ movmskps(result, input_reg); + return; + } + case kFloat32x4GetSignMask: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + Register result = ToRegister(instr->result()); + __ movmskps(result, input_reg); + return; + } + case kFloat32x4GetW: + select++; + case kFloat32x4GetZ: + select++; + case kFloat32x4GetY: + select++; + case kFloat32x4GetX: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + XMMRegister result = ToDoubleRegister(instr->result()); + XMMRegister xmm_scratch = result.is(input_reg) ? xmm0 : result; + + if (select == 0x0) { + __ xorps(xmm_scratch, xmm_scratch); + __ cvtss2sd(xmm_scratch, input_reg); + if (!xmm_scratch.is(result)) { + __ movaps(result, xmm_scratch); + } + } else { + __ pshufd(xmm_scratch, input_reg, select); + if (!xmm_scratch.is(result)) { + __ xorps(result, result); + } + __ cvtss2sd(result, xmm_scratch); + } + return; + } + case kFloat64x2GetSignMask: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + Register result = ToRegister(instr->result()); + __ movmskpd(result, input_reg); + return; + } + case kFloat64x2GetX: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + XMMRegister result = ToDoubleRegister(instr->result()); + + if (!input_reg.is(result)) { + __ movaps(result, input_reg); + } + return; + } + case kFloat64x2GetY: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + XMMRegister result = ToDoubleRegister(instr->result()); + + if (!input_reg.is(result)) { + __ movaps(result, input_reg); + } + __ shufpd(result, input_reg, 0x1); + return; + } + case kInt32x4GetX: + case kInt32x4GetY: + case kInt32x4GetZ: + case kInt32x4GetW: + case kInt32x4GetFlagX: + case kInt32x4GetFlagY: + case kInt32x4GetFlagZ: + case kInt32x4GetFlagW: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + bool flag = false; + switch (instr->op()) { + case kInt32x4GetFlagX: + flag = true; + case kInt32x4GetX: + break; + case kInt32x4GetFlagY: + flag = true; + case kInt32x4GetY: + select = 0x1; + break; + case kInt32x4GetFlagZ: + flag = true; + case kInt32x4GetZ: + select = 0x2; + break; + case kInt32x4GetFlagW: + flag = true; + case kInt32x4GetW: + select = 0x3; + break; + default: + UNREACHABLE(); + } + + XMMRegister input_reg = ToInt32x4Register(instr->value()); + Register result = ToRegister(instr->result()); + if (select == 0x0) { + __ movd(result, input_reg); + } else { + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ extractps(result, input_reg, select); + } else { + XMMRegister xmm_scratch = xmm0; + __ pshufd(xmm_scratch, input_reg, select); + __ movd(result, xmm_scratch); + } + } + + if (flag) { + Label false_value, done; + __ test(result, result); + __ j(zero, &false_value, Label::kNear); + __ LoadRoot(result, Heap::kTrueValueRootIndex); + __ jmp(&done, Label::kNear); + __ bind(&false_value); + __ LoadRoot(result, Heap::kFalseValueRootIndex); + __ bind(&done); + } + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoBinarySIMDOperation(LBinarySIMDOperation* instr) { + uint8_t imm8 = 0; // for with operation + switch (instr->op()) { + case kFloat32x4Add: + case kFloat32x4Sub: + case kFloat32x4Mul: + case kFloat32x4Div: + case kFloat32x4Min: + case kFloat32x4Max: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsFloat32x4()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToFloat32x4Register(instr->right()); + switch (instr->op()) { + case kFloat32x4Add: + __ addps(left_reg, right_reg); + break; + case kFloat32x4Sub: + __ subps(left_reg, right_reg); + break; + case kFloat32x4Mul: + __ mulps(left_reg, right_reg); + break; + case kFloat32x4Div: + __ divps(left_reg, right_reg); + break; + case kFloat32x4Min: + __ minps(left_reg, right_reg); + break; + case kFloat32x4Max: + __ maxps(left_reg, right_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat32x4Scale: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + XMMRegister scratch_reg = xmm0; + __ xorps(scratch_reg, scratch_reg); + __ cvtsd2ss(scratch_reg, right_reg); + __ shufps(scratch_reg, scratch_reg, 0x0); + __ mulps(left_reg, scratch_reg); + return; + } + case kFloat64x2Add: + case kFloat64x2Sub: + case kFloat64x2Mul: + case kFloat64x2Div: + case kFloat64x2Min: + case kFloat64x2Max: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsFloat64x2()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToFloat64x2Register(instr->right()); + switch (instr->op()) { + case kFloat64x2Add: + __ addpd(left_reg, right_reg); + break; + case kFloat64x2Sub: + __ subpd(left_reg, right_reg); + break; + case kFloat64x2Mul: + __ mulpd(left_reg, right_reg); + break; + case kFloat64x2Div: + __ divpd(left_reg, right_reg); + break; + case kFloat64x2Min: + __ minpd(left_reg, right_reg); + break; + case kFloat64x2Max: + __ maxpd(left_reg, right_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat64x2Scale: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + __ shufpd(right_reg, right_reg, 0x0); + __ mulpd(left_reg, right_reg); + return; + } + case kFloat32x4Shuffle: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + if (instr->hydrogen()->right()->IsConstant() && + HConstant::cast(instr->hydrogen()->right())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->right())); + uint8_t select = static_cast(value & 0xFF); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + __ shufps(left_reg, left_reg, select); + return; + } else { + Comment(";;; deoptimize: non-constant selector for shuffle"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + } + case kInt32x4Shuffle: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + if (instr->hydrogen()->right()->IsConstant() && + HConstant::cast(instr->hydrogen()->right())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->right())); + uint8_t select = static_cast(value & 0xFF); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + __ pshufd(left_reg, left_reg, select); + return; + } else { + Comment(";;; deoptimize: non-constant selector for shuffle"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + } + case kInt32x4ShiftLeft: + case kInt32x4ShiftRight: + case kInt32x4ShiftRightArithmetic: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + if (instr->hydrogen()->right()->IsConstant() && + HConstant::cast(instr->hydrogen()->right())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->right())); + uint8_t shift = static_cast(value & 0xFF); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + switch (instr->op()) { + case kInt32x4ShiftLeft: + __ pslld(left_reg, shift); + break; + case kInt32x4ShiftRight: + __ psrld(left_reg, shift); + break; + case kInt32x4ShiftRightArithmetic: + __ psrad(left_reg, shift); + break; + default: + UNREACHABLE(); + } + return; + } else { + XMMRegister left_reg = ToInt32x4Register(instr->left()); + Register shift = ToRegister(instr->right()); + XMMRegister xmm_scratch = double_scratch0(); + __ movd(xmm_scratch, shift); + switch (instr->op()) { + case kInt32x4ShiftLeft: + __ pslld(left_reg, xmm_scratch); + break; + case kInt32x4ShiftRight: + __ psrld(left_reg, xmm_scratch); + break; + case kInt32x4ShiftRightArithmetic: + __ psrad(left_reg, xmm_scratch); + break; + default: + UNREACHABLE(); + } + return; + } + } + case kFloat32x4LessThan: + case kFloat32x4LessThanOrEqual: + case kFloat32x4Equal: + case kFloat32x4NotEqual: + case kFloat32x4GreaterThanOrEqual: + case kFloat32x4GreaterThan: { + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsFloat32x4()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToFloat32x4Register(instr->right()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + switch (instr->op()) { + case kFloat32x4LessThan: + if (result_reg.is(left_reg)) { + __ cmpltps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpnltps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpltps(result_reg, right_reg); + } + break; + case kFloat32x4LessThanOrEqual: + if (result_reg.is(left_reg)) { + __ cmpleps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpnleps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpleps(result_reg, right_reg); + } + break; + case kFloat32x4Equal: + if (result_reg.is(left_reg)) { + __ cmpeqps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpeqps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpeqps(result_reg, right_reg); + } + break; + case kFloat32x4NotEqual: + if (result_reg.is(left_reg)) { + __ cmpneqps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpneqps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpneqps(result_reg, right_reg); + } + break; + case kFloat32x4GreaterThanOrEqual: + if (result_reg.is(left_reg)) { + __ cmpnltps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpltps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpnltps(result_reg, right_reg); + } + break; + case kFloat32x4GreaterThan: + if (result_reg.is(left_reg)) { + __ cmpnleps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpleps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpnleps(result_reg, right_reg); + } + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kInt32x4And: + case kInt32x4Or: + case kInt32x4Xor: + case kInt32x4Add: + case kInt32x4Sub: + case kInt32x4Mul: + case kInt32x4GreaterThan: + case kInt32x4Equal: + case kInt32x4LessThan: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsInt32x4()); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + XMMRegister right_reg = ToInt32x4Register(instr->right()); + switch (instr->op()) { + case kInt32x4And: + __ andps(left_reg, right_reg); + break; + case kInt32x4Or: + __ orps(left_reg, right_reg); + break; + case kInt32x4Xor: + __ xorps(left_reg, right_reg); + break; + case kInt32x4Add: + __ paddd(left_reg, right_reg); + break; + case kInt32x4Sub: + __ psubd(left_reg, right_reg); + break; + case kInt32x4Mul: + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ pmulld(left_reg, right_reg); + } else { + // The algorithm is from http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers + XMMRegister xmm_scratch = xmm0; + __ movaps(xmm_scratch, left_reg); + __ pmuludq(left_reg, right_reg); + __ psrldq(xmm_scratch, 4); + __ psrldq(right_reg, 4); + __ pmuludq(xmm_scratch, right_reg); + __ pshufd(left_reg, left_reg, 8); + __ pshufd(xmm_scratch, xmm_scratch, 8); + __ punpackldq(left_reg, xmm_scratch); + } + break; + case kInt32x4GreaterThan: + __ pcmpgtd(left_reg, right_reg); + break; + case kInt32x4Equal: + __ pcmpeqd(left_reg, right_reg); + break; + case kInt32x4LessThan: { + XMMRegister xmm_scratch = xmm0; + __ movaps(xmm_scratch, right_reg); + __ pcmpgtd(xmm_scratch, left_reg); + __ movaps(left_reg, xmm_scratch); + break; + } + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat32x4WithW: + imm8++; + case kFloat32x4WithZ: + imm8++; + case kFloat32x4WithY: + imm8++; + case kFloat32x4WithX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + XMMRegister xmm_scratch = xmm0; + __ xorps(xmm_scratch, xmm_scratch); + __ cvtsd2ss(xmm_scratch, right_reg); + if (CpuFeatures::IsSupported(SSE4_1)) { + imm8 = imm8 << 4; + CpuFeatureScope scope(masm(), SSE4_1); + __ insertps(left_reg, xmm_scratch, imm8); + } else { + __ sub(esp, Immediate(kFloat32x4Size)); + __ movups(Operand(esp, 0), left_reg); + __ movss(Operand(esp, imm8 * kFloatSize), xmm_scratch); + __ movups(left_reg, Operand(esp, 0)); + __ add(esp, Immediate(kFloat32x4Size)); + } + return; + } + case kFloat64x2WithX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + __ sub(esp, Immediate(kFloat64x2Size)); + __ movups(Operand(esp, 0), left_reg); + __ movsd(Operand(esp, 0 * kDoubleSize), right_reg); + __ movups(left_reg, Operand(esp, 0)); + __ add(esp, Immediate(kFloat64x2Size)); + return; + } + case kFloat64x2WithY: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + __ sub(esp, Immediate(kFloat64x2Size)); + __ movups(Operand(esp, 0), left_reg); + __ movsd(Operand(esp, 1 * kDoubleSize), right_reg); + __ movups(left_reg, Operand(esp, 0)); + __ add(esp, Immediate(kFloat64x2Size)); + return; + } + case kFloat64x2Constructor: { + DCHECK(instr->hydrogen()->left()->representation().IsDouble()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToDoubleRegister(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + XMMRegister result_reg = ToFloat64x2Register(instr->result()); + __ sub(esp, Immediate(kFloat64x2Size)); + __ movsd(Operand(esp, 0 * kDoubleSize), left_reg); + __ movsd(Operand(esp, 1 * kDoubleSize), right_reg); + __ movups(result_reg, Operand(esp, 0)); + __ add(esp, Immediate(kFloat64x2Size)); + return; + } + case kInt32x4WithW: + imm8++; + case kInt32x4WithZ: + imm8++; + case kInt32x4WithY: + imm8++; + case kInt32x4WithX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsInteger32()); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + Register right_reg = ToRegister(instr->right()); + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ pinsrd(left_reg, right_reg, imm8); + } else { + __ sub(esp, Immediate(kInt32x4Size)); + __ movdqu(Operand(esp, 0), left_reg); + __ mov(Operand(esp, imm8 * kFloatSize), right_reg); + __ movdqu(left_reg, Operand(esp, 0)); + __ add(esp, Immediate(kInt32x4Size)); + } + return; + } + case kInt32x4WithFlagW: + imm8++; + case kInt32x4WithFlagZ: + imm8++; + case kInt32x4WithFlagY: + imm8++; + case kInt32x4WithFlagX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsTagged()); + HType type = instr->hydrogen()->right()->type(); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + Register right_reg = ToRegister(instr->right()); + Label load_false_value, done; + if (type.IsBoolean()) { + __ sub(esp, Immediate(kInt32x4Size)); + __ movups(Operand(esp, 0), left_reg); + __ CompareRoot(right_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_value, Label::kNear); + } else { + Comment(";;; deoptimize: other types for int32x4.withFlagX/Y/Z/W."); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + // load true value. + __ mov(Operand(esp, imm8 * kFloatSize), Immediate(0xFFFFFFFF)); + __ jmp(&done, Label::kNear); + __ bind(&load_false_value); + __ mov(Operand(esp, imm8 * kFloatSize), Immediate(0x0)); + __ bind(&done); + __ movups(left_reg, Operand(esp, 0)); + __ add(esp, Immediate(kInt32x4Size)); + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoTernarySIMDOperation(LTernarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Select: { + DCHECK(instr->hydrogen()->first()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsFloat32x4()); + + XMMRegister mask_reg = ToInt32x4Register(instr->first()); + XMMRegister left_reg = ToFloat32x4Register(instr->second()); + XMMRegister right_reg = ToFloat32x4Register(instr->third()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + XMMRegister temp_reg = xmm0; + + // Copy mask. + __ movaps(temp_reg, mask_reg); + // Invert it. + __ notps(temp_reg); + // temp_reg = temp_reg & falseValue. + __ andps(temp_reg, right_reg); + + if (!result_reg.is(mask_reg)) { + if (result_reg.is(left_reg)) { + // result_reg = result_reg & trueValue. + __ andps(result_reg, mask_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } else { + __ movaps(result_reg, mask_reg); + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + } else { + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + return; + } + case kInt32x4Select: { + DCHECK(instr->hydrogen()->first()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsInt32x4()); + + XMMRegister mask_reg = ToInt32x4Register(instr->first()); + XMMRegister left_reg = ToInt32x4Register(instr->second()); + XMMRegister right_reg = ToInt32x4Register(instr->third()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + XMMRegister temp_reg = xmm0; + + // Copy mask. + __ movaps(temp_reg, mask_reg); + // Invert it. + __ notps(temp_reg); + // temp_reg = temp_reg & falseValue. + __ andps(temp_reg, right_reg); + + if (!result_reg.is(mask_reg)) { + if (result_reg.is(left_reg)) { + // result_reg = result_reg & trueValue. + __ andps(result_reg, mask_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } else { + __ movaps(result_reg, mask_reg); + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + } else { + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + return; + } + case kFloat32x4ShuffleMix: { + DCHECK(instr->first()->Equals(instr->result())); + DCHECK(instr->hydrogen()->first()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsInteger32()); + if (instr->hydrogen()->third()->IsConstant() && + HConstant::cast(instr->hydrogen()->third())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->third())); + uint8_t select = static_cast(value & 0xFF); + XMMRegister first_reg = ToFloat32x4Register(instr->first()); + XMMRegister second_reg = ToFloat32x4Register(instr->second()); + __ shufps(first_reg, second_reg, select); + return; + } else { + Comment(";;; deoptimize: non-constant selector for shuffle"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + } + case kFloat32x4Clamp: { + DCHECK(instr->first()->Equals(instr->result())); + DCHECK(instr->hydrogen()->first()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsFloat32x4()); + + XMMRegister value_reg = ToFloat32x4Register(instr->first()); + XMMRegister lower_reg = ToFloat32x4Register(instr->second()); + XMMRegister upper_reg = ToFloat32x4Register(instr->third()); + __ minps(value_reg, upper_reg); + __ maxps(value_reg, lower_reg); + return; + } + case kFloat64x2Clamp: { + DCHECK(instr->first()->Equals(instr->result())); + DCHECK(instr->hydrogen()->first()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->third()->representation().IsFloat64x2()); + + XMMRegister value_reg = ToFloat64x2Register(instr->first()); + XMMRegister lower_reg = ToFloat64x2Register(instr->second()); + XMMRegister upper_reg = ToFloat64x2Register(instr->third()); + __ minpd(value_reg, upper_reg); + __ maxpd(value_reg, lower_reg); + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoQuarternarySIMDOperation(LQuarternarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Constructor: { + DCHECK(instr->hydrogen()->x()->representation().IsDouble()); + DCHECK(instr->hydrogen()->y()->representation().IsDouble()); + DCHECK(instr->hydrogen()->z()->representation().IsDouble()); + DCHECK(instr->hydrogen()->w()->representation().IsDouble()); + XMMRegister x_reg = ToDoubleRegister(instr->x()); + XMMRegister y_reg = ToDoubleRegister(instr->y()); + XMMRegister z_reg = ToDoubleRegister(instr->z()); + XMMRegister w_reg = ToDoubleRegister(instr->w()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + __ sub(esp, Immediate(kFloat32x4Size)); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, x_reg); + __ movss(Operand(esp, 0 * kFloatSize), xmm0); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, y_reg); + __ movss(Operand(esp, 1 * kFloatSize), xmm0); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, z_reg); + __ movss(Operand(esp, 2 * kFloatSize), xmm0); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, w_reg); + __ movss(Operand(esp, 3 * kFloatSize), xmm0); + __ movups(result_reg, Operand(esp, 0 * kFloatSize)); + __ add(esp, Immediate(kFloat32x4Size)); + return; + } + case kInt32x4Constructor: { + DCHECK(instr->hydrogen()->x()->representation().IsInteger32()); + DCHECK(instr->hydrogen()->y()->representation().IsInteger32()); + DCHECK(instr->hydrogen()->z()->representation().IsInteger32()); + DCHECK(instr->hydrogen()->w()->representation().IsInteger32()); + Register x_reg = ToRegister(instr->x()); + Register y_reg = ToRegister(instr->y()); + Register z_reg = ToRegister(instr->z()); + Register w_reg = ToRegister(instr->w()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + __ sub(esp, Immediate(kInt32x4Size)); + __ mov(Operand(esp, 0 * kInt32Size), x_reg); + __ mov(Operand(esp, 1 * kInt32Size), y_reg); + __ mov(Operand(esp, 2 * kInt32Size), z_reg); + __ mov(Operand(esp, 3 * kInt32Size), w_reg); + __ movups(result_reg, Operand(esp, 0 * kInt32Size)); + __ add(esp, Immediate(kInt32x4Size)); + return; + } + case kInt32x4Bool: { + DCHECK(instr->hydrogen()->x()->representation().IsTagged()); + DCHECK(instr->hydrogen()->y()->representation().IsTagged()); + DCHECK(instr->hydrogen()->z()->representation().IsTagged()); + DCHECK(instr->hydrogen()->w()->representation().IsTagged()); + HType x_type = instr->hydrogen()->x()->type(); + HType y_type = instr->hydrogen()->y()->type(); + HType z_type = instr->hydrogen()->z()->type(); + HType w_type = instr->hydrogen()->w()->type(); + if (!x_type.IsBoolean() || !y_type.IsBoolean() || + !z_type.IsBoolean() || !w_type.IsBoolean()) { + Comment(";;; deoptimize: other types for int32x4.bool."); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + XMMRegister result_reg = ToInt32x4Register(instr->result()); + Register x_reg = ToRegister(instr->x()); + Register y_reg = ToRegister(instr->y()); + Register z_reg = ToRegister(instr->z()); + Register w_reg = ToRegister(instr->w()); + Label load_false_x, done_x, load_false_y, done_y, + load_false_z, done_z, load_false_w, done_w; + __ sub(esp, Immediate(kInt32x4Size)); + + __ CompareRoot(x_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_x, Label::kNear); + __ mov(Operand(esp, 0 * kInt32Size), Immediate(-1)); + __ jmp(&done_x, Label::kNear); + __ bind(&load_false_x); + __ mov(Operand(esp, 0 * kInt32Size), Immediate(0x0)); + __ bind(&done_x); + + __ CompareRoot(y_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_y, Label::kNear); + __ mov(Operand(esp, 1 * kInt32Size), Immediate(-1)); + __ jmp(&done_y, Label::kNear); + __ bind(&load_false_y); + __ mov(Operand(esp, 1 * kInt32Size), Immediate(0x0)); + __ bind(&done_y); + + __ CompareRoot(z_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_z, Label::kNear); + __ mov(Operand(esp, 2 * kInt32Size), Immediate(-1)); + __ jmp(&done_z, Label::kNear); + __ bind(&load_false_z); + __ mov(Operand(esp, 2 * kInt32Size), Immediate(0x0)); + __ bind(&done_z); + + __ CompareRoot(w_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_w, Label::kNear); + __ mov(Operand(esp, 3 * kInt32Size), Immediate(-1)); + __ jmp(&done_w, Label::kNear); + __ bind(&load_false_w); + __ mov(Operand(esp, 3 * kInt32Size), Immediate(0x0)); + __ bind(&done_w); + + __ movups(result_reg, Operand(esp, 0)); + __ add(esp, Immediate(kInt32x4Size)); + return; + } + default: + UNREACHABLE(); + return; + } +} + + #undef __ } } // namespace v8::internal diff --git a/src/v8/src/ia32/lithium-codegen-ia32.h b/src/v8/src/ia32/lithium-codegen-ia32.h index d2f85f1..dea88a1 100644 --- a/src/v8/src/ia32/lithium-codegen-ia32.h +++ b/src/v8/src/ia32/lithium-codegen-ia32.h @@ -66,6 +66,10 @@ class LCodeGen: public LCodeGenBase { Operand ToOperand(LOperand* op) const; Register ToRegister(LOperand* op) const; XMMRegister ToDoubleRegister(LOperand* op) const; + XMMRegister ToFloat32x4Register(LOperand* op) const; + XMMRegister ToFloat64x2Register(LOperand* op) const; + XMMRegister ToInt32x4Register(LOperand* op) const; + XMMRegister ToSIMD128Register(LOperand* op) const; bool IsInteger32(LConstantOperand* op) const; bool IsSmi(LConstantOperand* op) const; @@ -99,6 +103,8 @@ class LCodeGen: public LCodeGenBase { IntegerSignedness signedness); void DoDeferredTaggedToI(LTaggedToI* instr, Label* done); + void DoDeferredFloat32x4ToTagged(LInstruction* instr); + void DoDeferredInt32x4ToTagged(LInstruction* instr); void DoDeferredMathAbsTaggedHeapNumber(LMathAbs* instr); void DoDeferredStackCheck(LStackCheck* instr); void DoDeferredStringCharCodeAt(LStringCharCodeAt* instr); @@ -110,6 +116,12 @@ class LCodeGen: public LCodeGenBase { void DoDeferredLoadMutableDouble(LLoadFieldByIndex* instr, Register object, Register index); + void DoDeferredSIMD128ToTagged(LInstruction* instr, Runtime::FunctionId id); + + template + void HandleTaggedToSIMD128(LTaggedToSIMD128* instr); + template + void HandleSIMD128ToTagged(LSIMD128ToTagged* instr); // Parallel move support. void DoParallelMove(LParallelMove* move); @@ -232,6 +244,10 @@ class LCodeGen: public LCodeGenBase { Register ToRegister(int index) const; XMMRegister ToDoubleRegister(int index) const; + XMMRegister ToFloat32x4Register(int index) const; + XMMRegister ToFloat64x2Register(int index) const; + XMMRegister ToInt32x4Register(int index) const; + XMMRegister ToSIMD128Register(int index) const; int32_t ToRepresentation(LConstantOperand* op, const Representation& r) const; int32_t ToInteger32(LConstantOperand* op) const; ExternalReference ToExternalReference(LConstantOperand* op) const; @@ -313,9 +329,16 @@ class LCodeGen: public LCodeGenBase { void EnsureSpaceForLazyDeopt(int space_needed) V8_OVERRIDE; void DoLoadKeyedExternalArray(LLoadKeyed* instr); + void HandleExternalArrayOpRequiresTemp(LOperand* key, + Representation key_representation, + ElementsKind elements_kind); + template + void DoLoadKeyedSIMD128ExternalArray(LLoadKeyed* instr); void DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr); void DoLoadKeyedFixedArray(LLoadKeyed* instr); void DoStoreKeyedExternalArray(LStoreKeyed* instr); + template + void DoStoreKeyedSIMD128ExternalArray(LStoreKeyed* instr); void DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr); void DoStoreKeyedFixedArray(LStoreKeyed* instr); diff --git a/src/v8/src/ia32/lithium-gap-resolver-ia32.cc b/src/v8/src/ia32/lithium-gap-resolver-ia32.cc index 1e590fd..a4beeb3 100644 --- a/src/v8/src/ia32/lithium-gap-resolver-ia32.cc +++ b/src/v8/src/ia32/lithium-gap-resolver-ia32.cc @@ -341,6 +341,23 @@ void LGapResolver::EmitMove(int index) { __ movsd(xmm0, src); __ movsd(dst, xmm0); } + } else if (source->IsSIMD128Register()) { + XMMRegister src = cgen_->ToSIMD128Register(source); + if (destination->IsSIMD128Register()) { + __ movaps(cgen_->ToSIMD128Register(destination), src); + } else { + DCHECK(destination->IsSIMD128StackSlot()); + __ movups(cgen_->ToOperand(destination), src); + } + } else if (source->IsSIMD128StackSlot()) { + Operand src = cgen_->ToOperand(source); + if (destination->IsSIMD128Register()) { + __ movups(cgen_->ToSIMD128Register(destination), src); + } else { + DCHECK(destination->IsSIMD128StackSlot()); + __ movups(xmm0, src); + __ movups(cgen_->ToOperand(destination), xmm0); + } } else { UNREACHABLE(); } @@ -439,6 +456,43 @@ void LGapResolver::EmitSwap(int index) { __ mov(dst1, tmp); __ movsd(src0, xmm0); + } else if ((source->IsSIMD128StackSlot() && + destination->IsSIMD128StackSlot())) { + // Swap two XMM stack slots. + Operand src = cgen_->ToOperand(source); + Operand dst = cgen_->ToOperand(destination); + Register tmp = EnsureTempRegister(); + __ movups(xmm0, src); + for (int offset = 0; offset < kSIMD128Size; offset += kPointerSize) { + __ mov(tmp, Operand(dst, offset)); + __ mov(Operand(src, offset), tmp); + } + __ movups(dst, xmm0); + + } else if (source->IsSIMD128Register() && destination->IsSIMD128Register()) { + // Swap two XMM registers. + XMMRegister source_reg = cgen_->ToSIMD128Register(source); + XMMRegister destination_reg = cgen_->ToSIMD128Register(destination); + __ movaps(xmm0, source_reg); + __ movaps(source_reg, destination_reg); + __ movaps(destination_reg, xmm0); + + } else if (source->IsSIMD128Register() || destination->IsSIMD128Register()) { + // Swap a xmm register and a xmm stack slot. + DCHECK((source->IsSIMD128Register() && + destination->IsSIMD128StackSlot()) || + (source->IsSIMD128StackSlot() && + destination->IsSIMD128Register())); + XMMRegister reg = cgen_->ToSIMD128Register(source->IsSIMD128Register() + ? source + : destination); + LOperand* other = source->IsSIMD128Register() ? destination : source; + DCHECK(other->IsSIMD128StackSlot()); + Operand other_operand = cgen_->ToOperand(other); + __ movups(xmm0, other_operand); + __ movups(other_operand, reg); + __ movaps(reg, xmm0); + } else { // No other combinations are possible. UNREACHABLE(); diff --git a/src/v8/src/ia32/lithium-ia32.cc b/src/v8/src/ia32/lithium-ia32.cc index e02b65e..188f493 100644 --- a/src/v8/src/ia32/lithium-ia32.cc +++ b/src/v8/src/ia32/lithium-ia32.cc @@ -342,23 +342,41 @@ void LAccessArgumentsAt::PrintDataTo(StringStream* stream) { int LPlatformChunk::GetNextSpillIndex(RegisterKind kind) { - // Skip a slot if for a double-width slot. - if (kind == DOUBLE_REGISTERS) { - spill_slot_count_++; - spill_slot_count_ |= 1; - num_double_slots_++; + switch (kind) { + case GENERAL_REGISTERS: return spill_slot_count_++; + case DOUBLE_REGISTERS: { + // Skip a slot if for a double-width slot. + spill_slot_count_++; + spill_slot_count_ |= 1; + num_double_slots_++; + return spill_slot_count_++; + } + case FLOAT32x4_REGISTERS: + case FLOAT64x2_REGISTERS: + case INT32x4_REGISTERS: { + // Skip three slots if for a quad-width slot. + spill_slot_count_ += 3; + num_double_slots_ += 2; // for dynamic frame alignment + return spill_slot_count_++; + } + default: + UNREACHABLE(); + return -1; } - return spill_slot_count_++; } LOperand* LPlatformChunk::GetNextSpillSlot(RegisterKind kind) { int index = GetNextSpillIndex(kind); - if (kind == DOUBLE_REGISTERS) { - return LDoubleStackSlot::Create(index, zone()); - } else { - DCHECK(kind == GENERAL_REGISTERS); - return LStackSlot::Create(index, zone()); + switch (kind) { + case GENERAL_REGISTERS: return LStackSlot::Create(index, zone()); + case DOUBLE_REGISTERS: return LDoubleStackSlot::Create(index, zone()); + case FLOAT32x4_REGISTERS: return LFloat32x4StackSlot::Create(index, zone()); + case FLOAT64x2_REGISTERS: return LFloat64x2StackSlot::Create(index, zone()); + case INT32x4_REGISTERS: return LInt32x4StackSlot::Create(index, zone()); + default: + UNREACHABLE(); + return NULL; } } @@ -983,6 +1001,7 @@ LInstruction* LChunkBuilder::DoBranch(HBranch* instr) { bool easy_case = !r.IsTagged() || type.IsBoolean() || type.IsSmi() || type.IsJSArray() || type.IsHeapNumber() || type.IsString(); + LOperand* temp = !easy_case && expected.NeedsMap() ? TempRegister() : NULL; LInstruction* branch = new(zone()) LBranch(UseRegister(value), temp); if (!easy_case && @@ -1905,6 +1924,11 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { DefineAsRegister(new(zone()) LNumberUntagD(value, temp)); if (!val->representation().IsSmi()) result = AssignEnvironment(result); return result; + } else if (to.IsSIMD128()) { + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + LTaggedToSIMD128* res = new(zone()) LTaggedToSIMD128(value, temp, to); + return AssignEnvironment(DefineAsRegister(res)); } else if (to.IsSmi()) { LOperand* value = UseRegister(val); if (val->type().IsSmi()) { @@ -1979,6 +2003,17 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { return DefineAsRegister(new(zone()) LInteger32ToDouble(Use(val))); } } + } else if (from.IsSIMD128()) { + DCHECK(to.IsTagged()); + info()->MarkAsDeferredCalling(); + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + LOperand* temp2 = TempRegister(); + + // Make sure that temp and result_temp are different registers. + LUnallocated* result_temp = TempRegister(); + LSIMD128ToTagged* result = new(zone()) LSIMD128ToTagged(value, temp, temp2); + return AssignPointerMap(Define(result, result_temp)); } UNREACHABLE(); return NULL; @@ -2210,7 +2245,19 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { (instr->representation().IsInteger32() && !(IsDoubleOrFloatElementsKind(instr->elements_kind()))) || (instr->representation().IsDouble() && - (IsDoubleOrFloatElementsKind(instr->elements_kind())))); + (IsDoubleOrFloatElementsKind(instr->elements_kind()))) || + (CpuFeatures::SupportsSIMD128InCrankshaft() + ? instr->representation().IsFloat32x4() + : instr->representation().IsTagged() && + (IsFloat32x4ElementsKind(instr->elements_kind()))) || + (CpuFeatures::SupportsSIMD128InCrankshaft() + ? instr->representation().IsFloat64x2() + : instr->representation().IsTagged() && + (IsFloat64x2ElementsKind(instr->elements_kind()))) || + (CpuFeatures::SupportsSIMD128InCrankshaft() + ? instr->representation().IsInt32x4() + : instr->representation().IsTagged() && + (IsInt32x4ElementsKind(instr->elements_kind())))); LOperand* backing_store = UseRegister(instr->elements()); result = DefineAsRegister(new(zone()) LLoadKeyed(backing_store, key)); } @@ -2297,7 +2344,19 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { (instr->value()->representation().IsInteger32() && !IsDoubleOrFloatElementsKind(elements_kind)) || (instr->value()->representation().IsDouble() && - IsDoubleOrFloatElementsKind(elements_kind))); + IsDoubleOrFloatElementsKind(elements_kind)) || + (CpuFeatures::SupportsSIMD128InCrankshaft() + ? instr->value()->representation().IsFloat32x4() + : instr->value()->representation().IsTagged() && + IsFloat32x4ElementsKind(elements_kind)) || + (CpuFeatures::SupportsSIMD128InCrankshaft() + ? instr->value()->representation().IsFloat64x2() + : instr->value()->representation().IsTagged() && + IsFloat64x2ElementsKind(elements_kind)) || + (CpuFeatures::SupportsSIMD128InCrankshaft() + ? instr->value()->representation().IsInt32x4() + : instr->value()->representation().IsTagged() && + IsInt32x4ElementsKind(elements_kind))); DCHECK((instr->is_fixed_typed_array() && instr->elements()->representation().IsTagged()) || (instr->is_external() && @@ -2702,6 +2761,283 @@ LInstruction* LChunkBuilder::DoAllocateBlockContext( } +const char* LNullarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_NULLARY_OPERATION_CASE_ITEM(module, function, name, p4) \ + case k##name: \ + return #module "-" #function; +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoNullarySIMDOperation( + HNullarySIMDOperation* instr) { + LNullarySIMDOperation* result = + new(zone()) LNullarySIMDOperation(instr->op()); + switch (instr->op()) { +#define SIMD_NULLARY_OPERATION_CASE_ITEM(module, function, name, p4) \ + case k##name: +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + return DefineAsRegister(result); + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LUnarySIMDOperation::Mnemonic() const { + switch (op()) { + case kSIMD128Change: return "SIMD128-change"; +#define SIMD_UNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5) \ + case k##name: \ + return #module "-" #function; +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoUnarySIMDOperation(HUnarySIMDOperation* instr) { + LOperand* input = UseRegisterAtStart(instr->value()); + LUnarySIMDOperation* result = + new(zone()) LUnarySIMDOperation(input, instr->op()); + switch (instr->op()) { + case kSIMD128Change: + return AssignEnvironment(DefineAsRegister(result)); + case kFloat32x4Abs: + case kFloat32x4Neg: + case kFloat32x4Reciprocal: + case kFloat32x4ReciprocalSqrt: + case kFloat32x4Sqrt: + case kFloat64x2Abs: + case kFloat64x2Neg: + case kFloat64x2Sqrt: + case kInt32x4Neg: + case kInt32x4Not: + return DefineSameAsFirst(result); + case kFloat32x4Coercion: + case kFloat64x2Coercion: + case kInt32x4Coercion: + case kFloat32x4BitsToInt32x4: + case kFloat32x4ToInt32x4: + case kInt32x4BitsToFloat32x4: + case kInt32x4ToFloat32x4: + case kFloat32x4Splat: + case kInt32x4Splat: + case kFloat32x4GetSignMask: + case kFloat32x4GetX: + case kFloat32x4GetY: + case kFloat32x4GetZ: + case kFloat32x4GetW: + case kFloat64x2GetSignMask: + case kFloat64x2GetX: + case kFloat64x2GetY: + case kInt32x4GetSignMask: + case kInt32x4GetX: + case kInt32x4GetY: + case kInt32x4GetZ: + case kInt32x4GetW: + case kInt32x4GetFlagX: + case kInt32x4GetFlagY: + case kInt32x4GetFlagZ: + case kInt32x4GetFlagW: + return DefineAsRegister(result); + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LBinarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_BINARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, p6) \ + case k##name: \ + return #module "-" #function; +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoBinarySIMDOperation( + HBinarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Add: + case kFloat32x4Div: + case kFloat32x4Max: + case kFloat32x4Min: + case kFloat32x4Mul: + case kFloat32x4Sub: + case kFloat32x4Scale: + case kFloat32x4WithX: + case kFloat32x4WithY: + case kFloat32x4WithZ: + case kFloat32x4WithW: + case kFloat64x2Add: + case kFloat64x2Div: + case kFloat64x2Max: + case kFloat64x2Min: + case kFloat64x2Mul: + case kFloat64x2Sub: + case kFloat64x2Scale: + case kFloat64x2WithX: + case kFloat64x2WithY: + case kInt32x4Add: + case kInt32x4And: + case kInt32x4Mul: + case kInt32x4Or: + case kInt32x4Sub: + case kInt32x4Xor: + case kInt32x4WithX: + case kInt32x4WithY: + case kInt32x4WithZ: + case kInt32x4WithW: + case kInt32x4WithFlagX: + case kInt32x4WithFlagY: + case kInt32x4WithFlagZ: + case kInt32x4WithFlagW: + case kInt32x4GreaterThan: + case kInt32x4Equal: + case kInt32x4LessThan: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseRegisterAtStart(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + if (instr->op() == kInt32x4WithFlagX || + instr->op() == kInt32x4WithFlagY || + instr->op() == kInt32x4WithFlagZ || + instr->op() == kInt32x4WithFlagW) { + return AssignEnvironment(DefineSameAsFirst(result)); + } else { + return DefineSameAsFirst(result); + } + } + case kFloat64x2Constructor: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseRegisterAtStart(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + return DefineAsRegister(result); + } + case kFloat32x4Shuffle: + case kInt32x4Shuffle: + case kInt32x4ShiftLeft: + case kInt32x4ShiftRight: + case kInt32x4ShiftRightArithmetic: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseOrConstant(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + return AssignEnvironment(DefineSameAsFirst(result)); + } + case kFloat32x4LessThan: + case kFloat32x4LessThanOrEqual: + case kFloat32x4Equal: + case kFloat32x4NotEqual: + case kFloat32x4GreaterThanOrEqual: + case kFloat32x4GreaterThan: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseRegisterAtStart(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + return DefineAsRegister(result); + } + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LTernarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_TERNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, p6, \ + p7) \ + case k##name: \ + return #module "-" #function; +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoTernarySIMDOperation( + HTernarySIMDOperation* instr) { + LOperand* first = UseRegisterAtStart(instr->first()); + LOperand* second = UseRegisterAtStart(instr->second()); + LOperand* third = instr->op() == kFloat32x4ShuffleMix + ? UseOrConstant(instr->third()) + : UseRegisterAtStart(instr->third()); + LTernarySIMDOperation* result = + new(zone()) LTernarySIMDOperation(first, second, third, instr->op()); + switch (instr->op()) { + case kInt32x4Select: + case kFloat32x4Select: { + return DefineAsRegister(result); + } + case kFloat32x4ShuffleMix: { + return AssignEnvironment(DefineSameAsFirst(result)); + } + case kFloat32x4Clamp: + case kFloat64x2Clamp: { + return DefineSameAsFirst(result); + } + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LQuarternarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, \ + p6, p7, p8) \ + case k##name: \ + return #module "-" #function; +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoQuarternarySIMDOperation( + HQuarternarySIMDOperation* instr) { + LOperand* x = UseRegisterAtStart(instr->x()); + LOperand* y = UseRegisterAtStart(instr->y()); + LOperand* z = UseRegisterAtStart(instr->z()); + LOperand* w = UseRegisterAtStart(instr->w()); + LQuarternarySIMDOperation* result = + new(zone()) LQuarternarySIMDOperation(x, y, z, w, instr->op()); + if (instr->op() == kInt32x4Bool) { + return AssignEnvironment(DefineAsRegister(result)); + } else { + return DefineAsRegister(result); + } +} + + } } // namespace v8::internal #endif // V8_TARGET_ARCH_IA32 diff --git a/src/v8/src/ia32/lithium-ia32.h b/src/v8/src/ia32/lithium-ia32.h index 4206482..d9713c9 100644 --- a/src/v8/src/ia32/lithium-ia32.h +++ b/src/v8/src/ia32/lithium-ia32.h @@ -123,12 +123,19 @@ class LCodeGen; V(MathSqrt) \ V(ModByConstI) \ V(ModByPowerOf2I) \ + V(NullarySIMDOperation) \ + V(UnarySIMDOperation) \ + V(BinarySIMDOperation) \ + V(TernarySIMDOperation) \ + V(QuarternarySIMDOperation) \ V(ModI) \ V(MulI) \ V(NumberTagD) \ V(NumberTagI) \ V(NumberTagU) \ V(NumberUntagD) \ + V(SIMD128ToTagged) \ + V(TaggedToSIMD128) \ V(OsrEntry) \ V(Parameter) \ V(Power) \ @@ -961,6 +968,154 @@ class LMathPowHalf V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LNullarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 0, 0> { + public: + explicit LNullarySIMDOperation(BuiltinFunctionId op) + : op_(op) { + } + + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kNullarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LNullarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsNullarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(NullarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LUnarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 1, 0> { + public: + LUnarySIMDOperation(LOperand* value, BuiltinFunctionId op) + : op_(op) { + inputs_[0] = value; + } + + LOperand* value() { return inputs_[0]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kUnarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LUnarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsUnarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(UnarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LBinarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 2, 0> { + public: + LBinarySIMDOperation(LOperand* left, LOperand* right, BuiltinFunctionId op) + : op_(op) { + inputs_[0] = left; + inputs_[1] = right; + } + + LOperand* left() { return inputs_[0]; } + LOperand* right() { return inputs_[1]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kBinarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LBinarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsBinarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(BinarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LTernarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 3, 0> { + public: + LTernarySIMDOperation(LOperand* first, LOperand* second, LOperand* third, + BuiltinFunctionId op) + : op_(op) { + inputs_[0] = first; + inputs_[1] = second; + inputs_[2] = third; + } + + LOperand* first() { return inputs_[0]; } + LOperand* second() { return inputs_[1]; } + LOperand* third() { return inputs_[2]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kTernarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LTernarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsTernarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(TernarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LQuarternarySIMDOperation V8_FINAL + : public LTemplateInstruction<1, 4, 0> { + public: + LQuarternarySIMDOperation(LOperand* x, LOperand* y, LOperand* z, + LOperand* w, BuiltinFunctionId op) + : op_(op) { + inputs_[0] = x; + inputs_[1] = y; + inputs_[2] = z; + inputs_[3] = w; + } + + LOperand* x() { return inputs_[0]; } + LOperand* y() { return inputs_[1]; } + LOperand* z() { return inputs_[2]; } + LOperand* w() { return inputs_[3]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kQuarternarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LQuarternarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsQuarternarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(QuarternarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + class LCmpObjectEqAndBranch V8_FINAL : public LControlInstruction<2, 0> { public: LCmpObjectEqAndBranch(LOperand* left, LOperand* right) { @@ -1672,19 +1827,30 @@ class LLoadKeyed V8_FINAL : public LTemplateInstruction<1, 2, 0> { }; +inline static bool ExternalArrayOpRequiresPreScale( + Representation key_representation, + ElementsKind kind) { + int shift_size = ElementsKindToShiftSize(kind); + return key_representation.IsSmi() + ? shift_size > static_cast(maximal_scale_factor) + kSmiTagSize + : shift_size > static_cast(maximal_scale_factor); +} + + inline static bool ExternalArrayOpRequiresTemp( Representation key_representation, ElementsKind elements_kind) { - // Operations that require the key to be divided by two to be converted into - // an index cannot fold the scale operation into a load and need an extra - // temp register to do the work. - return key_representation.IsSmi() && - (elements_kind == EXTERNAL_INT8_ELEMENTS || - elements_kind == EXTERNAL_UINT8_ELEMENTS || - elements_kind == EXTERNAL_UINT8_CLAMPED_ELEMENTS || - elements_kind == UINT8_ELEMENTS || - elements_kind == INT8_ELEMENTS || - elements_kind == UINT8_CLAMPED_ELEMENTS); + // Operations that require the key to be scaled by a factor or divided by two + // to be converted into an index cannot fold the scale operation into a load + // and need an extra temp register to do the work. + return ExternalArrayOpRequiresPreScale(key_representation, elements_kind) || + (key_representation.IsSmi() && + (elements_kind == EXTERNAL_INT8_ELEMENTS || + elements_kind == EXTERNAL_UINT8_ELEMENTS || + elements_kind == EXTERNAL_UINT8_CLAMPED_ELEMENTS || + elements_kind == UINT8_ELEMENTS || + elements_kind == INT8_ELEMENTS || + elements_kind == UINT8_CLAMPED_ELEMENTS)); } @@ -2082,6 +2248,23 @@ class LNumberTagD V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LSIMD128ToTagged V8_FINAL : public LTemplateInstruction<1, 1, 2> { + public: + explicit LSIMD128ToTagged(LOperand* value, LOperand* temp, LOperand* temp2) { + inputs_[0] = value; + temps_[0] = temp; + temps_[1] = temp2; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + LOperand* temp2() { return temps_[1]; } + + DECLARE_CONCRETE_INSTRUCTION(SIMD128ToTagged, "simd128-tag") + DECLARE_HYDROGEN_ACCESSOR(Change) +}; + + // Sometimes truncating conversion from a tagged value to an int32. class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 1> { public: @@ -2159,6 +2342,26 @@ class LNumberUntagD V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LTaggedToSIMD128 V8_FINAL : public LTemplateInstruction<1, 1, 1> { + public: + explicit LTaggedToSIMD128(LOperand* value, LOperand* temp, + Representation representation) + : representation_(representation) { + inputs_[0] = value; + temps_[0] = temp; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + Representation representation() const { return representation_; } + + DECLARE_CONCRETE_INSTRUCTION(TaggedToSIMD128, "simd128-untag") + DECLARE_HYDROGEN_ACCESSOR(Change); + private: + Representation representation_; +}; + + class LSmiUntag V8_FINAL : public LTemplateInstruction<1, 1, 0> { public: LSmiUntag(LOperand* value, bool needs_check) diff --git a/src/v8/src/ia32/macro-assembler-ia32.cc b/src/v8/src/ia32/macro-assembler-ia32.cc index 7e05e67..b34295a 100644 --- a/src/v8/src/ia32/macro-assembler-ia32.cc +++ b/src/v8/src/ia32/macro-assembler-ia32.cc @@ -979,13 +979,13 @@ void MacroAssembler::EnterExitFramePrologue() { void MacroAssembler::EnterExitFrameEpilogue(int argc, bool save_doubles) { // Optionally save all XMM registers. if (save_doubles) { - int space = XMMRegister::kMaxNumRegisters * kDoubleSize + + int space = XMMRegister::kMaxNumRegisters * kSIMD128Size + argc * kPointerSize; sub(esp, Immediate(space)); const int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(Operand(ebp, offset - ((i + 1) * kDoubleSize)), reg); + movups(Operand(ebp, offset - ((i + 1) * kSIMD128Size)), reg); } } else { sub(esp, Immediate(argc * kPointerSize)); @@ -1028,7 +1028,7 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles) { const int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(reg, Operand(ebp, offset - ((i + 1) * kDoubleSize))); + movups(reg, Operand(ebp, offset - ((i + 1) * kSIMD128Size))); } } @@ -1698,6 +1698,52 @@ void MacroAssembler::AllocateHeapNumber(Register result, } +#define SIMD128_HEAP_ALLOCATE_FUNCTIONS(V) \ + V(Float32x4, float32x4, FLOAT32x4) \ + V(Float64x2, float64x2, FLOAT64x2) \ + V(Int32x4, int32x4, INT32x4) + +#define DECLARE_SIMD_HEAP_ALLOCATE_FUNCTION(Type, type, TYPE) \ +void MacroAssembler::Allocate##Type(Register result, \ + Register scratch1, \ + Register scratch2, \ + Label* gc_required) { \ + /* Allocate SIMD128 object */ \ + Allocate(Type::kSize, result, scratch1, no_reg, gc_required, TAG_OBJECT);\ + /* Load the initial map and assign to new allocated object. */ \ + mov(scratch1, Operand(ebp, StandardFrameConstants::kContextOffset)); \ + mov(scratch1, \ + Operand(scratch1, \ + Context::SlotOffset(Context::GLOBAL_OBJECT_INDEX))); \ + mov(scratch1, \ + FieldOperand(scratch1, GlobalObject::kNativeContextOffset)); \ + mov(scratch1, \ + Operand(scratch1, \ + Context::SlotOffset(Context::TYPE##_FUNCTION_INDEX))); \ + LoadGlobalFunctionInitialMap(scratch1, scratch1); \ + mov(FieldOperand(result, JSObject::kMapOffset), scratch1); \ + /* Initialize properties and elements. */ \ + mov(FieldOperand(result, JSObject::kPropertiesOffset), \ + Immediate(isolate()->factory()->empty_fixed_array())); \ + mov(FieldOperand(result, JSObject::kElementsOffset), \ + Immediate(isolate()->factory()->empty_fixed_array())); \ + /* Allocate FixedTypedArray object */ \ + Allocate(FixedTypedArrayBase::kDataOffset + k##Type##Size, \ + scratch1, scratch2, no_reg, gc_required, TAG_OBJECT); \ + \ + mov(FieldOperand(scratch1, FixedTypedArrayBase::kMapOffset), \ + Immediate(isolate()->factory()->fixed_##type##_array_map())); \ + mov(scratch2, Immediate(1)); \ + SmiTag(scratch2); \ + mov(FieldOperand(scratch1, FixedTypedArrayBase::kLengthOffset), \ + scratch2); \ + /* Assign TifxedTypedArray object to SIMD128 object */ \ + mov(FieldOperand(result, Type::kValueOffset), scratch1); \ +} + +SIMD128_HEAP_ALLOCATE_FUNCTIONS(DECLARE_SIMD_HEAP_ALLOCATE_FUNCTION) + + void MacroAssembler::AllocateTwoByteString(Register result, Register length, Register scratch1, @@ -3435,6 +3481,90 @@ void MacroAssembler::TruncatingDiv(Register dividend, int32_t divisor) { } +void MacroAssembler::absps(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } float_absolute_constant = + { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + andps(dst, + Operand(reinterpret_cast(&float_absolute_constant), + RelocInfo::NONE32)); +} + + +void MacroAssembler::abspd(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } double_absolute_constant = + { 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF }; + andps(dst, + Operand(reinterpret_cast(&double_absolute_constant), + RelocInfo::NONE32)); +} + + +void MacroAssembler::notps(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } float_not_constant = + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + xorps(dst, + Operand(reinterpret_cast(&float_not_constant), + RelocInfo::NONE32)); +} + + +void MacroAssembler::negateps(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } float_negate_constant = + { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + xorps(dst, + Operand(reinterpret_cast(&float_negate_constant), + RelocInfo::NONE32)); +} + + +void MacroAssembler::negatepd(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } double_negate_constant = + { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; + xorpd(dst, + Operand(reinterpret_cast(&double_negate_constant), + RelocInfo::NONE32)); +} + + +void MacroAssembler::pnegd(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } int32_one_constant = { 0x1, 0x1, 0x1, 0x1 }; + notps(dst); + paddd(dst, + Operand(reinterpret_cast(&int32_one_constant), + RelocInfo::NONE32)); +} + + } } // namespace v8::internal #endif // V8_TARGET_ARCH_IA32 diff --git a/src/v8/src/ia32/macro-assembler-ia32.h b/src/v8/src/ia32/macro-assembler-ia32.h index 3b2051f..5230f02 100644 --- a/src/v8/src/ia32/macro-assembler-ia32.h +++ b/src/v8/src/ia32/macro-assembler-ia32.h @@ -650,6 +650,25 @@ class MacroAssembler: public Assembler { Label* gc_required, MutableMode mode = IMMUTABLE); + // Allocate a float32x4, float64x2 and int32x4 object in new space with + // undefined value. + // Returns tagged pointer in result register, or jumps to gc_required if new + // space is full. + void AllocateFloat32x4(Register result, + Register scratch1, + Register scratch2, + Label* gc_required); + + void AllocateFloat64x2(Register result, + Register scratch1, + Register scratch2, + Label* gc_required); + + void AllocateInt32x4(Register result, + Register scratch1, + Register scratch2, + Label* gc_required); + // Allocate a sequential string. All the header fields of the string object // are initialized. void AllocateTwoByteString(Register result, @@ -901,6 +920,15 @@ class MacroAssembler: public Assembler { inline bool AllowThisStubCall(CodeStub* stub); // --------------------------------------------------------------------------- + // SIMD macros. + void absps(XMMRegister dst); + void abspd(XMMRegister dst); + void negateps(XMMRegister dst); + void negatepd(XMMRegister dst); + void notps(XMMRegister dst); + void pnegd(XMMRegister dst); + + // --------------------------------------------------------------------------- // String utilities. // Generate code to do a lookup in the number string cache. If the number in diff --git a/src/v8/src/lithium-allocator-inl.h b/src/v8/src/lithium-allocator-inl.h index bafa00f..36e0bf1 100644 --- a/src/v8/src/lithium-allocator-inl.h +++ b/src/v8/src/lithium-allocator-inl.h @@ -42,7 +42,8 @@ LGap* LAllocator::GapAt(int index) { void LAllocator::SetLiveRangeAssignedRegister(LiveRange* range, int reg) { - if (range->Kind() == DOUBLE_REGISTERS) { + if (range->Kind() == DOUBLE_REGISTERS || + IsSIMD128RegisterKind(range->Kind())) { assigned_double_registers_->Add(reg); } else { DCHECK(range->Kind() == GENERAL_REGISTERS); diff --git a/src/v8/src/lithium-allocator.cc b/src/v8/src/lithium-allocator.cc index 8350c80..fcde247 100644 --- a/src/v8/src/lithium-allocator.cc +++ b/src/v8/src/lithium-allocator.cc @@ -205,6 +205,15 @@ LOperand* LiveRange::CreateAssignedOperand(Zone* zone) { case DOUBLE_REGISTERS: op = LDoubleRegister::Create(assigned_register(), zone); break; + case FLOAT32x4_REGISTERS: + op = LFloat32x4Register::Create(assigned_register(), zone); + break; + case FLOAT64x2_REGISTERS: + op = LFloat64x2Register::Create(assigned_register(), zone); + break; + case INT32x4_REGISTERS: + op = LInt32x4Register::Create(assigned_register(), zone); + break; default: UNREACHABLE(); } @@ -455,7 +464,7 @@ void LiveRange::ConvertOperands(Zone* zone) { if (use_pos->HasOperand()) { DCHECK(op->IsRegister() || op->IsDoubleRegister() || - !use_pos->RequiresRegister()); + op->IsSIMD128Register() || !use_pos->RequiresRegister()); use_pos->operand()->ConvertTo(op->kind(), op->index()); } use_pos = use_pos->next(); @@ -521,6 +530,7 @@ LAllocator::LAllocator(int num_values, HGraph* graph) active_live_ranges_(8, zone()), inactive_live_ranges_(8, zone()), reusable_slots_(8, zone()), + reusable_simd128_slots_(8, zone()), next_virtual_register_(num_values), first_artificial_register_(num_values), mode_(UNALLOCATED_REGISTERS), @@ -840,6 +850,21 @@ void LAllocator::MeetConstraintsBetween(LInstruction* first, double_artificial_registers_.Add( cur_input->virtual_register() - first_artificial_register_, zone()); + } else if (RequiredRegisterKind(input_copy->virtual_register()) == + FLOAT32x4_REGISTERS) { + float32x4_artificial_registers_.Add( + cur_input->virtual_register() - first_artificial_register_, + zone()); + } else if (RequiredRegisterKind(input_copy->virtual_register()) == + FLOAT64x2_REGISTERS) { + float64x2_artificial_registers_.Add( + cur_input->virtual_register() - first_artificial_register_, + zone()); + } else if (RequiredRegisterKind(input_copy->virtual_register()) == + INT32x4_REGISTERS) { + int32x4_artificial_registers_.Add( + cur_input->virtual_register() - first_artificial_register_, + zone()); } AddConstraintsGapMove(gap_index, input_copy, cur_input); @@ -1160,8 +1185,10 @@ void LAllocator::ResolveControlFlow(LiveRange* range, if (branch->HasPointerMap()) { if (HasTaggedValue(range->id())) { branch->pointer_map()->RecordPointer(cur_op, chunk()->zone()); - } else if (!cur_op->IsDoubleStackSlot() && - !cur_op->IsDoubleRegister()) { + } else if (!cur_op->IsDoubleStackSlot() && + !cur_op->IsDoubleRegister() && + !cur_op->IsSIMD128StackSlot() && + !cur_op->IsSIMD128Register()) { branch->pointer_map()->RemovePointer(cur_op); } } @@ -1487,6 +1514,9 @@ void LAllocator::AllocateRegisters() { if (live_ranges_[i] != NULL) { if (live_ranges_[i]->Kind() == mode_) { AddToUnhandledUnsorted(live_ranges_[i]); + } else if (mode_ == DOUBLE_REGISTERS && + IsSIMD128RegisterKind(live_ranges_[i]->Kind())) { + AddToUnhandledUnsorted(live_ranges_[i]); } } } @@ -1494,6 +1524,7 @@ void LAllocator::AllocateRegisters() { DCHECK(UnhandledIsSorted()); DCHECK(reusable_slots_.is_empty()); + DCHECK(reusable_simd128_slots_.is_empty()); DCHECK(active_live_ranges_.is_empty()); DCHECK(inactive_live_ranges_.is_empty()); @@ -1585,6 +1616,7 @@ void LAllocator::AllocateRegisters() { } reusable_slots_.Rewind(0); + reusable_simd128_slots_.Rewind(0); active_live_ranges_.Rewind(0); inactive_live_ranges_.Rewind(0); } @@ -1621,10 +1653,25 @@ RegisterKind LAllocator::RequiredRegisterKind(int virtual_register) const { HValue* value = graph_->LookupValue(virtual_register); if (value != NULL && value->representation().IsDouble()) { return DOUBLE_REGISTERS; + } else if (value != NULL && (value->representation().IsFloat32x4())) { + return FLOAT32x4_REGISTERS; + } else if (value != NULL && (value->representation().IsFloat64x2())) { + return FLOAT64x2_REGISTERS; + } else if (value != NULL && (value->representation().IsInt32x4())) { + return INT32x4_REGISTERS; } } else if (double_artificial_registers_.Contains( virtual_register - first_artificial_register_)) { return DOUBLE_REGISTERS; + } else if (float32x4_artificial_registers_.Contains( + virtual_register - first_artificial_register_)) { + return FLOAT32x4_REGISTERS; + } else if (float64x2_artificial_registers_.Contains( + virtual_register - first_artificial_register_)) { + return FLOAT64x2_REGISTERS; + } else if (int32x4_artificial_registers_.Contains( + virtual_register - first_artificial_register_)) { + return INT32x4_REGISTERS; } return GENERAL_REGISTERS; @@ -1707,19 +1754,26 @@ void LAllocator::FreeSpillSlot(LiveRange* range) { int index = range->TopLevel()->GetSpillOperand()->index(); if (index >= 0) { - reusable_slots_.Add(range, zone()); + if (IsSIMD128RegisterKind(range->Kind())) { + reusable_simd128_slots_.Add(range, zone()); + } else { + reusable_slots_.Add(range, zone()); + } } } LOperand* LAllocator::TryReuseSpillSlot(LiveRange* range) { - if (reusable_slots_.is_empty()) return NULL; - if (reusable_slots_.first()->End().Value() > + ZoneList* reusable_slots = IsSIMD128RegisterKind(range->Kind()) + ? &reusable_simd128_slots_ + : &reusable_slots_; + if (reusable_slots->is_empty()) return NULL; + if (reusable_slots->first()->End().Value() > range->TopLevel()->Start().Value()) { return NULL; } - LOperand* result = reusable_slots_.first()->TopLevel()->GetSpillOperand(); - reusable_slots_.Remove(0); + LOperand* result = reusable_slots->first()->TopLevel()->GetSpillOperand(); + reusable_slots->Remove(0); return result; } @@ -1786,7 +1840,8 @@ bool LAllocator::TryAllocateFreeReg(LiveRange* current) { } LOperand* hint = current->FirstHint(); - if (hint != NULL && (hint->IsRegister() || hint->IsDoubleRegister())) { + if (hint != NULL && (hint->IsRegister() || hint->IsDoubleRegister() || + hint->IsSIMD128Register())) { int register_index = hint->index(); TraceAlloc( "Found reg hint %s (free until [%d) for live range %d (end %d[).\n", @@ -2137,7 +2192,21 @@ void LAllocator::Spill(LiveRange* range) { if (!first->HasAllocatedSpillOperand()) { LOperand* op = TryReuseSpillSlot(range); - if (op == NULL) op = chunk_->GetNextSpillSlot(range->Kind()); + if (op == NULL) { + op = chunk_->GetNextSpillSlot(range->Kind()); + } else if (range->Kind() == FLOAT32x4_REGISTERS && + op->kind() != LOperand::FLOAT32x4_STACK_SLOT) { + // Convert to Float32x4StackSlot. + op = LFloat32x4StackSlot::Create(op->index(), zone()); + } else if (range->Kind() == FLOAT64x2_REGISTERS && + op->kind() != LOperand::FLOAT64x2_STACK_SLOT) { + // Convert to Float64x2StackSlot. + op = LFloat64x2StackSlot::Create(op->index(), zone()); + } else if (range->Kind() == INT32x4_REGISTERS && + op->kind() != LOperand::INT32x4_STACK_SLOT) { + // Convert to Int32x4StackSlot. + op = LInt32x4StackSlot::Create(op->index(), zone()); + } first->SetSpillOperand(op); } range->MakeSpilled(chunk()->zone()); diff --git a/src/v8/src/lithium-allocator.h b/src/v8/src/lithium-allocator.h index f63077e..93cc546 100644 --- a/src/v8/src/lithium-allocator.h +++ b/src/v8/src/lithium-allocator.h @@ -117,6 +117,12 @@ class LifetimePosition { }; +inline bool IsSIMD128RegisterKind(RegisterKind kind) { + return kind == FLOAT32x4_REGISTERS || kind == FLOAT64x2_REGISTERS || + kind == INT32x4_REGISTERS; +} + + // Representation of the non-empty interval [start,end[. class UseInterval: public ZoneObject { public: @@ -530,11 +536,16 @@ class LAllocator BASE_EMBEDDED { ZoneList active_live_ranges_; ZoneList inactive_live_ranges_; ZoneList reusable_slots_; + // Slots reusable for float32x4, float64x2 and int32x4 register spilling. + ZoneList reusable_simd128_slots_; // Next virtual register number to be assigned to temporaries. int next_virtual_register_; int first_artificial_register_; GrowableBitVector double_artificial_registers_; + GrowableBitVector float32x4_artificial_registers_; + GrowableBitVector float64x2_artificial_registers_; + GrowableBitVector int32x4_artificial_registers_; RegisterKind mode_; int num_registers_; diff --git a/src/v8/src/lithium.cc b/src/v8/src/lithium.cc index a8d4d22..5a42975 100644 --- a/src/v8/src/lithium.cc +++ b/src/v8/src/lithium.cc @@ -112,6 +112,15 @@ void LOperand::PrintTo(StringStream* stream) { } break; } + case FLOAT32x4_STACK_SLOT: + stream->Add("[float32x4_stack:%d]", index()); + break; + case FLOAT64x2_STACK_SLOT: + stream->Add("[float64x2_stack:%d]", index()); + break; + case INT32x4_STACK_SLOT: + stream->Add("[int32x4_stack:%d]", index()); + break; case DOUBLE_REGISTER: { int reg_index = index(); if (reg_index < 0 || @@ -123,6 +132,18 @@ void LOperand::PrintTo(StringStream* stream) { } break; } + case FLOAT32x4_REGISTER: + stream->Add("[%s|R]", + SIMD128Register::AllocationIndexToString(index())); + break; + case FLOAT64x2_REGISTER: + stream->Add("[%s|R]", + SIMD128Register::AllocationIndexToString(index())); + break; + case INT32x4_REGISTER: + stream->Add("[%s|R]", + SIMD128Register::AllocationIndexToString(index())); + break; } } @@ -213,7 +234,10 @@ void LEnvironment::PrintTo(StringStream* stream) { void LPointerMap::RecordPointer(LOperand* op, Zone* zone) { // Do not record arguments as pointers. if (op->IsStackSlot() && op->index() < 0) return; - DCHECK(!op->IsDoubleRegister() && !op->IsDoubleStackSlot()); + DCHECK(!op->IsDoubleRegister() && !op->IsDoubleStackSlot() && + !op->IsFloat32x4Register() && !op->IsFloat32x4StackSlot() && + !op->IsFloat64x2Register() && !op->IsFloat64x2StackSlot() && + !op->IsInt32x4Register() && !op->IsInt32x4StackSlot()); pointer_operands_.Add(op, zone); } @@ -221,7 +245,10 @@ void LPointerMap::RecordPointer(LOperand* op, Zone* zone) { void LPointerMap::RemovePointer(LOperand* op) { // Do not record arguments as pointers. if (op->IsStackSlot() && op->index() < 0) return; - DCHECK(!op->IsDoubleRegister() && !op->IsDoubleStackSlot()); + DCHECK(!op->IsDoubleRegister() && !op->IsDoubleStackSlot() && + !op->IsFloat32x4Register() && !op->IsFloat32x4StackSlot() && + !op->IsFloat64x2Register() && !op->IsFloat64x2StackSlot() && + !op->IsInt32x4Register() && !op->IsInt32x4StackSlot()); for (int i = 0; i < pointer_operands_.length(); ++i) { if (pointer_operands_[i]->Equals(op)) { pointer_operands_.Remove(i); @@ -234,7 +261,10 @@ void LPointerMap::RemovePointer(LOperand* op) { void LPointerMap::RecordUntagged(LOperand* op, Zone* zone) { // Do not record arguments as pointers. if (op->IsStackSlot() && op->index() < 0) return; - DCHECK(!op->IsDoubleRegister() && !op->IsDoubleStackSlot()); + DCHECK(!op->IsDoubleRegister() && !op->IsDoubleStackSlot() && + !op->IsFloat32x4Register() && !op->IsFloat32x4StackSlot() && + !op->IsFloat64x2Register() && !op->IsFloat64x2StackSlot() && + !op->IsInt32x4Register() && !op->IsInt32x4StackSlot()); untagged_operands_.Add(op, zone); } diff --git a/src/v8/src/lithium.h b/src/v8/src/lithium.h index 032c1d4..2dec72d 100644 --- a/src/v8/src/lithium.h +++ b/src/v8/src/lithium.h @@ -15,12 +15,18 @@ namespace v8 { namespace internal { -#define LITHIUM_OPERAND_LIST(V) \ - V(ConstantOperand, CONSTANT_OPERAND, 128) \ - V(StackSlot, STACK_SLOT, 128) \ - V(DoubleStackSlot, DOUBLE_STACK_SLOT, 128) \ - V(Register, REGISTER, 16) \ - V(DoubleRegister, DOUBLE_REGISTER, 16) +#define LITHIUM_OPERAND_LIST(V) \ + V(ConstantOperand, CONSTANT_OPERAND, 128) \ + V(StackSlot, STACK_SLOT, 128) \ + V(DoubleStackSlot, DOUBLE_STACK_SLOT, 128) \ + V(Float32x4StackSlot, FLOAT32x4_STACK_SLOT, 128) \ + V(Float64x2StackSlot, FLOAT64x2_STACK_SLOT, 128) \ + V(Int32x4StackSlot, INT32x4_STACK_SLOT, 128) \ + V(Register, REGISTER, 16) \ + V(DoubleRegister, DOUBLE_REGISTER, 16) \ + V(Float32x4Register, FLOAT32x4_REGISTER, 16) \ + V(Float64x2Register, FLOAT64x2_REGISTER, 16) \ + V(Int32x4Register, INT32x4_REGISTER, 16) class LOperand : public ZoneObject { public: @@ -30,8 +36,14 @@ class LOperand : public ZoneObject { CONSTANT_OPERAND, STACK_SLOT, DOUBLE_STACK_SLOT, + FLOAT32x4_STACK_SLOT, + FLOAT64x2_STACK_SLOT, + INT32x4_STACK_SLOT, REGISTER, - DOUBLE_REGISTER + DOUBLE_REGISTER, + FLOAT32x4_REGISTER, + FLOAT64x2_REGISTER, + INT32x4_REGISTER }; LOperand() : value_(KindField::encode(INVALID)) { } @@ -44,7 +56,19 @@ class LOperand : public ZoneObject { LITHIUM_OPERAND_PREDICATE(Unallocated, UNALLOCATED, 0) LITHIUM_OPERAND_PREDICATE(Ignored, INVALID, 0) #undef LITHIUM_OPERAND_PREDICATE - bool Equals(LOperand* other) const { return value_ == other->value_; } + bool IsSIMD128Register() const { + return kind() == FLOAT32x4_REGISTER || kind() == FLOAT64x2_REGISTER || + kind() == INT32x4_REGISTER; + } + bool IsSIMD128StackSlot() const { + return kind() == FLOAT32x4_STACK_SLOT || kind() == FLOAT64x2_STACK_SLOT || + kind() == INT32x4_STACK_SLOT; + } + bool Equals(LOperand* other) const { + return value_ == other->value_ || (index() == other->index() && + ((IsSIMD128Register() && other->IsSIMD128Register()) || + (IsSIMD128StackSlot() && other->IsSIMD128StackSlot()))); + } void PrintTo(StringStream* stream); void ConvertTo(Kind kind, int index) { @@ -59,7 +83,7 @@ class LOperand : public ZoneObject { static void TearDownCaches(); protected: - static const int kKindFieldWidth = 3; + static const int kKindFieldWidth = 4; class KindField : public BitField { }; LOperand(Kind kind, int index) { ConvertTo(kind, index); } @@ -144,32 +168,32 @@ class LUnallocated : public LOperand { // because it accommodates a larger pay-load. // // For FIXED_SLOT policy: - // +------------------------------------------+ - // | slot_index | vreg | 0 | 001 | - // +------------------------------------------+ + // +-------------------------------------------+ + // | slot_index | vreg | 0 | 0001 | + // +-------------------------------------------+ // // For all other (extended) policies: - // +------------------------------------------+ - // | reg_index | L | PPP | vreg | 1 | 001 | L ... Lifetime - // +------------------------------------------+ P ... Policy + // +-------------------------------------------+ + // | reg_index | L | PPP | vreg | 1 | 0001 | L ... Lifetime + // +-------------------------------------------+ P ... Policy // // The slot index is a signed value which requires us to decode it manually // instead of using the BitField utility class. // The superclass has a KindField. - STATIC_ASSERT(kKindFieldWidth == 3); + STATIC_ASSERT(kKindFieldWidth == 4); // BitFields for all unallocated operands. - class BasicPolicyField : public BitField {}; - class VirtualRegisterField : public BitField {}; + class BasicPolicyField : public BitField {}; + class VirtualRegisterField : public BitField {}; // BitFields specific to BasicPolicy::FIXED_SLOT. - class FixedSlotIndexField : public BitField {}; + class FixedSlotIndexField : public BitField {}; // BitFields specific to BasicPolicy::EXTENDED_POLICY. - class ExtendedPolicyField : public BitField {}; - class LifetimeField : public BitField {}; - class FixedRegisterField : public BitField {}; + class ExtendedPolicyField : public BitField {}; + class LifetimeField : public BitField {}; + class FixedRegisterField : public BitField {}; static const int kMaxVirtualRegisters = VirtualRegisterField::kMax + 1; static const int kFixedSlotIndexWidth = FixedSlotIndexField::kSize; @@ -748,11 +772,13 @@ class LPhase : public CompilationPhase { // A register-allocator view of a Lithium instruction. It contains the id of // the output operand and a list of input operand uses. - enum RegisterKind { UNALLOCATED_REGISTERS, GENERAL_REGISTERS, - DOUBLE_REGISTERS + DOUBLE_REGISTERS, + FLOAT32x4_REGISTERS, + FLOAT64x2_REGISTERS, + INT32x4_REGISTERS }; // Iterator for non-null temp operands. diff --git a/src/v8/src/macros.py b/src/v8/src/macros.py index 131df87..2a90f2f 100644 --- a/src/v8/src/macros.py +++ b/src/v8/src/macros.py @@ -115,6 +115,9 @@ macro IS_MAP(arg) = (%_ClassOf(arg) === 'Map'); macro IS_WEAKMAP(arg) = (%_ClassOf(arg) === 'WeakMap'); macro IS_WEAKSET(arg) = (%_ClassOf(arg) === 'WeakSet'); macro IS_DATE(arg) = (%_ClassOf(arg) === 'Date'); +macro IsFloat32x4(arg) = (%_ClassOf(arg) === 'float32x4'); +macro IsFloat64x2(arg) = (%_ClassOf(arg) === 'float64x2'); +macro IsInt32x4(arg) = (%_ClassOf(arg) === 'int32x4'); macro IS_NUMBER_WRAPPER(arg) = (%_ClassOf(arg) === 'Number'); macro IS_STRING_WRAPPER(arg) = (%_ClassOf(arg) === 'String'); macro IS_SYMBOL_WRAPPER(arg) = (%_ClassOf(arg) === 'Symbol'); diff --git a/src/v8/src/objects-debug.cc b/src/v8/src/objects-debug.cc index 4834ef2..e8e5ed9 100644 --- a/src/v8/src/objects-debug.cc +++ b/src/v8/src/objects-debug.cc @@ -58,6 +58,15 @@ void HeapObject::HeapObjectVerify() { case MUTABLE_HEAP_NUMBER_TYPE: HeapNumber::cast(this)->HeapNumberVerify(); break; + case FLOAT32x4_TYPE: + Float32x4::cast(this)->Float32x4Verify(); + break; + case FLOAT64x2_TYPE: + Float64x2::cast(this)->Float64x2Verify(); + break; + case INT32x4_TYPE: + Int32x4::cast(this)->Int32x4Verify(); + break; case FIXED_ARRAY_TYPE: FixedArray::cast(this)->FixedArrayVerify(); break; @@ -211,6 +220,21 @@ void HeapNumber::HeapNumberVerify() { } +void Float32x4::Float32x4Verify() { + CHECK(IsFloat32x4()); +} + + +void Float64x2::Float64x2Verify() { + CHECK(IsFloat64x2()); +} + + +void Int32x4::Int32x4Verify() { + CHECK(IsInt32x4()); +} + + void ByteArray::ByteArrayVerify() { CHECK(IsByteArray()); } diff --git a/src/v8/src/objects-inl.h b/src/v8/src/objects-inl.h index 432d613..fa0f114 100644 --- a/src/v8/src/objects-inl.h +++ b/src/v8/src/objects-inl.h @@ -832,6 +832,9 @@ TYPE_CHECKER(JSArrayBuffer, JS_ARRAY_BUFFER_TYPE) TYPE_CHECKER(JSTypedArray, JS_TYPED_ARRAY_TYPE) TYPE_CHECKER(JSDataView, JS_DATA_VIEW_TYPE) +TYPE_CHECKER(Float32x4, FLOAT32x4_TYPE) +TYPE_CHECKER(Float64x2, FLOAT64x2_TYPE) +TYPE_CHECKER(Int32x4, INT32x4_TYPE) bool Object::IsJSArrayBufferView() const { return IsJSDataView() || IsJSTypedArray(); @@ -1275,6 +1278,29 @@ Maybe JSProxy::HasElementWithHandler(Handle proxy, write_double_field(p, offset, value) #endif // V8_TARGET_ARCH_MIPS +#define READ_FLOAT32x4_FIELD(p, offset) \ + (*reinterpret_cast(FIELD_ADDR(p, offset))) + +#define WRITE_FLOAT32x4_FIELD(p, offset, value) \ + (*reinterpret_cast(FIELD_ADDR(p, offset)) = value) + +#define READ_FLOAT64x2_FIELD(p, offset) \ + (*reinterpret_cast(FIELD_ADDR(p, offset))) + +#define WRITE_FLOAT64x2_FIELD(p, offset, value) \ + (*reinterpret_cast(FIELD_ADDR(p, offset)) = value) + +#define READ_INT32x4_FIELD(p, offset) \ + (*reinterpret_cast(FIELD_ADDR(p, offset))) + +#define WRITE_INT32x4_FIELD(p, offset, value) \ + (*reinterpret_cast(FIELD_ADDR(p, offset)) = value) + +#define READ_FLOAT_FIELD(p, offset) \ + (*reinterpret_cast(FIELD_ADDR(p, offset))) + +#define WRITE_FLOAT_FIELD(p, offset, value) \ + (*reinterpret_cast(FIELD_ADDR(p, offset)) = value) #define READ_INT_FIELD(p, offset) \ (*reinterpret_cast(FIELD_ADDR_CONST(p, offset))) @@ -1551,6 +1577,88 @@ int HeapNumber::get_sign() { } +ACCESSORS(Float32x4, value, Object, kValueOffset) +ACCESSORS(Float64x2, value, Object, kValueOffset) +ACCESSORS(Int32x4, value, Object, kValueOffset) + + +const char* Float32x4::Name() { + return "float32x4"; +} + + +int Float32x4::kRuntimeAllocatorId() { + return Runtime::kAllocateFloat32x4; +} + + +float Float32x4::getAt(int index) { + DCHECK(index >= 0 && index < kLanes); + return get().storage[index]; +} + + +float32x4_value_t Float32x4::get() { + return FixedFloat32x4Array::cast(value())->get_scalar(0); +} + + +void Float32x4::set(float32x4_value_t f32x4) { + FixedFloat32x4Array::cast(value())->set(0, f32x4); +} + + +const char* Float64x2::Name() { + return "float64x2"; +} + + +int Float64x2::kRuntimeAllocatorId() { + return Runtime::kAllocateFloat64x2; +} + + +double Float64x2::getAt(int index) { + DCHECK(index >= 0 && index < kLanes); + return get().storage[index]; +} + +float64x2_value_t Float64x2::get() { + return FixedFloat64x2Array::cast(value())->get_scalar(0); +} + + +void Float64x2::set(float64x2_value_t f64x2) { + FixedFloat64x2Array::cast(value())->set(0, f64x2); +} + + +const char* Int32x4::Name() { + return "int32x4"; +} + + +int Int32x4::kRuntimeAllocatorId() { + return Runtime::kAllocateInt32x4; +} + + +int32_t Int32x4::getAt(int index) { + DCHECK(index >= 0 && index < kLanes); + return get().storage[index];; +} + + +int32x4_value_t Int32x4::get() { + return FixedInt32x4Array::cast(value())->get_scalar(0); +} + + +void Int32x4::set(int32x4_value_t i32x4) { + FixedInt32x4Array::cast(value())->set(0, i32x4); +} + + ACCESSORS(JSObject, properties, FixedArray, kPropertiesOffset) @@ -1965,6 +2073,12 @@ int JSObject::GetHeaderSize() { return JSTypedArray::kSize; case JS_DATA_VIEW_TYPE: return JSDataView::kSize; + case FLOAT32x4_TYPE: + return Float32x4::kSize; + case FLOAT64x2_TYPE: + return Float64x2::kSize; + case INT32x4_TYPE: + return Int32x4::kSize; case JS_SET_TYPE: return JSSet::kSize; case JS_MAP_TYPE: @@ -3213,9 +3327,12 @@ CAST_ACCESSOR(DescriptorArray) CAST_ACCESSOR(ExternalArray) CAST_ACCESSOR(ExternalAsciiString) CAST_ACCESSOR(ExternalFloat32Array) +CAST_ACCESSOR(ExternalFloat32x4Array) CAST_ACCESSOR(ExternalFloat64Array) +CAST_ACCESSOR(ExternalFloat64x2Array) CAST_ACCESSOR(ExternalInt16Array) CAST_ACCESSOR(ExternalInt32Array) +CAST_ACCESSOR(ExternalInt32x4Array) CAST_ACCESSOR(ExternalInt8Array) CAST_ACCESSOR(ExternalString) CAST_ACCESSOR(ExternalTwoByteString) @@ -3231,6 +3348,9 @@ CAST_ACCESSOR(Foreign) CAST_ACCESSOR(FreeSpace) CAST_ACCESSOR(GlobalObject) CAST_ACCESSOR(HeapObject) +CAST_ACCESSOR(Float32x4) +CAST_ACCESSOR(Float64x2) +CAST_ACCESSOR(Int32x4) CAST_ACCESSOR(JSArray) CAST_ACCESSOR(JSArrayBuffer) CAST_ACCESSOR(JSArrayBufferView) @@ -4022,6 +4142,89 @@ void ExternalFloat32Array::set(int index, float value) { } +float32x4_value_t ExternalFloat32x4Array::get_scalar(int index) { + DCHECK((index >= 0) && (index < this->length())); + float* ptr = static_cast(external_pointer()); + float32x4_value_t value; + value.storage[0] = ptr[index * 4 + 0]; + value.storage[1] = ptr[index * 4 + 1]; + value.storage[2] = ptr[index * 4 + 2]; + value.storage[3] = ptr[index * 4 + 3]; + return value; +} + + +Handle ExternalFloat32x4Array::get(Handle array, + int index) { + float32x4_value_t value = array->get_scalar(index); + return array->GetIsolate()->factory()->NewFloat32x4(value); +} + + +void ExternalFloat32x4Array::set(int index, const float32x4_value_t& value) { + DCHECK((index >= 0) && (index < this->length())); + float* ptr = static_cast(external_pointer()); + ptr[index * 4 + 0] = value.storage[0]; + ptr[index * 4 + 1] = value.storage[1]; + ptr[index * 4 + 2] = value.storage[2]; + ptr[index * 4 + 3] = value.storage[3]; +} + + +float64x2_value_t ExternalFloat64x2Array::get_scalar(int index) { + DCHECK((index >= 0) && (index < this->length())); + double* ptr = static_cast(external_pointer()); + float64x2_value_t value; + value.storage[0] = ptr[index * 2 + 0]; + value.storage[1] = ptr[index * 2 + 1]; + return value; +} + + +Handle ExternalFloat64x2Array::get(Handle array, + int index) { + float64x2_value_t value = array->get_scalar(index); + return array->GetIsolate()->factory()->NewFloat64x2(value); +} + + +void ExternalFloat64x2Array::set(int index, const float64x2_value_t& value) { + DCHECK((index >= 0) && (index < this->length())); + double* ptr = static_cast(external_pointer()); + ptr[index * 2 + 0] = value.storage[0]; + ptr[index * 2 + 1] = value.storage[1]; +} + + +int32x4_value_t ExternalInt32x4Array::get_scalar(int index) { + DCHECK((index >= 0) && (index < this->length())); + int32_t* ptr = static_cast(external_pointer()); + int32x4_value_t value; + value.storage[0] = ptr[index * 4 + 0]; + value.storage[1] = ptr[index * 4 + 1]; + value.storage[2] = ptr[index * 4 + 2]; + value.storage[3] = ptr[index * 4 + 3]; + return value; +} + + +Handle ExternalInt32x4Array::get(Handle array, + int index) { + int32x4_value_t value = array->get_scalar(index); + return array->GetIsolate()->factory()->NewInt32x4(value); +} + + +void ExternalInt32x4Array::set(int index, const int32x4_value_t& value) { + DCHECK((index >= 0) && (index < this->length())); + int32_t* ptr = static_cast(external_pointer()); + ptr[index * 4 + 0] = value.storage[0]; + ptr[index * 4 + 1] = value.storage[1]; + ptr[index * 4 + 2] = value.storage[2]; + ptr[index * 4 + 3] = value.storage[3]; +} + + double ExternalFloat64Array::get_scalar(int index) { DCHECK((index >= 0) && (index < this->length())); double* ptr = static_cast(external_pointer()); @@ -4215,6 +4418,72 @@ Handle FixedTypedArray::SetValue( return Traits::ToHandle(array->GetIsolate(), cast_value); } +template<> inline +Handle FixedTypedArray::SetValue( + Handle > array, + uint32_t index, Handle value) { + float32x4_value_t cast_value; + cast_value.storage[0] = static_cast(base::OS::nan_value()); + cast_value.storage[1] = static_cast(base::OS::nan_value()); + cast_value.storage[2] = static_cast(base::OS::nan_value()); + cast_value.storage[3] = static_cast(base::OS::nan_value()); + if (index < static_cast(array->length())) { + if (value->IsFloat32x4()) { + cast_value = Handle::cast(value)->get(); + } else { + // Clamp undefined to NaN (default). All other types have been + // converted to a number type further up in the call chain. + DCHECK(value->IsUndefined()); + } + array->set(index, cast_value); + } + return Float32x4ArrayTraits::ToHandle(array->GetIsolate(), cast_value); +} + + +template<> inline +Handle FixedTypedArray::SetValue( + Handle > array, + uint32_t index, Handle value) { + float64x2_value_t cast_value; + cast_value.storage[0] = base::OS::nan_value(); + cast_value.storage[1] = base::OS::nan_value(); + if (index < static_cast(array->length())) { + if (value->IsFloat64x2()) { + cast_value = Handle::cast(value)->get(); + } else { + // Clamp undefined to NaN (default). All other types have been + // converted to a number type further up in the call chain. + DCHECK(value->IsUndefined()); + } + array->set(index, cast_value); + } + return Float64x2ArrayTraits::ToHandle(array->GetIsolate(), cast_value); +} + + +template<> inline +Handle FixedTypedArray::SetValue( + Handle > array, + uint32_t index, Handle value) { + int32x4_value_t cast_value; + cast_value.storage[0] = 0; + cast_value.storage[1] = 0; + cast_value.storage[2] = 0; + cast_value.storage[3] = 0; + if (index < static_cast(array->length())) { + if (value->IsInt32x4()) { + cast_value = Handle::cast(value)->get(); + } else { + // Clamp undefined to zero (default). All other types have been + // converted to a number type further up in the call chain. + DCHECK(value->IsUndefined()); + } + array->set(index, cast_value); + } + return Int32x4ArrayTraits::ToHandle(array->GetIsolate(), cast_value); +} + Handle Uint8ArrayTraits::ToHandle(Isolate* isolate, uint8_t scalar) { return handle(Smi::FromInt(scalar), isolate); @@ -4257,6 +4526,24 @@ Handle Float32ArrayTraits::ToHandle(Isolate* isolate, float scalar) { } +Handle Int32x4ArrayTraits::ToHandle( + Isolate* isolate, int32x4_value_t scalar) { + return isolate->factory()->NewInt32x4(scalar); +} + + +Handle Float32x4ArrayTraits::ToHandle( + Isolate* isolate, float32x4_value_t scalar) { + return isolate->factory()->NewFloat32x4(scalar); +} + + +Handle Float64x2ArrayTraits::ToHandle( + Isolate* isolate, float64x2_value_t scalar) { + return isolate->factory()->NewFloat64x2(scalar); +} + + Handle Float64ArrayTraits::ToHandle(Isolate* isolate, double scalar) { return isolate->factory()->NewNumber(scalar); } diff --git a/src/v8/src/objects-printer.cc b/src/v8/src/objects-printer.cc index 5acd5e5..e19f538 100644 --- a/src/v8/src/objects-printer.cc +++ b/src/v8/src/objects-printer.cc @@ -60,6 +60,21 @@ void HeapObject::HeapObjectPrint(OStream& os) { // NOLINT HeapNumber::cast(this)->HeapNumberPrint(os); os << ">"; break; + case FLOAT32x4_TYPE: + os << "Float32x4Print(os); + os << '>'; + break; + case FLOAT64x2_TYPE: + os << "Float64x2Print(os); + os << '>'; + break; + case INT32x4_TYPE: + os << "Int32x4Print(os); + os << '>'; + break; case FIXED_DOUBLE_ARRAY_TYPE: FixedDoubleArray::cast(this)->FixedDoubleArrayPrint(os); break; @@ -263,6 +278,41 @@ static void DoPrintElements(OStream& os, Object* object) { // NOLINT } +template +static void DoPrintFloat32x4Elements(OStream& os, Object* object) { + T* p = T::cast(object); + for (int i = 0; i < p->length(); i++) { + float32x4_value_t value = p->get_scalar(i); + os << " " << i << ": (" << value.storage[0] << ", " + << value.storage[1] << ", " << value.storage[2] << ", " + << value.storage[3] << ")\n"; + } +} + + +template +static void DoPrintFloat64x2Elements(OStream& os, Object* object) { + T* p = T::cast(object); + for (int i = 0; i < p->length(); i++) { + float64x2_value_t value = p->get_scalar(i); + os << " " << i << ": (" << value.storage[0] << ", " + << value.storage[1] << ")\n"; + } +} + + +template +static void DoPrintInt32x4Elements(OStream& os, Object* object) { + T* p = T::cast(object); + for (int i = 0; i < p->length(); i++) { + int32x4_value_t value = p->get_scalar(i); + os << " " << i << ": (" << value.storage[0] << ", " + << value.storage[1] << ", " << value.storage[2] << ", " + << value.storage[3] << ")\n"; + } +} + + void JSObject::PrintElements(OStream& os) { // NOLINT // Don't call GetElementsKind, its validation code can cause the printer to // fail when debugging. @@ -303,6 +353,24 @@ void JSObject::PrintElements(OStream& os) { // NOLINT break; \ } +#define PRINT_FLOAT32x4_ELEMENTS(Kind, Type) \ + case Kind: { \ + DoPrintFloat32x4Elements(os, elements()); \ + break; \ + } + +#define PRINT_FLOAT64x2_ELEMENTS(Kind, Type) \ + case Kind: { \ + DoPrintFloat64x2Elements(os, elements()); \ + break; \ + } + +#define PRINT_INT32x4_ELEMENTS(Kind, Type) \ + case Kind: { \ + DoPrintInt32x4Elements(os, elements()); \ + break; \ + } + PRINT_ELEMENTS(EXTERNAL_UINT8_CLAMPED_ELEMENTS, ExternalUint8ClampedArray) PRINT_ELEMENTS(EXTERNAL_INT8_ELEMENTS, ExternalInt8Array) PRINT_ELEMENTS(EXTERNAL_UINT8_ELEMENTS, @@ -315,6 +383,11 @@ void JSObject::PrintElements(OStream& os) { // NOLINT ExternalUint32Array) PRINT_ELEMENTS(EXTERNAL_FLOAT32_ELEMENTS, ExternalFloat32Array) PRINT_ELEMENTS(EXTERNAL_FLOAT64_ELEMENTS, ExternalFloat64Array) + PRINT_FLOAT32x4_ELEMENTS(EXTERNAL_FLOAT32x4_ELEMENTS, + ExternalFloat32x4Array) + PRINT_FLOAT64x2_ELEMENTS(EXTERNAL_FLOAT64x2_ELEMENTS, + ExternalFloat64x2Array) + PRINT_INT32x4_ELEMENTS(EXTERNAL_INT32x4_ELEMENTS, ExternalInt32x4Array) PRINT_ELEMENTS(UINT8_ELEMENTS, FixedUint8Array) PRINT_ELEMENTS(UINT8_CLAMPED_ELEMENTS, FixedUint8ClampedArray) @@ -325,6 +398,9 @@ void JSObject::PrintElements(OStream& os) { // NOLINT PRINT_ELEMENTS(INT32_ELEMENTS, FixedInt32Array) PRINT_ELEMENTS(FLOAT32_ELEMENTS, FixedFloat32Array) PRINT_ELEMENTS(FLOAT64_ELEMENTS, FixedFloat64Array) + PRINT_FLOAT32x4_ELEMENTS(FLOAT32x4_ELEMENTS, FixedFloat32x4Array) + PRINT_FLOAT64x2_ELEMENTS(FLOAT64x2_ELEMENTS, FixedFloat64x2Array) + PRINT_INT32x4_ELEMENTS(INT32x4_ELEMENTS, FixedInt32x4Array) #undef PRINT_ELEMENTS diff --git a/src/v8/src/objects.cc b/src/v8/src/objects.cc index b668916..889a6f2 100644 --- a/src/v8/src/objects.cc +++ b/src/v8/src/objects.cc @@ -1558,6 +1558,21 @@ void HeapObject::HeapObjectShortPrint(OStream& os) { // NOLINT os << '>'; break; } + case FLOAT32x4_TYPE: + os << "Float32x4Print(os); + os << '>'; + break; + case FLOAT64x2_TYPE: + os << "Float64x2Print(os); + os << '>'; + break; + case INT32x4_TYPE: + os << "Int32x4Print(os); + os << '>'; + break; case JS_PROXY_TYPE: os << ""; break; @@ -1656,6 +1671,9 @@ void HeapObject::IterateBody(InstanceType type, int object_size, case JS_GLOBAL_OBJECT_TYPE: case JS_BUILTINS_OBJECT_TYPE: case JS_MESSAGE_OBJECT_TYPE: + case FLOAT32x4_TYPE: + case FLOAT64x2_TYPE: + case INT32x4_TYPE: JSObject::BodyDescriptor::IterateBody(this, object_size, v); break; case JS_FUNCTION_TYPE: @@ -1737,6 +1755,45 @@ void HeapNumber::HeapNumberPrint(OStream& os) { // NOLINT } +void Float32x4::Float32x4Print(OStream& os) { + // The Windows version of vsnprintf can allocate when printing a %g string + // into a buffer that may not be big enough. We don't want random memory + // allocation when producing post-crash stack traces, so we print into a + // buffer that is plenty big enough for any floating point number, then + // print that using vsnprintf (which may truncate but never allocate if + // there is no more space in the buffer). + EmbeddedVector buffer; + SNPrintF(buffer, "%.16g %.16g %.16g %.16g", x(), y(), z(), w()); + os << buffer.start(); +} + + +void Int32x4::Int32x4Print(OStream& os) { + // The Windows version of vsnprintf can allocate when printing a %g string + // into a buffer that may not be big enough. We don't want random memory + // allocation when producing post-crash stack traces, so we print into a + // buffer that is plenty big enough for any floating point number, then + // print that using vsnprintf (which may truncate but never allocate if + // there is no more space in the buffer). + EmbeddedVector buffer; + SNPrintF(buffer, "%u %u %u %u", x(), y(), z(), w()); + os << buffer.start(); +} + + +void Float64x2::Float64x2Print(OStream& os) { + // The Windows version of vsnprintf can allocate when printing a %g string + // into a buffer that may not be big enough. We don't want random memory + // allocation when producing post-crash stack traces, so we print into a + // buffer that is plenty big enough for any floating point number, then + // print that using vsnprintf (which may truncate but never allocate if + // there is no more space in the buffer). + EmbeddedVector buffer; + SNPrintF(buffer, "%.16g %.16g", x(), y()); + os << buffer.start(); +} + + String* JSReceiver::class_name() { if (IsJSFunction() || IsJSFunctionProxy()) { return GetHeap()->function_class_string(); @@ -1982,6 +2039,9 @@ const char* Representation::Mnemonic() const { case kTagged: return "t"; case kSmi: return "s"; case kDouble: return "d"; + case kFloat32x4: return "float32x4"; + case kFloat64x2: return "float64x2"; + case kInt32x4: return "int32x44"; case kInteger32: return "i"; case kHeapObject: return "h"; case kExternal: return "x"; @@ -11270,6 +11330,27 @@ void DeoptimizationInputData::DeoptimizationInputDataPrint( break; } + case Translation::FLOAT32x4_REGISTER: { + int reg_code = iterator.Next(); + os << "{input=" << SIMD128Register::AllocationIndexToString(reg_code) + << "}"; + break; + } + + case Translation::FLOAT64x2_REGISTER: { + int reg_code = iterator.Next(); + os << "{input=" << SIMD128Register::AllocationIndexToString(reg_code) + << "}"; + break; + } + + case Translation::INT32x4_REGISTER: { + int reg_code = iterator.Next(); + os << "{input=" << SIMD128Register::AllocationIndexToString(reg_code) + << "}"; + break; + } + case Translation::STACK_SLOT: { int input_slot_index = iterator.Next(); os << "{input=" << input_slot_index << "}"; @@ -11294,6 +11375,24 @@ void DeoptimizationInputData::DeoptimizationInputDataPrint( break; } + case Translation::FLOAT32x4_STACK_SLOT: { + int input_slot_index = iterator.Next(); + os << "{input=" << input_slot_index << "}"; + break; + } + + case Translation::FLOAT64x2_STACK_SLOT: { + int input_slot_index = iterator.Next(); + os << "{input=" << input_slot_index << "}"; + break; + } + + case Translation::INT32x4_STACK_SLOT: { + int input_slot_index = iterator.Next(); + os << "{input=" << input_slot_index << "}"; + break; + } + case Translation::LITERAL: { unsigned literal_index = iterator.Next(); os << "{literal_id=" << literal_index << "}"; @@ -12826,7 +12925,8 @@ MaybeHandle JSObject::SetElement(Handle object, if (object->HasExternalArrayElements() || object->HasFixedTypedArrayElements()) { - if (!value->IsNumber() && !value->IsUndefined()) { + if (!value->IsNumber() && !value->IsFloat32x4() && !value->IsFloat64x2() && + !value->IsInt32x4() && !value->IsUndefined()) { ASSIGN_RETURN_ON_EXCEPTION( isolate, value, Execution::ToNumber(isolate, value), Object); @@ -15111,6 +15211,71 @@ Handle ExternalFloat64Array::SetValue( } +Handle ExternalFloat32x4Array::SetValue( + Handle array, + uint32_t index, + Handle value) { + float32x4_value_t cast_value; + cast_value.storage[0] = static_cast(base::OS::nan_value()); + cast_value.storage[1] = static_cast(base::OS::nan_value()); + cast_value.storage[2] = static_cast(base::OS::nan_value()); + cast_value.storage[3] = static_cast(base::OS::nan_value()); + if (index < static_cast(array->length())) { + if (value->IsFloat32x4()) { + cast_value = Handle::cast(value)->get(); + } else { + // Clamp undefined to NaN (default). All other types have been + // converted to a number type further up in the call chain. + DCHECK(value->IsUndefined()); + } + array->set(index, cast_value); + } + return array->GetIsolate()->factory()->NewFloat32x4(cast_value); +} + + +Handle ExternalInt32x4Array::SetValue( + Handle array, uint32_t index, Handle value) { + int32x4_value_t cast_value; + cast_value.storage[0] = 0; + cast_value.storage[1] = 0; + cast_value.storage[2] = 0; + cast_value.storage[3] = 0; + if (index < static_cast(array->length())) { + if (value->IsInt32x4()) { + cast_value = Handle::cast(value)->get(); + } else { + // Clamp undefined to zero (default). All other types have been + // converted to a number type further up in the call chain. + DCHECK(value->IsUndefined()); + } + array->set(index, cast_value); + } + return array->GetIsolate()->factory()->NewInt32x4(cast_value); +} + + +Handle ExternalFloat64x2Array::SetValue( + Handle array, + uint32_t index, + Handle value) { + float64x2_value_t cast_value; + cast_value.storage[0] = base::OS::nan_value(); + cast_value.storage[1] = base::OS::nan_value(); + if (index < static_cast(array->length())) { + if (value->IsFloat64x2()) { + cast_value = Handle::cast(value)->get(); + } else { + // Clamp undefined to NaN (default). All other types have been + // converted to a number type further up in the call chain. + DCHECK(value->IsUndefined()); + } + array->set(index, cast_value); + } + return array->GetIsolate()->factory()->NewFloat64x2(cast_value); +} + + PropertyCell* GlobalObject::GetPropertyCell(LookupResult* result) { DCHECK(!HasFastProperties()); Object* value = property_dictionary()->ValueAt(result->GetDictionaryEntry()); diff --git a/src/v8/src/objects.h b/src/v8/src/objects.h index 80442b4..fb4470e 100644 --- a/src/v8/src/objects.h +++ b/src/v8/src/objects.h @@ -44,6 +44,9 @@ // - JSTypedArray // - JSDataView // - JSCollection +// - Float32x4 +// - Float64x2 +// - Int32x4 // - JSSet // - JSMap // - JSSetIterator @@ -91,6 +94,9 @@ // - ExternalInt32Array // - ExternalUint32Array // - ExternalFloat32Array +// - ExternalFloat32x4Array +// - ExternalFloat64x2Array +// - ExternalInt32x4Array // - Name // - String // - SeqString @@ -379,6 +385,9 @@ const int kStubMinorKeyBits = kSmiValueSize - kStubMajorKeyBits - 1; V(EXTERNAL_INT32_ARRAY_TYPE) \ V(EXTERNAL_UINT32_ARRAY_TYPE) \ V(EXTERNAL_FLOAT32_ARRAY_TYPE) \ + V(EXTERNAL_FLOAT32x4_ARRAY_TYPE) \ + V(EXTERNAL_FLOAT64x2_ARRAY_TYPE) \ + V(EXTERNAL_INT32x4_ARRAY_TYPE) \ V(EXTERNAL_FLOAT64_ARRAY_TYPE) \ V(EXTERNAL_UINT8_CLAMPED_ARRAY_TYPE) \ \ @@ -387,9 +396,12 @@ const int kStubMinorKeyBits = kSmiValueSize - kStubMajorKeyBits - 1; V(FIXED_INT16_ARRAY_TYPE) \ V(FIXED_UINT16_ARRAY_TYPE) \ V(FIXED_INT32_ARRAY_TYPE) \ + V(FIXED_INT32x4_ARRAY_TYPE) \ V(FIXED_UINT32_ARRAY_TYPE) \ V(FIXED_FLOAT32_ARRAY_TYPE) \ + V(FIXED_FLOAT32x4_ARRAY_TYPE) \ V(FIXED_FLOAT64_ARRAY_TYPE) \ + V(FIXED_FLOAT64x2_ARRAY_TYPE) \ V(FIXED_UINT8_CLAMPED_ARRAY_TYPE) \ \ V(FILLER_TYPE) \ @@ -434,6 +446,9 @@ const int kStubMinorKeyBits = kSmiValueSize - kStubMajorKeyBits - 1; V(JS_ARRAY_BUFFER_TYPE) \ V(JS_TYPED_ARRAY_TYPE) \ V(JS_DATA_VIEW_TYPE) \ + V(FLOAT32x4_TYPE) \ + V(FLOAT64x2_TYPE) \ + V(INT32x4_TYPE) \ V(JS_PROXY_TYPE) \ V(JS_SET_TYPE) \ V(JS_MAP_TYPE) \ @@ -711,6 +726,9 @@ enum InstanceType { EXTERNAL_INT32_ARRAY_TYPE, EXTERNAL_UINT32_ARRAY_TYPE, EXTERNAL_FLOAT32_ARRAY_TYPE, + EXTERNAL_FLOAT32x4_ARRAY_TYPE, + EXTERNAL_FLOAT64x2_ARRAY_TYPE, + EXTERNAL_INT32x4_ARRAY_TYPE, EXTERNAL_FLOAT64_ARRAY_TYPE, EXTERNAL_UINT8_CLAMPED_ARRAY_TYPE, // LAST_EXTERNAL_ARRAY_TYPE @@ -719,8 +737,11 @@ enum InstanceType { FIXED_INT16_ARRAY_TYPE, FIXED_UINT16_ARRAY_TYPE, FIXED_INT32_ARRAY_TYPE, + FIXED_INT32x4_ARRAY_TYPE, FIXED_UINT32_ARRAY_TYPE, FIXED_FLOAT32_ARRAY_TYPE, + FIXED_FLOAT32x4_ARRAY_TYPE, + FIXED_FLOAT64x2_ARRAY_TYPE, FIXED_FLOAT64_ARRAY_TYPE, FIXED_UINT8_CLAMPED_ARRAY_TYPE, // LAST_FIXED_TYPED_ARRAY_TYPE @@ -776,6 +797,9 @@ enum InstanceType { JS_ARRAY_BUFFER_TYPE, JS_TYPED_ARRAY_TYPE, JS_DATA_VIEW_TYPE, + FLOAT32x4_TYPE, + FLOAT64x2_TYPE, + INT32x4_TYPE, JS_SET_TYPE, JS_MAP_TYPE, JS_SET_ITERATOR_TYPE, @@ -943,6 +967,9 @@ template inline bool Is(Object* obj); V(ExternalInt32Array) \ V(ExternalUint32Array) \ V(ExternalFloat32Array) \ + V(ExternalFloat32x4Array) \ + V(ExternalFloat64x2Array) \ + V(ExternalInt32x4Array) \ V(ExternalFloat64Array) \ V(ExternalUint8ClampedArray) \ V(FixedTypedArrayBase) \ @@ -953,6 +980,9 @@ template inline bool Is(Object* obj); V(FixedUint32Array) \ V(FixedInt32Array) \ V(FixedFloat32Array) \ + V(FixedFloat32x4Array) \ + V(FixedFloat64x2Array) \ + V(FixedInt32x4Array) \ V(FixedFloat64Array) \ V(FixedUint8ClampedArray) \ V(ByteArray) \ @@ -989,6 +1019,9 @@ template inline bool Is(Object* obj); V(JSArrayBufferView) \ V(JSTypedArray) \ V(JSDataView) \ + V(Float32x4) \ + V(Float64x2) \ + V(Int32x4) \ V(JSProxy) \ V(JSFunctionProxy) \ V(JSSet) \ @@ -2109,6 +2142,9 @@ class JSObject: public JSReceiver { inline bool HasExternalInt32Elements(); inline bool HasExternalUint32Elements(); inline bool HasExternalFloat32Elements(); + inline bool HasExternalFloat32x4Elements(); + inline bool HasExternalFloat64x2Elements(); + inline bool HasExternalInt32x4Elements(); inline bool HasExternalFloat64Elements(); inline bool HasFixedTypedArrayElements(); @@ -2123,6 +2159,9 @@ class JSObject: public JSReceiver { inline bool HasFixedUint32Elements(); inline bool HasFixedFloat32Elements(); inline bool HasFixedFloat64Elements(); + inline bool HasFixedFloat32x4Elements(); + inline bool HasFixedFloat64x2Elements(); + inline bool HasFixedInt32x4Elements(); bool HasFastArgumentsElements(); bool HasDictionaryArgumentsElements(); @@ -4883,7 +4922,7 @@ class FreeSpace: public HeapObject { // V has parameters (Type, type, TYPE, C type, element_size) -#define TYPED_ARRAYS(V) \ +#define BUILTIN_TYPED_ARRAY(V) \ V(Uint8, uint8, UINT8, uint8_t, 1) \ V(Int8, int8, INT8, int8_t, 1) \ V(Uint16, uint16, UINT16, uint16_t, 2) \ @@ -4895,6 +4934,16 @@ class FreeSpace: public HeapObject { V(Uint8Clamped, uint8_clamped, UINT8_CLAMPED, uint8_t, 1) +#define SIMD128_TYPED_ARRAY(V) \ + V(Float32x4, float32x4, FLOAT32x4, v8::internal::float32x4_value_t, 16) \ + V(Float64x2, float64x2, FLOAT64x2, v8::internal::float64x2_value_t, 16) \ + V(Int32x4, int32x4, INT32x4, v8::internal::int32x4_value_t, 16) + + +#define TYPED_ARRAYS(V) \ + BUILTIN_TYPED_ARRAY(V) \ + SIMD128_TYPED_ARRAY(V) + // An ExternalArray represents a fixed-size array of primitive values // which live outside the JavaScript heap. Its subclasses are used to @@ -5137,6 +5186,84 @@ class ExternalFloat32Array: public ExternalArray { }; +class ExternalFloat32x4Array: public ExternalArray { + public: + // Setter and getter. + inline float32x4_value_t get_scalar(int index); + static inline Handle get(Handle array, + int index); + inline void set(int index, const float32x4_value_t& value); + + // This accessor applies the correct conversion from Smi, HeapNumber + // and undefined. + static Handle SetValue(Handle array, + uint32_t index, + Handle value); + + // Casting. + DECLARE_CAST(ExternalFloat32x4Array) + + // Dispatched behavior. + DECLARE_PRINTER(ExternalFloat32x4Array) + DECLARE_VERIFIER(ExternalFloat32x4Array) + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalFloat32x4Array); +}; + + +class ExternalFloat64x2Array: public ExternalArray { + public: + // Setter and getter. + inline float64x2_value_t get_scalar(int index); + static inline Handle get(Handle array, + int index); + inline void set(int index, const float64x2_value_t& value); + + // This accessor applies the correct conversion from Smi, HeapNumber + // and undefined. + static Handle SetValue(Handle array, + uint32_t index, + Handle value); + + // Casting. + DECLARE_CAST(ExternalFloat64x2Array) + + // Dispatched behavior. + DECLARE_PRINTER(ExternalFloat64x2Array) + DECLARE_VERIFIER(ExternalFloat64x2Array) + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalFloat64x2Array); +}; + + +class ExternalInt32x4Array: public ExternalArray { + public: + // Setter and getter. + inline int32x4_value_t get_scalar(int index); + static inline Handle get(Handle array, + int index); + inline void set(int index, const int32x4_value_t& value); + + // This accessor applies the correct conversion from Smi, HeapNumber + // and undefined. + static Handle SetValue(Handle array, + uint32_t index, + Handle value); + + // Casting. + DECLARE_CAST(ExternalInt32x4Array) + + // Dispatched behavior. + DECLARE_PRINTER(ExternalInt32x4Array) + DECLARE_VERIFIER(ExternalInt32x4Array) + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalInt32x4Array); +}; + + class ExternalFloat64Array: public ExternalArray { public: // Setter and getter. @@ -7047,15 +7174,179 @@ class Script: public Struct { V(Math, clz32, MathClz32) \ V(Math, fround, MathFround) +#define SIMD_NULLARY_OPERATIONS(V) \ + V(SIMD.float32x4, zero, Float32x4Zero, Float32x4) \ + V(SIMD.float64x2, zero, Float64x2Zero, Float64x2) \ + V(SIMD.int32x4, zero, Int32x4Zero, Int32x4) + +#define SIMD_UNARY_OPERATIONS(V) \ + V(SIMD, float32x4, Float32x4Coercion, Float32x4, Float32x4) \ + V(SIMD, float64x2, Float64x2Coercion, Float64x2, Float64x2) \ + V(SIMD, int32x4, Int32x4Coercion, Int32x4, Int32x4) \ + V(SIMD.float32x4, abs, Float32x4Abs, Float32x4, Float32x4) \ + V(SIMD.float32x4, fromInt32x4, Int32x4ToFloat32x4, Float32x4, Int32x4) \ + V(SIMD.float32x4, fromInt32x4Bits, Int32x4BitsToFloat32x4, Float32x4, \ + Int32x4) \ + V(SIMD.float32x4, neg, Float32x4Neg, Float32x4, Float32x4) \ + V(SIMD.float32x4, reciprocal, Float32x4Reciprocal, Float32x4, Float32x4) \ + V(SIMD.float32x4, reciprocalSqrt, Float32x4ReciprocalSqrt, \ + Float32x4, Float32x4) \ + V(SIMD.float32x4, splat, Float32x4Splat, Float32x4, Double) \ + V(SIMD.float32x4, sqrt, Float32x4Sqrt, Float32x4, Float32x4) \ + V(SIMD.float64x2, abs, Float64x2Abs, Float64x2, Float64x2) \ + V(SIMD.float64x2, neg, Float64x2Neg, Float64x2, Float64x2) \ + V(SIMD.float64x2, sqrt, Float64x2Sqrt, Float64x2, Float64x2) \ + V(SIMD.int32x4, fromFloat32x4, Float32x4ToInt32x4, Int32x4, Float32x4) \ + V(SIMD.int32x4, fromFloat32x4Bits, Float32x4BitsToInt32x4, Int32x4, \ + Float32x4) \ + V(SIMD.int32x4, neg, Int32x4Neg, Int32x4, Int32x4) \ + V(SIMD.int32x4, not, Int32x4Not, Int32x4, Int32x4) \ + V(SIMD.int32x4, splat, Int32x4Splat, Int32x4, Integer32) + +// Do not need to install them in InstallExperimentalSIMDBuiltinFunctionIds. +#define SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(V) \ + V(SIMD.float32x4.prototype, signMask, Float32x4GetSignMask, Integer32, \ + Float32x4) \ + V(SIMD.float32x4.prototype, x, Float32x4GetX, Double, Float32x4) \ + V(SIMD.float32x4.prototype, y, Float32x4GetY, Double, Float32x4) \ + V(SIMD.float32x4.prototype, z, Float32x4GetZ, Double, Float32x4) \ + V(SIMD.float32x4.prototype, w, Float32x4GetW, Double, Float32x4) \ + V(SIMD.float64x2.prototype, signMask, Float64x2GetSignMask, Integer32, \ + Float64x2) \ + V(SIMD.float64x2.prototype, x, Float64x2GetX, Double, Float64x2) \ + V(SIMD.float64x2.prototype, y, Float64x2GetY, Double, Float64x2) \ + V(SIMD.int32x4.prototype, signMask, Int32x4GetSignMask, Integer32, Int32x4) \ + V(SIMD.int32x4.prototype, x, Int32x4GetX, Integer32, Int32x4) \ + V(SIMD.int32x4.prototype, y, Int32x4GetY, Integer32, Int32x4) \ + V(SIMD.int32x4.prototype, z, Int32x4GetZ, Integer32, Int32x4) \ + V(SIMD.int32x4.prototype, w, Int32x4GetW, Integer32, Int32x4) \ + V(SIMD.int32x4.prototype, flagX, Int32x4GetFlagX, Tagged, Int32x4) \ + V(SIMD.int32x4.prototype, flagY, Int32x4GetFlagY, Tagged, Int32x4) \ + V(SIMD.int32x4.prototype, flagZ, Int32x4GetFlagZ, Tagged, Int32x4) \ + V(SIMD.int32x4.prototype, flagW, Int32x4GetFlagW, Tagged, Int32x4) + +#define SIMD_BINARY_OPERATIONS(V) \ + V(SIMD.float32x4, add, Float32x4Add, Float32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, div, Float32x4Div, Float32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, max, Float32x4Max, Float32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, min, Float32x4Min, Float32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, mul, Float32x4Mul, Float32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, sub, Float32x4Sub, Float32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, equal, Float32x4Equal, Int32x4, Float32x4, Float32x4) \ + V(SIMD.float32x4, notEqual, Float32x4NotEqual, Int32x4, Float32x4, \ + Float32x4) \ + V(SIMD.float32x4, greaterThan, Float32x4GreaterThan, Int32x4, Float32x4, \ + Float32x4) \ + V(SIMD.float32x4, greaterThanOrEqual, Float32x4GreaterThanOrEqual, Int32x4, \ + Float32x4, Float32x4) \ + V(SIMD.float32x4, lessThan, Float32x4LessThan, Int32x4, Float32x4, \ + Float32x4) \ + V(SIMD.float32x4, lessThanOrEqual, Float32x4LessThanOrEqual, Int32x4, \ + Float32x4, Float32x4) \ + V(SIMD.float32x4, shuffle, Float32x4Shuffle, Float32x4, Float32x4, \ + Integer32) \ + V(SIMD.float32x4, scale, Float32x4Scale, Float32x4, Float32x4, Double) \ + V(SIMD.float32x4, withX, Float32x4WithX, Float32x4, Float32x4, Double) \ + V(SIMD.float32x4, withY, Float32x4WithY, Float32x4, Float32x4, Double) \ + V(SIMD.float32x4, withZ, Float32x4WithZ, Float32x4, Float32x4, Double) \ + V(SIMD.float32x4, withW, Float32x4WithW, Float32x4, Float32x4, Double) \ + V(SIMD.float64x2, add, Float64x2Add, Float64x2, Float64x2, Float64x2) \ + V(SIMD.float64x2, div, Float64x2Div, Float64x2, Float64x2, Float64x2) \ + V(SIMD.float64x2, max, Float64x2Max, Float64x2, Float64x2, Float64x2) \ + V(SIMD.float64x2, min, Float64x2Min, Float64x2, Float64x2, Float64x2) \ + V(SIMD.float64x2, mul, Float64x2Mul, Float64x2, Float64x2, Float64x2) \ + V(SIMD.float64x2, sub, Float64x2Sub, Float64x2, Float64x2, Float64x2) \ + V(SIMD.float64x2, scale, Float64x2Scale, Float64x2, Float64x2, Double) \ + V(SIMD.float64x2, withX, Float64x2WithX, Float64x2, Float64x2, Double) \ + V(SIMD.float64x2, withY, Float64x2WithY, Float64x2, Float64x2, Double) \ + V(SIMD, float64x2, Float64x2Constructor, Float64x2, Double, Double) \ + V(SIMD.int32x4, add, Int32x4Add, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, and, Int32x4And, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, mul, Int32x4Mul, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, or, Int32x4Or, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, sub, Int32x4Sub, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, xor, Int32x4Xor, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, shuffle, Int32x4Shuffle, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, withX, Int32x4WithX, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, withY, Int32x4WithY, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, withZ, Int32x4WithZ, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, withW, Int32x4WithW, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, withFlagX, Int32x4WithFlagX, Int32x4, Int32x4, Tagged) \ + V(SIMD.int32x4, withFlagY, Int32x4WithFlagY, Int32x4, Int32x4, Tagged) \ + V(SIMD.int32x4, withFlagZ, Int32x4WithFlagZ, Int32x4, Int32x4, Tagged) \ + V(SIMD.int32x4, withFlagW, Int32x4WithFlagW, Int32x4, Int32x4, Tagged) \ + V(SIMD.int32x4, greaterThan, Int32x4GreaterThan, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, equal, Int32x4Equal, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, lessThan, Int32x4LessThan, Int32x4, Int32x4, Int32x4) \ + V(SIMD.int32x4, shiftLeft, Int32x4ShiftLeft, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, shiftRight, Int32x4ShiftRight, Int32x4, Int32x4, Integer32) \ + V(SIMD.int32x4, shiftRightArithmetic, Int32x4ShiftRightArithmetic, Int32x4, \ + Int32x4, Integer32) + +#define SIMD_TERNARY_OPERATIONS(V) \ + V(SIMD.float32x4, clamp, Float32x4Clamp, Float32x4, Float32x4, Float32x4, \ + Float32x4) \ + V(SIMD.float32x4, shuffleMix, Float32x4ShuffleMix, Float32x4, Float32x4, \ + Float32x4, Integer32) \ + V(SIMD.float32x4, select, Float32x4Select, Float32x4, Int32x4, Float32x4, \ + Float32x4) \ + V(SIMD.float64x2, clamp, Float64x2Clamp, Float64x2, Float64x2, Float64x2, \ + Float64x2) \ + V(SIMD.int32x4, select, Int32x4Select, Int32x4, Int32x4, Int32x4, Int32x4) + +#define SIMD_QUARTERNARY_OPERATIONS(V) \ + V(SIMD, float32x4, Float32x4Constructor, Float32x4, Double, Double, Double, \ + Double) \ + V(SIMD, int32x4, Int32x4Constructor, Int32x4, Integer32, Integer32, \ + Integer32, Integer32) \ + V(SIMD.int32x4, bool, Int32x4Bool, Int32x4, Tagged, Tagged, Tagged, Tagged) + +#define SIMD_ARRAY_OPERATIONS(V) \ + V(Float32x4Array.prototype, getAt, Float32x4ArrayGetAt) \ + V(Float32x4Array.prototype, setAt, Float32x4ArraySetAt) \ + V(Float64x2Array.prototype, getAt, Float64x2ArrayGetAt) \ + V(Float64x2Array.prototype, setAt, Float64x2ArraySetAt) \ + V(Int32x4Array.prototype, getAt, Int32x4ArrayGetAt) \ + V(Int32x4Array.prototype, setAt, Int32x4ArraySetAt) + +// Do not need to install them in InstallExperimentalSIMDBuiltinFunctionIds. +#define SIMD_FAKE_ID_LISTS(V) \ + V(SIMD, unreachable, SIMD128Unreachable) \ + V(SIMD, change, SIMD128Change) + enum BuiltinFunctionId { kArrayCode, #define DECLARE_FUNCTION_ID(ignored1, ignore2, name) \ k##name, FUNCTIONS_WITH_ID_LIST(DECLARE_FUNCTION_ID) -#undef DECLARE_FUNCTION_ID // Fake id for a special case of Math.pow. Note, it continues the // list of math functions. - kMathPowHalf + kMathPowHalf, + SIMD_FAKE_ID_LISTS(DECLARE_FUNCTION_ID) + SIMD_ARRAY_OPERATIONS(DECLARE_FUNCTION_ID) +#undef DECLARE_FUNCTION_ID +#define DECLARE_SIMD_NULLARY_FUNCTION_ID(i1, i2, name, i3) \ + k##name, + SIMD_NULLARY_OPERATIONS(DECLARE_SIMD_NULLARY_FUNCTION_ID) +#undef DECLARE_SIMD_NULLARY_FUNCTION_ID +#define DECLARE_SIMD_UNARY_FUNCTION_ID(i1, i2, name, i3, i4) \ + k##name, + SIMD_UNARY_OPERATIONS(DECLARE_SIMD_UNARY_FUNCTION_ID) + SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(DECLARE_SIMD_UNARY_FUNCTION_ID) +#undef DECLARE_SIMD_UNARY_FUNCTION_ID +#define DECLARE_SIMD_BINARY_FUNCTION_ID(i1, i2, name, i3, i4, i5) \ + k##name, + SIMD_BINARY_OPERATIONS(DECLARE_SIMD_BINARY_FUNCTION_ID) +#undef DECLARE_SIMD_BINARY_FUNCTION_ID +#define DECLARE_SIMD_TERNARY_FUNCTION_ID(i1, i2, name, i3, i4, i5, i6) \ + k##name, + SIMD_TERNARY_OPERATIONS(DECLARE_SIMD_TERNARY_FUNCTION_ID) +#undef DECLARE_SIMD_TERNARY_FUNCTION_ID +#define DECLARE_SIMD_QUARTERNARY_FUNCTION_ID(i1, i2, name, i3, i4, i5, i6, i7) \ + k##name, + SIMD_QUARTERNARY_OPERATIONS(DECLARE_SIMD_QUARTERNARY_FUNCTION_ID) +#undef DECLARE_SIMD_QUARTERNARY_FUNCTION_ID + kNumberOfBuiltinFunction }; @@ -10437,6 +10728,115 @@ class JSDataView: public JSArrayBufferView { }; +class Float32x4: public JSObject { + public: + typedef float32x4_value_t value_t; + static const int kValueSize = kFloat32x4Size; + static const InstanceType kInstanceType = FLOAT32x4_TYPE; + static inline const char* Name(); + static inline int kRuntimeAllocatorId(); + + // [value]: the FixedFloat32x4Array with length 1. + DECL_ACCESSORS(value, Object) + + // Casting. + DECLARE_CAST(Float32x4) + + // Dispatched behavior. + void Float32x4Print(OStream& os); + DECLARE_VERIFIER(Float32x4) + + // Helpers. + static const int kLanes = 4; + inline float getAt(int index); + inline float x() { return getAt(0); } + inline float y() { return getAt(1); } + inline float z() { return getAt(2); } + inline float w() { return getAt(3); } + inline float32x4_value_t get(); + inline void set(float32x4_value_t f32x4); + + // Layout description. + static const int kValueOffset = JSObject::kHeaderSize; + static const int kSize = kValueOffset + kPointerSize; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Float32x4); +}; + + +class Float64x2: public JSObject { + public: + typedef float64x2_value_t value_t; + static const int kValueSize = kFloat64x2Size; + static const InstanceType kInstanceType = FLOAT64x2_TYPE; + static inline const char* Name(); + static inline int kRuntimeAllocatorId(); + + // [value]: the FixedFloat64x2Array with length 1. + DECL_ACCESSORS(value, Object) + + // Casting. + DECLARE_CAST(Float64x2) + + // Dispatched behavior. + void Float64x2Print(OStream& os); + DECLARE_VERIFIER(Float64x2) + + // Helpers. + static const int kLanes = 2; + inline double getAt(int index); + inline double x() { return getAt(0); } + inline double y() { return getAt(1); } + inline float64x2_value_t get(); + inline void set(float64x2_value_t f64x2); + + // Layout description. + static const int kValueOffset = JSObject::kHeaderSize; + static const int kSize = kValueOffset + kPointerSize; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Float64x2); +}; + + +class Int32x4: public JSObject { + public: + typedef int32x4_value_t value_t; + static const int kValueSize = kInt32x4Size; + static const InstanceType kInstanceType = INT32x4_TYPE; + static inline const char* Name(); + static inline int kRuntimeAllocatorId(); + + // [value]: the FixedInt32x4Array with length 1. + DECL_ACCESSORS(value, Object) + + // Casting. + DECLARE_CAST(Int32x4) + + // Dispatched behavior. + void Int32x4Print(OStream& os); + DECLARE_VERIFIER(Int32x4) + + // Helpers. + static const int kLanes = 4; + inline int32_t getAt(int32_t index); + inline int32_t x() { return getAt(0); } + inline int32_t y() { return getAt(1); } + inline int32_t z() { return getAt(2); } + inline int32_t w() { return getAt(3); } + inline int32x4_value_t get(); + inline void set(int32x4_value_t i32x4); + + // Layout description. + static const int kValueOffset = JSObject::kHeaderSize; + static const int kSize = kValueOffset + kPointerSize; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Int32x4); +}; + + // Foreign describes objects pointing from JavaScript to C structures. // Since they cannot contain references to JS HeapObjects they can be // placed in old_data_space. diff --git a/src/v8/src/property-details.h b/src/v8/src/property-details.h index 7eb2e4e..36a817c 100644 --- a/src/v8/src/property-details.h +++ b/src/v8/src/property-details.h @@ -70,6 +70,9 @@ class Representation { kSmi, kInteger32, kDouble, + kFloat32x4, + kFloat64x2, + kInt32x4, kHeapObject, kTagged, kExternal, @@ -87,6 +90,9 @@ class Representation { static Representation Smi() { return Representation(kSmi); } static Representation Integer32() { return Representation(kInteger32); } static Representation Double() { return Representation(kDouble); } + static Representation Float32x4() { return Representation(kFloat32x4); } + static Representation Float64x2() { return Representation(kFloat64x2); } + static Representation Int32x4() { return Representation(kInt32x4); } static Representation HeapObject() { return Representation(kHeapObject); } static Representation External() { return Representation(kExternal); } @@ -117,6 +123,7 @@ class Representation { if (IsHeapObject()) return other.IsNone(); if (kind_ == kUInteger8 && other.kind_ == kInteger8) return false; if (kind_ == kUInteger16 && other.kind_ == kInteger16) return false; + if (IsSIMD128() && other.IsSIMD128()) return false; return kind_ > other.kind_; } @@ -158,6 +165,12 @@ class Representation { bool IsInteger32() const { return kind_ == kInteger32; } bool IsSmiOrInteger32() const { return IsSmi() || IsInteger32(); } bool IsDouble() const { return kind_ == kDouble; } + bool IsFloat32x4() const { return kind_ == kFloat32x4; } + bool IsFloat64x2() const { return kind_ == kFloat64x2; } + bool IsInt32x4() const { return kind_ == kInt32x4; } + bool IsSIMD128() const { + return IsFloat32x4() || IsFloat64x2() || IsInt32x4(); + } bool IsHeapObject() const { return kind_ == kHeapObject; } bool IsExternal() const { return kind_ == kExternal; } bool IsSpecialization() const { diff --git a/src/v8/src/runtime.cc b/src/v8/src/runtime.cc index 24c9655..f55f223 100644 --- a/src/v8/src/runtime.cc +++ b/src/v8/src/runtime.cc @@ -5104,7 +5104,9 @@ MaybeHandle Runtime::SetObjectProperty(Isolate* isolate, JSObject::ValidateElements(js_object); if (js_object->HasExternalArrayElements() || js_object->HasFixedTypedArrayElements()) { - if (!value->IsNumber() && !value->IsUndefined()) { + if (!value->IsNumber() && !value->IsFloat32x4() && + !value->IsFloat64x2() && !value->IsInt32x4() && + !value->IsUndefined()) { ASSIGN_RETURN_ON_EXCEPTION( isolate, value, Execution::ToNumber(isolate, value), Object); } @@ -5124,7 +5126,9 @@ MaybeHandle Runtime::SetObjectProperty(Isolate* isolate, if (!object->IsJSObject()) return value; Handle js_object = Handle::cast(object); if (js_object->HasExternalArrayElements()) { - if (!value->IsNumber() && !value->IsUndefined()) { + if (!value->IsNumber() && !value->IsFloat32x4() && + !value->IsFloat64x2() && !value->IsInt32x4() && + !value->IsUndefined()) { ASSIGN_RETURN_ON_EXCEPTION( isolate, value, Execution::ToNumber(isolate, value), Object); } @@ -6929,6 +6933,33 @@ RUNTIME_FUNCTION(Runtime_AllocateHeapNumber) { } +RUNTIME_FUNCTION(Runtime_AllocateFloat32x4) { + HandleScope scope(isolate); + DCHECK(args.length() == 0); + + float32x4_value_t zero = {{0, 0, 0, 0}}; + return *isolate->factory()->NewFloat32x4(zero); +} + + +RUNTIME_FUNCTION(Runtime_AllocateFloat64x2) { + HandleScope scope(isolate); + DCHECK(args.length() == 0); + + float64x2_value_t zero = {{0, 0}}; + return *isolate->factory()->NewFloat64x2(zero); +} + + +RUNTIME_FUNCTION(Runtime_AllocateInt32x4) { + HandleScope scope(isolate); + DCHECK(args.length() == 0); + + int32x4_value_t zero = {{0, 0, 0, 0}}; + return *isolate->factory()->NewInt32x4(zero); +} + + RUNTIME_FUNCTION(Runtime_NumberAdd) { HandleScope scope(isolate); DCHECK(args.length() == 2); @@ -10194,6 +10225,54 @@ static void IterateExternalArrayElements(Isolate* isolate, } +static void IterateExternalFloat32x4ArrayElements(Isolate* isolate, + Handle receiver, + ArrayConcatVisitor* visitor) { + Handle array( + ExternalFloat32x4Array::cast(receiver->elements())); + uint32_t len = static_cast(array->length()); + + DCHECK(visitor != NULL); + for (uint32_t j = 0; j < len; j++) { + HandleScope loop_scope(isolate); + Handle e = isolate->factory()->NewFloat32x4(array->get_scalar(j)); + visitor->visit(j, e); + } +} + + +static void IterateExternalFloat64x2ArrayElements(Isolate* isolate, + Handle receiver, + ArrayConcatVisitor* visitor) { + Handle array( + ExternalFloat64x2Array::cast(receiver->elements())); + uint32_t len = static_cast(array->length()); + + DCHECK(visitor != NULL); + for (uint32_t j = 0; j < len; j++) { + HandleScope loop_scope(isolate); + Handle e = isolate->factory()->NewFloat64x2(array->get_scalar(j)); + visitor->visit(j, e); + } +} + + +static void IterateExternalInt32x4ArrayElements(Isolate* isolate, + Handle receiver, + ArrayConcatVisitor* visitor) { + Handle array( + ExternalInt32x4Array::cast(receiver->elements())); + uint32_t len = static_cast(array->length()); + + DCHECK(visitor != NULL); + for (uint32_t j = 0; j < len; j++) { + HandleScope loop_scope(isolate); + Handle e = isolate->factory()->NewInt32x4(array->get_scalar(j)); + visitor->visit(j, e); + } +} + + // Used for sorting indices in a List. static int compareUInt32(const uint32_t* ap, const uint32_t* bp) { uint32_t a = *ap; @@ -10439,6 +10518,18 @@ static bool IterateElements(Isolate* isolate, isolate, receiver, false, false, visitor); break; } + case EXTERNAL_FLOAT32x4_ELEMENTS: { + IterateExternalFloat32x4ArrayElements(isolate, receiver, visitor); + break; + } + case EXTERNAL_FLOAT64x2_ELEMENTS: { + IterateExternalFloat64x2ArrayElements(isolate, receiver, visitor); + break; + } + case EXTERNAL_INT32x4_ELEMENTS: { + IterateExternalInt32x4ArrayElements(isolate, receiver, visitor); + break; + } case EXTERNAL_FLOAT64_ELEMENTS: { IterateExternalArrayElements( isolate, receiver, false, false, visitor); @@ -15227,6 +15318,684 @@ RUNTIME_FUNCTION_RETURN_PAIR(Runtime_ForInNext) { } +#define RETURN_Float32x4_RESULT(value) \ + return *isolate->factory()->NewFloat32x4(value); + + +#define RETURN_Float64x2_RESULT(value) \ + return *isolate->factory()->NewFloat64x2(value); + + +#define RETURN_Int32x4_RESULT(value) \ + return *isolate->factory()->NewInt32x4(value); + + +RUNTIME_FUNCTION(Runtime_CreateFloat32x4) { + HandleScope scope(isolate); + DCHECK(args.length() == 4); + RUNTIME_ASSERT(args[0]->IsNumber()); + RUNTIME_ASSERT(args[1]->IsNumber()); + RUNTIME_ASSERT(args[2]->IsNumber()); + RUNTIME_ASSERT(args[3]->IsNumber()); + + float32x4_value_t value; + value.storage[0] = static_cast(args.number_at(0)); + value.storage[1] = static_cast(args.number_at(1)); + value.storage[2] = static_cast(args.number_at(2)); + value.storage[3] = static_cast(args.number_at(3)); + + RETURN_Float32x4_RESULT(value); +} + + +RUNTIME_FUNCTION(Runtime_CreateFloat64x2) { + HandleScope scope(isolate); + DCHECK(args.length() == 2); + RUNTIME_ASSERT(args[0]->IsNumber()); + RUNTIME_ASSERT(args[1]->IsNumber()); + + float64x2_value_t value; + value.storage[0] = args.number_at(0); + value.storage[1] = args.number_at(1); + + RETURN_Float64x2_RESULT(value); +} + + +RUNTIME_FUNCTION(Runtime_CreateInt32x4) { + HandleScope scope(isolate); + DCHECK(args.length() == 4); + RUNTIME_ASSERT(args[0]->IsNumber()); + RUNTIME_ASSERT(args[1]->IsNumber()); + RUNTIME_ASSERT(args[2]->IsNumber()); + RUNTIME_ASSERT(args[3]->IsNumber()); + + int32x4_value_t value; + value.storage[0] = NumberToInt32(args[0]); + value.storage[1] = NumberToInt32(args[1]); + value.storage[2] = NumberToInt32(args[2]); + value.storage[3] = NumberToInt32(args[3]); + + RETURN_Int32x4_RESULT(value); +} + + +// Used to convert between uint32_t and float32 without breaking strict +// aliasing rules. +union float32_uint32 { + float f; + uint32_t u; + float32_uint32(float v) { + f = v; + } + float32_uint32(uint32_t v) { + u = v; + } +}; + + +union float64_uint64 { + double f; + uint64_t u; + float64_uint64(double v) { + f = v; + } + float64_uint64(uint64_t v) { + u = v; + } +}; + + +RUNTIME_FUNCTION(Runtime_Float32x4GetSignMask) { + HandleScope scope(isolate); + DCHECK(args.length() == 1); + CONVERT_ARG_CHECKED(Float32x4, self, 0); + float32_uint32 x(self->x()); + float32_uint32 y(self->y()); + float32_uint32 z(self->z()); + float32_uint32 w(self->w()); + uint32_t mx = (x.u & 0x80000000) >> 31; + uint32_t my = (y.u & 0x80000000) >> 31; + uint32_t mz = (z.u & 0x80000000) >> 31; + uint32_t mw = (w.u & 0x80000000) >> 31; + uint32_t value = mx | (my << 1) | (mz << 2) | (mw << 3); + return *isolate->factory()->NewNumberFromUint(value); +} + + +RUNTIME_FUNCTION(Runtime_Float64x2GetSignMask) { + HandleScope scope(isolate); + DCHECK(args.length() == 1); + CONVERT_ARG_CHECKED(Float64x2, self, 0); + float64_uint64 x(self->x()); + float64_uint64 y(self->y()); + uint64_t mx = x.u >> 63; + uint64_t my = y.u >> 63; + uint32_t value = static_cast(mx | (my << 1)); + return *isolate->factory()->NewNumberFromUint(value); +} + + +RUNTIME_FUNCTION(Runtime_Int32x4GetSignMask) { + HandleScope scope(isolate); + DCHECK(args.length() == 1); + CONVERT_ARG_CHECKED(Int32x4, self, 0); + uint32_t mx = (self->x() & 0x80000000) >> 31; + uint32_t my = (self->y() & 0x80000000) >> 31; + uint32_t mz = (self->z() & 0x80000000) >> 31; + uint32_t mw = (self->w() & 0x80000000) >> 31; + uint32_t value = mx | (my << 1) | (mz << 2) | (mw << 3); + return *isolate->factory()->NewNumberFromUint(value); +} + + +#define LANE_VALUE(VALUE, LANE) \ + VALUE->LANE() + + +#define LANE_FLAG(VALUE, LANE) \ + VALUE->LANE() != 0 + + +#define SIMD128_LANE_ACCESS_FUNCTIONS(V) \ + V(Float32x4, GetX, NewNumber, x, LANE_VALUE) \ + V(Float32x4, GetY, NewNumber, y, LANE_VALUE) \ + V(Float32x4, GetZ, NewNumber, z, LANE_VALUE) \ + V(Float32x4, GetW, NewNumber, w, LANE_VALUE) \ + V(Float64x2, GetX, NewNumber, x, LANE_VALUE) \ + V(Float64x2, GetY, NewNumber, y, LANE_VALUE) \ + V(Int32x4, GetX, NewNumberFromInt, x, LANE_VALUE) \ + V(Int32x4, GetY, NewNumberFromInt, y, LANE_VALUE) \ + V(Int32x4, GetZ, NewNumberFromInt, z, LANE_VALUE) \ + V(Int32x4, GetW, NewNumberFromInt, w, LANE_VALUE) \ + V(Int32x4, GetFlagX, ToBoolean, x, LANE_FLAG) \ + V(Int32x4, GetFlagY, ToBoolean, y, LANE_FLAG) \ + V(Int32x4, GetFlagZ, ToBoolean, z, LANE_FLAG) \ + V(Int32x4, GetFlagW, ToBoolean, w, LANE_FLAG) + + +#define DECLARE_SIMD_LANE_ACCESS_FUNCTION( \ + TYPE, NAME, HEAP_FUNCTION, LANE, ACCESS_FUNCTION) \ +RUNTIME_FUNCTION(Runtime_##TYPE##NAME) { \ + HandleScope scope(isolate); \ + DCHECK(args.length() == 1); \ + \ + CONVERT_ARG_CHECKED(TYPE, a, 0); \ + \ + return *isolate->factory()->HEAP_FUNCTION( \ + ACCESS_FUNCTION(a, LANE)); \ +} + + +SIMD128_LANE_ACCESS_FUNCTIONS(DECLARE_SIMD_LANE_ACCESS_FUNCTION) + + +template +static inline T Neg(T a) { + return -a; +} + + +template +static inline T Not(T a) { + return ~a; +} + + +template +static inline T Reciprocal(T a) { + UNIMPLEMENTED(); +} + + +template<> +inline float Reciprocal(float a) { + return 1.0f / a; +} + + +template +static inline T ReciprocalSqrt(T a) { + UNIMPLEMENTED(); +} + + +template<> +inline float ReciprocalSqrt(float a) { + return sqrtf(1.0f / a); +} + + +template +static inline T Sqrt(T a) { + UNIMPLEMENTED(); +} + + +template<> +inline float Sqrt(float a) { + return sqrtf(a); +} + + +template<> +inline double Sqrt(double a) { + return sqrt(a); +} + + +#define SIMD128_UNARY_FUNCTIONS(V) \ + V(Float32x4, Abs) \ + V(Float32x4, Neg) \ + V(Float32x4, Reciprocal) \ + V(Float32x4, ReciprocalSqrt) \ + V(Float32x4, Sqrt) \ + V(Float64x2, Abs) \ + V(Float64x2, Neg) \ + V(Float64x2, Sqrt) \ + V(Int32x4, Neg) \ + V(Int32x4, Not) + + +#define DECLARE_SIMD_UNARY_FUNCTION(TYPE, FUNCTION) \ +RUNTIME_FUNCTION(Runtime_##TYPE##FUNCTION) { \ + HandleScope scope(isolate); \ + DCHECK(args.length() == 1); \ + \ + CONVERT_ARG_CHECKED(TYPE, a, 0); \ + \ + TYPE::value_t result; \ + for (int i = 0; i < TYPE::kLanes; i++) { \ + result.storage[i] = FUNCTION(a->getAt(i)); \ + } \ + \ + RETURN_##TYPE##_RESULT(result); \ +} + + +SIMD128_UNARY_FUNCTIONS(DECLARE_SIMD_UNARY_FUNCTION) + + +template +inline void BitsTo(T1 s, T2* t) { + memcpy(t, &s, sizeof(T2)); +} + + +template +inline void To(T1 s, T2* t) { +} + + +template<> +inline void To(int32_t s, float* t) { + *t = static_cast(s); +} + + +template<> +inline void To(float s, int32_t* t) { + *t = DoubleToInt32(static_cast(s)); +} + + +#define SIMD128_CONVERSION_FUNCTIONS(V) \ + V(Float32x4, BitsTo, Int32x4) \ + V(Float32x4, To, Int32x4) \ + V(Int32x4, BitsTo, Float32x4) \ + V(Int32x4, To, Float32x4) + + +#define DECLARE_SIMD_CONVERSION_FUNCTION( \ + SOURCE_TYPE, FUNCTION, TARGET_TYPE) \ +RUNTIME_FUNCTION( \ + Runtime_##SOURCE_TYPE##FUNCTION##TARGET_TYPE) { \ + HandleScope scope(isolate); \ + DCHECK(args.length() == 1); \ + \ + CONVERT_ARG_CHECKED(SOURCE_TYPE, a, 0); \ + \ + TARGET_TYPE::value_t result; \ + for (int i = 0; i < SOURCE_TYPE::kLanes; i++) { \ + FUNCTION(a->getAt(i), &result.storage[i]); \ + } \ + \ + RETURN_##TARGET_TYPE##_RESULT(result); \ +} + + +SIMD128_CONVERSION_FUNCTIONS(DECLARE_SIMD_CONVERSION_FUNCTION) + + +template +static inline T Add(T a, T b) { + return a + b; +} + + +template +static inline T Div(T a, T b) { + return a / b; +} + + +template +static inline T Mul(T a, T b) { + return a * b; +} + + +template +static inline T Sub(T a, T b) { + return a - b; +} + + +template +static inline int32_t Equal(T a, T b) { + return a == b ? -1 : 0; +} + + +template +static inline int32_t NotEqual(T a, T b) { + return a != b ? -1 : 0; +} + + +template +static inline int32_t GreaterThanOrEqual(T a, T b) { + return a >= b ? -1 : 0; +} + + +template +static inline int32_t GreaterThan(T a, T b) { + return a > b ? -1 : 0; +} + + +template +static inline int32_t LessThan(T a, T b) { + return a < b ? -1 : 0; +} + + +template +static inline int32_t LessThanOrEqual(T a, T b) { + return a <= b ? -1 : 0; +} + + +template +static inline T And(T a, T b) { + return a & b; +} + + +template +static inline T Or(T a, T b) { + return a | b; +} + + +template +static inline T Xor(T a, T b) { + return a ^ b; +} + + +#define SIMD128_BINARY_FUNCTIONS(V) \ + V(Float32x4, Add, Float32x4) \ + V(Float32x4, Div, Float32x4) \ + V(Float32x4, Max, Float32x4) \ + V(Float32x4, Min, Float32x4) \ + V(Float32x4, Mul, Float32x4) \ + V(Float32x4, Sub, Float32x4) \ + V(Float32x4, Equal, Int32x4) \ + V(Float32x4, NotEqual, Int32x4) \ + V(Float32x4, GreaterThanOrEqual, Int32x4) \ + V(Float32x4, GreaterThan, Int32x4) \ + V(Float32x4, LessThan, Int32x4) \ + V(Float32x4, LessThanOrEqual, Int32x4) \ + V(Float64x2, Add, Float64x2) \ + V(Float64x2, Div, Float64x2) \ + V(Float64x2, Max, Float64x2) \ + V(Float64x2, Min, Float64x2) \ + V(Float64x2, Mul, Float64x2) \ + V(Float64x2, Sub, Float64x2) \ + V(Int32x4, Add, Int32x4) \ + V(Int32x4, And, Int32x4) \ + V(Int32x4, Mul, Int32x4) \ + V(Int32x4, Or, Int32x4) \ + V(Int32x4, Sub, Int32x4) \ + V(Int32x4, Xor, Int32x4) \ + V(Int32x4, Equal, Int32x4) \ + V(Int32x4, GreaterThan, Int32x4) \ + V(Int32x4, LessThan, Int32x4) + + +#define DECLARE_SIMD_BINARY_FUNCTION( \ + TYPE, FUNCTION, RETURN_TYPE) \ +RUNTIME_FUNCTION(Runtime_##TYPE##FUNCTION) { \ + HandleScope scope(isolate); \ + DCHECK(args.length() == 2); \ + \ + CONVERT_ARG_CHECKED(TYPE, a, 0); \ + CONVERT_ARG_CHECKED(TYPE, b, 1); \ + \ + RETURN_TYPE::value_t result; \ + for (int i = 0; i < TYPE::kLanes; i++) { \ + result.storage[i] = FUNCTION(a->getAt(i), b->getAt(i)); \ + } \ + \ + RETURN_##RETURN_TYPE##_RESULT(result); \ +} + + +SIMD128_BINARY_FUNCTIONS(DECLARE_SIMD_BINARY_FUNCTION) + + +#define SIMD128_SHUFFLE_FUNCTIONS(V) \ + V(Float32x4) \ + V(Int32x4) + + +#define DECLARE_SIMD_SHUFFLE_FUNCTION(TYPE) \ +RUNTIME_FUNCTION(Runtime_##TYPE##Shuffle) { \ + HandleScope scope(isolate); \ + DCHECK(args.length() == 2); \ + \ + CONVERT_ARG_CHECKED(TYPE, a, 0); \ + RUNTIME_ASSERT(args[1]->IsNumber()); \ + uint32_t m = NumberToUint32(args[1]); \ + \ + TYPE::value_t result; \ + for (int i = 0; i < TYPE::kLanes; i++) { \ + result.storage[i] = a->getAt((m >> (i * 2)) & 0x3); \ + } \ + \ + RETURN_##TYPE##_RESULT(result); \ +} + + +SIMD128_SHUFFLE_FUNCTIONS(DECLARE_SIMD_SHUFFLE_FUNCTION) + + +RUNTIME_FUNCTION(Runtime_Float32x4Scale) { + HandleScope scope(isolate); + DCHECK(args.length() == 2); + + CONVERT_ARG_CHECKED(Float32x4, self, 0); + RUNTIME_ASSERT(args[1]->IsNumber()); + + float _s = static_cast(args.number_at(1)); + float32x4_value_t result; + result.storage[0] = self->x() * _s; + result.storage[1] = self->y() * _s; + result.storage[2] = self->z() * _s; + result.storage[3] = self->w() * _s; + + RETURN_Float32x4_RESULT(result); +} + + +RUNTIME_FUNCTION(Runtime_Float64x2Scale) { + HandleScope scope(isolate); + DCHECK(args.length() == 2); + + CONVERT_ARG_CHECKED(Float64x2, self, 0); + RUNTIME_ASSERT(args[1]->IsNumber()); + + double _s = args.number_at(1); + float64x2_value_t result; + result.storage[0] = self->x() * _s; + result.storage[1] = self->y() * _s; + + RETURN_Float64x2_RESULT(result); +} + + +#define ARG_TO_FLOAT32(x) \ + CONVERT_DOUBLE_ARG_CHECKED(t, 1); \ + float x = static_cast(t); + + +#define ARG_TO_FLOAT64(x) \ + CONVERT_DOUBLE_ARG_CHECKED(x, 1); \ + + +#define ARG_TO_INT32(x) \ + RUNTIME_ASSERT(args[1]->IsNumber()); \ + int32_t x = NumberToInt32(args[1]); + + +#define ARG_TO_BOOLEAN(x) \ + CONVERT_BOOLEAN_ARG_CHECKED(flag, 1); \ + int32_t x = flag ? -1 : 0; + +#define SIMD128_SET_LANE_FUNCTIONS(V) \ + V(Float32x4, WithX, ARG_TO_FLOAT32, 0) \ + V(Float32x4, WithY, ARG_TO_FLOAT32, 1) \ + V(Float32x4, WithZ, ARG_TO_FLOAT32, 2) \ + V(Float32x4, WithW, ARG_TO_FLOAT32, 3) \ + V(Float64x2, WithX, ARG_TO_FLOAT64, 0) \ + V(Float64x2, WithY, ARG_TO_FLOAT64, 1) \ + V(Int32x4, WithX, ARG_TO_INT32, 0) \ + V(Int32x4, WithY, ARG_TO_INT32, 1) \ + V(Int32x4, WithZ, ARG_TO_INT32, 2) \ + V(Int32x4, WithW, ARG_TO_INT32, 3) \ + V(Int32x4, WithFlagX, ARG_TO_BOOLEAN, 0) \ + V(Int32x4, WithFlagY, ARG_TO_BOOLEAN, 1) \ + V(Int32x4, WithFlagZ, ARG_TO_BOOLEAN, 2) \ + V(Int32x4, WithFlagW, ARG_TO_BOOLEAN, 3) + + +#define DECLARE_SIMD_SET_LANE_FUNCTION( \ + TYPE, NAME, ARG_FUNCTION, LANE) \ +RUNTIME_FUNCTION(Runtime_##TYPE##NAME) { \ + HandleScope scope(isolate); \ + DCHECK(args.length() == 2); \ + \ + CONVERT_ARG_CHECKED(TYPE, a, 0); \ + ARG_FUNCTION(value); \ + \ + TYPE::value_t result; \ + for (int i = 0; i < TYPE::kLanes; i++) { \ + if (i != LANE) \ + result.storage[i] = a->getAt(i); \ + else \ + result.storage[i] = value; \ + } \ + \ + RETURN_##TYPE##_RESULT(result); \ +} + + +SIMD128_SET_LANE_FUNCTIONS(DECLARE_SIMD_SET_LANE_FUNCTION) + + +RUNTIME_FUNCTION(Runtime_Float32x4Clamp) { + HandleScope scope(isolate); + DCHECK(args.length() == 3); + + CONVERT_ARG_CHECKED(Float32x4, self, 0); + CONVERT_ARG_CHECKED(Float32x4, lo, 1); + CONVERT_ARG_CHECKED(Float32x4, hi, 2); + + float32x4_value_t result; + float _x = self->x() > lo->x() ? self->x() : lo->x(); + float _y = self->y() > lo->y() ? self->y() : lo->y(); + float _z = self->z() > lo->z() ? self->z() : lo->z(); + float _w = self->w() > lo->w() ? self->w() : lo->w(); + result.storage[0] = _x > hi->x() ? hi->x() : _x; + result.storage[1] = _y > hi->y() ? hi->y() : _y; + result.storage[2] = _z > hi->z() ? hi->z() : _z; + result.storage[3] = _w > hi->w() ? hi->w() : _w; + + RETURN_Float32x4_RESULT(result); +} + + +RUNTIME_FUNCTION(Runtime_Float64x2Clamp) { + HandleScope scope(isolate); + DCHECK(args.length() == 3); + + CONVERT_ARG_CHECKED(Float64x2, self, 0); + CONVERT_ARG_CHECKED(Float64x2, lo, 1); + CONVERT_ARG_CHECKED(Float64x2, hi, 2); + + float64x2_value_t result; + double _x = self->x() > lo->x() ? self->x() : lo->x(); + double _y = self->y() > lo->y() ? self->y() : lo->y(); + result.storage[0] = _x > hi->x() ? hi->x() : _x; + result.storage[1] = _y > hi->y() ? hi->y() : _y; + + RETURN_Float64x2_RESULT(result); +} + + +RUNTIME_FUNCTION(Runtime_Float32x4ShuffleMix) { + HandleScope scope(isolate); + DCHECK(args.length() == 3); + + CONVERT_ARG_CHECKED(Float32x4, first, 0); + CONVERT_ARG_CHECKED(Float32x4, second, 1); + RUNTIME_ASSERT(args[2]->IsNumber()); + + uint32_t m = NumberToUint32(args[2]); + float32x4_value_t result; + float data1[4] = { first->x(), first->y(), first->z(), first->w() }; + float data2[4] = { second->x(), second->y(), second->z(), second->w() }; + result.storage[0] = data1[m & 0x3]; + result.storage[1] = data1[(m >> 2) & 0x3]; + result.storage[2] = data2[(m >> 4) & 0x3]; + result.storage[3] = data2[(m >> 6) & 0x3]; + + RETURN_Float32x4_RESULT(result); +} + + +RUNTIME_FUNCTION(Runtime_Float32x4Select) { + HandleScope scope(isolate); + DCHECK(args.length() == 3); + + CONVERT_ARG_CHECKED(Int32x4, self, 0); + CONVERT_ARG_CHECKED(Float32x4, tv, 1); + CONVERT_ARG_CHECKED(Float32x4, fv, 2); + + uint32_t _maskX = self->x(); + uint32_t _maskY = self->y(); + uint32_t _maskZ = self->z(); + uint32_t _maskW = self->w(); + // Extract floats and interpret them as masks. + float32_uint32 tvx(tv->x()); + float32_uint32 tvy(tv->y()); + float32_uint32 tvz(tv->z()); + float32_uint32 tvw(tv->w()); + float32_uint32 fvx(fv->x()); + float32_uint32 fvy(fv->y()); + float32_uint32 fvz(fv->z()); + float32_uint32 fvw(fv->w()); + // Perform select. + float32_uint32 tempX((_maskX & tvx.u) | (~_maskX & fvx.u)); + float32_uint32 tempY((_maskY & tvy.u) | (~_maskY & fvy.u)); + float32_uint32 tempZ((_maskZ & tvz.u) | (~_maskZ & fvz.u)); + float32_uint32 tempW((_maskW & tvw.u) | (~_maskW & fvw.u)); + + float32x4_value_t result; + result.storage[0] = tempX.f; + result.storage[1] = tempY.f; + result.storage[2] = tempZ.f; + result.storage[3] = tempW.f; + + RETURN_Float32x4_RESULT(result); +} + + +RUNTIME_FUNCTION(Runtime_Int32x4Select) { + HandleScope scope(isolate); + DCHECK(args.length() == 3); + + CONVERT_ARG_CHECKED(Int32x4, self, 0); + CONVERT_ARG_CHECKED(Int32x4, tv, 1); + CONVERT_ARG_CHECKED(Int32x4, fv, 2); + + uint32_t _maskX = self->x(); + uint32_t _maskY = self->y(); + uint32_t _maskZ = self->z(); + uint32_t _maskW = self->w(); + + int32x4_value_t result; + result.storage[0] = (_maskX & tv->x()) | (~_maskX & fv->x()); + result.storage[1] = (_maskY & tv->y()) | (~_maskY & fv->y()); + result.storage[2] = (_maskZ & tv->z()) | (~_maskZ & fv->z()); + result.storage[3] = (_maskW & tv->w()) | (~_maskW & fv->w()); + + RETURN_Int32x4_RESULT(result); +} + + // ---------------------------------------------------------------------------- // Reference implementation for inlined runtime functions. Only used when the // compiler does not support a certain intrinsic. Don't optimize these, but diff --git a/src/v8/src/runtime.h b/src/v8/src/runtime.h index 93185cc..2cde15d 100644 --- a/src/v8/src/runtime.h +++ b/src/v8/src/runtime.h @@ -152,6 +152,77 @@ namespace internal { F(MathFround, 1, 1) \ F(RemPiO2, 1, 1) \ \ + /* Float32x4 and Int32x4 */ \ + F(AllocateFloat32x4, 0, 1) \ + F(AllocateFloat64x2, 0, 1) \ + F(AllocateInt32x4, 0, 1) \ + \ + /* SIMD */ \ + F(Float32x4Abs, 1, 1) \ + F(Float32x4BitsToInt32x4, 1, 1) \ + F(Float32x4Neg, 1, 1) \ + F(Float32x4Reciprocal, 1, 1) \ + F(Float32x4ReciprocalSqrt, 1, 1) \ + F(Float32x4Sqrt, 1, 1) \ + F(Float32x4ToInt32x4, 1, 1) \ + F(Float32x4Add, 2, 1) \ + F(Float32x4Div, 2, 1) \ + F(Float32x4Max, 2, 1) \ + F(Float32x4Min, 2, 1) \ + F(Float32x4Mul, 2, 1) \ + F(Float32x4Sub, 2, 1) \ + F(Float32x4Equal, 2, 1) \ + F(Float32x4NotEqual, 2, 1) \ + F(Float32x4GreaterThanOrEqual, 2, 1) \ + F(Float32x4GreaterThan, 2, 1) \ + F(Float32x4LessThan, 2, 1) \ + F(Float32x4LessThanOrEqual, 2, 1) \ + F(Float32x4Shuffle, 2, 1) \ + F(Float32x4Scale, 2, 1) \ + F(Float32x4WithX, 2, 1) \ + F(Float32x4WithY, 2, 1) \ + F(Float32x4WithZ, 2, 1) \ + F(Float32x4WithW, 2, 1) \ + F(Float32x4Clamp, 3, 1) \ + F(Float32x4ShuffleMix, 3, 1) \ + F(Float32x4Select, 3, 1) \ + F(Float64x2Abs, 1, 1) \ + F(Float64x2Neg, 1, 1) \ + F(Float64x2Sqrt, 1, 1) \ + F(Float64x2Add, 2, 1) \ + F(Float64x2Div, 2, 1) \ + F(Float64x2Max, 2, 1) \ + F(Float64x2Min, 2, 1) \ + F(Float64x2Mul, 2, 1) \ + F(Float64x2Sub, 2, 1) \ + F(Float64x2Scale, 2, 1) \ + F(Float64x2WithX, 2, 1) \ + F(Float64x2WithY, 2, 1) \ + F(Float64x2Clamp, 3, 1) \ + F(Int32x4BitsToFloat32x4, 1, 1) \ + F(Int32x4Neg, 1, 1) \ + F(Int32x4Not, 1, 1) \ + F(Int32x4ToFloat32x4, 1, 1) \ + F(Int32x4And, 2, 1) \ + F(Int32x4Or, 2, 1) \ + F(Int32x4Xor, 2, 1) \ + F(Int32x4Add, 2, 1) \ + F(Int32x4Sub, 2, 1) \ + F(Int32x4Mul, 2, 1) \ + F(Int32x4Shuffle, 2, 1) \ + F(Int32x4WithX, 2, 1) \ + F(Int32x4WithY, 2, 1) \ + F(Int32x4WithZ, 2, 1) \ + F(Int32x4WithW, 2, 1) \ + F(Int32x4WithFlagX, 2, 1) \ + F(Int32x4WithFlagY, 2, 1) \ + F(Int32x4WithFlagZ, 2, 1) \ + F(Int32x4WithFlagW, 2, 1) \ + F(Int32x4GreaterThan, 2, 1) \ + F(Int32x4Equal, 2, 1) \ + F(Int32x4LessThan, 2, 1) \ + F(Int32x4Select, 3, 1) \ + \ /* Regular expressions */ \ F(RegExpCompile, 3, 1) \ F(RegExpExecMultiple, 4, 1) \ @@ -224,6 +295,28 @@ namespace internal { F(DateSetValue, 3, 1) \ F(DateCacheVersion, 0, 1) \ \ + /* Float32x4, Float64x2 and Int32x4 */ \ + F(CreateFloat32x4, 4, 1) \ + F(Float32x4GetX, 1, 1) \ + F(Float32x4GetY, 1, 1) \ + F(Float32x4GetZ, 1, 1) \ + F(Float32x4GetW, 1, 1) \ + F(Float32x4GetSignMask, 1, 1) \ + F(CreateFloat64x2, 2, 1) \ + F(Float64x2GetX, 1, 1) \ + F(Float64x2GetY, 1, 1) \ + F(Float64x2GetSignMask, 1, 1) \ + F(CreateInt32x4, 4, 1) \ + F(Int32x4GetX, 1, 1) \ + F(Int32x4GetY, 1, 1) \ + F(Int32x4GetZ, 1, 1) \ + F(Int32x4GetW, 1, 1) \ + F(Int32x4GetFlagX, 1, 1) \ + F(Int32x4GetFlagY, 1, 1) \ + F(Int32x4GetFlagZ, 1, 1) \ + F(Int32x4GetFlagW, 1, 1) \ + F(Int32x4GetSignMask, 1, 1) \ + \ /* Globals */ \ F(CompileString, 2, 1) \ \ @@ -394,7 +487,10 @@ namespace internal { F(HasExternalInt32Elements, 1, 1) \ F(HasExternalUint32Elements, 1, 1) \ F(HasExternalFloat32Elements, 1, 1) \ + F(HasExternalFloat32x4Elements, 1, 1) \ + F(HasExternalInt32x4Elements, 1, 1) \ F(HasExternalFloat64Elements, 1, 1) \ + F(HasExternalFloat64x2Elements, 1, 1) \ F(HasFixedUint8ClampedElements, 1, 1) \ F(HasFixedInt8Elements, 1, 1) \ F(HasFixedUint8Elements, 1, 1) \ @@ -878,9 +974,11 @@ class Runtime : public AllStatic { ARRAY_ID_FLOAT32 = 7, ARRAY_ID_FLOAT64 = 8, ARRAY_ID_UINT8_CLAMPED = 9, - + ARRAY_ID_FLOAT32x4 = 10, + ARRAY_ID_FLOAT64x2 = 11, + ARRAY_ID_INT32x4 = 12, ARRAY_ID_FIRST = ARRAY_ID_UINT8, - ARRAY_ID_LAST = ARRAY_ID_UINT8_CLAMPED + ARRAY_ID_LAST = ARRAY_ID_INT32x4 }; static void ArrayIdToTypeAndSize(int array_id, diff --git a/src/v8/src/runtime.js b/src/v8/src/runtime.js index d9e1fe5..8aa62c4 100644 --- a/src/v8/src/runtime.js +++ b/src/v8/src/runtime.js @@ -50,17 +50,57 @@ function EQUALS(y) { if (IS_NUMBER(y)) return %NumberEquals(%ToNumber(x), y); if (IS_BOOLEAN(y)) return %NumberEquals(%ToNumber(x), %ToNumber(y)); if (IS_NULL_OR_UNDEFINED(y)) return 1; // not equal + if (IsFloat32x4(y) || IsFloat64x2(y) || IsInt32x4(y)) { + return %StringEquals(x, %ToString(y)); + } y = %ToPrimitive(y, NO_HINT); } } else if (IS_SYMBOL(x)) { if (IS_SYMBOL(y)) return %_ObjectEquals(x, y) ? 0 : 1; return 1; // not equal + } else if (IsFloat32x4(x)) { + while (true) { + if (IsFloat32x4(y) || IsInt32x4(y)) { + return (x.x == y.x && x.y == y.y && x.z == y.z && x.w == y.w) ? 0 : 1; + } + if (IS_STRING(y)) return %StringEquals(%ToString(x), y); + if (IS_NUMBER(y)) return 1; // not equal + if (IS_SYMBOL(y)) return 1; // not equal + if (IS_BOOLEAN(y)) return y ? 0 : 1; + if (IS_NULL_OR_UNDEFINED(y)) return 1; // not equal + y = %ToPrimitive(y, NO_HINT); + } + } else if (IsFloat64x2(x)) { + while (true) { + if (IsFloat64x2(y)) { + return (x.x == y.x && x.y == y.y) ? 0 : 1; + } + if (IS_STRING(y)) return %StringEquals(%ToString(x), y); + if (IS_NUMBER(y)) return 1; // not equal + if (IS_SYMBOL(y)) return 1; // not equal + if (IS_BOOLEAN(y)) return y ? 0 : 1; + if (IS_NULL_OR_UNDEFINED(y)) return 1; // not equal + y = %ToPrimitive(y, NO_HINT); + } + } else if (IsInt32x4(x)) { + while (true) { + if (IsFloat32x4(y) || IsInt32x4(y)) { + return (x.x == y.x && x.y == y.y && x.z == y.z && x.w == y.w) ? 0 : 1; + } + if (IS_STRING(y)) return %StringEquals(%ToString(x), y); + if (IS_NUMBER(y)) return 1; // not equal + if (IS_SYMBOL(y)) return 1; // not equal + if (IS_BOOLEAN(y)) return y ? 0 : 1; + if (IS_NULL_OR_UNDEFINED(y)) return 1; // not equal + y = %ToPrimitive(y, NO_HINT); + } } else if (IS_BOOLEAN(x)) { if (IS_BOOLEAN(y)) return %_ObjectEquals(x, y) ? 0 : 1; if (IS_NULL_OR_UNDEFINED(y)) return 1; if (IS_NUMBER(y)) return %NumberEquals(%ToNumber(x), y); if (IS_STRING(y)) return %NumberEquals(%ToNumber(x), %ToNumber(y)); if (IS_SYMBOL(y)) return 1; // not equal + if (IsFloat32x4(y) || IsFloat64x2(y) || IsInt32x4(y)) return x ? 0 : 1; // y is object. x = %ToNumber(x); y = %ToPrimitive(y, NO_HINT); @@ -91,6 +131,23 @@ function STRICT_EQUALS(x) { return %NumberEquals(this, x); } + if (IsFloat32x4(this)) { + if (!IsFloat32x4(x)) return 1; // not equal + return (this.x == x.x && this.y == x.y && + this.z == x.z && this.w == x.w) ? 0 : 1; + } + + if (IsFloat64x2(this)) { + if (!IsFloat64x2(x)) return 1; // not equal + return (this.x == x.x && this.y == x.y) ? 0 : 1; + } + + if (IsInt32x4(this)) { + if (!IsInt32x4(x)) return 1; // not equal + return (this.x == x.x && this.y == x.y && + this.z == x.z && this.w == x.w) ? 0 : 1; + } + // If anything else gets here, we just do simple identity check. // Objects (including functions), null, undefined and booleans were // checked in the CompareStub, so there should be nothing left. @@ -127,6 +184,30 @@ function COMPARE(x, ncr) { right = %ToPrimitive(x, NUMBER_HINT); if (IS_STRING(left) && IS_STRING(right)) { return %_StringCompare(left, right); + } else if ((IsFloat32x4(left) || IsInt32x4(left)) && + (IsFloat32x4(right) || IsInt32x4(right))) { + if ((left.x == right.x) && (left.y == right.y) && + (left.z == right.z) && (left.w == right.w)) { + return 0; // equal + } + if ((left.x < right.x) && (left.y < right.y) && + (left.z < right.z) && (left.w < right.w)) { + return -1; // less + } + if ((left.x > right.x) && (left.y > right.y) && + (left.z > right.z) && (left.w > right.w)) { + return 1; // great + } + } else if (IsFloat64x2(left) && IsFloat64x2(right)) { + if ((left.x == right.x) && (left.y == right.y)) { + return 0; // equal + } + if ((left.x < right.x) && (left.y < right.y)) { + return -1; // less + } + if ((left.x > right.x) && (left.y > right.y)) { + return 1; // great + } } else { var left_number = %ToNumber(left); var right_number = %ToNumber(right); @@ -503,6 +584,9 @@ function ToNumber(x) { if (IS_BOOLEAN(x)) return x ? 1 : 0; if (IS_UNDEFINED(x)) return NAN; if (IS_SYMBOL(x)) throw MakeTypeError('symbol_to_number', []); + if (IsFloat32x4(x)) return NAN; + if (IsFloat64x2(x)) return NAN; + if (IsInt32x4(x)) return NAN; return (IS_NULL(x)) ? 0 : ToNumber(%DefaultNumber(x)); } @@ -514,6 +598,9 @@ function NonNumberToNumber(x) { if (IS_BOOLEAN(x)) return x ? 1 : 0; if (IS_UNDEFINED(x)) return NAN; if (IS_SYMBOL(x)) throw MakeTypeError('symbol_to_number', []); + if (IsFloat32x4(x)) return NAN; + if (IsFloat64x2(x)) return NAN; + if (IsInt32x4(x)) return NAN; return (IS_NULL(x)) ? 0 : ToNumber(%DefaultNumber(x)); } @@ -548,6 +635,9 @@ function ToObject(x) { if (IS_STRING(x)) return new $String(x); if (IS_NUMBER(x)) return new $Number(x); if (IS_BOOLEAN(x)) return new $Boolean(x); + if (IsFloat32x4(x)) return new $Float32x4(x.x, x.y, x.z, x.w); + if (IsFloat64x2(x)) return new $Float64x2(x.x, x.y); + if (IsInt32x4(x)) return new $Int32x4(x.x, x.y, x.z, x.w); if (IS_SYMBOL(x)) return %NewSymbolWrapper(x); if (IS_NULL_OR_UNDEFINED(x) && !IS_UNDETECTABLE(x)) { throw %MakeTypeError('undefined_or_null_to_object', []); diff --git a/src/v8/src/simd128.js b/src/v8/src/simd128.js new file mode 100644 index 0000000..2ac352f --- /dev/null +++ b/src/v8/src/simd128.js @@ -0,0 +1,1063 @@ +// Copyright 2013 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"use strict"; + +// This file relies on the fact that the following declaration has been made +// in runtime.js: +// var $Array = global.Array; + +var $SIMD = global.SIMD; +var $Float32x4 = $SIMD.float32x4; +var $Float64x2 = $SIMD.float64x2; +var $Int32x4 = $SIMD.int32x4; + +macro SIMD128_DATA_TYPES(FUNCTION) +FUNCTION(Float32x4, float32x4) +FUNCTION(Float64x2, float64x2) +FUNCTION(Int32x4, int32x4) +endmacro + +macro DECLARE_DATA_TYPE_COMMON_FUNCTION(NAME, TYPE) +function ThrowNAMETypeError() { + throw MakeTypeError("this is not a TYPE object."); +} + +function CheckNAME(arg) { + if (!(arg instanceof $NAME)) + ThrowNAMETypeError(); +} +endmacro + +SIMD128_DATA_TYPES(DECLARE_DATA_TYPE_COMMON_FUNCTION) + +function StringfyFloat32x4JS() { + CheckFloat32x4(this); + return "float32x4(" + this.x + "," + this.y + "," + this.z + "," + this.w + ")"; +} + +function StringfyFloat64x2JS() { + CheckFloat64x2(this); + return "float64x2(" + this.x + "," + this.y + ")"; +} + +function StringfyInt32x4JS() { + CheckInt32x4(this); + return "int32x4(" + this.x + "," + this.y + "," + this.z + "," + this.w + ")"; +} + +macro SIMD128_DATA_TYPE_FUNCTIONS(FUNCTION) +FUNCTION(Float32x4, GetX) +FUNCTION(Float32x4, GetY) +FUNCTION(Float32x4, GetZ) +FUNCTION(Float32x4, GetW) +FUNCTION(Float32x4, GetSignMask) +FUNCTION(Float64x2, GetX) +FUNCTION(Float64x2, GetY) +FUNCTION(Float64x2, GetSignMask) +FUNCTION(Int32x4, GetX) +FUNCTION(Int32x4, GetY) +FUNCTION(Int32x4, GetZ) +FUNCTION(Int32x4, GetW) +FUNCTION(Int32x4, GetFlagX) +FUNCTION(Int32x4, GetFlagY) +FUNCTION(Int32x4, GetFlagZ) +FUNCTION(Int32x4, GetFlagW) +FUNCTION(Int32x4, GetSignMask) +endmacro + +macro DECLARE_DATA_TYPE_FUNCTION(TYPE, FUNCTION) +function TYPEFUNCTIONJS() { + CheckTYPE(this); + return %TYPEFUNCTION(this); +} +endmacro + +SIMD128_DATA_TYPE_FUNCTIONS(DECLARE_DATA_TYPE_FUNCTION) + +function Float32x4Constructor(x, y, z, w) { + if (arguments.length == 1) { + CheckFloat32x4(x); + return %CreateFloat32x4(x.x, x.y, x.z, x.w); + } else { + x = TO_NUMBER_INLINE(x); + y = TO_NUMBER_INLINE(y); + z = TO_NUMBER_INLINE(z); + w = TO_NUMBER_INLINE(w); + return %CreateFloat32x4(x, y, z, w); + } +} + +function Float64x2Constructor(x, y) { + if (arguments.length == 1) { + CheckFloat64x2(x); + return %CreateFloat64x2(x.x, x.y); + } else { + x = TO_NUMBER_INLINE(x); + y = TO_NUMBER_INLINE(y); + return %CreateFloat64x2(x, y); + } +} + +function Int32x4Constructor(x, y, z, w) { + if (arguments.length == 1) { + CheckInt32x4(x); + return %CreateInt32x4(x.x, x.y, x.z, x.w); + } else { + x = TO_INT32(x); + y = TO_INT32(y); + z = TO_INT32(z); + w = TO_INT32(w); + return %CreateInt32x4(x, y, z, w); + } +} + +function SetUpFloat32x4() { + %CheckIsBootstrapping(); + + %SetCode($Float32x4, Float32x4Constructor); + + %FunctionSetPrototype($Float32x4, new $Object()); + %AddNamedProperty($Float32x4.prototype, "constructor", $Float32x4, DONT_ENUM); + + InstallGetter($Float32x4.prototype, "x", Float32x4GetXJS); + InstallGetter($Float32x4.prototype, "y", Float32x4GetYJS); + InstallGetter($Float32x4.prototype, "z", Float32x4GetZJS); + InstallGetter($Float32x4.prototype, "w", Float32x4GetWJS); + InstallGetter($Float32x4.prototype, "signMask", Float32x4GetSignMaskJS); + InstallFunctions($Float32x4.prototype, DONT_ENUM, $Array( + "toString", StringfyFloat32x4JS + )); +} + +function SetUpFloat64x2() { + %CheckIsBootstrapping(); + + %SetCode($Float64x2, Float64x2Constructor); + + %FunctionSetPrototype($Float64x2, new $Object()); + %AddNamedProperty($Float64x2.prototype, "constructor", $Float64x2, DONT_ENUM); + + InstallGetter($Float64x2.prototype, "x", Float64x2GetXJS); + InstallGetter($Float64x2.prototype, "y", Float64x2GetYJS); + InstallGetter($Float64x2.prototype, "signMask", Float64x2GetSignMaskJS); + InstallFunctions($Float64x2.prototype, DONT_ENUM, $Array( + "toString", StringfyFloat64x2JS + )); +} + +function SetUpInt32x4() { + %CheckIsBootstrapping(); + + %SetCode($Int32x4, Int32x4Constructor); + + %FunctionSetPrototype($Int32x4, new $Object()); + %AddNamedProperty($Int32x4.prototype, "constructor", $Int32x4, DONT_ENUM); + + InstallGetter($Int32x4.prototype, "x", Int32x4GetXJS); + InstallGetter($Int32x4.prototype, "y", Int32x4GetYJS); + InstallGetter($Int32x4.prototype, "z", Int32x4GetZJS); + InstallGetter($Int32x4.prototype, "w", Int32x4GetWJS); + InstallGetter($Int32x4.prototype, "flagX", Int32x4GetFlagXJS); + InstallGetter($Int32x4.prototype, "flagY", Int32x4GetFlagYJS); + InstallGetter($Int32x4.prototype, "flagZ", Int32x4GetFlagZJS); + InstallGetter($Int32x4.prototype, "flagW", Int32x4GetFlagWJS); + InstallGetter($Int32x4.prototype, "signMask", Int32x4GetSignMaskJS); + InstallFunctions($Int32x4.prototype, DONT_ENUM, $Array( + "toString", StringfyInt32x4JS + )); +} + +SetUpFloat32x4(); +SetUpFloat64x2(); +SetUpInt32x4(); + +//------------------------------------------------------------------------------ +macro SIMD128_UNARY_FUNCTIONS(FUNCTION) +FUNCTION(Float32x4, Abs) +FUNCTION(Float32x4, BitsToInt32x4) +FUNCTION(Float32x4, Neg) +FUNCTION(Float32x4, Reciprocal) +FUNCTION(Float32x4, ReciprocalSqrt) +FUNCTION(Float32x4, Sqrt) +FUNCTION(Float32x4, ToInt32x4) +FUNCTION(Float64x2, Abs) +FUNCTION(Float64x2, Neg) +FUNCTION(Float64x2, Sqrt) +FUNCTION(Int32x4, BitsToFloat32x4) +FUNCTION(Int32x4, Neg) +FUNCTION(Int32x4, Not) +FUNCTION(Int32x4, ToFloat32x4) +endmacro + +macro SIMD128_BINARY_FUNCTIONS(FUNCTION) +FUNCTION(Float32x4, Add) +FUNCTION(Float32x4, Div) +FUNCTION(Float32x4, Max) +FUNCTION(Float32x4, Min) +FUNCTION(Float32x4, Mul) +FUNCTION(Float32x4, Sub) +FUNCTION(Float32x4, Equal) +FUNCTION(Float32x4, NotEqual) +FUNCTION(Float32x4, GreaterThanOrEqual) +FUNCTION(Float32x4, GreaterThan) +FUNCTION(Float32x4, LessThan) +FUNCTION(Float32x4, LessThanOrEqual) +FUNCTION(Float64x2, Add) +FUNCTION(Float64x2, Div) +FUNCTION(Float64x2, Max) +FUNCTION(Float64x2, Min) +FUNCTION(Float64x2, Mul) +FUNCTION(Float64x2, Sub) +FUNCTION(Int32x4, Add) +FUNCTION(Int32x4, And) +FUNCTION(Int32x4, Mul) +FUNCTION(Int32x4, Or) +FUNCTION(Int32x4, Sub) +FUNCTION(Int32x4, Xor) +FUNCTION(Int32x4, Equal) +FUNCTION(Int32x4, GreaterThan) +FUNCTION(Int32x4, LessThan) +endmacro + +macro SIMD128_BINARY_SHUFFLE_FUNCTIONS(FUNCTION) +FUNCTION(Float32x4) +FUNCTION(Int32x4) +endmacro + +macro FLOAT32x4_BINARY_FUNCTIONS_WITH_FLOAT32_PARAMETER(FUNCTION) +FUNCTION(Scale) +FUNCTION(WithX) +FUNCTION(WithY) +FUNCTION(WithZ) +FUNCTION(WithW) +endmacro + +macro FLOAT64x2_BINARY_FUNCTIONS_WITH_FLOAT64_PARAMETER(FUNCTION) +FUNCTION(Scale) +FUNCTION(WithX) +FUNCTION(WithY) +endmacro + +macro INT32x4_BINARY_FUNCTIONS_WITH_INT32_PARAMETER(FUNCTION) +FUNCTION(WithX) +FUNCTION(WithY) +FUNCTION(WithZ) +FUNCTION(WithW) +endmacro + +macro INT32x4_BINARY_FUNCTIONS_WITH_BOOLEAN_PARAMETER(FUNCTION) +FUNCTION(WithFlagX) +FUNCTION(WithFlagY) +FUNCTION(WithFlagZ) +FUNCTION(WithFlagW) +endmacro + +macro DECLARE_SIMD_UNARY_FUNCTION(TYPE, FUNCTION) +function TYPEFUNCTIONJS(x4) { + CheckTYPE(x4); + return %TYPEFUNCTION(x4); +} +endmacro + +macro DECLARE_SIMD_BINARY_FUNCTION(TYPE, FUNCTION) +function TYPEFUNCTIONJS(a4, b4) { + CheckTYPE(a4); + CheckTYPE(b4); + return %TYPEFUNCTION(a4, b4); +} +endmacro + +macro DECLARE_SIMD_BINARY_SHUFFLE_FUNCTION(TYPE) +function TYPEShuffleJS(x4, mask) { + CheckTYPE(x4); + var value = TO_INT32(mask); + if ((value < 0) || (value > 0xFF)) { + throw MakeRangeError("invalid_simd_shuffle_mask"); + } + return %TYPEShuffle(x4, mask); +} +endmacro + +macro DECLARE_FLOAT32x4_BINARY_FUNCTION_WITH_FLOAT32_PARAMETER(FUNCTION) +function Float32x4FUNCTIONJS(x4, f) { + CheckFloat32x4(x4); + f = TO_NUMBER_INLINE(f); + return %Float32x4FUNCTION(x4, f); +} +endmacro + +macro DECLARE_FLOAT64x2_BINARY_FUNCTION_WITH_FLOAT64_PARAMETER(FUNCTION) +function Float64x2FUNCTIONJS(x2, f) { + CheckFloat64x2(x2); + f = TO_NUMBER_INLINE(f); + return %Float64x2FUNCTION(x2, f); +} +endmacro + +macro DECLARE_INT32x4_BINARY_FUNCTION_WITH_INT32_PARAMETER(FUNCTION) +function Int32x4FUNCTIONJS(x4, i) { + CheckInt32x4(x4); + i = TO_INT32(i); + return %Int32x4FUNCTION(x4, i); +} +endmacro + +macro DECLARE_INT32x4_BINARY_FUNCTION_WITH_BOOLEAN_PARAMETER(FUNCTION) +function Int32x4FUNCTIONJS(x4, b) { + CheckInt32x4(x4); + b = ToBoolean(b); + return %Int32x4FUNCTION(x4, b); +} +endmacro + +SIMD128_UNARY_FUNCTIONS(DECLARE_SIMD_UNARY_FUNCTION) +SIMD128_BINARY_FUNCTIONS(DECLARE_SIMD_BINARY_FUNCTION) +SIMD128_BINARY_SHUFFLE_FUNCTIONS(DECLARE_SIMD_BINARY_SHUFFLE_FUNCTION) +FLOAT32x4_BINARY_FUNCTIONS_WITH_FLOAT32_PARAMETER(DECLARE_FLOAT32x4_BINARY_FUNCTION_WITH_FLOAT32_PARAMETER) +FLOAT64x2_BINARY_FUNCTIONS_WITH_FLOAT64_PARAMETER(DECLARE_FLOAT64x2_BINARY_FUNCTION_WITH_FLOAT64_PARAMETER) +INT32x4_BINARY_FUNCTIONS_WITH_INT32_PARAMETER(DECLARE_INT32x4_BINARY_FUNCTION_WITH_INT32_PARAMETER) +INT32x4_BINARY_FUNCTIONS_WITH_BOOLEAN_PARAMETER(DECLARE_INT32x4_BINARY_FUNCTION_WITH_BOOLEAN_PARAMETER) + +function Float32x4SplatJS(f) { + f = TO_NUMBER_INLINE(f); + return %CreateFloat32x4(f, f, f, f); +} + +function Float32x4ZeroJS() { + return %CreateFloat32x4(0.0, 0.0, 0.0, 0.0); +} + +function Float32x4AndJS(a4, b4) { + a4 = Float32x4BitsToInt32x4JS(a4); + b4 = Float32x4BitsToInt32x4JS(b4); + return Int32x4BitsToFloat32x4JS(Int32x4AndJS(a4, b4)); +} + +function Float32x4OrJS(a4, b4) { + a4 = Float32x4BitsToInt32x4JS(a4); + b4 = Float32x4BitsToInt32x4JS(b4); + return Int32x4BitsToFloat32x4JS(Int32x4OrJS(a4, b4)); +} + +function Float32x4XorJS(a4, b4) { + a4 = Float32x4BitsToInt32x4JS(a4); + b4 = Float32x4BitsToInt32x4JS(b4); + return Int32x4BitsToFloat32x4JS(Int32x4XorJS(a4, b4)); +} + +function Float32x4NotJS(x4) { + x4 = Float32x4BitsToInt32x4JS(x4); + return Int32x4BitsToFloat32x4JS(Int32x4NotJS(x4)); +} + +function Float32x4ClampJS(x4, lowerLimit, upperLimit) { + CheckFloat32x4(x4); + CheckFloat32x4(lowerLimit); + CheckFloat32x4(upperLimit); + return %Float32x4Clamp(x4, lowerLimit, upperLimit); +} + +function Float32x4ShuffleMixJS(a4, b4, mask) { + CheckFloat32x4(a4); + CheckFloat32x4(b4); + var value = TO_INT32(mask); + if ((value < 0) || (value > 0xFF)) { + throw MakeRangeError("invalid_simd_shuffleMix_mask"); + } + return %Float32x4ShuffleMix(a4, b4, mask); +} + +function Float32x4SelectJS(x4, trueValue, falseValue) { + CheckInt32x4(x4); + CheckFloat32x4(trueValue); + CheckFloat32x4(falseValue); + return %Float32x4Select(x4, trueValue, falseValue); +} + +function Float64x2SplatJS(f) { + f = TO_NUMBER_INLINE(f); + return %CreateFloat64x2(f, f); +} + +function Float64x2ZeroJS() { + return %CreateFloat64x2(0.0, 0.0); +} + +function Float64x2ClampJS(x2, lowerLimit, upperLimit) { + CheckFloat64x2(x2); + CheckFloat64x2(lowerLimit); + CheckFloat64x2(upperLimit); + return %Float64x2Clamp(x2, lowerLimit, upperLimit); +} + +function Int32x4ZeroJS() { + return %CreateInt32x4(0, 0, 0, 0); +} + +function Int32x4BoolJS(x, y, z, w) { + x = x ? -1 : 0; + y = y ? -1 : 0; + z = z ? -1 : 0; + w = w ? -1 : 0; + return %CreateInt32x4(x, y, z, w); +} + +function Int32x4SplatJS(s) { + s = TO_INT32(s); + return %CreateInt32x4(s, s, s, s); +} + +function Int32x4SelectJS(x4, trueValue, falseValue) { + CheckInt32x4(x4); + CheckInt32x4(trueValue); + CheckInt32x4(falseValue); + return %Int32x4Select(x4, trueValue, falseValue); +} + +function Int32x4ShiftLeftJS(t, s) { + CheckInt32x4(t); + s = TO_NUMBER_INLINE(s); + var x = t.x << s; + var y = t.y << s; + var z = t.z << s; + var w = t.w << s; + return %CreateInt32x4(x, y, z, w); +} + +function Int32x4ShiftRightJS(t, s) { + CheckInt32x4(t); + s = TO_NUMBER_INLINE(s); + var x = t.x >>> s; + var y = t.y >>> s; + var z = t.z >>> s; + var w = t.w >>> s; + return %CreateInt32x4(x, y, z, w); +} + +function Int32x4ShiftRightArithmeticJS(t, s) { + CheckInt32x4(t); + s = TO_NUMBER_INLINE(s); + var x = t.x >> s; + var y = t.y >> s; + var z = t.z >> s; + var w = t.w >> s; + return %CreateInt32x4(x, y, z, w); +} + +function SetUpSIMD() { + %CheckIsBootstrapping(); + + %OptimizeObjectForAddingMultipleProperties($SIMD, 258); + %AddNamedProperty($SIMD, "XXXX", 0x00, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXXY", 0x40, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXXZ", 0x80, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXXW", 0xC0, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXYX", 0x10, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXYY", 0x50, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXYZ", 0x90, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXYW", 0xD0, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXZX", 0x20, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXZY", 0x60, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXZZ", 0xA0, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXZW", 0xE0, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXWX", 0x30, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXWY", 0x70, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXWZ", 0xB0, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XXWW", 0xF0, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYXX", 0x04, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYXY", 0x44, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYXZ", 0x84, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYXW", 0xC4, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYYX", 0x14, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYYY", 0x54, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYYZ", 0x94, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYYW", 0xD4, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYZX", 0x24, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYZY", 0x64, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYZZ", 0xA4, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYZW", 0xE4, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYWX", 0x34, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYWY", 0x74, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYWZ", 0xB4, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XYWW", 0xF4, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZXX", 0x08, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZXY", 0x48, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZXZ", 0x88, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZXW", 0xC8, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZYX", 0x18, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZYY", 0x58, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZYZ", 0x98, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZYW", 0xD8, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZZX", 0x28, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZZY", 0x68, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZZZ", 0xA8, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZZW", 0xE8, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZWX", 0x38, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZWY", 0x78, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZWZ", 0xB8, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XZWW", 0xF8, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWXX", 0x0C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWXY", 0x4C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWXZ", 0x8C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWXW", 0xCC, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWYX", 0x1C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWYY", 0x5C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWYZ", 0x9C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWYW", 0xDC, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWZX", 0x2C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWZY", 0x6C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWZZ", 0xAC, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWZW", 0xEC, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWWX", 0x3C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWWY", 0x7C, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWWZ", 0xBC, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "XWWW", 0xFC, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXXX", 0x01, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXXY", 0x41, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXXZ", 0x81, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXXW", 0xC1, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXYX", 0x11, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXYY", 0x51, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXYZ", 0x91, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXYW", 0xD1, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXZX", 0x21, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXZY", 0x61, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXZZ", 0xA1, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXZW", 0xE1, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXWX", 0x31, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXWY", 0x71, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXWZ", 0xB1, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YXWW", 0xF1, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYXX", 0x05, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYXY", 0x45, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYXZ", 0x85, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYXW", 0xC5, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYYX", 0x15, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYYY", 0x55, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYYZ", 0x95, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYYW", 0xD5, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYZX", 0x25, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYZY", 0x65, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYZZ", 0xA5, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYZW", 0xE5, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYWX", 0x35, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYWY", 0x75, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYWZ", 0xB5, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YYWW", 0xF5, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZXX", 0x09, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZXY", 0x49, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZXZ", 0x89, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZXW", 0xC9, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZYX", 0x19, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZYY", 0x59, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZYZ", 0x99, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZYW", 0xD9, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZZX", 0x29, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZZY", 0x69, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZZZ", 0xA9, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZZW", 0xE9, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZWX", 0x39, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZWY", 0x79, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZWZ", 0xB9, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YZWW", 0xF9, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWXX", 0x0D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWXY", 0x4D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWXZ", 0x8D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWXW", 0xCD, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWYX", 0x1D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWYY", 0x5D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWYZ", 0x9D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWYW", 0xDD, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWZX", 0x2D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWZY", 0x6D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWZZ", 0xAD, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWZW", 0xED, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWWX", 0x3D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWWY", 0x7D, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWWZ", 0xBD, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "YWWW", 0xFD, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXXX", 0x02, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXXY", 0x42, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXXZ", 0x82, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXXW", 0xC2, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXYX", 0x12, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXYY", 0x52, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXYZ", 0x92, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXYW", 0xD2, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXZX", 0x22, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXZY", 0x62, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXZZ", 0xA2, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXZW", 0xE2, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXWX", 0x32, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXWY", 0x72, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXWZ", 0xB2, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZXWW", 0xF2, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYXX", 0x06, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYXY", 0x46, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYXZ", 0x86, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYXW", 0xC6, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYYX", 0x16, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYYY", 0x56, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYYZ", 0x96, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYYW", 0xD6, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYZX", 0x26, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYZY", 0x66, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYZZ", 0xA6, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYZW", 0xE6, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYWX", 0x36, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYWY", 0x76, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYWZ", 0xB6, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZYWW", 0xF6, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZXX", 0x0A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZXY", 0x4A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZXZ", 0x8A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZXW", 0xCA, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZYX", 0x1A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZYY", 0x5A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZYZ", 0x9A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZYW", 0xDA, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZZX", 0x2A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZZY", 0x6A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZZZ", 0xAA, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZZW", 0xEA, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZWX", 0x3A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZWY", 0x7A, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZWZ", 0xBA, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZZWW", 0xFA, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWXX", 0x0E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWXY", 0x4E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWXZ", 0x8E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWXW", 0xCE, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWYX", 0x1E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWYY", 0x5E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWYZ", 0x9E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWYW", 0xDE, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWZX", 0x2E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWZY", 0x6E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWZZ", 0xAE, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWZW", 0xEE, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWWX", 0x3E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWWY", 0x7E, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWWZ", 0xBE, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "ZWWW", 0xFE, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXXX", 0x03, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXXY", 0x43, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXXZ", 0x83, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXXW", 0xC3, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXYX", 0x13, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXYY", 0x53, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXYZ", 0x93, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXYW", 0xD3, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXZX", 0x23, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXZY", 0x63, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXZZ", 0xA3, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXZW", 0xE3, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXWX", 0x33, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXWY", 0x73, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXWZ", 0xB3, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WXWW", 0xF3, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYXX", 0x07, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYXY", 0x47, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYXZ", 0x87, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYXW", 0xC7, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYYX", 0x17, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYYY", 0x57, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYYZ", 0x97, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYYW", 0xD7, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYZX", 0x27, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYZY", 0x67, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYZZ", 0xA7, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYZW", 0xE7, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYWX", 0x37, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYWY", 0x77, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYWZ", 0xB7, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WYWW", 0xF7, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZXX", 0x0B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZXY", 0x4B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZXZ", 0x8B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZXW", 0xCB, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZYX", 0x1B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZYY", 0x5B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZYZ", 0x9B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZYW", 0xDB, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZZX", 0x2B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZZY", 0x6B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZZZ", 0xAB, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZZW", 0xEB, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZWX", 0x3B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZWY", 0x7B, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZWZ", 0xBB, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WZWW", 0xFB, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWXX", 0x0F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWXY", 0x4F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWXZ", 0x8F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWXW", 0xCF, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWYX", 0x1F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWYY", 0x5F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWYZ", 0x9F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWYW", 0xDF, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWZX", 0x2F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWZY", 0x6F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWZZ", 0xAF, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWZW", 0xEF, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWWX", 0x3F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWWY", 0x7F, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWWZ", 0xBF, DONT_ENUM | DONT_DELETE | READ_ONLY); + %AddNamedProperty($SIMD, "WWWW", 0xFF, DONT_ENUM | DONT_DELETE | READ_ONLY); + + %ToFastProperties($SIMD); + + // Set up non-enumerable properties of the SIMD float32x4 object. + InstallFunctions($SIMD.float32x4, DONT_ENUM, $Array( + // Float32x4 operations + "splat", Float32x4SplatJS, + "zero", Float32x4ZeroJS, + // Unary + "abs", Float32x4AbsJS, + "fromInt32x4", Int32x4ToFloat32x4JS, + "fromInt32x4Bits", Int32x4BitsToFloat32x4JS, + "neg", Float32x4NegJS, + "reciprocal", Float32x4ReciprocalJS, + "reciprocalSqrt", Float32x4ReciprocalSqrtJS, + "sqrt", Float32x4SqrtJS, + // Binary + "add", Float32x4AddJS, + "div", Float32x4DivJS, + "max", Float32x4MaxJS, + "min", Float32x4MinJS, + "mul", Float32x4MulJS, + "sub", Float32x4SubJS, + "lessThan", Float32x4LessThanJS, + "lessThanOrEqual", Float32x4LessThanOrEqualJS, + "equal", Float32x4EqualJS, + "notEqual", Float32x4NotEqualJS, + "greaterThanOrEqual", Float32x4GreaterThanOrEqualJS, + "greaterThan", Float32x4GreaterThanJS, + "and", Float32x4AndJS, + "or", Float32x4OrJS, + "xor", Float32x4XorJS, + "not", Float32x4NotJS, + "scale", Float32x4ScaleJS, + "withX", Float32x4WithXJS, + "withY", Float32x4WithYJS, + "withZ", Float32x4WithZJS, + "withW", Float32x4WithWJS, + "shuffle", Float32x4ShuffleJS, + // Ternary + "clamp", Float32x4ClampJS, + "shuffleMix", Float32x4ShuffleMixJS, + "select", Float32x4SelectJS + )); + + // Set up non-enumerable properties of the SIMD float64x2 object. + InstallFunctions($SIMD.float64x2, DONT_ENUM, $Array( + // Float64x2 operations + "splat", Float64x2SplatJS, + "zero", Float64x2ZeroJS, + // Unary + "abs", Float64x2AbsJS, + "neg", Float64x2NegJS, + "sqrt", Float64x2SqrtJS, + // Binary + "add", Float64x2AddJS, + "div", Float64x2DivJS, + "max", Float64x2MaxJS, + "min", Float64x2MinJS, + "mul", Float64x2MulJS, + "sub", Float64x2SubJS, + "scale", Float64x2ScaleJS, + "withX", Float64x2WithXJS, + "withY", Float64x2WithYJS, + // Ternary + "clamp", Float64x2ClampJS + )); + + // Set up non-enumerable properties of the SIMD int32x4 object. + InstallFunctions($SIMD.int32x4, DONT_ENUM, $Array( + // Int32x4 operations + "zero", Int32x4ZeroJS, + "splat", Int32x4SplatJS, + "bool", Int32x4BoolJS, + // Unary + "fromFloat32x4", Float32x4ToInt32x4JS, + "fromFloat32x4Bits", Float32x4BitsToInt32x4JS, + "neg", Int32x4NegJS, + "not", Int32x4NotJS, + // Binary + "add", Int32x4AddJS, + "and", Int32x4AndJS, + "mul", Int32x4MulJS, + "or", Int32x4OrJS, + "sub", Int32x4SubJS, + "xor", Int32x4XorJS, + "shuffle", Int32x4ShuffleJS, + "withX", Int32x4WithXJS, + "withY", Int32x4WithYJS, + "withZ", Int32x4WithZJS, + "withW", Int32x4WithWJS, + "withFlagX", Int32x4WithFlagXJS, + "withFlagY", Int32x4WithFlagYJS, + "withFlagZ", Int32x4WithFlagZJS, + "withFlagW", Int32x4WithFlagWJS, + "greaterThan", Int32x4GreaterThanJS, + "equal", Int32x4EqualJS, + "lessThan", Int32x4LessThanJS, + "shiftLeft", Int32x4ShiftLeftJS, + "shiftRight", Int32x4ShiftRightJS, + "shiftRightArithmetic", Int32x4ShiftRightArithmeticJS, + // Ternary + "select", Int32x4SelectJS + )); +} + +SetUpSIMD(); + +//------------------------------------------------------------------------------ +macro SIMD128_TYPED_ARRAYS(FUNCTION) +// arrayIds below should be synchronized with Runtime_TypedArrayInitialize. +FUNCTION(10, Float32x4Array, 16) +FUNCTION(11, Float64x2Array, 16) +FUNCTION(12, Int32x4Array, 16) +endmacro + +macro TYPED_ARRAY_CONSTRUCTOR(ARRAY_ID, NAME, ELEMENT_SIZE) + function NAMEConstructByArrayBuffer(obj, buffer, byteOffset, length) { + if (!IS_UNDEFINED(byteOffset)) { + byteOffset = + ToPositiveInteger(byteOffset, "invalid_typed_array_length"); + } + if (!IS_UNDEFINED(length)) { + length = ToPositiveInteger(length, "invalid_typed_array_length"); + } + + var bufferByteLength = %_ArrayBufferGetByteLength(buffer); + var offset; + if (IS_UNDEFINED(byteOffset)) { + offset = 0; + } else { + offset = byteOffset; + + if (offset % ELEMENT_SIZE !== 0) { + throw MakeRangeError("invalid_typed_array_alignment", + ["start offset", "NAME", ELEMENT_SIZE]); + } + if (offset > bufferByteLength) { + throw MakeRangeError("invalid_typed_array_offset"); + } + } + + var newByteLength; + var newLength; + if (IS_UNDEFINED(length)) { + if (bufferByteLength % ELEMENT_SIZE !== 0) { + throw MakeRangeError("invalid_typed_array_alignment", + ["byte length", "NAME", ELEMENT_SIZE]); + } + newByteLength = bufferByteLength - offset; + newLength = newByteLength / ELEMENT_SIZE; + } else { + var newLength = length; + newByteLength = newLength * ELEMENT_SIZE; + } + if ((offset + newByteLength > bufferByteLength) + || (newLength > %_MaxSmi())) { + throw MakeRangeError("invalid_typed_array_length"); + } + %_TypedArrayInitialize(obj, ARRAY_ID, buffer, offset, newByteLength); + } + + function NAMEConstructByLength(obj, length) { + var l = IS_UNDEFINED(length) ? + 0 : ToPositiveInteger(length, "invalid_typed_array_length"); + if (l > %_MaxSmi()) { + throw MakeRangeError("invalid_typed_array_length"); + } + var byteLength = l * ELEMENT_SIZE; + if (byteLength > %_TypedArrayMaxSizeInHeap()) { + var buffer = new $ArrayBuffer(byteLength); + %_TypedArrayInitialize(obj, ARRAY_ID, buffer, 0, byteLength); + } else { + %_TypedArrayInitialize(obj, ARRAY_ID, null, 0, byteLength); + } + } + + function NAMEConstructByArrayLike(obj, arrayLike) { + var length = arrayLike.length; + var l = ToPositiveInteger(length, "invalid_typed_array_length"); + + if (l > %_MaxSmi()) { + throw MakeRangeError("invalid_typed_array_length"); + } + if(!%TypedArrayInitializeFromArrayLike(obj, ARRAY_ID, arrayLike, l)) { + for (var i = 0; i < l; i++) { + // It is crucial that we let any execptions from arrayLike[i] + // propagate outside the function. + obj[i] = arrayLike[i]; + } + } + } + + function NAMEConstructor(arg1, arg2, arg3) { + if (%_IsConstructCall()) { + if (IS_ARRAYBUFFER(arg1)) { + NAMEConstructByArrayBuffer(this, arg1, arg2, arg3); + } else if (IS_NUMBER(arg1) || IS_STRING(arg1) || + IS_BOOLEAN(arg1) || IS_UNDEFINED(arg1)) { + NAMEConstructByLength(this, arg1); + } else { + NAMEConstructByArrayLike(this, arg1); + } + } else { + throw MakeTypeError("constructor_not_function", ["NAME"]) + } + } + + function NAME_GetBuffer() { + if (!(%_ClassOf(this) === 'NAME')) { + throw MakeTypeError('incompatible_method_receiver', + ["NAME.buffer", this]); + } + return %TypedArrayGetBuffer(this); + } + + function NAME_GetByteLength() { + if (!(%_ClassOf(this) === 'NAME')) { + throw MakeTypeError('incompatible_method_receiver', + ["NAME.byteLength", this]); + } + return %_ArrayBufferViewGetByteLength(this); + } + + function NAME_GetByteOffset() { + if (!(%_ClassOf(this) === 'NAME')) { + throw MakeTypeError('incompatible_method_receiver', + ["NAME.byteOffset", this]); + } + return %_ArrayBufferViewGetByteOffset(this); + } + + function NAME_GetLength() { + if (!(%_ClassOf(this) === 'NAME')) { + throw MakeTypeError('incompatible_method_receiver', + ["NAME.length", this]); + } + return %_TypedArrayGetLength(this); + } + + var $NAME = global.NAME; + + function NAMESubArray(begin, end) { + if (!(%_ClassOf(this) === 'NAME')) { + throw MakeTypeError('incompatible_method_receiver', + ["NAME.subarray", this]); + } + var beginInt = TO_INTEGER(begin); + if (!IS_UNDEFINED(end)) { + end = TO_INTEGER(end); + } + + var srcLength = %_TypedArrayGetLength(this); + if (beginInt < 0) { + beginInt = $MathMax(0, srcLength + beginInt); + } else { + beginInt = $MathMin(srcLength, beginInt); + } + + var endInt = IS_UNDEFINED(end) ? srcLength : end; + if (endInt < 0) { + endInt = $MathMax(0, srcLength + endInt); + } else { + endInt = $MathMin(endInt, srcLength); + } + if (endInt < beginInt) { + endInt = beginInt; + } + var newLength = endInt - beginInt; + var beginByteOffset = + %_ArrayBufferViewGetByteOffset(this) + beginInt * ELEMENT_SIZE; + return new $NAME(%TypedArrayGetBuffer(this), + beginByteOffset, newLength); + } +endmacro + +SIMD128_TYPED_ARRAYS(TYPED_ARRAY_CONSTRUCTOR) + +function SetupSIMD128TypedArrays() { +macro SETUP_TYPED_ARRAY(ARRAY_ID, NAME, ELEMENT_SIZE) + %CheckIsBootstrapping(); + %SetCode(global.NAME, NAMEConstructor); + %FunctionSetPrototype(global.NAME, new $Object()); + + %AddNamedProperty(global.NAME, "BYTES_PER_ELEMENT", ELEMENT_SIZE, + READ_ONLY | DONT_ENUM | DONT_DELETE); + %AddNamedProperty(global.NAME.prototype, + "constructor", global.NAME, DONT_ENUM); + %AddNamedProperty(global.NAME.prototype, + "BYTES_PER_ELEMENT", ELEMENT_SIZE, + READ_ONLY | DONT_ENUM | DONT_DELETE); + InstallGetter(global.NAME.prototype, "buffer", NAME_GetBuffer); + InstallGetter(global.NAME.prototype, "byteOffset", NAME_GetByteOffset); + InstallGetter(global.NAME.prototype, "byteLength", NAME_GetByteLength); + InstallGetter(global.NAME.prototype, "length", NAME_GetLength); + + InstallFunctions(global.NAME.prototype, DONT_ENUM, $Array( + "subarray", NAMESubArray, + "set", TypedArraySet + )); +endmacro + +SIMD128_TYPED_ARRAYS(SETUP_TYPED_ARRAY) +} + +SetupSIMD128TypedArrays(); + +macro DECLARE_TYPED_ARRAY_FUNCTION(NAME) +function NAMEArrayGet(i) { + return this[i]; +} + +function NAMEArraySet(i, v) { + CheckNAME(v); + this[i] = v; +} + +function SetUpNAMEArray() { + InstallFunctions(global.NAMEArray.prototype, DONT_ENUM, $Array( + "getAt", NAMEArrayGet, + "setAt", NAMEArraySet + )); +} +endmacro + +DECLARE_TYPED_ARRAY_FUNCTION(Float32x4) +DECLARE_TYPED_ARRAY_FUNCTION(Float64x2) +DECLARE_TYPED_ARRAY_FUNCTION(Int32x4) + +SetUpFloat32x4Array(); +SetUpFloat64x2Array(); +SetUpInt32x4Array(); diff --git a/src/v8/src/types.cc b/src/v8/src/types.cc index db92f30..1f9fab3 100644 --- a/src/v8/src/types.cc +++ b/src/v8/src/types.cc @@ -224,6 +224,9 @@ int TypeImpl::BitsetType::Lub(i::Map* map) { case JS_MAP_ITERATOR_TYPE: case JS_WEAK_MAP_TYPE: case JS_WEAK_SET_TYPE: + case FLOAT32x4_TYPE: + case FLOAT64x2_TYPE: + case INT32x4_TYPE: if (map->is_undetectable()) return kUndetectable; return kOtherObject; case JS_ARRAY_TYPE: diff --git a/src/v8/src/x64/assembler-x64-inl.h b/src/v8/src/x64/assembler-x64-inl.h index b64bbfb..6a6f4a1 100644 --- a/src/v8/src/x64/assembler-x64-inl.h +++ b/src/v8/src/x64/assembler-x64-inl.h @@ -15,6 +15,7 @@ namespace v8 { namespace internal { bool CpuFeatures::SupportsCrankshaft() { return true; } +bool CpuFeatures::SupportsSIMD128InCrankshaft() { return true; } // ----------------------------------------------------------------------------- @@ -178,6 +179,10 @@ void Assembler::emit_optional_rex_32(Register rm_reg) { if (rm_reg.high_bit()) emit(0x41); } +void Assembler::emit_optional_rex_32(XMMRegister reg) { + byte rex_bits = (reg.code() & 0x8) >> 1; + if (rex_bits != 0) emit(0x40 | rex_bits); +} void Assembler::emit_optional_rex_32(const Operand& op) { if (op.rex_ != 0) emit(0x40 | op.rex_); diff --git a/src/v8/src/x64/assembler-x64.cc b/src/v8/src/x64/assembler-x64.cc index d13c21f..cb6bd6e 100644 --- a/src/v8/src/x64/assembler-x64.cc +++ b/src/v8/src/x64/assembler-x64.cc @@ -2348,6 +2348,86 @@ void Assembler::divps(XMMRegister dst, const Operand& src) { } +void Assembler::addpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x58); + emit_sse_operand(dst, src); +} + + +void Assembler::addpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x58); + emit_sse_operand(dst, src); +} + + +void Assembler::subpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5C); + emit_sse_operand(dst, src); +} + + +void Assembler::subpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5C); + emit_sse_operand(dst, src); +} + + +void Assembler::mulpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x59); + emit_sse_operand(dst, src); +} + + +void Assembler::mulpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x59); + emit_sse_operand(dst, src); +} + + +void Assembler::divpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5E); + emit_sse_operand(dst, src); +} + + +void Assembler::divpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5E); + emit_sse_operand(dst, src); +} + + // SSE 2 operations. void Assembler::movd(XMMRegister dst, Register src) { @@ -2463,6 +2543,34 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) { } +void Assembler::insertps(XMMRegister dst, XMMRegister src, byte imm8) { + DCHECK(CpuFeatures::IsSupported(SSE4_1)); + DCHECK(is_uint8(imm8)); + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x3A); + emit(0x21); + emit_sse_operand(dst, src); + emit(imm8); +} + + +void Assembler::pinsrd(XMMRegister dst, Register src, byte imm8) { + DCHECK(CpuFeatures::IsSupported(SSE4_1)); + DCHECK(is_uint8(imm8)); + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x3A); + emit(0x22); + emit_sse_operand(dst, src); + emit(imm8); +} + + void Assembler::movsd(const Operand& dst, XMMRegister src) { EnsureSpace ensure_space(this); emit(0xF2); // double @@ -2510,10 +2618,40 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) { } +void Assembler::movups(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x10); + emit_sse_operand(dst, src); +} + + +void Assembler::movups(const Operand& dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(src, dst); + emit(0x0F); + emit(0x11); + emit_sse_operand(src, dst); +} + + void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) { DCHECK(is_uint8(imm8)); EnsureSpace ensure_space(this); - emit_optional_rex_32(src, dst); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xC6); + emit_sse_operand(dst, src); + emit(imm8); +} + + +void Assembler::shufpd(XMMRegister dst, XMMRegister src, byte imm8) { + DCHECK(is_uint8(imm8)); + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); emit(0x0F); emit(0xC6); emit_sse_operand(dst, src); @@ -2780,6 +2918,16 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) { } +void Assembler::andpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x54); + emit_sse_operand(dst, src); +} + + void Assembler::orpd(XMMRegister dst, XMMRegister src) { EnsureSpace ensure_space(this); emit(0x66); @@ -2800,6 +2948,16 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) { } +void Assembler::xorpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x57); + emit_sse_operand(dst, src); +} + + void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { EnsureSpace ensure_space(this); emit(0xF2); @@ -2851,6 +3009,129 @@ void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) { } +void Assembler::cmpps(XMMRegister dst, XMMRegister src, int8_t cmp) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xC2); + emit_sse_operand(dst, src); + emit(cmp); +} + + +void Assembler::cmpeqps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x0); +} + + +void Assembler::cmpltps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x1); +} + + +void Assembler::cmpleps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x2); +} + + +void Assembler::cmpneqps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x4); +} + + +void Assembler::cmpnltps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x5); +} + + +void Assembler::cmpnleps(XMMRegister dst, XMMRegister src) { + cmpps(dst, src, 0x6); +} + + +void Assembler::pslld(XMMRegister reg, int8_t shift) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(reg); + emit(0x0F); + emit(0x72); + emit_sse_operand(rsi, reg); // rsi == 6 + emit(shift); +} + + +void Assembler::pslld(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xF2); + emit_sse_operand(dst, src); +} + + +void Assembler::psrld(XMMRegister reg, int8_t shift) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(reg); + emit(0x0F); + emit(0x72); + emit_sse_operand(rdx, reg); // rdx == 2 + emit(shift); +} + + +void Assembler::psrld(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xD2); + emit_sse_operand(dst, src); +} + + +void Assembler::psrad(XMMRegister reg, int8_t shift) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(reg); + emit(0x0F); + emit(0x72); + emit_sse_operand(rsp, reg); // rsp == 4 + emit(shift); +} + + +void Assembler::psrad(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xE2); + emit_sse_operand(dst, src); +} + + +void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x76); + emit_sse_operand(dst, src); +} + + +void Assembler::pcmpgtd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x66); + emit_sse_operand(dst, src); +} + + void Assembler::roundsd(XMMRegister dst, XMMRegister src, Assembler::RoundingMode mode) { DCHECK(IsEnabled(SSE4_1)); @@ -2885,6 +3166,318 @@ void Assembler::movmskps(Register dst, XMMRegister src) { } +void Assembler::minps(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5D); + emit_sse_operand(dst, src); +} + + +void Assembler::minps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5D); + emit_sse_operand(dst, src); +} + + +void Assembler::maxps(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5F); + emit_sse_operand(dst, src); +} + + +void Assembler::maxps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5F); + emit_sse_operand(dst, src); +} + + +void Assembler::minpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5D); + emit_sse_operand(dst, src); +} + + +void Assembler::minpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5D); + emit_sse_operand(dst, src); +} + + +void Assembler::maxpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5F); + emit_sse_operand(dst, src); +} + + +void Assembler::maxpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5F); + emit_sse_operand(dst, src); +} + + +void Assembler::rcpps(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x53); + emit_sse_operand(dst, src); +} + + +void Assembler::rcpps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x53); + emit_sse_operand(dst, src); +} + + +void Assembler::rsqrtps(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x52); + emit_sse_operand(dst, src); +} + + +void Assembler::rsqrtps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x52); + emit_sse_operand(dst, src); +} + + +void Assembler::sqrtps(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x51); + emit_sse_operand(dst, src); +} + + +void Assembler::sqrtps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x51); + emit_sse_operand(dst, src); +} + + +void Assembler::sqrtpd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x51); + emit_sse_operand(dst, src); +} + + +void Assembler::sqrtpd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x51); + emit_sse_operand(dst, src); +} + + +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5B); + emit_sse_operand(dst, src); +} + + +void Assembler::cvtdq2ps(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5B); + emit_sse_operand(dst, src); +} + + +void Assembler::paddd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xFE); + emit_sse_operand(dst, src); +} + + +void Assembler::paddd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xFE); + emit_sse_operand(dst, src); +} + + +void Assembler::psubd(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xFA); + emit_sse_operand(dst, src); +} + + +void Assembler::psubd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xFA); + emit_sse_operand(dst, src); +} + + +void Assembler::pmulld(XMMRegister dst, XMMRegister src) { + DCHECK(IsEnabled(SSE4_1)); + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x38); + emit(0x40); + emit_sse_operand(dst, src); +} + + +void Assembler::pmulld(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xF4); + emit_sse_operand(dst, src); +} + + +void Assembler::pmuludq(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xF4); + emit_sse_operand(dst, src); +} + + +void Assembler::pmuludq(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0xF4); + emit_sse_operand(dst, src); +} + + +void Assembler::punpackldq(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x62); + emit_sse_operand(dst, src); +} + + +void Assembler::punpackldq(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x62); + emit_sse_operand(dst, src); +} + + +void Assembler::psrldq(XMMRegister dst, uint8_t shift) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst); + emit(0x0F); + emit(0x73); + emit_sse_operand(dst); + emit(shift); +} + + +void Assembler::cvtps2dq(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5B); + emit_sse_operand(dst, src); +} + + +void Assembler::cvtps2dq(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5B); + emit_sse_operand(dst, src); +} + + +void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) { + EnsureSpace ensure_space(this); + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x70); + emit_sse_operand(dst, src); + emit(shuffle); +} + + void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) { Register ireg = { reg.code() }; emit_operand(ireg, adr); @@ -2906,6 +3499,11 @@ void Assembler::emit_sse_operand(Register dst, XMMRegister src) { } +void Assembler::emit_sse_operand(XMMRegister dst) { + emit(0xD8 | dst.low_bits()); +} + + void Assembler::db(uint8_t data) { EnsureSpace ensure_space(this); emit(data); diff --git a/src/v8/src/x64/assembler-x64.h b/src/v8/src/x64/assembler-x64.h index 3896f89..0adc135 100644 --- a/src/v8/src/x64/assembler-x64.h +++ b/src/v8/src/x64/assembler-x64.h @@ -279,6 +279,7 @@ const XMMRegister xmm15 = { 15 }; typedef XMMRegister DoubleRegister; +typedef XMMRegister SIMD128Register; enum Condition { @@ -377,6 +378,7 @@ enum ScaleFactor { times_2 = 1, times_4 = 2, times_8 = 3, + maximal_scale_factor = times_8, times_int_size = times_4, times_pointer_size = (kPointerSize == 8) ? times_8 : times_4 }; @@ -993,9 +995,12 @@ class Assembler : public AssemblerBase { // SSE instructions void movaps(XMMRegister dst, XMMRegister src); + void movups(XMMRegister dst, const Operand& src); + void movups(const Operand& dst, XMMRegister src); void movss(XMMRegister dst, const Operand& src); void movss(const Operand& dst, XMMRegister src); void shufps(XMMRegister dst, XMMRegister src, byte imm8); + void shufpd(XMMRegister dst, XMMRegister src, byte imm8); void cvttss2si(Register dst, const Operand& src); void cvttss2si(Register dst, XMMRegister src); @@ -1017,6 +1022,15 @@ class Assembler : public AssemblerBase { void divps(XMMRegister dst, XMMRegister src); void divps(XMMRegister dst, const Operand& src); + void addpd(XMMRegister dst, XMMRegister src); + void addpd(XMMRegister dst, const Operand& src); + void subpd(XMMRegister dst, XMMRegister src); + void subpd(XMMRegister dst, const Operand& src); + void mulpd(XMMRegister dst, XMMRegister src); + void mulpd(XMMRegister dst, const Operand& src); + void divpd(XMMRegister dst, XMMRegister src); + void divpd(XMMRegister dst, const Operand& src); + void movmskps(Register dst, XMMRegister src); // SSE2 instructions @@ -1070,8 +1084,10 @@ class Assembler : public AssemblerBase { void divsd(XMMRegister dst, XMMRegister src); void andpd(XMMRegister dst, XMMRegister src); + void andpd(XMMRegister dst, const Operand& src); void orpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src); + void xorpd(XMMRegister dst, const Operand& src); void sqrtsd(XMMRegister dst, XMMRegister src); void sqrtsd(XMMRegister dst, const Operand& src); @@ -1083,6 +1099,41 @@ class Assembler : public AssemblerBase { // SSE 4.1 instruction void extractps(Register dst, XMMRegister src, byte imm8); + void insertps(XMMRegister dst, XMMRegister src, byte imm8); + void pinsrd(XMMRegister dst, Register src, byte imm8); + + void minps(XMMRegister dst, XMMRegister src); + void minps(XMMRegister dst, const Operand& src); + void maxps(XMMRegister dst, XMMRegister src); + void maxps(XMMRegister dst, const Operand& src); + void minpd(XMMRegister dst, XMMRegister src); + void minpd(XMMRegister dst, const Operand& src); + void maxpd(XMMRegister dst, XMMRegister src); + void maxpd(XMMRegister dst, const Operand& src); + void rcpps(XMMRegister dst, XMMRegister src); + void rcpps(XMMRegister dst, const Operand& src); + void rsqrtps(XMMRegister dst, XMMRegister src); + void rsqrtps(XMMRegister dst, const Operand& src); + void sqrtps(XMMRegister dst, XMMRegister src); + void sqrtps(XMMRegister dst, const Operand& src); + void sqrtpd(XMMRegister dst, XMMRegister src); + void sqrtpd(XMMRegister dst, const Operand& src); + void paddd(XMMRegister dst, XMMRegister src); + void paddd(XMMRegister dst, const Operand& src); + void psubd(XMMRegister dst, XMMRegister src); + void psubd(XMMRegister dst, const Operand& src); + void pmulld(XMMRegister dst, XMMRegister src); + void pmulld(XMMRegister dst, const Operand& src); + void pmuludq(XMMRegister dst, XMMRegister src); + void pmuludq(XMMRegister dst, const Operand& src); + void punpackldq(XMMRegister dst, XMMRegister src); + void punpackldq(XMMRegister dst, const Operand& src); + void psrldq(XMMRegister dst, uint8_t shift); + void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle); + void cvtps2dq(XMMRegister dst, XMMRegister src); + void cvtps2dq(XMMRegister dst, const Operand& src); + void cvtdq2ps(XMMRegister dst, XMMRegister src); + void cvtdq2ps(XMMRegister dst, const Operand& src); enum RoundingMode { kRoundToNearest = 0x0, @@ -1093,6 +1144,25 @@ class Assembler : public AssemblerBase { void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); + void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp); + void cmpeqps(XMMRegister dst, XMMRegister src); + void cmpltps(XMMRegister dst, XMMRegister src); + void cmpleps(XMMRegister dst, XMMRegister src); + void cmpneqps(XMMRegister dst, XMMRegister src); + void cmpnltps(XMMRegister dst, XMMRegister src); + void cmpnleps(XMMRegister dst, XMMRegister src); + + void pslld(XMMRegister reg, int8_t shift); + void pslld(XMMRegister dst, XMMRegister src); + void psrld(XMMRegister reg, int8_t shift); + void psrld(XMMRegister dst, XMMRegister src); + void psrad(XMMRegister reg, int8_t shift); + void psrad(XMMRegister dst, XMMRegister src); + + void pcmpgtd(XMMRegister dst, XMMRegister src); + void pcmpeqd(XMMRegister dst, XMMRegister src); + void pcmpltd(XMMRegister dst, XMMRegister src); + // Debugging void Print(); @@ -1253,6 +1323,10 @@ class Assembler : public AssemblerBase { // the high bit set. inline void emit_optional_rex_32(Register rm_reg); + // As for emit_optional_rex_32(Register), except that the register is + // an XMM register. + inline void emit_optional_rex_32(XMMRegister rm_reg); + // Optionally do as emit_rex_32(const Operand&) if the operand register // numbers have a high bit set. inline void emit_optional_rex_32(const Operand& op); @@ -1318,6 +1392,7 @@ class Assembler : public AssemblerBase { void emit_sse_operand(XMMRegister reg, const Operand& adr); void emit_sse_operand(XMMRegister dst, Register src); void emit_sse_operand(Register dst, XMMRegister src); + void emit_sse_operand(XMMRegister dst); // Emit machine code for one of the operations ADD, ADC, SUB, SBC, // AND, OR, XOR, or CMP. The encodings of these operations are all diff --git a/src/v8/src/x64/deoptimizer-x64.cc b/src/v8/src/x64/deoptimizer-x64.cc index a2f9faa..30d164b 100644 --- a/src/v8/src/x64/deoptimizer-x64.cc +++ b/src/v8/src/x64/deoptimizer-x64.cc @@ -91,8 +91,9 @@ void Deoptimizer::FillInputFrame(Address tos, JavaScriptFrame* frame) { } input_->SetRegister(rsp.code(), reinterpret_cast(frame->sp())); input_->SetRegister(rbp.code(), reinterpret_cast(frame->fp())); + simd128_value_t zero = {{0.0, 0.0}}; for (int i = 0; i < DoubleRegister::NumAllocatableRegisters(); i++) { - input_->SetDoubleRegister(i, 0.0); + input_->SetSIMD128Register(i, zero); } // Fill the frame content from the actual data on the frame. @@ -112,10 +113,10 @@ void Deoptimizer::SetPlatformCompiledStubRegisters( } -void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) { +void Deoptimizer::CopySIMD128Registers(FrameDescription* output_frame) { for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); ++i) { - double double_value = input_->GetDoubleRegister(i); - output_frame->SetDoubleRegister(i, double_value); + simd128_value_t xmm_value = input_->GetSIMD128Register(i); + output_frame->SetSIMD128Register(i, xmm_value); } } @@ -134,14 +135,14 @@ void Deoptimizer::EntryGenerator::Generate() { // Save all general purpose registers before messing with them. const int kNumberOfRegisters = Register::kNumRegisters; - const int kDoubleRegsSize = kDoubleSize * + const int kXMMRegsSize = kSIMD128Size * XMMRegister::NumAllocatableRegisters(); - __ subp(rsp, Immediate(kDoubleRegsSize)); + __ subp(rsp, Immediate(kXMMRegsSize)); for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int offset = i * kDoubleSize; - __ movsd(Operand(rsp, offset), xmm_reg); + int offset = i * kSIMD128Size; + __ movups(Operand(rsp, offset), xmm_reg); } // We push all registers onto the stack, even though we do not need @@ -152,7 +153,7 @@ void Deoptimizer::EntryGenerator::Generate() { } const int kSavedRegistersAreaSize = kNumberOfRegisters * kRegisterSize + - kDoubleRegsSize; + kXMMRegsSize; // We use this to keep the value of the fifth argument temporarily. // Unfortunately we can't store it directly in r8 (used for passing @@ -202,11 +203,13 @@ void Deoptimizer::EntryGenerator::Generate() { __ PopQuad(Operand(rbx, offset)); } - // Fill in the double input registers. - int double_regs_offset = FrameDescription::double_registers_offset(); + // Fill in the xmm input registers. + STATIC_ASSERT(kSIMD128Size == 2 * kDoubleSize); + int xmm_regs_offset = FrameDescription::simd128_registers_offset(); for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); i++) { - int dst_offset = i * kDoubleSize + double_regs_offset; + int dst_offset = i * kSIMD128Size + xmm_regs_offset; __ popq(Operand(rbx, dst_offset)); + __ popq(Operand(rbx, dst_offset + kDoubleSize)); } // Remove the bailout id and return address from the stack. @@ -270,8 +273,8 @@ void Deoptimizer::EntryGenerator::Generate() { for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); ++i) { XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i); - int src_offset = i * kDoubleSize + double_regs_offset; - __ movsd(xmm_reg, Operand(rbx, src_offset)); + int src_offset = i * kSIMD128Size + xmm_regs_offset; + __ movups(xmm_reg, Operand(rbx, src_offset)); } // Push state, pc, and continuation from the last output frame. @@ -344,6 +347,18 @@ void FrameDescription::SetCallerConstantPool(unsigned offset, intptr_t value) { } +double FrameDescription::GetDoubleRegister(unsigned n) const { + DCHECK(n < ARRAY_SIZE(simd128_registers_)); + return simd128_registers_[n].d[0]; +} + + +void FrameDescription::SetDoubleRegister(unsigned n, double value) { + DCHECK(n < ARRAY_SIZE(simd128_registers_)); + simd128_registers_[n].d[0] = value; +} + + #undef __ diff --git a/src/v8/src/x64/disasm-x64.cc b/src/v8/src/x64/disasm-x64.cc index 2b8fc2d..aaa8754 100644 --- a/src/v8/src/x64/disasm-x64.cc +++ b/src/v8/src/x64/disasm-x64.cc @@ -315,6 +315,17 @@ class DisassemblerX64 { OPERAND_QUADWORD_SIZE = 3 }; + enum { + rax = 0, + rcx = 1, + rdx = 2, + rbx = 3, + rsp = 4, + rbp = 5, + rsi = 6, + rdi = 7 + }; + const NameConverter& converter_; v8::internal::EmbeddedVector tmp_buffer_; unsigned int tmp_buffer_pos_; @@ -1014,6 +1025,22 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { current += PrintRightOperand(current); AppendToBuffer(",%s,%d", NameOfXMMRegister(regop), (*current) & 3); current += 1; + } else if (third_byte == 0x21) { + get_modrm(*current, &mod, ®op, &rm); + // insertps xmm, xmm, imm8 + AppendToBuffer("insertps %s,%s,%d", + NameOfXMMRegister(regop), + NameOfXMMRegister(rm), + (*(current + 1)) & 3); + current += 2; + } else if (third_byte == 0x22) { + get_modrm(*current, &mod, ®op, &rm); + // pinsrd xmm, reg32, imm8 + AppendToBuffer("pinsrd %s,%s,%d", + NameOfXMMRegister(regop), + NameOfCPURegister(rm), + (*(current + 1)) & 3); + current += 2; } else if (third_byte == 0x0b) { get_modrm(*current, &mod, ®op, &rm); // roundsd xmm, xmm/m64, imm8 @@ -1024,6 +1051,16 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { } else { UnimplementedInstruction(); } + } else if (opcode == 0x38) { + byte third_byte = *current; + current = data + 3; + if (third_byte == 0x40) { + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("pmulld %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + } else { + UnimplementedInstruction(); + } } else { get_modrm(*current, &mod, ®op, &rm); if (opcode == 0x1f) { @@ -1053,6 +1090,24 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { AppendToBuffer("movdqa %s,", NameOfXMMRegister(regop)); current += PrintRightXMMOperand(current); + } else if (opcode == 0x70) { + AppendToBuffer("pshufd %s,", + NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",0x%x", (*current) & 0xff); + current += 1; + } else if (opcode == 0x5B) { + AppendToBuffer("cvtps2dq %s,", + NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + } else if (opcode == 0xFE) { + AppendToBuffer("paddd %s,", + NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + } else if (opcode == 0xFA) { + AppendToBuffer("psubd %s,", + NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); } else if (opcode == 0x7E) { AppendToBuffer("mov%c ", rex_w() ? 'q' : 'd'); @@ -1074,18 +1129,59 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { DCHECK(regop == 6); AppendToBuffer("psllq,%s,%d", NameOfXMMRegister(rm), *current & 0x7f); current += 1; + } else if (opcode == 0x62) { + AppendToBuffer("punpackldq %s,", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + } else if (opcode == 0x72) { + AppendToBuffer(regop == rsi ? "pslld " + : regop == rdx ? "psrld" : "psrad"); + current += PrintRightXMMOperand(current); + AppendToBuffer(",0x%x", (*current) & 0xff); + current += 1; + } else if (opcode == 0xC6) { + AppendToBuffer("shufpd %s,", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",0x%x", (*current) & 0xff); + current += 1; + } else if (opcode == 0xF4) { + AppendToBuffer("pmuludq %s,", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); } else { const char* mnemonic = "?"; - if (opcode == 0x54) { + if (opcode == 0x51) { + mnemonic = "sqrtpd"; + } else if (opcode == 0x54) { mnemonic = "andpd"; } else if (opcode == 0x56) { mnemonic = "orpd"; } else if (opcode == 0x57) { mnemonic = "xorpd"; + } else if (opcode == 0x58) { + mnemonic = "addpd"; + } else if (opcode == 0x59) { + mnemonic = "mulpd"; + } else if (opcode == 0x5C) { + mnemonic = "subpd"; + } else if (opcode == 0x5D) { + mnemonic = "minpd"; + } else if (opcode == 0x5E) { + mnemonic = "divpd"; + } else if (opcode == 0x5F) { + mnemonic = "maxpd"; } else if (opcode == 0x2E) { mnemonic = "ucomisd"; } else if (opcode == 0x2F) { mnemonic = "comisd"; + } else if (opcode == 0x66) { + mnemonic = "pcmpgtd"; + } else if (opcode == 0x76) { + mnemonic = "pcmpeqd"; + } else if (opcode == 0xD2) { + mnemonic = "psrld"; + } else if (opcode == 0xE2) { + mnemonic = "psrad"; + } else if (opcode == 0xF2) { + mnemonic = "pslld"; } else { UnimplementedInstruction(); } @@ -1230,6 +1326,21 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { current += PrintRightXMMOperand(current); AppendToBuffer(",%s", NameOfXMMRegister(regop)); + } else if (opcode == 0x10) { + // movups xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("movups %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x11) { + // movups xmm/m128, xmm + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("movups "); + current += PrintRightXMMOperand(current); + AppendToBuffer(", %s", NameOfXMMRegister(regop)); + } else if (opcode == 0xA2) { // CPUID AppendToBuffer("%s", mnemonic); @@ -1273,6 +1384,100 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { AppendToBuffer(", %d", (*current) & 3); current += 1; + } else if (opcode == 0xC6) { + // shufps xmm, xmm/m128, imm8 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("shufps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + AppendToBuffer(", %d", (*current) & 3); + current += 1; + + } else if (opcode == 0x54) { + // andps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("andps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x56) { + // orps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("orps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x58) { + // addps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("addps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x59) { + // mulps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("mulps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x5C) { + // subps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("subps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x5E) { + // divps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("divps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x5D) { + // minps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("minps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x5F) { + // maxps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("maxps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x5B) { + // cvtdq2ps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("cvtdq2ps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + + } else if (opcode == 0x53) { + // rcpps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("rcpps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x52) { + // rsqrtps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("rsqrtps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + + } else if (opcode == 0x51) { + // sqrtps xmm, xmm/m128 + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("sqrtps %s, ", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + } else if (opcode == 0x50) { // movmskps reg, xmm int mod, regop, rm; @@ -1280,6 +1485,26 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { AppendToBuffer("movmskps %s,", NameOfCPURegister(regop)); current += PrintRightXMMOperand(current); + } else if (opcode == 0xC2) { + // Intel manual 2A, Table 3-11. + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + const char* const pseudo_op[] = { + "cmpeqps", + "cmpltps", + "cmpleps", + "cmpunordps", + "cmpneqps", + "cmpnltps", + "cmpnleps", + "cmpordps" + }; + AppendToBuffer("%s %s,%s", + pseudo_op[current[1]], + NameOfXMMRegister(regop), + NameOfXMMRegister(rm)); + current += 2; + } else if ((opcode & 0xF0) == 0x80) { // Jcc: Conditional jump (branch). current = data + JumpConditional(data); diff --git a/src/v8/src/x64/lithium-codegen-x64.cc b/src/v8/src/x64/lithium-codegen-x64.cc index 4457c20..9de3099 100644 --- a/src/v8/src/x64/lithium-codegen-x64.cc +++ b/src/v8/src/x64/lithium-codegen-x64.cc @@ -407,6 +407,11 @@ XMMRegister LCodeGen::ToDoubleRegister(int index) const { } +XMMRegister LCodeGen::ToSIMD128Register(int index) const { + return XMMRegister::FromAllocationIndex(index); +} + + Register LCodeGen::ToRegister(LOperand* op) const { DCHECK(op->IsRegister()); return ToRegister(op->index()); @@ -419,6 +424,31 @@ XMMRegister LCodeGen::ToDoubleRegister(LOperand* op) const { } +XMMRegister LCodeGen::ToFloat32x4Register(LOperand* op) const { + DCHECK(op->IsFloat32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToFloat64x2Register(LOperand* op) const { + DCHECK(op->IsFloat64x2Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToInt32x4Register(LOperand* op) const { + DCHECK(op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + +XMMRegister LCodeGen::ToSIMD128Register(LOperand* op) const { + DCHECK(op->IsFloat32x4Register() || op->IsFloat64x2Register() || + op->IsInt32x4Register()); + return ToSIMD128Register(op->index()); +} + + bool LCodeGen::IsInteger32Constant(LConstantOperand* op) const { return chunk_->LookupLiteralRepresentation(op).IsSmiOrInteger32(); } @@ -486,7 +516,9 @@ static int ArgumentsOffsetWithoutFrame(int index) { Operand LCodeGen::ToOperand(LOperand* op) const { // Does not handle registers. In X64 assembler, plain registers are not // representable as an Operand. - DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot()); + DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot() || + op->IsFloat32x4StackSlot() || op->IsFloat64x2StackSlot() || + op->IsInt32x4StackSlot()); if (NeedsEagerFrame()) { return Operand(rbp, StackSlotOffset(op->index())); } else { @@ -599,6 +631,15 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } } else if (op->IsDoubleStackSlot()) { translation->StoreDoubleStackSlot(op->index()); + } else if (op->IsFloat32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::FLOAT32x4_STACK_SLOT); + } else if (op->IsFloat64x2StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::FLOAT64x2_STACK_SLOT); + } else if (op->IsInt32x4StackSlot()) { + translation->StoreSIMD128StackSlot(op->index(), + Translation::INT32x4_STACK_SLOT); } else if (op->IsRegister()) { Register reg = ToRegister(op); if (is_tagged) { @@ -611,6 +652,15 @@ void LCodeGen::AddToTranslation(LEnvironment* environment, } else if (op->IsDoubleRegister()) { XMMRegister reg = ToDoubleRegister(op); translation->StoreDoubleRegister(reg); + } else if (op->IsFloat32x4Register()) { + XMMRegister reg = ToFloat32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::FLOAT32x4_REGISTER); + } else if (op->IsFloat64x2Register()) { + XMMRegister reg = ToFloat64x2Register(op); + translation->StoreSIMD128Register(reg, Translation::FLOAT64x2_REGISTER); + } else if (op->IsInt32x4Register()) { + XMMRegister reg = ToInt32x4Register(op); + translation->StoreSIMD128Register(reg, Translation::INT32x4_REGISTER); } else if (op->IsConstantOperand()) { HConstant* constant = chunk()->LookupConstant(LConstantOperand::cast(op)); int src_index = DefineDeoptimizationLiteral(constant->handle(isolate())); @@ -2115,6 +2165,9 @@ void LCodeGen::DoBranch(LBranch* instr) { __ xorps(xmm_scratch, xmm_scratch); __ ucomisd(reg, xmm_scratch); EmitBranch(instr, not_equal); + } else if (r.IsSIMD128()) { + DCHECK(!info()->IsStub()); + EmitBranch(instr, no_condition); } else { DCHECK(r.IsTagged()); Register reg = ToRegister(instr->value()); @@ -3076,6 +3129,22 @@ void LCodeGen::DoAccessArgumentsAt(LAccessArgumentsAt* instr) { } +bool LCodeGen::HandleExternalArrayOpRequiresPreScale( + LOperand* key, + Representation key_representation, + ElementsKind elements_kind) { + Register key_reg = ToRegister(key); + if (ExternalArrayOpRequiresPreScale(key_representation, elements_kind)) { + int pre_shift_size = ElementsKindToShiftSize(elements_kind) - + static_cast(maximal_scale_factor); + DCHECK(pre_shift_size > 0); + __ shll(key_reg, Immediate(pre_shift_size)); + return true; + } + return false; +} + + void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { ElementsKind elements_kind = instr->elements_kind(); LOperand* key = instr->key(); @@ -3084,13 +3153,22 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { Representation key_representation = instr->hydrogen()->key()->representation(); if (ExternalArrayOpRequiresTemp(key_representation, elements_kind)) { - __ SmiToInteger64(key_reg, key_reg); + if (!HandleExternalArrayOpRequiresPreScale( + key, key_representation, elements_kind)) + __ SmiToInteger64(key_reg, key_reg); } else if (instr->hydrogen()->IsDehoisted()) { // Sign extend key because it could be a 32 bit negative value // and the dehoisted address computation happens in 64 bits __ movsxlq(key_reg, key_reg); } + } else if (kPointerSize == kInt64Size && !key->IsConstantOperand()) { + Representation key_representation = + instr->hydrogen()->key()->representation(); + if (ExternalArrayOpRequiresTemp(key_representation, elements_kind)) + HandleExternalArrayOpRequiresPreScale( + key, key_representation, elements_kind); } + Operand operand(BuildFastArrayOperand( instr->elements(), key, @@ -3106,6 +3184,8 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS || elements_kind == FLOAT64_ELEMENTS) { __ movsd(ToDoubleRegister(instr->result()), operand); + } else if (IsSIMD128ElementsKind(elements_kind)) { + __ movups(ToSIMD128Register(instr->result()), operand); } else { Register result(ToRegister(instr->result())); switch (elements_kind) { @@ -3141,8 +3221,14 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { break; case EXTERNAL_FLOAT32_ELEMENTS: case EXTERNAL_FLOAT64_ELEMENTS: + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: case FLOAT32_ELEMENTS: case FLOAT64_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: case FAST_ELEMENTS: case FAST_SMI_ELEMENTS: case FAST_DOUBLE_ELEMENTS: @@ -3267,6 +3353,7 @@ Operand LCodeGen::BuildFastArrayOperand( if (constant_value & 0xF0000000) { Abort(kArrayIndexConstantValueTooBig); } + return Operand(elements_pointer_reg, (constant_value << shift_size) + offset); } else { @@ -3275,6 +3362,10 @@ Operand LCodeGen::BuildFastArrayOperand( DCHECK(SmiValuesAre31Bits()); shift_size -= kSmiTagSize; } + if (ExternalArrayOpRequiresPreScale(key_representation, elements_kind)) { + // Make sure the key is pre-scaled against maximal_scale_factor. + shift_size = static_cast(maximal_scale_factor); + } ScaleFactor scale_factor = static_cast(shift_size); return Operand(elements_pointer_reg, ToRegister(key), @@ -3864,6 +3955,1016 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) { } +void LCodeGen::DoNullarySIMDOperation(LNullarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Zero: { + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + __ xorps(result_reg, result_reg); + return; + } + case kFloat64x2Zero: { + XMMRegister result_reg = ToFloat64x2Register(instr->result()); + __ xorpd(result_reg, result_reg); + return; + } + case kInt32x4Zero: { + XMMRegister result_reg = ToInt32x4Register(instr->result()); + __ xorps(result_reg, result_reg); + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoUnarySIMDOperation(LUnarySIMDOperation* instr) { + uint8_t select = 0; + switch (instr->op()) { + case kFloat32x4Coercion: { + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + return; + } + case kFloat64x2Coercion: { + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + XMMRegister result_reg = ToFloat64x2Register(instr->result()); + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + return; + } + case kInt32x4Coercion: { + XMMRegister input_reg = ToInt32x4Register(instr->value()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + return; + } + case kSIMD128Change: { + Comment(";;; deoptimize: can not perform representation change" + "for float32x4 or int32x4"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + case kFloat32x4Abs: + case kFloat32x4Neg: + case kFloat32x4Reciprocal: + case kFloat32x4ReciprocalSqrt: + case kFloat32x4Sqrt: { + DCHECK(instr->value()->Equals(instr->result())); + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + switch (instr->op()) { + case kFloat32x4Abs: + __ absps(input_reg); + break; + case kFloat32x4Neg: + __ negateps(input_reg); + break; + case kFloat32x4Reciprocal: + __ rcpps(input_reg, input_reg); + break; + case kFloat32x4ReciprocalSqrt: + __ rsqrtps(input_reg, input_reg); + break; + case kFloat32x4Sqrt: + __ sqrtps(input_reg, input_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat64x2Abs: + case kFloat64x2Neg: + case kFloat64x2Sqrt: { + DCHECK(instr->value()->Equals(instr->result())); + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + switch (instr->op()) { + case kFloat64x2Abs: + __ abspd(input_reg); + break; + case kFloat64x2Neg: + __ negatepd(input_reg); + break; + case kFloat64x2Sqrt: + __ sqrtpd(input_reg, input_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kInt32x4Not: + case kInt32x4Neg: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + XMMRegister input_reg = ToInt32x4Register(instr->value()); + switch (instr->op()) { + case kInt32x4Not: + __ notps(input_reg); + break; + case kInt32x4Neg: + __ pnegd(input_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat32x4BitsToInt32x4: + case kFloat32x4ToInt32x4: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + if (instr->op() == kFloat32x4BitsToInt32x4) { + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + } else { + DCHECK(instr->op() == kFloat32x4ToInt32x4); + __ cvtps2dq(result_reg, input_reg); + } + return; + } + case kInt32x4BitsToFloat32x4: + case kInt32x4ToFloat32x4: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + XMMRegister input_reg = ToInt32x4Register(instr->value()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + if (instr->op() == kInt32x4BitsToFloat32x4) { + if (!result_reg.is(input_reg)) { + __ movaps(result_reg, input_reg); + } + } else { + DCHECK(instr->op() == kInt32x4ToFloat32x4); + __ cvtdq2ps(result_reg, input_reg); + } + return; + } + case kFloat32x4Splat: { + DCHECK(instr->hydrogen()->value()->representation().IsDouble()); + XMMRegister input_reg = ToDoubleRegister(instr->value()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + XMMRegister xmm_scratch = xmm0; + __ xorps(xmm_scratch, xmm_scratch); + __ cvtsd2ss(xmm_scratch, input_reg); + __ shufps(xmm_scratch, xmm_scratch, 0x0); + __ movaps(result_reg, xmm_scratch); + return; + } + case kInt32x4Splat: { + DCHECK(instr->hydrogen()->value()->representation().IsInteger32()); + Register input_reg = ToRegister(instr->value()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + __ movd(result_reg, input_reg); + __ shufps(result_reg, result_reg, 0x0); + return; + } + case kInt32x4GetSignMask: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + XMMRegister input_reg = ToInt32x4Register(instr->value()); + Register result = ToRegister(instr->result()); + __ movmskps(result, input_reg); + return; + } + case kFloat32x4GetSignMask: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + Register result = ToRegister(instr->result()); + __ movmskps(result, input_reg); + return; + } + case kFloat32x4GetW: + select++; + case kFloat32x4GetZ: + select++; + case kFloat32x4GetY: + select++; + case kFloat32x4GetX: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat32x4()); + XMMRegister input_reg = ToFloat32x4Register(instr->value()); + XMMRegister result = ToDoubleRegister(instr->result()); + XMMRegister xmm_scratch = result.is(input_reg) ? xmm0 : result; + + if (select == 0x0) { + __ xorps(xmm_scratch, xmm_scratch); + __ cvtss2sd(xmm_scratch, input_reg); + if (!xmm_scratch.is(result)) { + __ movaps(result, xmm_scratch); + } + } else { + __ pshufd(xmm_scratch, input_reg, select); + if (!xmm_scratch.is(result)) { + __ xorps(result, result); + } + __ cvtss2sd(result, xmm_scratch); + } + return; + } + case kFloat64x2GetSignMask: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + Register result = ToRegister(instr->result()); + __ movmskpd(result, input_reg); + return; + } + case kFloat64x2GetX: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + XMMRegister result = ToDoubleRegister(instr->result()); + + if (!input_reg.is(result)) { + __ movaps(result, input_reg); + } + return; + } + case kFloat64x2GetY: { + DCHECK(instr->hydrogen()->value()->representation().IsFloat64x2()); + XMMRegister input_reg = ToFloat64x2Register(instr->value()); + XMMRegister result = ToDoubleRegister(instr->result()); + + if (!input_reg.is(result)) { + __ movaps(result, input_reg); + } + __ shufpd(result, input_reg, 0x1); + return; + } + case kInt32x4GetX: + case kInt32x4GetY: + case kInt32x4GetZ: + case kInt32x4GetW: + case kInt32x4GetFlagX: + case kInt32x4GetFlagY: + case kInt32x4GetFlagZ: + case kInt32x4GetFlagW: { + DCHECK(instr->hydrogen()->value()->representation().IsInt32x4()); + bool flag = false; + switch (instr->op()) { + case kInt32x4GetFlagX: + flag = true; + case kInt32x4GetX: + break; + case kInt32x4GetFlagY: + flag = true; + case kInt32x4GetY: + select = 0x1; + break; + case kInt32x4GetFlagZ: + flag = true; + case kInt32x4GetZ: + select = 0x2; + break; + case kInt32x4GetFlagW: + flag = true; + case kInt32x4GetW: + select = 0x3; + break; + default: + UNREACHABLE(); + } + + XMMRegister input_reg = ToInt32x4Register(instr->value()); + Register result = ToRegister(instr->result()); + if (select == 0x0) { + __ movd(result, input_reg); + } else { + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ extractps(result, input_reg, select); + } else { + XMMRegister xmm_scratch = xmm0; + __ pshufd(xmm_scratch, input_reg, select); + __ movd(result, xmm_scratch); + } + } + + if (flag) { + Label false_value, done; + __ testl(result, result); + __ j(zero, &false_value, Label::kNear); + __ LoadRoot(result, Heap::kTrueValueRootIndex); + __ jmp(&done, Label::kNear); + __ bind(&false_value); + __ LoadRoot(result, Heap::kFalseValueRootIndex); + __ bind(&done); + } + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoBinarySIMDOperation(LBinarySIMDOperation* instr) { + uint8_t imm8 = 0; // for with operation + switch (instr->op()) { + case kFloat32x4Add: + case kFloat32x4Sub: + case kFloat32x4Mul: + case kFloat32x4Div: + case kFloat32x4Min: + case kFloat32x4Max: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsFloat32x4()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToFloat32x4Register(instr->right()); + switch (instr->op()) { + case kFloat32x4Add: + __ addps(left_reg, right_reg); + break; + case kFloat32x4Sub: + __ subps(left_reg, right_reg); + break; + case kFloat32x4Mul: + __ mulps(left_reg, right_reg); + break; + case kFloat32x4Div: + __ divps(left_reg, right_reg); + break; + case kFloat32x4Min: + __ minps(left_reg, right_reg); + break; + case kFloat32x4Max: + __ maxps(left_reg, right_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat32x4Scale: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + XMMRegister scratch_reg = xmm0; + __ xorps(scratch_reg, scratch_reg); + __ cvtsd2ss(scratch_reg, right_reg); + __ shufps(scratch_reg, scratch_reg, 0x0); + __ mulps(left_reg, scratch_reg); + return; + } + case kFloat64x2Add: + case kFloat64x2Sub: + case kFloat64x2Mul: + case kFloat64x2Div: + case kFloat64x2Min: + case kFloat64x2Max: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsFloat64x2()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToFloat64x2Register(instr->right()); + switch (instr->op()) { + case kFloat64x2Add: + __ addpd(left_reg, right_reg); + break; + case kFloat64x2Sub: + __ subpd(left_reg, right_reg); + break; + case kFloat64x2Mul: + __ mulpd(left_reg, right_reg); + break; + case kFloat64x2Div: + __ divpd(left_reg, right_reg); + break; + case kFloat64x2Min: + __ minpd(left_reg, right_reg); + break; + case kFloat64x2Max: + __ maxpd(left_reg, right_reg); + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat64x2Scale: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + __ shufpd(right_reg, right_reg, 0x0); + __ mulpd(left_reg, right_reg); + return; + } + case kFloat32x4Shuffle: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + if (instr->hydrogen()->right()->IsConstant() && + HConstant::cast(instr->hydrogen()->right())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->right())); + uint8_t select = static_cast(value & 0xFF); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + __ shufps(left_reg, left_reg, select); + return; + } else { + Comment(";;; deoptimize: non-constant selector for shuffle"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + } + case kInt32x4Shuffle: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + if (instr->hydrogen()->right()->IsConstant() && + HConstant::cast(instr->hydrogen()->right())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->right())); + uint8_t select = static_cast(value & 0xFF); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + __ pshufd(left_reg, left_reg, select); + return; + } else { + Comment(";;; deoptimize: non-constant selector for shuffle"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + } + case kInt32x4ShiftLeft: + case kInt32x4ShiftRight: + case kInt32x4ShiftRightArithmetic: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + if (instr->hydrogen()->right()->IsConstant() && + HConstant::cast(instr->hydrogen()->right())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->right())); + uint8_t shift = static_cast(value & 0xFF); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + switch (instr->op()) { + case kInt32x4ShiftLeft: + __ pslld(left_reg, shift); + break; + case kInt32x4ShiftRight: + __ psrld(left_reg, shift); + break; + case kInt32x4ShiftRightArithmetic: + __ psrad(left_reg, shift); + break; + default: + UNREACHABLE(); + } + return; + } else { + XMMRegister left_reg = ToInt32x4Register(instr->left()); + Register shift = ToRegister(instr->right()); + XMMRegister xmm_scratch = double_scratch0(); + __ movd(xmm_scratch, shift); + switch (instr->op()) { + case kInt32x4ShiftLeft: + __ pslld(left_reg, xmm_scratch); + break; + case kInt32x4ShiftRight: + __ psrld(left_reg, xmm_scratch); + break; + case kInt32x4ShiftRightArithmetic: + __ psrad(left_reg, xmm_scratch); + break; + default: + UNREACHABLE(); + } + return; + } + } + case kFloat32x4LessThan: + case kFloat32x4LessThanOrEqual: + case kFloat32x4Equal: + case kFloat32x4NotEqual: + case kFloat32x4GreaterThanOrEqual: + case kFloat32x4GreaterThan: { + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsFloat32x4()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToFloat32x4Register(instr->right()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + switch (instr->op()) { + case kFloat32x4LessThan: + if (result_reg.is(left_reg)) { + __ cmpltps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpnltps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpltps(result_reg, right_reg); + } + break; + case kFloat32x4LessThanOrEqual: + if (result_reg.is(left_reg)) { + __ cmpleps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpnleps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpleps(result_reg, right_reg); + } + break; + case kFloat32x4Equal: + if (result_reg.is(left_reg)) { + __ cmpeqps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpeqps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpeqps(result_reg, right_reg); + } + break; + case kFloat32x4NotEqual: + if (result_reg.is(left_reg)) { + __ cmpneqps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpneqps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpneqps(result_reg, right_reg); + } + break; + case kFloat32x4GreaterThanOrEqual: + if (result_reg.is(left_reg)) { + __ cmpnltps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpltps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpnltps(result_reg, right_reg); + } + break; + case kFloat32x4GreaterThan: + if (result_reg.is(left_reg)) { + __ cmpnleps(result_reg, right_reg); + } else if (result_reg.is(right_reg)) { + __ cmpleps(result_reg, left_reg); + } else { + __ movaps(result_reg, left_reg); + __ cmpnleps(result_reg, right_reg); + } + break; + default: + UNREACHABLE(); + break; + } + return; + } + case kInt32x4And: + case kInt32x4Or: + case kInt32x4Xor: + case kInt32x4Add: + case kInt32x4Sub: + case kInt32x4Mul: + case kInt32x4GreaterThan: + case kInt32x4Equal: + case kInt32x4LessThan: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsInt32x4()); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + XMMRegister right_reg = ToInt32x4Register(instr->right()); + switch (instr->op()) { + case kInt32x4And: + __ andps(left_reg, right_reg); + break; + case kInt32x4Or: + __ orps(left_reg, right_reg); + break; + case kInt32x4Xor: + __ xorps(left_reg, right_reg); + break; + case kInt32x4Add: + __ paddd(left_reg, right_reg); + break; + case kInt32x4Sub: + __ psubd(left_reg, right_reg); + break; + case kInt32x4Mul: + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ pmulld(left_reg, right_reg); + } else { + // The algorithm is from http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers + XMMRegister xmm_scratch = xmm0; + __ movaps(xmm_scratch, left_reg); + __ pmuludq(left_reg, right_reg); + __ psrldq(xmm_scratch, 4); + __ psrldq(right_reg, 4); + __ pmuludq(xmm_scratch, right_reg); + __ pshufd(left_reg, left_reg, 8); + __ pshufd(xmm_scratch, xmm_scratch, 8); + __ punpackldq(left_reg, xmm_scratch); + } + break; + case kInt32x4GreaterThan: + __ pcmpgtd(left_reg, right_reg); + break; + case kInt32x4Equal: + __ pcmpeqd(left_reg, right_reg); + break; + case kInt32x4LessThan: { + XMMRegister xmm_scratch = xmm0; + __ movaps(xmm_scratch, right_reg); + __ pcmpgtd(xmm_scratch, left_reg); + __ movaps(left_reg, xmm_scratch); + break; + } + default: + UNREACHABLE(); + break; + } + return; + } + case kFloat32x4WithW: + imm8++; + case kFloat32x4WithZ: + imm8++; + case kFloat32x4WithY: + imm8++; + case kFloat32x4WithX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat32x4Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + XMMRegister xmm_scratch = xmm0; + __ xorps(xmm_scratch, xmm_scratch); + __ cvtsd2ss(xmm_scratch, right_reg); + if (CpuFeatures::IsSupported(SSE4_1)) { + imm8 = imm8 << 4; + CpuFeatureScope scope(masm(), SSE4_1); + __ insertps(left_reg, xmm_scratch, imm8); + } else { + __ subq(rsp, Immediate(kFloat32x4Size)); + __ movups(Operand(rsp, 0), left_reg); + __ movss(Operand(rsp, imm8 * kFloatSize), xmm_scratch); + __ movups(left_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kFloat32x4Size)); + } + return; + } + case kFloat64x2WithX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + __ subq(rsp, Immediate(kFloat64x2Size)); + __ movups(Operand(rsp, 0), left_reg); + __ movsd(Operand(rsp, 0 * kDoubleSize), right_reg); + __ movups(left_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kFloat64x2Size)); + return; + } + case kFloat64x2WithY: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToFloat64x2Register(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + __ subq(rsp, Immediate(kFloat64x2Size)); + __ movups(Operand(rsp, 0), left_reg); + __ movsd(Operand(rsp, 1 * kDoubleSize), right_reg); + __ movups(left_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kFloat64x2Size)); + return; + } + case kFloat64x2Constructor: { + DCHECK(instr->hydrogen()->left()->representation().IsDouble()); + DCHECK(instr->hydrogen()->right()->representation().IsDouble()); + XMMRegister left_reg = ToDoubleRegister(instr->left()); + XMMRegister right_reg = ToDoubleRegister(instr->right()); + XMMRegister result_reg = ToFloat64x2Register(instr->result()); + __ subq(rsp, Immediate(kFloat64x2Size)); + __ movsd(Operand(rsp, 0 * kDoubleSize), left_reg); + __ movsd(Operand(rsp, 1 * kDoubleSize), right_reg); + __ movups(result_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kFloat64x2Size)); + return; + } + case kInt32x4WithW: + imm8++; + case kInt32x4WithZ: + imm8++; + case kInt32x4WithY: + imm8++; + case kInt32x4WithX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsInteger32()); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + Register right_reg = ToRegister(instr->right()); + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ pinsrd(left_reg, right_reg, imm8); + } else { + __ subq(rsp, Immediate(kInt32x4Size)); + __ movdqu(Operand(rsp, 0), left_reg); + __ movl(Operand(rsp, imm8 * kFloatSize), right_reg); + __ movdqu(left_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kInt32x4Size)); + } + return; + } + case kInt32x4WithFlagW: + imm8++; + case kInt32x4WithFlagZ: + imm8++; + case kInt32x4WithFlagY: + imm8++; + case kInt32x4WithFlagX: { + DCHECK(instr->left()->Equals(instr->result())); + DCHECK(instr->hydrogen()->left()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->right()->representation().IsTagged()); + HType type = instr->hydrogen()->right()->type(); + XMMRegister left_reg = ToInt32x4Register(instr->left()); + Register right_reg = ToRegister(instr->right()); + Label load_false_value, done; + if (type.IsBoolean()) { + __ subq(rsp, Immediate(kInt32x4Size)); + __ movups(Operand(rsp, 0), left_reg); + __ CompareRoot(right_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_value, Label::kNear); + } else { + Comment(";;; deoptimize: other types for int32x4.withFlagX/Y/Z/W."); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + // load true value. + __ movl(Operand(rsp, imm8 * kFloatSize), Immediate(0xFFFFFFFF)); + __ jmp(&done, Label::kNear); + __ bind(&load_false_value); + __ movl(Operand(rsp, imm8 * kFloatSize), Immediate(0x0)); + __ bind(&done); + __ movups(left_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kInt32x4Size)); + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoTernarySIMDOperation(LTernarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Select: { + DCHECK(instr->hydrogen()->first()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsFloat32x4()); + + XMMRegister mask_reg = ToInt32x4Register(instr->first()); + XMMRegister left_reg = ToFloat32x4Register(instr->second()); + XMMRegister right_reg = ToFloat32x4Register(instr->third()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + XMMRegister temp_reg = xmm0; + + // Copy mask. + __ movaps(temp_reg, mask_reg); + // Invert it. + __ notps(temp_reg); + // temp_reg = temp_reg & falseValue. + __ andps(temp_reg, right_reg); + + if (!result_reg.is(mask_reg)) { + if (result_reg.is(left_reg)) { + // result_reg = result_reg & trueValue. + __ andps(result_reg, mask_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } else { + __ movaps(result_reg, mask_reg); + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + } else { + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + return; + } + case kInt32x4Select: { + DCHECK(instr->hydrogen()->first()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsInt32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsInt32x4()); + + XMMRegister mask_reg = ToInt32x4Register(instr->first()); + XMMRegister left_reg = ToInt32x4Register(instr->second()); + XMMRegister right_reg = ToInt32x4Register(instr->third()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + XMMRegister temp_reg = xmm0; + + // Copy mask. + __ movaps(temp_reg, mask_reg); + // Invert it. + __ notps(temp_reg); + // temp_reg = temp_reg & falseValue. + __ andps(temp_reg, right_reg); + + if (!result_reg.is(mask_reg)) { + if (result_reg.is(left_reg)) { + // result_reg = result_reg & trueValue. + __ andps(result_reg, mask_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } else { + __ movaps(result_reg, mask_reg); + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + } else { + // result_reg = result_reg & trueValue. + __ andps(result_reg, left_reg); + // out = result_reg | temp_reg. + __ orps(result_reg, temp_reg); + } + return; + } + case kFloat32x4ShuffleMix: { + DCHECK(instr->first()->Equals(instr->result())); + DCHECK(instr->hydrogen()->first()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsInteger32()); + if (instr->hydrogen()->third()->IsConstant() && + HConstant::cast(instr->hydrogen()->third())->HasInteger32Value()) { + int32_t value = ToInteger32(LConstantOperand::cast(instr->third())); + uint8_t select = static_cast(value & 0xFF); + XMMRegister first_reg = ToFloat32x4Register(instr->first()); + XMMRegister second_reg = ToFloat32x4Register(instr->second()); + __ shufps(first_reg, second_reg, select); + return; + } else { + Comment(";;; deoptimize: non-constant selector for shuffle"); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + } + case kFloat32x4Clamp: { + DCHECK(instr->first()->Equals(instr->result())); + DCHECK(instr->hydrogen()->first()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat32x4()); + DCHECK(instr->hydrogen()->third()->representation().IsFloat32x4()); + + XMMRegister value_reg = ToFloat32x4Register(instr->first()); + XMMRegister lower_reg = ToFloat32x4Register(instr->second()); + XMMRegister upper_reg = ToFloat32x4Register(instr->third()); + __ minps(value_reg, upper_reg); + __ maxps(value_reg, lower_reg); + return; + } + case kFloat64x2Clamp: { + DCHECK(instr->first()->Equals(instr->result())); + DCHECK(instr->hydrogen()->first()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->second()->representation().IsFloat64x2()); + DCHECK(instr->hydrogen()->third()->representation().IsFloat64x2()); + + XMMRegister value_reg = ToFloat64x2Register(instr->first()); + XMMRegister lower_reg = ToFloat64x2Register(instr->second()); + XMMRegister upper_reg = ToFloat64x2Register(instr->third()); + __ minpd(value_reg, upper_reg); + __ maxpd(value_reg, lower_reg); + return; + } + default: + UNREACHABLE(); + return; + } +} + + +void LCodeGen::DoQuarternarySIMDOperation(LQuarternarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Constructor: { + DCHECK(instr->hydrogen()->x()->representation().IsDouble()); + DCHECK(instr->hydrogen()->y()->representation().IsDouble()); + DCHECK(instr->hydrogen()->z()->representation().IsDouble()); + DCHECK(instr->hydrogen()->w()->representation().IsDouble()); + XMMRegister x_reg = ToDoubleRegister(instr->x()); + XMMRegister y_reg = ToDoubleRegister(instr->y()); + XMMRegister z_reg = ToDoubleRegister(instr->z()); + XMMRegister w_reg = ToDoubleRegister(instr->w()); + XMMRegister result_reg = ToFloat32x4Register(instr->result()); + __ subq(rsp, Immediate(kFloat32x4Size)); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, x_reg); + __ movss(Operand(rsp, 0 * kFloatSize), xmm0); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, y_reg); + __ movss(Operand(rsp, 1 * kFloatSize), xmm0); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, z_reg); + __ movss(Operand(rsp, 2 * kFloatSize), xmm0); + __ xorps(xmm0, xmm0); + __ cvtsd2ss(xmm0, w_reg); + __ movss(Operand(rsp, 3 * kFloatSize), xmm0); + __ movups(result_reg, Operand(rsp, 0 * kFloatSize)); + __ addq(rsp, Immediate(kFloat32x4Size)); + return; + } + case kInt32x4Constructor: { + DCHECK(instr->hydrogen()->x()->representation().IsInteger32()); + DCHECK(instr->hydrogen()->y()->representation().IsInteger32()); + DCHECK(instr->hydrogen()->z()->representation().IsInteger32()); + DCHECK(instr->hydrogen()->w()->representation().IsInteger32()); + Register x_reg = ToRegister(instr->x()); + Register y_reg = ToRegister(instr->y()); + Register z_reg = ToRegister(instr->z()); + Register w_reg = ToRegister(instr->w()); + XMMRegister result_reg = ToInt32x4Register(instr->result()); + __ subq(rsp, Immediate(kInt32x4Size)); + __ movl(Operand(rsp, 0 * kInt32Size), x_reg); + __ movl(Operand(rsp, 1 * kInt32Size), y_reg); + __ movl(Operand(rsp, 2 * kInt32Size), z_reg); + __ movl(Operand(rsp, 3 * kInt32Size), w_reg); + __ movups(result_reg, Operand(rsp, 0 * kInt32Size)); + __ addq(rsp, Immediate(kInt32x4Size)); + return; + } + case kInt32x4Bool: { + DCHECK(instr->hydrogen()->x()->representation().IsTagged()); + DCHECK(instr->hydrogen()->y()->representation().IsTagged()); + DCHECK(instr->hydrogen()->z()->representation().IsTagged()); + DCHECK(instr->hydrogen()->w()->representation().IsTagged()); + HType x_type = instr->hydrogen()->x()->type(); + HType y_type = instr->hydrogen()->y()->type(); + HType z_type = instr->hydrogen()->z()->type(); + HType w_type = instr->hydrogen()->w()->type(); + if (!x_type.IsBoolean() || !y_type.IsBoolean() || + !z_type.IsBoolean() || !w_type.IsBoolean()) { + Comment(";;; deoptimize: other types for int32x4.bool."); + DeoptimizeIf(no_condition, instr->environment()); + return; + } + XMMRegister result_reg = ToInt32x4Register(instr->result()); + Register x_reg = ToRegister(instr->x()); + Register y_reg = ToRegister(instr->y()); + Register z_reg = ToRegister(instr->z()); + Register w_reg = ToRegister(instr->w()); + Label load_false_x, done_x, load_false_y, done_y, + load_false_z, done_z, load_false_w, done_w; + __ subq(rsp, Immediate(kInt32x4Size)); + + __ CompareRoot(x_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_x, Label::kNear); + __ movl(Operand(rsp, 0 * kInt32Size), Immediate(-1)); + __ jmp(&done_x, Label::kNear); + __ bind(&load_false_x); + __ movl(Operand(rsp, 0 * kInt32Size), Immediate(0x0)); + __ bind(&done_x); + + __ CompareRoot(y_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_y, Label::kNear); + __ movl(Operand(rsp, 1 * kInt32Size), Immediate(-1)); + __ jmp(&done_y, Label::kNear); + __ bind(&load_false_y); + __ movl(Operand(rsp, 1 * kInt32Size), Immediate(0x0)); + __ bind(&done_y); + + __ CompareRoot(z_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_z, Label::kNear); + __ movl(Operand(rsp, 2 * kInt32Size), Immediate(-1)); + __ jmp(&done_z, Label::kNear); + __ bind(&load_false_z); + __ movl(Operand(rsp, 2 * kInt32Size), Immediate(0x0)); + __ bind(&done_z); + + __ CompareRoot(w_reg, Heap::kTrueValueRootIndex); + __ j(not_equal, &load_false_w, Label::kNear); + __ movl(Operand(rsp, 3 * kInt32Size), Immediate(-1)); + __ jmp(&done_w, Label::kNear); + __ bind(&load_false_w); + __ movl(Operand(rsp, 3 * kInt32Size), Immediate(0x0)); + __ bind(&done_w); + + __ movups(result_reg, Operand(rsp, 0)); + __ addq(rsp, Immediate(kInt32x4Size)); + return; + } + default: + UNREACHABLE(); + return; + } +} + + void LCodeGen::DoPower(LPower* instr) { Representation exponent_type = instr->hydrogen()->right()->representation(); // Having marked this as a call, we can use any registers. @@ -4269,13 +5370,22 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { Representation key_representation = instr->hydrogen()->key()->representation(); if (ExternalArrayOpRequiresTemp(key_representation, elements_kind)) { - __ SmiToInteger64(key_reg, key_reg); + if (!HandleExternalArrayOpRequiresPreScale( + key, key_representation, elements_kind)) + __ SmiToInteger64(key_reg, key_reg); } else if (instr->hydrogen()->IsDehoisted()) { // Sign extend key because it could be a 32 bit negative value // and the dehoisted address computation happens in 64 bits __ movsxlq(key_reg, key_reg); } + } else if (kPointerSize == kInt64Size && !key->IsConstantOperand()) { + Representation key_representation = + instr->hydrogen()->key()->representation(); + if (ExternalArrayOpRequiresTemp(key_representation, elements_kind)) + HandleExternalArrayOpRequiresPreScale( + key, key_representation, elements_kind); } + Operand operand(BuildFastArrayOperand( instr->elements(), key, @@ -4289,8 +5399,10 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { __ cvtsd2ss(value, value); __ movss(operand, value); } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS || - elements_kind == FLOAT64_ELEMENTS) { + elements_kind == FLOAT64_ELEMENTS) { __ movsd(operand, ToDoubleRegister(instr->value())); + } else if (IsSIMD128ElementsKind(elements_kind)) { + __ movups(operand, ToSIMD128Register(instr->value())); } else { Register value(ToRegister(instr->value())); switch (elements_kind) { @@ -4315,9 +5427,15 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { __ movl(operand, value); break; case EXTERNAL_FLOAT32_ELEMENTS: + case EXTERNAL_FLOAT32x4_ELEMENTS: + case EXTERNAL_FLOAT64x2_ELEMENTS: + case EXTERNAL_INT32x4_ELEMENTS: case EXTERNAL_FLOAT64_ELEMENTS: case FLOAT32_ELEMENTS: case FLOAT64_ELEMENTS: + case FLOAT32x4_ELEMENTS: + case FLOAT64x2_ELEMENTS: + case INT32x4_ELEMENTS: case FAST_ELEMENTS: case FAST_SMI_ELEMENTS: case FAST_DOUBLE_ELEMENTS: @@ -4816,6 +5934,84 @@ void LCodeGen::DoDeferredNumberTagD(LNumberTagD* instr) { } +void LCodeGen::DoDeferredSIMD128ToTagged(LSIMD128ToTagged* instr, + Runtime::FunctionId id) { + // TODO(3095996): Get rid of this. For now, we need to make the + // result register contain a valid pointer because it is already + // contained in the register pointer map. + Register reg = ToRegister(instr->result()); + __ Move(reg, Smi::FromInt(0)); + + { + PushSafepointRegistersScope scope(this); + __ movp(rsi, Operand(rbp, StandardFrameConstants::kContextOffset)); + __ CallRuntimeSaveDoubles(id); + RecordSafepointWithRegisters( + instr->pointer_map(), 0, Safepoint::kNoLazyDeopt); + __ movp(kScratchRegister, rax); + } + __ movp(reg, kScratchRegister); +} + + +template +void LCodeGen::HandleSIMD128ToTagged(LSIMD128ToTagged* instr) { + class DeferredSIMD128ToTagged V8_FINAL : public LDeferredCode { + public: + DeferredSIMD128ToTagged(LCodeGen* codegen, + LSIMD128ToTagged* instr, + Runtime::FunctionId id) + : LDeferredCode(codegen), instr_(instr), id_(id) { } + virtual void Generate() V8_OVERRIDE { + codegen()->DoDeferredSIMD128ToTagged(instr_, id_); + } + virtual LInstruction* instr() V8_OVERRIDE { return instr_; } + private: + LSIMD128ToTagged* instr_; + Runtime::FunctionId id_; + }; + + XMMRegister input_reg = ToSIMD128Register(instr->value()); + Register reg = ToRegister(instr->result()); + Register tmp = ToRegister(instr->temp()); + Register tmp2 = ToRegister(instr->temp2()); + Register tmp3 = ToRegister(instr->temp3()); + + DeferredSIMD128ToTagged* deferred = + new(zone()) DeferredSIMD128ToTagged(this, instr, + static_cast(T::kRuntimeAllocatorId())); + if (FLAG_inline_new) { + if (T::kInstanceType == FLOAT32x4_TYPE) { + __ AllocateFloat32x4(reg, tmp, tmp2, tmp3, deferred->entry()); + } else if (T::kInstanceType == INT32x4_TYPE) { + __ AllocateInt32x4(reg, tmp, tmp2, tmp3, deferred->entry()); + } else if (T::kInstanceType == FLOAT64x2_TYPE) { + __ AllocateFloat64x2(reg, tmp, tmp2, tmp3, deferred->entry()); + } + } else { + __ jmp(deferred->entry()); + } + __ bind(deferred->exit()); + + // Load the inner FixedTypedArray object. + __ movp(tmp, FieldOperand(reg, T::kValueOffset)); + + __ movups(FieldOperand(tmp, FixedTypedArrayBase::kDataOffset), input_reg); +} + + +void LCodeGen::DoSIMD128ToTagged(LSIMD128ToTagged* instr) { + if (instr->value()->IsFloat32x4Register()) { + HandleSIMD128ToTagged(instr); + } else if (instr->value()->IsFloat64x2Register()) { + HandleSIMD128ToTagged(instr); + } else { + DCHECK(instr->value()->IsInt32x4Register()); + HandleSIMD128ToTagged(instr); + } +} + + void LCodeGen::DoSmiTag(LSmiTag* instr) { HChange* hchange = instr->hydrogen(); Register input = ToRegister(instr->value()); @@ -5002,6 +6198,44 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) { } +template +void LCodeGen::HandleTaggedToSIMD128(LTaggedToSIMD128* instr) { + LOperand* input = instr->value(); + DCHECK(input->IsRegister()); + LOperand* result = instr->result(); + DCHECK(result->IsSIMD128Register()); + LOperand* temp = instr->temp(); + DCHECK(temp->IsRegister()); + + Register input_reg = ToRegister(input); + XMMRegister result_reg = ToSIMD128Register(result); + Register temp_reg = ToRegister(temp); + + __ testp(input_reg, Immediate(kSmiTagMask)); + DeoptimizeIf(zero, instr->environment()); + __ CmpObjectType(input_reg, T::kInstanceType, kScratchRegister); + DeoptimizeIf(not_equal, instr->environment()); + + // Load the inner FixedTypedArray object. + __ movp(temp_reg, FieldOperand(input_reg, T::kValueOffset)); + + __ movups( + result_reg, FieldOperand(temp_reg, FixedTypedArrayBase::kDataOffset)); +} + + +void LCodeGen::DoTaggedToSIMD128(LTaggedToSIMD128* instr) { + if (instr->representation().IsFloat32x4()) { + HandleTaggedToSIMD128(instr); + } else if (instr->representation().IsFloat64x2()) { + HandleTaggedToSIMD128(instr); + } else { + DCHECK(instr->representation().IsInt32x4()); + HandleTaggedToSIMD128(instr); + } +} + + void LCodeGen::DoDoubleToI(LDoubleToI* instr) { LOperand* input = instr->value(); DCHECK(input->IsDoubleRegister()); diff --git a/src/v8/src/x64/lithium-codegen-x64.h b/src/v8/src/x64/lithium-codegen-x64.h index b3070c0..8d9155f 100644 --- a/src/v8/src/x64/lithium-codegen-x64.h +++ b/src/v8/src/x64/lithium-codegen-x64.h @@ -62,6 +62,10 @@ class LCodeGen: public LCodeGenBase { // Support for converting LOperands to assembler types. Register ToRegister(LOperand* op) const; XMMRegister ToDoubleRegister(LOperand* op) const; + XMMRegister ToFloat32x4Register(LOperand* op) const; + XMMRegister ToFloat64x2Register(LOperand* op) const; + XMMRegister ToInt32x4Register(LOperand* op) const; + XMMRegister ToSIMD128Register(LOperand* op) const; bool IsInteger32Constant(LConstantOperand* op) const; bool IsDehoistedKeyConstant(LConstantOperand* op) const; bool IsSmiConstant(LConstantOperand* op) const; @@ -104,6 +108,13 @@ class LCodeGen: public LCodeGenBase { void DoDeferredLoadMutableDouble(LLoadFieldByIndex* instr, Register object, Register index); + void DoDeferredSIMD128ToTagged(LSIMD128ToTagged* instr, + Runtime::FunctionId id); + + template + void HandleTaggedToSIMD128(LTaggedToSIMD128* instr); + template + void HandleSIMD128ToTagged(LSIMD128ToTagged* instr); // Parallel move support. void DoParallelMove(LParallelMove* move); @@ -229,6 +240,7 @@ class LCodeGen: public LCodeGenBase { Register ToRegister(int index) const; XMMRegister ToDoubleRegister(int index) const; + XMMRegister ToSIMD128Register(int index) const; Operand BuildFastArrayOperand( LOperand* elements_pointer, LOperand* key, @@ -309,6 +321,9 @@ class LCodeGen: public LCodeGenBase { void EnsureSpaceForLazyDeopt(int space_needed) V8_OVERRIDE; void DoLoadKeyedExternalArray(LLoadKeyed* instr); + bool HandleExternalArrayOpRequiresPreScale(LOperand* key, + Representation key_representation, + ElementsKind elements_kind); void DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr); void DoLoadKeyedFixedArray(LLoadKeyed* instr); void DoStoreKeyedExternalArray(LStoreKeyed* instr); diff --git a/src/v8/src/x64/lithium-gap-resolver-x64.cc b/src/v8/src/x64/lithium-gap-resolver-x64.cc index bfc2ec0..66e581a 100644 --- a/src/v8/src/x64/lithium-gap-resolver-x64.cc +++ b/src/v8/src/x64/lithium-gap-resolver-x64.cc @@ -227,6 +227,23 @@ void LGapResolver::EmitMove(int index) { __ movsd(xmm0, src); __ movsd(cgen_->ToOperand(destination), xmm0); } + } else if (source->IsSIMD128Register()) { + XMMRegister src = cgen_->ToSIMD128Register(source); + if (destination->IsSIMD128Register()) { + __ movaps(cgen_->ToSIMD128Register(destination), src); + } else { + DCHECK(destination->IsSIMD128StackSlot()); + __ movups(cgen_->ToOperand(destination), src); + } + } else if (source->IsSIMD128StackSlot()) { + Operand src = cgen_->ToOperand(source); + if (destination->IsSIMD128Register()) { + __ movups(cgen_->ToSIMD128Register(destination), src); + } else { + DCHECK(destination->IsSIMD128StackSlot()); + __ movups(xmm0, src); + __ movups(cgen_->ToOperand(destination), xmm0); + } } else { UNREACHABLE(); } @@ -268,6 +285,19 @@ void LGapResolver::EmitSwap(int index) { __ movsd(dst, xmm0); __ movp(src, kScratchRegister); + } else if ((source->IsSIMD128StackSlot() && + destination->IsSIMD128StackSlot())) { + // Swap two XMM stack slots. + STATIC_ASSERT(kSIMD128Size == 2 * kDoubleSize); + Operand src = cgen_->ToOperand(source); + Operand dst = cgen_->ToOperand(destination); + __ movups(xmm0, src); + __ movq(kScratchRegister, dst); + __ movq(src, kScratchRegister); + __ movq(kScratchRegister, Operand(dst, kDoubleSize)); + __ movq(Operand(src, kDoubleSize), kScratchRegister); + __ movups(dst, xmm0); + } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) { // Swap two double registers. XMMRegister source_reg = cgen_->ToDoubleRegister(source); @@ -276,6 +306,14 @@ void LGapResolver::EmitSwap(int index) { __ movaps(source_reg, destination_reg); __ movaps(destination_reg, xmm0); + } else if (source->IsSIMD128Register() && destination->IsSIMD128Register()) { + // Swap two XMM registers. + XMMRegister source_reg = cgen_->ToSIMD128Register(source); + XMMRegister destination_reg = cgen_->ToSIMD128Register(destination); + __ movaps(xmm0, source_reg); + __ movaps(source_reg, destination_reg); + __ movaps(destination_reg, xmm0); + } else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) { // Swap a double register and a double stack slot. DCHECK((source->IsDoubleRegister() && destination->IsDoubleStackSlot()) || @@ -290,6 +328,21 @@ void LGapResolver::EmitSwap(int index) { __ movsd(other_operand, reg); __ movaps(reg, xmm0); + } else if (source->IsSIMD128Register() || destination->IsSIMD128Register()) { + // Swap a xmm register and a xmm stack slot. + DCHECK((source->IsSIMD128Register() && + destination->IsSIMD128StackSlot()) || + (source->IsSIMD128StackSlot() && + destination->IsSIMD128Register())); + XMMRegister reg = cgen_->ToSIMD128Register(source->IsSIMD128Register() + ? source + : destination); + LOperand* other = source->IsSIMD128Register() ? destination : source; + DCHECK(other->IsSIMD128StackSlot()); + Operand other_operand = cgen_->ToOperand(other); + __ movups(xmm0, other_operand); + __ movups(other_operand, reg); + __ movaps(reg, xmm0); } else { // No other combinations are possible. UNREACHABLE(); diff --git a/src/v8/src/x64/lithium-x64.cc b/src/v8/src/x64/lithium-x64.cc index 0575166..cc0664e 100644 --- a/src/v8/src/x64/lithium-x64.cc +++ b/src/v8/src/x64/lithium-x64.cc @@ -340,6 +340,21 @@ int LPlatformChunk::GetNextSpillIndex(RegisterKind kind) { // TODO(haitao): make sure rbp is aligned at 8-byte boundary for x32 port. spill_slot_count_ |= 1; } + + switch (kind) { + case GENERAL_REGISTERS: return spill_slot_count_++; + case DOUBLE_REGISTERS: return spill_slot_count_++; + case FLOAT32x4_REGISTERS: + case FLOAT64x2_REGISTERS: + case INT32x4_REGISTERS: { + spill_slot_count_++; + return spill_slot_count_++; + } + default: + UNREACHABLE(); + return -1; + } + return spill_slot_count_++; } @@ -349,11 +364,15 @@ LOperand* LPlatformChunk::GetNextSpillSlot(RegisterKind kind) { // Alternatively, at some point, start using half-size // stack slots for int32 values. int index = GetNextSpillIndex(kind); - if (kind == DOUBLE_REGISTERS) { - return LDoubleStackSlot::Create(index, zone()); - } else { - DCHECK(kind == GENERAL_REGISTERS); - return LStackSlot::Create(index, zone()); + switch (kind) { + case GENERAL_REGISTERS: return LStackSlot::Create(index, zone()); + case DOUBLE_REGISTERS: return LDoubleStackSlot::Create(index, zone()); + case FLOAT32x4_REGISTERS: return LFloat32x4StackSlot::Create(index, zone()); + case FLOAT64x2_REGISTERS: return LFloat64x2StackSlot::Create(index, zone()); + case INT32x4_REGISTERS: return LInt32x4StackSlot::Create(index, zone()); + default: + UNREACHABLE(); + return NULL; } } @@ -1228,6 +1247,283 @@ LInstruction* LChunkBuilder::DoMathPowHalf(HUnaryMathOperation* instr) { } +const char* LNullarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_NULLARY_OPERATION_CASE_ITEM(module, function, name, p4) \ + case k##name: \ + return #module "-" #function; +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoNullarySIMDOperation( + HNullarySIMDOperation* instr) { + LNullarySIMDOperation* result = + new(zone()) LNullarySIMDOperation(instr->op()); + switch (instr->op()) { +#define SIMD_NULLARY_OPERATION_CASE_ITEM(module, function, name, p4) \ + case k##name: +SIMD_NULLARY_OPERATIONS(SIMD_NULLARY_OPERATION_CASE_ITEM) +#undef SIMD_NULLARY_OPERATION_CASE_ITEM + return DefineAsRegister(result); + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LUnarySIMDOperation::Mnemonic() const { + switch (op()) { + case kSIMD128Change: return "SIMD128-change"; +#define SIMD_UNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5) \ + case k##name: \ + return #module "-" #function; +SIMD_UNARY_OPERATIONS(SIMD_UNARY_OPERATION_CASE_ITEM) +SIMD_UNARY_OPERATIONS_FOR_PROPERTY_ACCESS(SIMD_UNARY_OPERATION_CASE_ITEM) +#undef SIMD_UNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoUnarySIMDOperation(HUnarySIMDOperation* instr) { + LOperand* input = UseRegisterAtStart(instr->value()); + LUnarySIMDOperation* result = + new(zone()) LUnarySIMDOperation(input, instr->op()); + switch (instr->op()) { + case kSIMD128Change: + return AssignEnvironment(DefineAsRegister(result)); + case kFloat32x4Abs: + case kFloat32x4Neg: + case kFloat32x4Reciprocal: + case kFloat32x4ReciprocalSqrt: + case kFloat32x4Sqrt: + case kFloat64x2Abs: + case kFloat64x2Neg: + case kFloat64x2Sqrt: + case kInt32x4Neg: + case kInt32x4Not: + return DefineSameAsFirst(result); + case kFloat32x4Coercion: + case kFloat64x2Coercion: + case kInt32x4Coercion: + case kFloat32x4BitsToInt32x4: + case kFloat32x4ToInt32x4: + case kInt32x4BitsToFloat32x4: + case kInt32x4ToFloat32x4: + case kFloat32x4Splat: + case kInt32x4Splat: + case kFloat32x4GetSignMask: + case kFloat32x4GetX: + case kFloat32x4GetY: + case kFloat32x4GetZ: + case kFloat32x4GetW: + case kFloat64x2GetSignMask: + case kFloat64x2GetX: + case kFloat64x2GetY: + case kInt32x4GetSignMask: + case kInt32x4GetX: + case kInt32x4GetY: + case kInt32x4GetZ: + case kInt32x4GetW: + case kInt32x4GetFlagX: + case kInt32x4GetFlagY: + case kInt32x4GetFlagZ: + case kInt32x4GetFlagW: + return DefineAsRegister(result); + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LBinarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_BINARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, p6) \ + case k##name: \ + return #module "-" #function; +SIMD_BINARY_OPERATIONS(SIMD_BINARY_OPERATION_CASE_ITEM) +#undef SIMD_BINARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoBinarySIMDOperation( + HBinarySIMDOperation* instr) { + switch (instr->op()) { + case kFloat32x4Add: + case kFloat32x4Div: + case kFloat32x4Max: + case kFloat32x4Min: + case kFloat32x4Mul: + case kFloat32x4Sub: + case kFloat32x4Scale: + case kFloat32x4WithX: + case kFloat32x4WithY: + case kFloat32x4WithZ: + case kFloat32x4WithW: + case kFloat64x2Add: + case kFloat64x2Div: + case kFloat64x2Max: + case kFloat64x2Min: + case kFloat64x2Mul: + case kFloat64x2Sub: + case kFloat64x2Scale: + case kFloat64x2WithX: + case kFloat64x2WithY: + case kInt32x4Add: + case kInt32x4And: + case kInt32x4Mul: + case kInt32x4Or: + case kInt32x4Sub: + case kInt32x4Xor: + case kInt32x4WithX: + case kInt32x4WithY: + case kInt32x4WithZ: + case kInt32x4WithW: + case kInt32x4WithFlagX: + case kInt32x4WithFlagY: + case kInt32x4WithFlagZ: + case kInt32x4WithFlagW: + case kInt32x4GreaterThan: + case kInt32x4Equal: + case kInt32x4LessThan: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseRegisterAtStart(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + if (instr->op() == kInt32x4WithFlagX || + instr->op() == kInt32x4WithFlagY || + instr->op() == kInt32x4WithFlagZ || + instr->op() == kInt32x4WithFlagW) { + return AssignEnvironment(DefineSameAsFirst(result)); + } else { + return DefineSameAsFirst(result); + } + } + case kFloat64x2Constructor: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseRegisterAtStart(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + return DefineAsRegister(result); + } + case kFloat32x4Shuffle: + case kInt32x4Shuffle: + case kInt32x4ShiftLeft: + case kInt32x4ShiftRight: + case kInt32x4ShiftRightArithmetic: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseOrConstant(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + return AssignEnvironment(DefineSameAsFirst(result)); + } + case kFloat32x4LessThan: + case kFloat32x4LessThanOrEqual: + case kFloat32x4Equal: + case kFloat32x4NotEqual: + case kFloat32x4GreaterThanOrEqual: + case kFloat32x4GreaterThan: { + LOperand* left = UseRegisterAtStart(instr->left()); + LOperand* right = UseRegisterAtStart(instr->right()); + LBinarySIMDOperation* result = + new(zone()) LBinarySIMDOperation(left, right, instr->op()); + return DefineAsRegister(result); + } + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LTernarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_TERNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, p6, \ + p7) \ + case k##name: \ + return #module "-" #function; +SIMD_TERNARY_OPERATIONS(SIMD_TERNARY_OPERATION_CASE_ITEM) +#undef SIMD_TERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoTernarySIMDOperation( + HTernarySIMDOperation* instr) { + LOperand* first = UseRegisterAtStart(instr->first()); + LOperand* second = UseRegisterAtStart(instr->second()); + LOperand* third = instr->op() == kFloat32x4ShuffleMix + ? UseOrConstant(instr->third()) + : UseRegisterAtStart(instr->third()); + LTernarySIMDOperation* result = + new(zone()) LTernarySIMDOperation(first, second, third, instr->op()); + switch (instr->op()) { + case kFloat32x4Clamp: + case kFloat64x2Clamp: { + return DefineSameAsFirst(result); + } + case kFloat32x4ShuffleMix: { + return AssignEnvironment(DefineSameAsFirst(result)); + } + case kFloat32x4Select: + case kInt32x4Select: { + return DefineAsRegister(result); + } + default: + UNREACHABLE(); + return NULL; + } +} + + +const char* LQuarternarySIMDOperation::Mnemonic() const { + switch (op()) { +#define SIMD_QUARTERNARY_OPERATION_CASE_ITEM(module, function, name, p4, p5, \ + p6, p7, p8) \ + case k##name: \ + return #module "-" #function; +SIMD_QUARTERNARY_OPERATIONS(SIMD_QUARTERNARY_OPERATION_CASE_ITEM) +#undef SIMD_QUARTERNARY_OPERATION_CASE_ITEM + default: + UNREACHABLE(); + return NULL; + } +} + + +LInstruction* LChunkBuilder::DoQuarternarySIMDOperation( + HQuarternarySIMDOperation* instr) { + LOperand* x = UseRegisterAtStart(instr->x()); + LOperand* y = UseRegisterAtStart(instr->y()); + LOperand* z = UseRegisterAtStart(instr->z()); + LOperand* w = UseRegisterAtStart(instr->w()); + LQuarternarySIMDOperation* result = + new(zone()) LQuarternarySIMDOperation(x, y, z, w, instr->op()); + if (instr->op() == kInt32x4Bool) { + return AssignEnvironment(DefineAsRegister(result)); + } else { + return DefineAsRegister(result); + } +} + + LInstruction* LChunkBuilder::DoCallNew(HCallNew* instr) { LOperand* context = UseFixed(instr->context(), rsi); LOperand* constructor = UseFixed(instr->constructor(), rdi); @@ -1865,6 +2161,11 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { LInstruction* result = DefineAsRegister(new(zone()) LNumberUntagD(value)); if (!val->representation().IsSmi()) result = AssignEnvironment(result); return result; + } else if (to.IsSIMD128()) { + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + LTaggedToSIMD128* res = new(zone()) LTaggedToSIMD128(value, temp, to); + return AssignEnvironment(DefineAsRegister(res)); } else if (to.IsSmi()) { LOperand* value = UseRegister(val); if (val->type().IsSmi()) { @@ -1940,6 +2241,19 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) { return DefineAsRegister(new(zone()) LInteger32ToDouble(value)); } } + } else if (from.IsSIMD128()) { + DCHECK(to.IsTagged()); + info()->MarkAsDeferredCalling(); + LOperand* value = UseRegister(instr->value()); + LOperand* temp = TempRegister(); + LOperand* temp2 = TempRegister(); + LOperand* temp3 = TempRegister(); + + // Make sure that temp and result_temp are different registers. + LUnallocated* result_temp = TempRegister(); + LSIMD128ToTagged* result = + new(zone()) LSIMD128ToTagged(value, temp, temp2, temp3); + return AssignPointerMap(Define(result, result_temp)); } UNREACHABLE(); return NULL; @@ -2180,7 +2494,11 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { LInstruction* result = NULL; if (kPointerSize == kInt64Size) { - key = UseRegisterOrConstantAtStart(instr->key()); + bool clobbers_key = ExternalArrayOpRequiresPreScale( + instr->key()->representation(), elements_kind); + key = clobbers_key + ? UseTempRegisterOrConstant(instr->key()) + : UseRegisterOrConstantAtStart(instr->key()); } else { bool clobbers_key = ExternalArrayOpRequiresTemp( instr->key()->representation(), elements_kind); @@ -2193,6 +2511,7 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { FindDehoistedKeyDefinitions(instr->key()); } + if (!instr->is_typed_elements()) { LOperand* obj = UseRegisterAtStart(instr->elements()); result = DefineAsRegister(new(zone()) LLoadKeyed(obj, key)); @@ -2201,7 +2520,13 @@ LInstruction* LChunkBuilder::DoLoadKeyed(HLoadKeyed* instr) { (instr->representation().IsInteger32() && !(IsDoubleOrFloatElementsKind(elements_kind))) || (instr->representation().IsDouble() && - (IsDoubleOrFloatElementsKind(elements_kind)))); + (IsDoubleOrFloatElementsKind(elements_kind))) || + (instr->representation().IsFloat32x4() && + IsFloat32x4ElementsKind(elements_kind)) || + (instr->representation().IsFloat64x2() && + IsFloat64x2ElementsKind(elements_kind)) || + (instr->representation().IsInt32x4() && + IsInt32x4ElementsKind(elements_kind))); LOperand* backing_store = UseRegister(instr->elements()); result = DefineAsRegister(new(zone()) LLoadKeyed(backing_store, key)); } @@ -2275,7 +2600,13 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { (instr->value()->representation().IsInteger32() && !IsDoubleOrFloatElementsKind(elements_kind)) || (instr->value()->representation().IsDouble() && - IsDoubleOrFloatElementsKind(elements_kind))); + IsDoubleOrFloatElementsKind(elements_kind)) || + (instr->value()->representation().IsFloat32x4() && + IsFloat32x4ElementsKind(elements_kind)) || + (instr->value()->representation().IsFloat64x2() && + IsFloat64x2ElementsKind(elements_kind)) || + (instr->value()->representation().IsInt32x4() && + IsInt32x4ElementsKind(elements_kind))); DCHECK((instr->is_fixed_typed_array() && instr->elements()->representation().IsTagged()) || (instr->is_external() && @@ -2288,7 +2619,11 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) { : UseRegister(instr->value()); LOperand* key = NULL; if (kPointerSize == kInt64Size) { - key = UseRegisterOrConstantAtStart(instr->key()); + bool clobbers_key = ExternalArrayOpRequiresPreScale( + instr->key()->representation(), elements_kind); + key = clobbers_key + ? UseTempRegisterOrConstant(instr->key()) + : UseRegisterOrConstantAtStart(instr->key()); } else { bool clobbers_key = ExternalArrayOpRequiresTemp( instr->key()->representation(), elements_kind); diff --git a/src/v8/src/x64/lithium-x64.h b/src/v8/src/x64/lithium-x64.h index a1c563f..ba94b0a 100644 --- a/src/v8/src/x64/lithium-x64.h +++ b/src/v8/src/x64/lithium-x64.h @@ -119,12 +119,19 @@ class LCodeGen; V(MathSqrt) \ V(ModByConstI) \ V(ModByPowerOf2I) \ + V(NullarySIMDOperation) \ + V(UnarySIMDOperation) \ + V(BinarySIMDOperation) \ + V(TernarySIMDOperation) \ + V(QuarternarySIMDOperation) \ V(ModI) \ V(MulI) \ V(NumberTagD) \ + V(SIMD128ToTagged) \ V(NumberTagI) \ V(NumberTagU) \ V(NumberUntagD) \ + V(TaggedToSIMD128) \ V(OsrEntry) \ V(Parameter) \ V(Power) \ @@ -161,7 +168,6 @@ class LCodeGen; V(UnknownOSRValue) \ V(WrapReceiver) - #define DECLARE_CONCRETE_INSTRUCTION(type, mnemonic) \ virtual Opcode opcode() const V8_FINAL V8_OVERRIDE { \ return LInstruction::k##type; \ @@ -955,6 +961,154 @@ class LMathPowHalf V8_FINAL : public LTemplateInstruction<1, 1, 0> { }; +class LNullarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 0, 0> { + public: + explicit LNullarySIMDOperation(BuiltinFunctionId op) + : op_(op) { + } + + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kNullarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LNullarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsNullarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(NullarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LUnarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 1, 0> { + public: + LUnarySIMDOperation(LOperand* value, BuiltinFunctionId op) + : op_(op) { + inputs_[0] = value; + } + + LOperand* value() { return inputs_[0]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kUnarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LUnarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsUnarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(UnarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LBinarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 2, 0> { + public: + LBinarySIMDOperation(LOperand* left, LOperand* right, BuiltinFunctionId op) + : op_(op) { + inputs_[0] = left; + inputs_[1] = right; + } + + LOperand* left() { return inputs_[0]; } + LOperand* right() { return inputs_[1]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kBinarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LBinarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsBinarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(BinarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LTernarySIMDOperation V8_FINAL : public LTemplateInstruction<1, 3, 0> { + public: + LTernarySIMDOperation(LOperand* first, LOperand* second, LOperand* third, + BuiltinFunctionId op) + : op_(op) { + inputs_[0] = first; + inputs_[1] = second; + inputs_[2] = third; + } + + LOperand* first() { return inputs_[0]; } + LOperand* second() { return inputs_[1]; } + LOperand* third() { return inputs_[2]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kTernarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LTernarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsTernarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(TernarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + +class LQuarternarySIMDOperation V8_FINAL + : public LTemplateInstruction<1, 4, 0> { + public: + LQuarternarySIMDOperation(LOperand* x, LOperand* y, LOperand* z, + LOperand* w, BuiltinFunctionId op) + : op_(op) { + inputs_[0] = x; + inputs_[1] = y; + inputs_[2] = z; + inputs_[3] = w; + } + + LOperand* x() { return inputs_[0]; } + LOperand* y() { return inputs_[1]; } + LOperand* z() { return inputs_[2]; } + LOperand* w() { return inputs_[3]; } + BuiltinFunctionId op() const { return op_; } + + virtual Opcode opcode() const V8_OVERRIDE { + return LInstruction::kQuarternarySIMDOperation; + } + virtual void CompileToNative(LCodeGen* generator) V8_OVERRIDE; + virtual const char* Mnemonic() const V8_OVERRIDE; + static LQuarternarySIMDOperation* cast(LInstruction* instr) { + DCHECK(instr->IsQuarternarySIMDOperation()); + return reinterpret_cast(instr); + } + + DECLARE_HYDROGEN_ACCESSOR(QuarternarySIMDOperation) + + private: + BuiltinFunctionId op_; +}; + + class LCmpObjectEqAndBranch V8_FINAL : public LControlInstruction<2, 0> { public: LCmpObjectEqAndBranch(LOperand* left, LOperand* right) { @@ -1627,19 +1781,30 @@ class LLoadRoot V8_FINAL : public LTemplateInstruction<1, 0, 0> { }; +inline static bool ExternalArrayOpRequiresPreScale( + Representation key_representation, + ElementsKind kind) { + int shift_size = ElementsKindToShiftSize(kind); + return SmiValuesAre31Bits() && key_representation.IsSmi() + ? shift_size > static_cast(maximal_scale_factor) + kSmiTagSize + : shift_size > static_cast(maximal_scale_factor); +} + + inline static bool ExternalArrayOpRequiresTemp( Representation key_representation, ElementsKind elements_kind) { // Operations that require the key to be divided by two to be converted into // an index cannot fold the scale operation into a load and need an extra // temp register to do the work. - return SmiValuesAre31Bits() && key_representation.IsSmi() && + return ExternalArrayOpRequiresPreScale(key_representation, elements_kind) || + (SmiValuesAre31Bits() && key_representation.IsSmi() && (elements_kind == EXTERNAL_INT8_ELEMENTS || elements_kind == EXTERNAL_UINT8_ELEMENTS || elements_kind == EXTERNAL_UINT8_CLAMPED_ELEMENTS || elements_kind == UINT8_ELEMENTS || elements_kind == INT8_ELEMENTS || - elements_kind == UINT8_CLAMPED_ELEMENTS); + elements_kind == UINT8_CLAMPED_ELEMENTS)); } @@ -2071,6 +2236,26 @@ class LNumberTagD V8_FINAL : public LTemplateInstruction<1, 1, 1> { }; +class LSIMD128ToTagged V8_FINAL : public LTemplateInstruction<1, 1, 3> { + public: + explicit LSIMD128ToTagged(LOperand* value, LOperand* temp, + LOperand* temp2, LOperand* temp3) { + inputs_[0] = value; + temps_[0] = temp; + temps_[1] = temp2; + temps_[2] = temp3; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + LOperand* temp2() { return temps_[1]; } + LOperand* temp3() { return temps_[2]; } + + DECLARE_CONCRETE_INSTRUCTION(SIMD128ToTagged, "simd128-tag") + DECLARE_HYDROGEN_ACCESSOR(Change) +}; + + // Sometimes truncating conversion from a tagged value to an int32. class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 0> { public: @@ -2144,6 +2329,26 @@ class LNumberUntagD V8_FINAL : public LTemplateInstruction<1, 1, 0> { }; +class LTaggedToSIMD128 V8_FINAL : public LTemplateInstruction<1, 1, 1> { + public: + explicit LTaggedToSIMD128(LOperand* value, LOperand* temp, + Representation representation) + : representation_(representation) { + inputs_[0] = value; + temps_[0] = temp; + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp() { return temps_[0]; } + Representation representation() const { return representation_; } + + DECLARE_CONCRETE_INSTRUCTION(TaggedToSIMD128, "simd128-untag") + DECLARE_HYDROGEN_ACCESSOR(Change); + private: + Representation representation_; +}; + + class LSmiUntag V8_FINAL : public LTemplateInstruction<1, 1, 0> { public: LSmiUntag(LOperand* value, bool needs_check) diff --git a/src/v8/src/x64/macro-assembler-x64.cc b/src/v8/src/x64/macro-assembler-x64.cc index 7a37fb3..838ba25 100644 --- a/src/v8/src/x64/macro-assembler-x64.cc +++ b/src/v8/src/x64/macro-assembler-x64.cc @@ -929,10 +929,10 @@ void MacroAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, } // R12 to r15 are callee save on all platforms. if (fp_mode == kSaveFPRegs) { - subp(rsp, Immediate(kDoubleSize * XMMRegister::kMaxNumRegisters)); + subp(rsp, Immediate(kSIMD128Size * XMMRegister::kMaxNumRegisters)); for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(Operand(rsp, i * kDoubleSize), reg); + movups(Operand(rsp, i * kSIMD128Size), reg); } } } @@ -945,9 +945,9 @@ void MacroAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, if (fp_mode == kSaveFPRegs) { for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(reg, Operand(rsp, i * kDoubleSize)); + movups(reg, Operand(rsp, i * kSIMD128Size)); } - addp(rsp, Immediate(kDoubleSize * XMMRegister::kMaxNumRegisters)); + addp(rsp, Immediate(kSIMD128Size * XMMRegister::kMaxNumRegisters)); } for (int i = kNumberOfSavedRegs - 1; i >= 0; i--) { Register reg = saved_regs[i]; @@ -2599,6 +2599,81 @@ void MacroAssembler::LookupNumberStringCache(Register object, } +void MacroAssembler::absps(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } float_absolute_constant = + { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + Set(kScratchRegister, reinterpret_cast(&float_absolute_constant)); + andps(dst, Operand(kScratchRegister, 0)); +} + + +void MacroAssembler::abspd(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint64_t a; + uint64_t b; + } double_absolute_constant = + { V8_UINT64_C(0x7FFFFFFFFFFFFFFF), V8_UINT64_C(0x7FFFFFFFFFFFFFFF) }; + Set(kScratchRegister, reinterpret_cast(&double_absolute_constant)); + andpd(dst, Operand(kScratchRegister, 0)); +} + + +void MacroAssembler::negateps(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } float_negate_constant = + { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + Set(kScratchRegister, reinterpret_cast(&float_negate_constant)); + xorps(dst, Operand(kScratchRegister, 0)); +} + + +void MacroAssembler::negatepd(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint64_t a; + uint64_t b; + } double_absolute_constant = + { V8_UINT64_C(0x8000000000000000), V8_UINT64_C(0x8000000000000000) }; + Set(kScratchRegister, reinterpret_cast(&double_absolute_constant)); + xorpd(dst, Operand(kScratchRegister, 0)); +} + + +void MacroAssembler::notps(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } float_not_constant = + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + Set(kScratchRegister, reinterpret_cast(&float_not_constant)); + xorps(dst, Operand(kScratchRegister, 0)); +} + + +void MacroAssembler::pnegd(XMMRegister dst) { + static const struct V8_ALIGNED(16) { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + } int32_one_constant = { 0x1, 0x1, 0x1, 0x1 }; + notps(dst); + Set(kScratchRegister, reinterpret_cast(&int32_one_constant)); + paddd(dst, Operand(kScratchRegister, 0)); +} + + + void MacroAssembler::JumpIfNotString(Register object, Register object_map, Label* not_string, @@ -4087,13 +4162,13 @@ void MacroAssembler::EnterExitFrameEpilogue(int arg_stack_space, #endif // Optionally save all XMM registers. if (save_doubles) { - int space = XMMRegister::kMaxNumAllocatableRegisters * kDoubleSize + + int space = XMMRegister::kMaxNumAllocatableRegisters * kSIMD128Size + arg_stack_space * kRegisterSize; subp(rsp, Immediate(space)); int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); i++) { XMMRegister reg = XMMRegister::FromAllocationIndex(i); - movsd(Operand(rbp, offset - ((i + 1) * kDoubleSize)), reg); + movups(Operand(rbp, offset - ((i + 1) * kSIMD128Size)), reg); } } else if (arg_stack_space > 0) { subp(rsp, Immediate(arg_stack_space * kRegisterSize)); @@ -4137,7 +4212,7 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles) { int offset = -2 * kPointerSize; for (int i = 0; i < XMMRegister::NumAllocatableRegisters(); i++) { XMMRegister reg = XMMRegister::FromAllocationIndex(i); - movsd(reg, Operand(rbp, offset - ((i + 1) * kDoubleSize))); + movups(reg, Operand(rbp, offset - ((i + 1) * kSIMD128Size))); } } // Get the return address from the stack and restore the frame pointer. @@ -4611,6 +4686,57 @@ void MacroAssembler::AllocateHeapNumber(Register result, } +#define SIMD128_HEAP_ALLOCATE_FUNCTIONS(V) \ + V(Float32x4, float32x4, FLOAT32x4) \ + V(Float64x2, float64x2, FLOAT64x2) \ + V(Int32x4, int32x4, INT32x4) + +#define DECLARE_SIMD_HEAP_ALLOCATE_FUNCTION(Type, type, TYPE) \ +void MacroAssembler::Allocate##Type(Register result, \ + Register scratch1, \ + Register scratch2, \ + Register scratch3, \ + Label* gc_required) { \ + /* Allocate SIMD128 object. */ \ + Allocate(Type::kSize, result, scratch1, no_reg, gc_required, TAG_OBJECT);\ + /* Load the initial map and assign to new allocated object. */ \ + movp(scratch1, Operand(rbp, StandardFrameConstants::kContextOffset)); \ + movp(scratch1, \ + Operand(scratch1, \ + Context::SlotOffset(Context::GLOBAL_OBJECT_INDEX))); \ + movp(scratch1, \ + FieldOperand(scratch1, GlobalObject::kNativeContextOffset)); \ + movp(scratch1, \ + Operand(scratch1, \ + Context::SlotOffset(Context::TYPE##_FUNCTION_INDEX))); \ + LoadGlobalFunctionInitialMap(scratch1, scratch1); \ + movp(FieldOperand(result, JSObject::kMapOffset), \ + scratch1); \ + /* Initialize the properties and elements. */ \ + MoveHeapObject(kScratchRegister, \ + isolate()->factory()->empty_fixed_array()); \ + movp(FieldOperand(result, JSObject::kPropertiesOffset), \ + kScratchRegister); \ + movp(FieldOperand(result, JSObject::kElementsOffset), \ + kScratchRegister); \ + /* Allocate FixedTypedArray object. */ \ + Allocate(FixedTypedArrayBase::kDataOffset + k##Type##Size, \ + scratch1, scratch2, no_reg, gc_required, TAG_OBJECT); \ + MoveHeapObject(kScratchRegister, \ + isolate()->factory()->fixed_##type##_array_map()); \ + movp(FieldOperand(scratch1, FixedTypedArrayBase::kMapOffset), \ + kScratchRegister); \ + movp(scratch3, Immediate(1)); \ + Integer32ToSmi(scratch2, scratch3); \ + movp(FieldOperand(scratch1, FixedTypedArrayBase::kLengthOffset), \ + scratch2); \ + /* Assign FixedTypedArray object to SIMD128 object. */ \ + movp(FieldOperand(result, Type::kValueOffset), scratch1); \ +} + +SIMD128_HEAP_ALLOCATE_FUNCTIONS(DECLARE_SIMD_HEAP_ALLOCATE_FUNCTION) + + void MacroAssembler::AllocateTwoByteString(Register result, Register length, Register scratch1, diff --git a/src/v8/src/x64/macro-assembler-x64.h b/src/v8/src/x64/macro-assembler-x64.h index 2ab05cf..962cdbd 100644 --- a/src/v8/src/x64/macro-assembler-x64.h +++ b/src/v8/src/x64/macro-assembler-x64.h @@ -753,6 +753,16 @@ class MacroAssembler: public Assembler { // --------------------------------------------------------------------------- + // SIMD macros. + void absps(XMMRegister dst); + void abspd(XMMRegister dst); + void negateps(XMMRegister dst); + void negatepd(XMMRegister dst); + void notps(XMMRegister dst); + void pnegd(XMMRegister dst); + + + // --------------------------------------------------------------------------- // String macros. // Generate code to do a lookup in the number string cache. If the number in @@ -1198,6 +1208,29 @@ class MacroAssembler: public Assembler { Label* gc_required, MutableMode mode = IMMUTABLE); + + // Allocate a float32x4, float64x2 and int32x4 object in new space with + // undefined value. + // Returns tagged pointer in result register, or jumps to gc_required if new + // space is full. + void AllocateFloat32x4(Register result, + Register scratch1, + Register scratch2, + Register scratch3, + Label* gc_required); + + void AllocateFloat64x2(Register result, + Register scratch1, + Register scratch2, + Register scratch3, + Label* gc_required); + + void AllocateInt32x4(Register result, + Register scratch1, + Register scratch2, + Register scratch3, + Label* gc_required); + // Allocate a sequential string. All the header fields of the string object // are initialized. void AllocateTwoByteString(Register result, diff --git a/src/v8/test/cctest/test-disasm-ia32.cc b/src/v8/test/cctest/test-disasm-ia32.cc index 8436df7..46565c3 100644 --- a/src/v8/test/cctest/test-disasm-ia32.cc +++ b/src/v8/test/cctest/test-disasm-ia32.cc @@ -427,6 +427,83 @@ TEST(DisasmIa320) { __ psrlq(xmm0, xmm1); __ por(xmm0, xmm1); } + { + __ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000)); + __ cvtsi2sd(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ movsd(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ movsd(Operand(ebx, ecx, times_4, 10000), xmm1); + // 128 bit move instructions. + __ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000)); + __ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0); + __ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000)); + __ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0); + + __ addsd(xmm1, xmm0); + __ mulsd(xmm1, xmm0); + __ subsd(xmm1, xmm0); + __ divsd(xmm1, xmm0); + __ ucomisd(xmm0, xmm1); + __ cmpltsd(xmm0, xmm1); + + __ andpd(xmm0, xmm1); + __ psllq(xmm0, 17); + __ psllq(xmm0, xmm1); + __ psrlq(xmm0, 17); + __ psrlq(xmm0, xmm1); + __ por(xmm0, xmm1); + + // new instruction introduced by SIMD + __ cvtdq2ps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ cvtdq2ps(xmm1, xmm0); + __ cvtps2dq(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ cvtps2dq(xmm1, xmm0); + __ paddd(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ paddd(xmm1, xmm0); + __ psubd(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ psubd(xmm1, xmm0); + __ pmuludq(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ pmuludq(xmm1, xmm0); + __ punpackldq(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ punpackldq(xmm1, xmm0); + { + __ shufps(xmm1, xmm1, 0x0); + __ movups(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ movups(Operand(ebx, ecx, times_4, 10000), xmm1); + + __ andps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ andps(xmm1, xmm0); + __ xorps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ xorps(xmm1, xmm0); + __ orps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ orps(xmm1, xmm0); + + __ addps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ addps(xmm1, xmm0); + __ subps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ subps(xmm1, xmm0); + __ mulps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ mulps(xmm1, xmm0); + __ divps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ divps(xmm1, xmm0); + __ minps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ minps(xmm1, xmm0); + __ maxps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ maxps(xmm1, xmm0); + __ rcpps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ rcpps(xmm1, xmm0); + __ rsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ rsqrtps(xmm1, xmm0); + __ sqrtps(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ sqrtps(xmm1, xmm0); + + __ cmpeqps(xmm1, xmm0); + __ cmpltps(xmm1, xmm0); + __ cmpleps(xmm1, xmm0); + __ cmpneqps(xmm1, xmm0); + __ cmpnltps(xmm1, xmm0); + __ cmpnleps(xmm1, xmm0); + } + } // cmov. { @@ -454,6 +531,9 @@ TEST(DisasmIa320) { __ pextrd(eax, xmm0, 1); __ pinsrd(xmm1, eax, 0); __ extractps(eax, xmm1, 0); + __ insertps(xmm1, xmm0, 0); + __ pmulld(xmm1, Operand(ebx, ecx, times_4, 10000)); + __ pmulld(xmm1, xmm0); } } diff --git a/src/v8/test/cctest/test-heap.cc b/src/v8/test/cctest/test-heap.cc index 601e9eb..2ef1d20 100644 --- a/src/v8/test/cctest/test-heap.cc +++ b/src/v8/test/cctest/test-heap.cc @@ -1685,7 +1685,8 @@ static void FillUpNewSpace(NewSpace* new_space) { HandleScope scope(isolate); AlwaysAllocateScope always_allocate(isolate); intptr_t available = new_space->EffectiveCapacity() - new_space->Size(); - intptr_t number_of_fillers = (available / FixedArray::SizeFor(32)) - 1; + intptr_t number_of_fillers = (RoundDown(available, Page::kPageSize) / + FixedArray::SizeFor(32)) - 1; for (intptr_t i = 0; i < number_of_fillers; i++) { CHECK(heap->InNewSpace(*factory->NewFixedArray(32, NOT_TENURED))); } @@ -1713,6 +1714,11 @@ TEST(GrowAndShrinkNewSpace) { CHECK(2 * old_capacity == new_capacity); old_capacity = new_space->Capacity(); + new_space->Grow(); + new_capacity = new_space->Capacity(); + CHECK(2 * old_capacity == new_capacity); + + old_capacity = new_space->Capacity(); FillUpNewSpace(new_space); new_capacity = new_space->Capacity(); CHECK(old_capacity == new_capacity); @@ -1731,7 +1737,7 @@ TEST(GrowAndShrinkNewSpace) { old_capacity = new_space->Capacity(); new_space->Shrink(); new_capacity = new_space->Capacity(); - CHECK(old_capacity == 2 * new_capacity); + CHECK(old_capacity >= 2 * new_capacity); // Consecutive shrinking should not affect space capacity. old_capacity = new_space->Capacity(); diff --git a/src/v8/test/cctest/test-mark-compact.cc b/src/v8/test/cctest/test-mark-compact.cc index 1d4b0d8..0444f97 100644 --- a/src/v8/test/cctest/test-mark-compact.cc +++ b/src/v8/test/cctest/test-mark-compact.cc @@ -107,6 +107,8 @@ TEST(NoPromotion) { v8::HandleScope sc(CcTest::isolate()); + heap->new_space()->Grow(); + // Allocate a big fixed array in the new space. int array_length = (Page::kMaxRegularHeapObjectSize - FixedArray::kHeaderSize) / diff --git a/src/v8/test/fuzz-natives/testcfg.py b/src/v8/test/fuzz-natives/testcfg.py index 5e00b40..973908b 100644 --- a/src/v8/test/fuzz-natives/testcfg.py +++ b/src/v8/test/fuzz-natives/testcfg.py @@ -20,7 +20,7 @@ class FuzzNativesTestSuite(testsuite.TestSuite): shell += ".exe" output = commands.Execute( context.command_prefix + - [shell, "--allow-natives-syntax", "-e", + [shell, "--allow-natives-syntax", "--simd-object", "-e", "try { var natives = %ListNatives();" " for (var n in natives) { print(natives[n]); }" "} catch(e) {}"] + @@ -33,7 +33,7 @@ class FuzzNativesTestSuite(testsuite.TestSuite): for line in output.stdout.strip().split(): try: (name, argc) = line.split(",") - flags = ["--allow-natives-syntax", + flags = ["--allow-natives-syntax", "--simd-object", "-e", "var NAME = '%s', ARGC = %s;" % (name, argc)] test = testcase.TestCase(self, name, flags) tests.append(test) diff --git a/src/v8/test/mjsunit/runtime-gen/allocatefloat32x4.js b/src/v8/test/mjsunit/runtime-gen/allocatefloat32x4.js new file mode 100644 index 0000000..4821b8d --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/allocatefloat32x4.js @@ -0,0 +1,4 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +%AllocateFloat32x4(); diff --git a/src/v8/test/mjsunit/runtime-gen/allocatefloat64x2.js b/src/v8/test/mjsunit/runtime-gen/allocatefloat64x2.js new file mode 100644 index 0000000..4b12f6b --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/allocatefloat64x2.js @@ -0,0 +1,4 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +%AllocateFloat64x2(); diff --git a/src/v8/test/mjsunit/runtime-gen/allocateint32x4.js b/src/v8/test/mjsunit/runtime-gen/allocateint32x4.js new file mode 100644 index 0000000..14f2f24 --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/allocateint32x4.js @@ -0,0 +1,4 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +%AllocateInt32x4(); diff --git a/src/v8/test/mjsunit/runtime-gen/float32x4clamp.js b/src/v8/test/mjsunit/runtime-gen/float32x4clamp.js new file mode 100644 index 0000000..1a3ac5f --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/float32x4clamp.js @@ -0,0 +1,7 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); +var _lo = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); +var _hi = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); +%Float32x4Clamp(_self, _lo, _hi); diff --git a/src/v8/test/mjsunit/runtime-gen/float32x4getsignmask.js b/src/v8/test/mjsunit/runtime-gen/float32x4getsignmask.js new file mode 100644 index 0000000..b0e7cf8 --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/float32x4getsignmask.js @@ -0,0 +1,5 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); +%Float32x4GetSignMask(_self); diff --git a/src/v8/test/mjsunit/runtime-gen/float32x4select.js b/src/v8/test/mjsunit/runtime-gen/float32x4select.js new file mode 100644 index 0000000..51d7745 --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/float32x4select.js @@ -0,0 +1,7 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.int32x4(0, 0, 0, 0); +var _tv = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); +var _fv = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); +%Float32x4Select(_self, _tv, _fv); diff --git a/src/v8/test/mjsunit/runtime-gen/float64x2clamp.js b/src/v8/test/mjsunit/runtime-gen/float64x2clamp.js new file mode 100644 index 0000000..f4a56b5 --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/float64x2clamp.js @@ -0,0 +1,7 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.float64x2(0.0, 0.0); +var _lo = SIMD.float64x2(0.0, 0.0); +var _hi = SIMD.float64x2(0.0, 0.0); +%Float64x2Clamp(_self, _lo, _hi); diff --git a/src/v8/test/mjsunit/runtime-gen/float64x2getsignmask.js b/src/v8/test/mjsunit/runtime-gen/float64x2getsignmask.js new file mode 100644 index 0000000..65651581 --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/float64x2getsignmask.js @@ -0,0 +1,5 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.float64x2(0.0, 0.0); +%Float64x2GetSignMask(_self); diff --git a/src/v8/test/mjsunit/runtime-gen/int32x4getsignmask.js b/src/v8/test/mjsunit/runtime-gen/int32x4getsignmask.js new file mode 100644 index 0000000..8417d0e --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/int32x4getsignmask.js @@ -0,0 +1,5 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.int32x4(0, 0, 0, 0); +%Int32x4GetSignMask(_self); diff --git a/src/v8/test/mjsunit/runtime-gen/int32x4select.js b/src/v8/test/mjsunit/runtime-gen/int32x4select.js new file mode 100644 index 0000000..c933a0e --- /dev/null +++ b/src/v8/test/mjsunit/runtime-gen/int32x4select.js @@ -0,0 +1,7 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY +// Flags: --allow-natives-syntax --harmony --simd-object +var _self = SIMD.int32x4(0, 0, 0, 0); +var _tv = SIMD.int32x4(0, 0, 0, 0); +var _fv = SIMD.int32x4(0, 0, 0, 0); +%Int32x4Select(_self, _tv, _fv); diff --git a/src/v8/test/mjsunit/simd/argument_object.js b/src/v8/test/mjsunit/simd/argument_object.js new file mode 100644 index 0000000..fc2e6ba --- /dev/null +++ b/src/v8/test/mjsunit/simd/argument_object.js @@ -0,0 +1,124 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax --use-escape-analysis + +function testArgumentsObjectwithFloat32x4Field() { + "use strict"; + var forceDeopt = { deopt:false }; + function inner(a,b,c,d,e,f,g,h,i,j,k) { + var args = arguments; + forceDeopt.deopt; + assertSame(11, args.length); + assertSame(a, args[0]); + assertSame(b, args[1]); + assertSame(c, args[2]); + assertSame(d, args[3]); + assertSame(e, args[4]); + assertSame(f, args[5]); + assertSame(g, args[6]); + assertSame(h, args[7]); + assertSame(i, args[8]); + assertSame(j, args[9]); + assertEquals(1, args[10].x); + assertEquals(2, args[10].y); + assertEquals(3, args[10].z); + assertEquals(4, args[10].w); + } + + var a = 0.5; + var b = 1.7; + var c = 123; + function outer() { + inner( + a - 0.3, // double in double register + b + 2.3, // integer in double register + c + 321, // integer in general register + c - 456, // integer in stack slot + a + 0.1, a + 0.2, a + 0.3, a + 0.4, a + 0.5, + a + 0.6, // double in stack slot + SIMD.float32x4(1, 2, 3, 4) + ); + } + + outer(); + outer(); + %OptimizeFunctionOnNextCall(outer); + outer(); + delete forceDeopt.deopt; + outer(); +} + +testArgumentsObjectwithFloat32x4Field(); + +function testArgumentsObjectwithInt32x4Field() { + "use strict"; + var forceDeopt = { deopt:false }; + function inner(a,b,c,d,e,f,g,h,i,j,k) { + var args = arguments; + forceDeopt.deopt; + assertSame(11, args.length); + assertSame(a, args[0]); + assertSame(b, args[1]); + assertSame(c, args[2]); + assertSame(d, args[3]); + assertSame(e, args[4]); + assertSame(f, args[5]); + assertSame(g, args[6]); + assertSame(h, args[7]); + assertSame(i, args[8]); + assertSame(j, args[9]); + assertEquals(1, args[10].x); + assertEquals(2, args[10].y); + assertEquals(3, args[10].z); + assertEquals(4, args[10].w); + } + + var a = 0.5; + var b = 1.7; + var c = 123; + function outer() { + inner( + a - 0.3, // double in double register + b + 2.3, // integer in double register + c + 321, // integer in general register + c - 456, // integer in stack slot + a + 0.1, a + 0.2, a + 0.3, a + 0.4, a + 0.5, + a + 0.6, // double in stack slot + SIMD.int32x4(1, 2, 3, 4) + ); + } + + outer(); + outer(); + %OptimizeFunctionOnNextCall(outer); + outer(); + delete forceDeopt.deopt; + outer(); +} + +testArgumentsObjectwithInt32x4Field(); diff --git a/src/v8/test/mjsunit/simd/builtin_operator.js b/src/v8/test/mjsunit/simd/builtin_operator.js new file mode 100644 index 0000000..d4cb1ab --- /dev/null +++ b/src/v8/test/mjsunit/simd/builtin_operator.js @@ -0,0 +1,183 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testArithmeticOperators() { + var a = SIMD.float32x4.zero(); + var b = SIMD.float32x4.zero(); + var c; + + c = a + b; + assertEquals('float32x4(0,0,0,0)float32x4(0,0,0,0)', c); + c = a++; + assertEquals(NaN, c); + c = a - b; + assertEquals(NaN, c); + c = a--; + assertEquals(NaN, c); + c = a * b; + assertEquals(NaN, c); + c = a / b; + assertEquals(NaN, c); + c = a % b; + assertEquals(NaN, c); +} + +testArithmeticOperators(); +testArithmeticOperators(); +%OptimizeFunctionOnNextCall(testArithmeticOperators); +testArithmeticOperators(); + + +function testBitwiseOperators() { + var a = SIMD.float32x4.zero(); + var b = SIMD.float32x4.zero(); + var c; + c = a | b; + assertEquals(0, c); + c = a & b; + assertEquals(0, c); + c = a ^ b; + assertEquals(0, c); + c = ~a; + assertEquals(-1, c); + c = a << 0; + assertEquals(0, c); + c = a >> 0; + assertEquals(0, c); + c = a >>> 0; + assertEquals(0, c); +} + +testBitwiseOperators(); +testBitwiseOperators(); +%OptimizeFunctionOnNextCall(testBitwiseOperators); +testBitwiseOperators(); + + +function testAssignmentOperators() { + var a = SIMD.float32x4.zero(); + var b = SIMD.float32x4.zero(); + var c = a; + c += b; + assertEquals('float32x4(0,0,0,0)float32x4(0,0,0,0)', c); + c -= b; + assertEquals(NaN, c); + c *= b; + assertEquals(NaN, c); + c /= b; + assertEquals(NaN, c); + c %= b; + assertEquals(NaN, c); + + c &= b; + assertEquals(0, c); + c |= b; + assertEquals(0, c); + c ^= b; + assertEquals(0, c); + c <<= b; + assertEquals(0, c); + c >>= b; + assertEquals(0, c); + c >>>= b; + assertEquals(0, c); +} + +testAssignmentOperators(); +testAssignmentOperators(); +%OptimizeFunctionOnNextCall(testAssignmentOperators); +testAssignmentOperators(); + + +function testStringOperators() { + var a = SIMD.float32x4.zero(); + var b = "0"; + var c = a; + c += b; + assertEquals("float32x4(0,0,0,0)0", c); + c = b + a; + assertEquals("0float32x4(0,0,0,0)", c); +} + +testStringOperators(); +testStringOperators(); +%OptimizeFunctionOnNextCall(testStringOperators); +testStringOperators(); + + +function testComparisionOperators() { + var a = SIMD.float32x4.zero(); + var b = SIMD.float32x4.zero(); + assertEquals(false, a == b); + assertEquals(true, a != b); + assertEquals(false, a === b); + assertEquals(true, a !== b); + assertEquals(false, a > b); + assertEquals(true, a >= b); + assertEquals(false, a < b); + assertEquals(true, a <= b); +} + +testComparisionOperators(); +testComparisionOperators(); +// TODO(ningxin): optimized code will get opposite result. +//%OptimizeFunctionOnNextCall(testComparisionOperators); +testComparisionOperators(); + + +function testLogicalOperators() { + var a = SIMD.float32x4.zero(); + var b = SIMD.float32x4.splat(1); + assertEquals(1, (a && b).x); + assertEquals(1, (a && b).y); + assertEquals(1, (a && b).z); + assertEquals(1, (a && b).w); + assertEquals(0, (a || b).x); + assertEquals(0, (a || b).y); + assertEquals(0, (a || b).z); + assertEquals(0, (a || b).w); + assertEquals(false, !a); +} + +testLogicalOperators(); +testLogicalOperators(); +%OptimizeFunctionOnNextCall(testLogicalOperators); +testLogicalOperators(); + + +function testConditionalOperators() { + var a = SIMD.int32x4.zero(); + var c = a ? 1 : 0; + assertEquals(1, c); +} + +testConditionalOperators(); +testConditionalOperators(); +%OptimizeFunctionOnNextCall(testConditionalOperators); +testConditionalOperators(); diff --git a/src/v8/test/mjsunit/simd/builtin_operator_float64x2.js b/src/v8/test/mjsunit/simd/builtin_operator_float64x2.js new file mode 100644 index 0000000..c9b18fe --- /dev/null +++ b/src/v8/test/mjsunit/simd/builtin_operator_float64x2.js @@ -0,0 +1,167 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testArithmeticOperators() { + var a = SIMD.float64x2.zero(); + var b = SIMD.float64x2.zero(); + var c; + + c = a + b; + assertEquals('float64x2(0,0)float64x2(0,0)', c); + c = a++; + assertEquals(NaN, c); + c = a - b; + assertEquals(NaN, c); + c = a--; + assertEquals(NaN, c); + c = a * b; + assertEquals(NaN, c); + c = a / b; + assertEquals(NaN, c); + c = a % b; + assertEquals(NaN, c); +} + +testArithmeticOperators(); +testArithmeticOperators(); +%OptimizeFunctionOnNextCall(testArithmeticOperators); +testArithmeticOperators(); + + +function testBitwiseOperators() { + var a = SIMD.float64x2.zero(); + var b = SIMD.float64x2.zero(); + var c; + c = a | b; + assertEquals(0, c); + c = a & b; + assertEquals(0, c); + c = a ^ b; + assertEquals(0, c); + c = ~a; + assertEquals(-1, c); + c = a << 0; + assertEquals(0, c); + c = a >> 0; + assertEquals(0, c); + c = a >>> 0; + assertEquals(0, c); +} + +testBitwiseOperators(); +testBitwiseOperators(); +%OptimizeFunctionOnNextCall(testBitwiseOperators); +testBitwiseOperators(); + + +function testAssignmentOperators() { + var a = SIMD.float64x2.zero(); + var b = SIMD.float64x2.zero(); + var c = a; + c += b; + assertEquals('float64x2(0,0)float64x2(0,0)', c); + c -= b; + assertEquals(NaN, c); + c *= b; + assertEquals(NaN, c); + c /= b; + assertEquals(NaN, c); + c %= b; + assertEquals(NaN, c); + + c &= b; + assertEquals(0, c); + c |= b; + assertEquals(0, c); + c ^= b; + assertEquals(0, c); + c <<= b; + assertEquals(0, c); + c >>= b; + assertEquals(0, c); + c >>>= b; + assertEquals(0, c); +} + +testAssignmentOperators(); +testAssignmentOperators(); +%OptimizeFunctionOnNextCall(testAssignmentOperators); +testAssignmentOperators(); + + +function testStringOperators() { + var a = SIMD.float64x2.zero(); + var b = "0"; + var c = a; + c += b; + assertEquals("float64x2(0,0)0", c); + c = b + a; + assertEquals("0float64x2(0,0)", c); +} + +testStringOperators(); +testStringOperators(); +%OptimizeFunctionOnNextCall(testStringOperators); +testStringOperators(); + + +function testComparisionOperators() { + var a = SIMD.float64x2.zero(); + var b = SIMD.float64x2.zero(); + assertEquals(false, a == b); + assertEquals(true, a != b); + assertEquals(false, a === b); + assertEquals(true, a !== b); + assertEquals(false, a > b); + assertEquals(true, a >= b); + assertEquals(false, a < b); + assertEquals(true, a <= b); +} + +testComparisionOperators(); +testComparisionOperators(); +// TODO(ningxin): optimized code will get opposite result. +//%OptimizeFunctionOnNextCall(testComparisionOperators); +testComparisionOperators(); + + +function testLogicalOperators() { + var a = SIMD.float64x2.zero(); + var b = SIMD.float64x2.splat(1); + assertEquals(1, (a && b).x); + assertEquals(1, (a && b).y); + assertEquals(0, (a || b).x); + assertEquals(0, (a || b).y); + assertEquals(false, !a); +} + +testLogicalOperators(); +testLogicalOperators(); +%OptimizeFunctionOnNextCall(testLogicalOperators); +testLogicalOperators(); diff --git a/src/v8/test/mjsunit/simd/captured_object.js b/src/v8/test/mjsunit/simd/captured_object.js new file mode 100644 index 0000000..1b1e3a6 --- /dev/null +++ b/src/v8/test/mjsunit/simd/captured_object.js @@ -0,0 +1,80 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax --use-escape-analysis + +function testCapturedObjectwithFloat32x4Field() { + var deopt = { deopt:false }; + function constructor() { + this.x = 1.1; + this.y = SIMD.float32x4(1,2,3,4); + } + function field(x) { + var o = new constructor(); + o.x = x; + deopt.deopt; + assertEquals(x, o.x); + assertEquals(o.y.x, 1); + assertEquals(o.y.y, 2); + assertEquals(o.y.z, 3); + assertEquals(o.y.w, 4); + } + field(1); field(2); + // TODO(ningxin): fails in x64 test. + //%OptimizeFunctionOnNextCall(field); + field(3); field(4); + delete deopt.deopt; + field(5); field(6); +} + +testCapturedObjectwithFloat32x4Field(); + +function testCapturedObjectwithInt32x4Field() { + var deopt = { deopt:false }; + function constructor() { + this.x = 1.1; + this.y = SIMD.int32x4(1,2,3,4); + } + function field(x) { + var o = new constructor(); + o.x = x; + deopt.deopt; + assertEquals(x, o.x); + assertEquals(o.y.x, 1); + assertEquals(o.y.y, 2); + assertEquals(o.y.z, 3); + assertEquals(o.y.w, 4); + } + field(1); field(2); + // TODO(ningxin): fix the failures. + //%OptimizeFunctionOnNextCall(field); + field(3); field(4); + delete deopt.deopt; + field(5); field(6); +} + +testCapturedObjectwithInt32x4Field(); diff --git a/src/v8/test/mjsunit/simd/conversions.js b/src/v8/test/mjsunit/simd/conversions.js new file mode 100644 index 0000000..39cf87d --- /dev/null +++ b/src/v8/test/mjsunit/simd/conversions.js @@ -0,0 +1,81 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testObject() { + var a = SIMD.float32x4.zero(); + var b = Object(a); + assertEquals(0, b.x); + assertEquals(0, b.y); + assertEquals(0, b.z); + assertEquals(0, b.w); + assertEquals(typeof(b), "object"); + assertEquals(typeof(b.valueOf()), "object"); + assertEquals(Object.prototype.toString.call(b), "[object Object]"); +} + +testObject(); +testObject(); +%OptimizeFunctionOnNextCall(testObject); +testObject(); + + +function testNumber() { + var a = SIMD.float32x4.zero(); + var b = Number(a); + assertEquals(NaN, b); +} + +testNumber(); +testNumber(); +%OptimizeFunctionOnNextCall(testNumber); +testNumber(); + + +function testString() { + var a = SIMD.float32x4.zero(); + var b = String(a); + assertEquals("float32x4(0,0,0,0)", b); +} + +testString(); +testString(); +%OptimizeFunctionOnNextCall(testString); +testString(); + + +function testBoolean() { + var a = SIMD.float32x4.zero(); + var b = Boolean(a); + assertEquals(true, b); +} + +testBoolean(); +testBoolean(); +%OptimizeFunctionOnNextCall(testBoolean); +testBoolean(); diff --git a/src/v8/test/mjsunit/simd/deopt.js b/src/v8/test/mjsunit/simd/deopt.js new file mode 100644 index 0000000..b7128ea --- /dev/null +++ b/src/v8/test/mjsunit/simd/deopt.js @@ -0,0 +1,78 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testdeopt(a, b) { + var a4 = SIMD.float32x4(1.0, -2.0, 3.0, -4.0); + var b4 = SIMD.float32x4.abs(a4); + + if (a > 0) { + a = 0; + } else { + a += b; //deopt + } + + assertEquals(1.0, b4.x); + assertEquals(2.0, b4.y); + assertEquals(3.0, b4.z); + assertEquals(4.0, b4.w); +} + +testdeopt(1, 1); +testdeopt(1, 1); +%OptimizeFunctionOnNextCall(testdeopt); +testdeopt(0, 1); + +function testdeopt2() { + var a4 = SIMD.float32x4(1.0, -1.0, 1.0, -1.0); + var b4 = SIMD.float32x4.abs(a4); + + assertEquals(1.0, b4.x); + assertEquals(1.0, b4.y); + assertEquals(1.0, b4.z); + assertEquals(1.0, b4.w); + + var new_a4 = new SIMD.float32x4(1.0, -1.0, 1.0, -1.0); + var new_b4 = SIMD.float32x4.abs(new_a4); + + assertEquals(1.0, new_b4.x); + assertEquals(1.0, new_b4.y); + assertEquals(1.0, new_b4.z); + assertEquals(1.0, new_b4.w); + + // Verifying deoptimization + assertEquals(1.0, b4.x); + assertEquals(1.0, b4.y); + assertEquals(1.0, b4.z); + assertEquals(1.0, b4.w); +} + +testdeopt2(); +testdeopt2(); +%OptimizeFunctionOnNextCall(testdeopt2); +testdeopt2(); diff --git a/src/v8/test/mjsunit/simd/float32x4.js b/src/v8/test/mjsunit/simd/float32x4.js new file mode 100644 index 0000000..58d79a0 --- /dev/null +++ b/src/v8/test/mjsunit/simd/float32x4.js @@ -0,0 +1,938 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testConstructor() { + var f4 = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + assertEquals(1.0, f4.x); + assertEquals(2.0, f4.y); + assertEquals(3.0, f4.z); + assertEquals(4.0, f4.w); + + f4 = SIMD.float32x4(1.1, 2.2, 3.3, 4.4); + assertEquals(1.100000023841858, f4.x); + assertEquals(2.200000047683716, f4.y); + assertEquals(3.299999952316284, f4.z); + assertEquals(4.400000095367432, f4.w); +} + +testConstructor(); +testConstructor(); +%OptimizeFunctionOnNextCall(testConstructor); +testConstructor(); + +function test1ArgumentConstructor() { + var f4 = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var f4_new = SIMD.float32x4(f4); + assertEquals(f4_new.x, f4.x); + assertEquals(f4_new.y, f4.y); + assertEquals(f4_new.z, f4.z); + assertEquals(f4_new.w, f4.w); + + f4 = SIMD.float32x4(1.1, 2.2, 3.3, 4.4); + f4_new = SIMD.float32x4(f4); + assertEquals(f4_new.x, f4.x); + assertEquals(f4_new.y, f4.y); + assertEquals(f4_new.z, f4.z); + assertEquals(f4_new.w, f4.w); +} + +test1ArgumentConstructor(); +test1ArgumentConstructor(); +%OptimizeFunctionOnNextCall(test1ArgumentConstructor); +test1ArgumentConstructor(); + +function testZeroConstructor() { + var z4 = SIMD.float32x4.zero(); + assertEquals(0.0, z4.x); + assertEquals(0.0, z4.y); + assertEquals(0.0, z4.z); + assertEquals(0.0, z4.w); +} + +testZeroConstructor(); +testZeroConstructor(); +%OptimizeFunctionOnNextCall(testZeroConstructor); +testZeroConstructor(); + +function testSplatConstructor() { + var z4 = SIMD.float32x4.splat(5.0); + assertEquals(5.0, z4.x); + assertEquals(5.0, z4.y); + assertEquals(5.0, z4.z); + assertEquals(5.0, z4.w); +} + +testSplatConstructor(); +testSplatConstructor(); +%OptimizeFunctionOnNextCall(testSplatConstructor); +testSplatConstructor(); + +function testTypeof() { + var z4 = SIMD.float32x4.zero(); + assertEquals(typeof(z4), "object"); + + var new_z4 = new SIMD.float32x4(0, 0, 0, 0); + assertEquals(typeof(new_z4), "object"); + assertEquals(typeof(new_z4.valueOf()), "object"); + assertEquals(Object.prototype.toString.call(new_z4), "[object Object]"); +} + +testTypeof(); + +function testSignMaskGetter() { + var a = SIMD.float32x4(-1.0, -2.0, -3.0, -4.0); + assertEquals(0xf, a.signMask); + var b = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + assertEquals(0x0, b.signMask); + var c = SIMD.float32x4(1.0, -2.0, -3.0, 4.0); + assertEquals(0x6, c.signMask); +} + +testSignMaskGetter(); +testSignMaskGetter(); +%OptimizeFunctionOnNextCall(testSignMaskGetter); +testSignMaskGetter(); + +function testSIMDAbs() { + var a4 = SIMD.float32x4(1.0, -1.0, 1.0, -1.0); + var b4 = SIMD.float32x4.abs(a4); + + assertEquals(1.0, b4.x); + assertEquals(1.0, b4.y); + assertEquals(1.0, b4.z); + assertEquals(1.0, b4.w); +} + +testSIMDAbs(); +testSIMDAbs(); +%OptimizeFunctionOnNextCall(testSIMDAbs); +testSIMDAbs(); + +function testSIMDNeg() { + var a4 = SIMD.float32x4(1.0, -1.0, 1.0, -1.0); + var b4 = SIMD.float32x4.neg(a4); + + assertEquals(-1.0, b4.x); + assertEquals(1.0, b4.y); + assertEquals(-1.0, b4.z); + assertEquals(1.0, b4.w); +} + +testSIMDNeg(); +testSIMDNeg(); +%OptimizeFunctionOnNextCall(testSIMDNeg); +testSIMDNeg(); + +function testSIMDAdd() { + var a4 = SIMD.float32x4(1.0, 1.0, 1.0, 1.0); + var b4 = SIMD.float32x4(2.0, 2.0, 2.0, 2.0); + var c4 = SIMD.float32x4.add(a4, b4); + + assertEquals(3.0, c4.x); + assertEquals(3.0, c4.y); + assertEquals(3.0, c4.z); + assertEquals(3.0, c4.w); +} + +testSIMDAdd(); +testSIMDAdd(); +%OptimizeFunctionOnNextCall(testSIMDAdd); +testSIMDAdd(); + +function testSIMDSub() { + var a4 = SIMD.float32x4(1.0, 1.0, 1.0, 1.0); + var b4 = SIMD.float32x4(2.0, 2.0, 2.0, 2.0); + var c4 = SIMD.float32x4.sub(a4, b4); + + assertEquals(-1.0, c4.x); + assertEquals(-1.0, c4.y); + assertEquals(-1.0, c4.z); + assertEquals(-1.0, c4.w); +} + +testSIMDSub(); +testSIMDSub(); +%OptimizeFunctionOnNextCall(testSIMDSub); +testSIMDSub(); + +function testSIMDMul() { + var a4 = SIMD.float32x4(1.0, 1.0, 1.0, 1.0); + var b4 = SIMD.float32x4(2.0, 2.0, 2.0, 2.0); + var c4 = SIMD.float32x4.mul(a4, b4); + + assertEquals(2.0, c4.x); + assertEquals(2.0, c4.y); + assertEquals(2.0, c4.z); + assertEquals(2.0, c4.w); +} + +testSIMDMul(); +testSIMDMul(); +%OptimizeFunctionOnNextCall(testSIMDMul); +testSIMDMul(); + +function testSIMDDiv() { + var a4 = SIMD.float32x4(1.0, 1.0, 1.0, 1.0); + var b4 = SIMD.float32x4(2.0, 2.0, 2.0, 2.0); + var c4 = SIMD.float32x4.div(a4, b4); + + assertEquals(0.5, c4.x); + assertEquals(0.5, c4.y); + assertEquals(0.5, c4.z); + assertEquals(0.5, c4.w); +} + +testSIMDDiv(); +testSIMDDiv(); +%OptimizeFunctionOnNextCall(testSIMDDiv); +testSIMDDiv(); + +function testSIMDClamp() { + var m = SIMD.float32x4(1.0, -2.0, 3.0, -4.0); + var lo = SIMD.float32x4(0.0, 0.0, 0.0, 0.0); + var hi = SIMD.float32x4(2.0, 2.0, 2.0, 2.0); + m = SIMD.float32x4.clamp(m, lo, hi); + assertEquals(1.0, m.x); + assertEquals(0.0, m.y); + assertEquals(2.0, m.z); + assertEquals(0.0, m.w); +} + +testSIMDClamp(); +testSIMDClamp(); +%OptimizeFunctionOnNextCall(testSIMDClamp); +testSIMDClamp(); + +function testSIMDMin() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var n = SIMD.float32x4(1.0, 0.0, 2.5, 5.0); + m = SIMD.float32x4.min(m, n); + assertEquals(1.0, m.x); + assertEquals(0.0, m.y); + assertEquals(2.5, m.z); + assertEquals(4.0, m.w); +} + +testSIMDMin(); +testSIMDMin(); +%OptimizeFunctionOnNextCall(testSIMDMin); +testSIMDMin(); + +function testSIMDMax() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var n = SIMD.float32x4(1.0, 0.0, 2.5, 5.0); + m = SIMD.float32x4.max(m, n); + assertEquals(1.0, m.x); + assertEquals(2.0, m.y); + assertEquals(3.0, m.z); + assertEquals(5.0, m.w); +} + +testSIMDMax(); +testSIMDMax(); +%OptimizeFunctionOnNextCall(testSIMDMax); +testSIMDMax(); + +function testSIMDReciprocal() { + var m = SIMD.float32x4(1.0, 4.0, 9.0, 16.0); + m = SIMD.float32x4.reciprocal(m); + assertTrue(Math.abs(1.0 - m.x) <= 0.001); + assertTrue(Math.abs(0.25 - m.y) <= 0.001); + assertTrue(Math.abs(0.1111111 - m.z) <= 0.001); + assertTrue(Math.abs(0.0625 - m.w) <= 0.001); +} + +testSIMDReciprocal(); +testSIMDReciprocal(); +%OptimizeFunctionOnNextCall(testSIMDReciprocal); +testSIMDReciprocal(); + +function testSIMDReciprocalSqrt() { + var m = SIMD.float32x4(1.0, 0.25, 0.111111, 0.0625); + m = SIMD.float32x4.reciprocalSqrt(m); + assertTrue(Math.abs(1.0 - m.x) <= 0.001); + assertTrue(Math.abs(2.0 - m.y) <= 0.001); + assertTrue(Math.abs(3.0 - m.z) <= 0.001); + assertTrue(Math.abs(4.0 - m.w) <= 0.001); +} + +testSIMDReciprocalSqrt(); +testSIMDReciprocalSqrt(); +%OptimizeFunctionOnNextCall(testSIMDReciprocalSqrt); +testSIMDReciprocalSqrt(); + +function testSIMDScale() { + var m = SIMD.float32x4(1.0, -2.0, 3.0, -4.0); + m = SIMD.float32x4.scale(m, 20.0); + assertEquals(20.0, m.x); + assertEquals(-40.0, m.y); + assertEquals(60.0, m.z); + assertEquals(-80.0, m.w); +} + +testSIMDScale(); +testSIMDScale(); +%OptimizeFunctionOnNextCall(testSIMDScale); +testSIMDScale(); + +function testSIMDSqrt() { + var m = SIMD.float32x4(1.0, 4.0, 9.0, 16.0); + m = SIMD.float32x4.sqrt(m); + assertEquals(1.0, m.x); + assertEquals(2.0, m.y); + assertEquals(3.0, m.z); + assertEquals(4.0, m.w); +} + +testSIMDSqrt(); +testSIMDSqrt(); +%OptimizeFunctionOnNextCall(testSIMDSqrt); +testSIMDSqrt(); + +function testSIMDShuffle() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var xxxx = SIMD.float32x4.shuffle(m, SIMD.XXXX); + assertEquals(1.0, xxxx.x); + assertEquals(1.0, xxxx.y); + assertEquals(1.0, xxxx.z); + assertEquals(1.0, xxxx.w); + var yyyy = SIMD.float32x4.shuffle(m, SIMD.YYYY); + assertEquals(2.0, yyyy.x); + assertEquals(2.0, yyyy.y); + assertEquals(2.0, yyyy.z); + assertEquals(2.0, yyyy.w); + var zzzz = SIMD.float32x4.shuffle(m, SIMD.ZZZZ); + assertEquals(3.0, zzzz.x); + assertEquals(3.0, zzzz.y); + assertEquals(3.0, zzzz.z); + assertEquals(3.0, zzzz.w); + var wwww = SIMD.float32x4.shuffle(m, SIMD.WWWW); + assertEquals(4.0, wwww.x); + assertEquals(4.0, wwww.y); + assertEquals(4.0, wwww.z); + assertEquals(4.0, wwww.w); + var wzyx = SIMD.float32x4.shuffle(m, SIMD.WZYX); + assertEquals(4.0, wzyx.x); + assertEquals(3.0, wzyx.y); + assertEquals(2.0, wzyx.z); + assertEquals(1.0, wzyx.w); + var wwzz = SIMD.float32x4.shuffle(m, SIMD.WWZZ); + assertEquals(4.0, wwzz.x); + assertEquals(4.0, wwzz.y); + assertEquals(3.0, wwzz.z); + assertEquals(3.0, wwzz.w); + var xxyy = SIMD.float32x4.shuffle(m, SIMD.XXYY); + assertEquals(1.0, xxyy.x); + assertEquals(1.0, xxyy.y); + assertEquals(2.0, xxyy.z); + assertEquals(2.0, xxyy.w); + var yyww = SIMD.float32x4.shuffle(m, SIMD.YYWW); + assertEquals(2.0, yyww.x); + assertEquals(2.0, yyww.y); + assertEquals(4.0, yyww.z); + assertEquals(4.0, yyww.w); +} + +testSIMDShuffle(); +testSIMDShuffle(); +%OptimizeFunctionOnNextCall(testSIMDShuffle); +testSIMDShuffle(); + +function testSIMDShuffleMix() { + var a = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var b = SIMD.float32x4(5.0, 6.0, 7.0, 8.0); + var xxxx = SIMD.float32x4.shuffleMix(a, b, SIMD.XXXX); + assertEquals(1.0, xxxx.x); + assertEquals(1.0, xxxx.y); + assertEquals(5.0, xxxx.z); + assertEquals(5.0, xxxx.w); + var yyyy = SIMD.float32x4.shuffleMix(a, b, SIMD.YYYY); + assertEquals(2.0, yyyy.x); + assertEquals(2.0, yyyy.y); + assertEquals(6.0, yyyy.z); + assertEquals(6.0, yyyy.w); + var zzzz = SIMD.float32x4.shuffleMix(a, b, SIMD.ZZZZ); + assertEquals(3.0, zzzz.x); + assertEquals(3.0, zzzz.y); + assertEquals(7.0, zzzz.z); + assertEquals(7.0, zzzz.w); + var wwww = SIMD.float32x4.shuffleMix(a, b, SIMD.WWWW); + assertEquals(4.0, wwww.x); + assertEquals(4.0, wwww.y); + assertEquals(8.0, wwww.z); + assertEquals(8.0, wwww.w); + var wzyx = SIMD.float32x4.shuffleMix(a, b, SIMD.WZYX); + assertEquals(4.0, wzyx.x); + assertEquals(3.0, wzyx.y); + assertEquals(6.0, wzyx.z); + assertEquals(5.0, wzyx.w); + var wwzz = SIMD.float32x4.shuffleMix(a, b, SIMD.WWZZ); + assertEquals(4.0, wwzz.x); + assertEquals(4.0, wwzz.y); + assertEquals(7.0, wwzz.z); + assertEquals(7.0, wwzz.w); + var xxyy = SIMD.float32x4.shuffleMix(a, b, SIMD.XXYY); + assertEquals(1.0, xxyy.x); + assertEquals(1.0, xxyy.y); + assertEquals(6.0, xxyy.z); + assertEquals(6.0, xxyy.w); + var yyww = SIMD.float32x4.shuffleMix(a, b, SIMD.YYWW); + assertEquals(2.0, yyww.x); + assertEquals(2.0, yyww.y); + assertEquals(8.0, yyww.z); + assertEquals(8.0, yyww.w); +} + +testSIMDShuffleMix(); +testSIMDShuffleMix(); +%OptimizeFunctionOnNextCall(testSIMDShuffleMix); +testSIMDShuffleMix(); + +function testSIMDSetters() { + var f = SIMD.float32x4.zero(); + assertEquals(0.0, f.x); + assertEquals(0.0, f.y); + assertEquals(0.0, f.z); + assertEquals(0.0, f.w); + f = SIMD.float32x4.withX(f, 4.0); + assertEquals(4.0, f.x); + f = SIMD.float32x4.withY(f, 3.0); + assertEquals(3.0, f.y); + f = SIMD.float32x4.withZ(f, 2.0); + assertEquals(2.0, f.z); + f = SIMD.float32x4.withW(f, 1.0); + assertEquals(1.0, f.w); + f = SIMD.float32x4.zero(); +} + +testSIMDSetters(); +testSIMDSetters(); +%OptimizeFunctionOnNextCall(testSIMDSetters); +testSIMDSetters(); + +function testSIMDConversion() { + var m = SIMD.int32x4(0x3F800000, 0x40000000, 0x40400000, 0x40800000); + var n = SIMD.float32x4.fromInt32x4Bits(m); + assertEquals(1.0, n.x); + assertEquals(2.0, n.y); + assertEquals(3.0, n.z); + assertEquals(4.0, n.w); + n = SIMD.float32x4(5.0, 6.0, 7.0, 8.0); + m = SIMD.int32x4.fromFloat32x4Bits(n); + assertEquals(0x40A00000, m.x); + assertEquals(0x40C00000, m.y); + assertEquals(0x40E00000, m.z); + assertEquals(0x41000000, m.w); + // Flip sign using bit-wise operators. + n = SIMD.float32x4(9.0, 10.0, 11.0, 12.0); + m = SIMD.int32x4(0x80000000, 0x80000000, 0x80000000, 0x80000000); + var nMask = SIMD.int32x4.fromFloat32x4Bits(n); + nMask = SIMD.int32x4.xor(nMask, m); // flip sign. + n = SIMD.float32x4.fromInt32x4Bits(nMask); + assertEquals(-9.0, n.x); + assertEquals(-10.0, n.y); + assertEquals(-11.0, n.z); + assertEquals(-12.0, n.w); + nMask = SIMD.int32x4.fromFloat32x4Bits(n); + nMask = SIMD.int32x4.xor(nMask, m); // flip sign. + n = SIMD.float32x4.fromInt32x4Bits(nMask); + assertEquals(9.0, n.x); + assertEquals(10.0, n.y); + assertEquals(11.0, n.z); + assertEquals(12.0, n.w); +} + +testSIMDConversion(); +testSIMDConversion(); +%OptimizeFunctionOnNextCall(testSIMDConversion); +testSIMDConversion(); + +function testSIMDConversion2() { + var m = SIMD.int32x4(1, 2, 3, 4); + var n = SIMD.float32x4.fromInt32x4(m); + assertEquals(1.0, n.x); + assertEquals(2.0, n.y); + assertEquals(3.0, n.z); + assertEquals(4.0, n.w); + n = SIMD.float32x4(5.0, 6.0, 7.0, 8.0); + m = SIMD.int32x4.fromFloat32x4(n); + assertEquals(5, m.x); + assertEquals(6, m.y); + assertEquals(7, m.z); + assertEquals(8, m.w); +} + +testSIMDConversion2(); +testSIMDConversion2(); +%OptimizeFunctionOnNextCall(testSIMDConversion2); +testSIMDConversion2(); + + +function testSIMDComparisons() { + var m = SIMD.float32x4(1.0, 2.0, 0.1, 0.001); + var n = SIMD.float32x4(2.0, 2.0, 0.001, 0.1); + var cmp; + cmp = SIMD.float32x4.lessThan(m, n); + assertEquals(-1, cmp.x); + assertEquals(0x0, cmp.y); + assertEquals(0x0, cmp.z); + assertEquals(-1, cmp.w); + + cmp = SIMD.float32x4.lessThanOrEqual(m, n); + assertEquals(-1, cmp.x); + assertEquals(-1, cmp.y); + assertEquals(0x0, cmp.z); + assertEquals(-1, cmp.w); + + cmp = SIMD.float32x4.equal(m, n); + assertEquals(0x0, cmp.x); + assertEquals(-1, cmp.y); + assertEquals(0x0, cmp.z); + assertEquals(0x0, cmp.w); + + cmp = SIMD.float32x4.notEqual(m, n); + assertEquals(-1, cmp.x); + assertEquals(0x0, cmp.y); + assertEquals(-1, cmp.z); + assertEquals(-1, cmp.w); + + cmp = SIMD.float32x4.greaterThanOrEqual(m, n); + assertEquals(0x0, cmp.x); + assertEquals(-1, cmp.y); + assertEquals(-1, cmp.z); + assertEquals(0x0, cmp.w); + + cmp = SIMD.float32x4.greaterThan(m, n); + assertEquals(0x0, cmp.x); + assertEquals(0x0, cmp.y); + assertEquals(-1, cmp.z); + assertEquals(0x0, cmp.w); +} + +testSIMDComparisons(); +testSIMDComparisons(); +%OptimizeFunctionOnNextCall(testSIMDComparisons); +testSIMDComparisons(); + +function testSIMDAnd() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var n = SIMD.float32x4(~1.0, 2.0, 3.0, 4.0); + o = SIMD.float32x4.and(m,n); // and + assertEquals(0, o.x); + assertEquals(2, o.y); + assertEquals(3, o.z); + assertEquals(4, o.w); +} + +testSIMDAnd(); +testSIMDAnd(); +%OptimizeFunctionOnNextCall(testSIMDAnd); +testSIMDAnd(); + +function testSIMDOr() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var n = SIMD.float32x4(~1.0, 2.0, 3.0, 4.0); + var o = SIMD.float32x4.or(m,n); // or + assertEquals(-Infinity, o.x); + assertEquals(2.0, o.y); + assertEquals(3.0, o.z); + assertEquals(4.0, o.w); +} + +testSIMDOr(); +testSIMDOr(); +%OptimizeFunctionOnNextCall(testSIMDOr); +testSIMDOr(); + +function testSIMDXor() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var n = SIMD.float32x4(~1.0, 2.0, 3.0, 4.0); + var o = SIMD.float32x4.xor(m,n); // xor + assertEquals(-Infinity, o.x); + assertEquals(0x0, o.y); + assertEquals(0x0, o.z); + assertEquals(0x0, o.w); +} + +testSIMDXor(); +testSIMDXor(); +%OptimizeFunctionOnNextCall(testSIMDXor); +testSIMDXor(); + +function testSIMDNot() { + var m = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + m = SIMD.float32x4.not(m); + m = SIMD.float32x4.not(m); + assertEquals(1.0, m.x); + assertEquals(2.0, m.y); + assertEquals(3.0, m.z); + assertEquals(4.0, m.w); +} + +testSIMDNot(); +testSIMDNot(); +%OptimizeFunctionOnNextCall(testSIMDNot); +testSIMDNot(); + +function testSIMDSelect() { + var m = SIMD.int32x4.bool(true, true, false, false); + var t = SIMD.float32x4(1.0, 2.0, 3.0, 4.0); + var f = SIMD.float32x4(5.0, 6.0, 7.0, 8.0); + var s = SIMD.float32x4.select(m, t, f); + assertEquals(1.0, s.x); + assertEquals(2.0, s.y); + assertEquals(7.0, s.z); + assertEquals(8.0, s.w); +} + +testSIMDSelect(); +testSIMDSelect(); +%OptimizeFunctionOnNextCall(testSIMDSelect); +testSIMDSelect(); + + +function testFloat32x4ArrayBasic() { + var a = new Float32x4Array(1); + assertEquals(1, a.length); + assertEquals(16, a.byteLength); + assertEquals(16, a.BYTES_PER_ELEMENT); + assertEquals(16, Float32x4Array.BYTES_PER_ELEMENT); + assertEquals(0, a.byteOffset); + assertTrue(undefined != a.buffer); + var b = new Float32x4Array(4); + assertEquals(4, b.length); + assertEquals(64, b.byteLength); + assertEquals(16, b.BYTES_PER_ELEMENT); + assertEquals(16, Float32x4Array.BYTES_PER_ELEMENT); + assertEquals(0, b.byteOffset); + assertTrue(undefined != b.buffer); +} + +testFloat32x4ArrayBasic(); + +function testFloat32x4ArrayGetAndSet() { + var a = new Float32x4Array(4); + a[0] = SIMD.float32x4(1, 2, 3, 4); + a[1] = SIMD.float32x4(5, 6, 7, 8); + a[2] = SIMD.float32x4(9, 10, 11, 12); + a[3] = SIMD.float32x4(13, 14, 15, 16); + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + assertEquals(a[0].z, 3); + assertEquals(a[0].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 7); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + assertEquals(a[3].z, 15); + assertEquals(a[3].w, 16); + + var b = new Float32x4Array(4); + b.setAt(0,SIMD.float32x4(1, 2, 3, 4)); + b.setAt(1,SIMD.float32x4(5, 6, 7, 8)); + b.setAt(2,SIMD.float32x4(9, 10, 11, 12)); + b.setAt(3,SIMD.float32x4(13, 14, 15, 16)); + + assertEquals(b.getAt(0).x, 1); + assertEquals(b.getAt(0).y, 2); + assertEquals(b.getAt(0).z, 3); + assertEquals(b.getAt(0).w, 4); + + assertEquals(b.getAt(1).x, 5); + assertEquals(b.getAt(1).y, 6); + assertEquals(b.getAt(1).z, 7); + assertEquals(b.getAt(1).w, 8); + + assertEquals(b.getAt(2).x, 9); + assertEquals(b.getAt(2).y, 10); + assertEquals(b.getAt(2).z, 11); + assertEquals(b.getAt(2).w, 12); + + assertEquals(b.getAt(3).x, 13); + assertEquals(b.getAt(3).y, 14); + assertEquals(b.getAt(3).z, 15); + assertEquals(b.getAt(3).w, 16); +} + +testFloat32x4ArrayGetAndSet(); +testFloat32x4ArrayGetAndSet(); +%OptimizeFunctionOnNextCall(testFloat32x4ArrayGetAndSet); +testFloat32x4ArrayGetAndSet(); + +function testFloat32x4ArraySwap() { + var a = new Float32x4Array(4); + a[0] = SIMD.float32x4(1, 2, 3, 4); + a[1] = SIMD.float32x4(5, 6, 7, 8); + a[2] = SIMD.float32x4(9, 10, 11, 12); + a[3] = SIMD.float32x4(13, 14, 15, 16); + + // Swap element 0 and element 3 + var t = a[0]; + a[0] = a[3]; + a[3] = t; + + assertEquals(a[3].x, 1); + assertEquals(a[3].y, 2); + assertEquals(a[3].z, 3); + assertEquals(a[3].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 7); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[0].x, 13); + assertEquals(a[0].y, 14); + assertEquals(a[0].z, 15); + assertEquals(a[0].w, 16); +} + +testFloat32x4ArraySwap(); + +function testFloat32x4ArrayCopy() { + var a = new Float32x4Array(4); + a[0] = SIMD.float32x4(1, 2, 3, 4); + a[1] = SIMD.float32x4(5, 6, 7, 8); + a[2] = SIMD.float32x4(9, 10, 11, 12); + a[3] = SIMD.float32x4(13, 14, 15, 16); + var b = new Float32x4Array(a); + assertEquals(a[0].x, b[0].x); + assertEquals(a[0].y, b[0].y); + assertEquals(a[0].z, b[0].z); + assertEquals(a[0].w, b[0].w); + + assertEquals(a[1].x, b[1].x); + assertEquals(a[1].y, b[1].y); + assertEquals(a[1].z, b[1].z); + assertEquals(a[1].w, b[1].w); + + assertEquals(a[2].x, b[2].x); + assertEquals(a[2].y, b[2].y); + assertEquals(a[2].z, b[2].z); + assertEquals(a[2].w, b[2].w); + + assertEquals(a[3].x, b[3].x); + assertEquals(a[3].y, b[3].y); + assertEquals(a[3].z, b[3].z); + assertEquals(a[3].w, b[3].w); + + a[2] = SIMD.float32x4(17, 18, 19, 20); + + assertEquals(a[2].x, 17); + assertEquals(a[2].y, 18); + assertEquals(a[2].z, 19); + assertEquals(a[2].w, 20); + + assertTrue(a[2].x != b[2].x); + assertTrue(a[2].y != b[2].y); + assertTrue(a[2].z != b[2].z); + assertTrue(a[2].w != b[2].w); +} + +testFloat32x4ArrayCopy(); + +function testFloat32x4ArrayViewBasic() { + var a = new Float32Array(8); + // view with no offset. + var b = new Float32x4Array(a.buffer, 0); + // view with offset. + var c = new Float32x4Array(a.buffer, 16); + // view with no offset but shorter than original list. + var d = new Float32x4Array(a.buffer, 0, 1); + assertEquals(a.length, 8); + assertEquals(b.length, 2); + assertEquals(c.length, 1); + assertEquals(d.length, 1); + assertEquals(a.byteLength, 32); + assertEquals(b.byteLength, 32); + assertEquals(c.byteLength, 16); + assertEquals(d.byteLength, 16) + assertEquals(a.byteOffset, 0); + assertEquals(b.byteOffset, 0); + assertEquals(c.byteOffset, 16); + assertEquals(d.byteOffset, 0); +} + +testFloat32x4ArrayViewBasic(); + +function testFloat32x4ArrayViewValues() { + var a = new Float32Array(8); + var b = new Float32x4Array(a.buffer, 0); + var c = new Float32x4Array(a.buffer, 16); + var d = new Float32x4Array(a.buffer, 0, 1); + var start = 100; + for (var i = 0; i < b.length; i++) { + assertEquals(0.0, b[i].x); + assertEquals(0.0, b[i].y); + assertEquals(0.0, b[i].z); + assertEquals(0.0, b[i].w); + } + for (var i = 0; i < c.length; i++) { + assertEquals(0.0, c[i].x); + assertEquals(0.0, c[i].y); + assertEquals(0.0, c[i].z); + assertEquals(0.0, c[i].w); + } + for (var i = 0; i < d.length; i++) { + assertEquals(0.0, d[i].x); + assertEquals(0.0, d[i].y); + assertEquals(0.0, d[i].z); + assertEquals(0.0, d[i].w); + } + for (var i = 0; i < a.length; i++) { + a[i] = i+start; + } + for (var i = 0; i < b.length; i++) { + assertTrue(0.0 != b[i].x); + assertTrue(0.0 != b[i].y); + assertTrue(0.0 != b[i].z); + assertTrue(0.0 != b[i].w); + } + for (var i = 0; i < c.length; i++) { + assertTrue(0.0 != c[i].x); + assertTrue(0.0 != c[i].y); + assertTrue(0.0 != c[i].z); + assertTrue(0.0 != c[i].w); + } + for (var i = 0; i < d.length; i++) { + assertTrue(0.0 != d[i].x); + assertTrue(0.0 != d[i].y); + assertTrue(0.0 != d[i].z); + assertTrue(0.0 != d[i].w); + } + assertEquals(start+0, b[0].x); + assertEquals(start+1, b[0].y); + assertEquals(start+2, b[0].z); + assertEquals(start+3, b[0].w); + assertEquals(start+4, b[1].x); + assertEquals(start+5, b[1].y); + assertEquals(start+6, b[1].z); + assertEquals(start+7, b[1].w); + + assertEquals(start+4, c[0].x); + assertEquals(start+5, c[0].y); + assertEquals(start+6, c[0].z); + assertEquals(start+7, c[0].w); + + assertEquals(start+0, d[0].x); + assertEquals(start+1, d[0].y); + assertEquals(start+2, d[0].z); + assertEquals(start+3, d[0].w); +} + +testFloat32x4ArrayViewValues(); + +function testViewOnFloat32x4Array() { + var a = new Float32x4Array(4); + a[0] = SIMD.float32x4(1, 2, 3, 4); + a[1] = SIMD.float32x4(5, 6, 7, 8); + a[2] = SIMD.float32x4(9, 10, 11, 12); + a[3] = SIMD.float32x4(13, 14, 15, 16); + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + assertEquals(a[0].z, 3); + assertEquals(a[0].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 7); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + assertEquals(a[3].z, 15); + assertEquals(a[3].w, 16); + + // Create view on a. + var b = new Float32Array(a.buffer); + assertEquals(b.length, 16); + assertEquals(b.byteLength, 64); + b[2] = 99.0; + b[6] = 1.0; + + // Observe changes in "a" + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + assertEquals(a[0].z, 99); + assertEquals(a[0].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 1); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + assertEquals(a[3].z, 15); + assertEquals(a[3].w, 16); +} + +testViewOnFloat32x4Array(); + +function testArrayOfFloat32x4() { + var a = []; + var a4 = new Float32x4Array(2); + for (var i = 0; i < a4.length; i++) { + a[i] = SIMD.float32x4(i, i + 1, i + 2, i + 3); + a4[i] = SIMD.float32x4(i, i + 1, i + 2, i + 3); + } + + for (var i = 0; i < a4.length; i++) { + assertEquals(a[i].x, a4[i].x); + assertEquals(a[i].y, a4[i].y); + assertEquals(a[i].z, a4[i].z); + assertEquals(a[i].w, a4[i].w); + } +} + +testArrayOfFloat32x4(); diff --git a/src/v8/test/mjsunit/simd/float64x2.js b/src/v8/test/mjsunit/simd/float64x2.js new file mode 100644 index 0000000..ec2d7c8 --- /dev/null +++ b/src/v8/test/mjsunit/simd/float64x2.js @@ -0,0 +1,520 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testConstructor() { + var f4 = SIMD.float64x2(1.0, 2.0); + assertEquals(1.0, f4.x); + assertEquals(2.0, f4.y); + + f4 = SIMD.float64x2(1.1, 2.2); + assertEquals(1.1, f4.x); + assertEquals(2.2, f4.y); +} + +testConstructor(); +testConstructor(); +%OptimizeFunctionOnNextCall(testConstructor); +testConstructor(); + +function test1ArgumentConstructor() { + var f2 = SIMD.float64x2(1.0, 2.0); + var f2_new = SIMD.float64x2(f2); + assertEquals(f2_new.x, f2.x); + assertEquals(f2_new.y, f2.y); + + f2 = SIMD.float64x2(1.1, 2.2); + f2_new = SIMD.float64x2(f2); + assertEquals(f2_new.x, f2.x); + assertEquals(f2_new.y, f2.y); +} + +test1ArgumentConstructor(); +test1ArgumentConstructor(); +%OptimizeFunctionOnNextCall(test1ArgumentConstructor); +test1ArgumentConstructor(); + +function testZeroConstructor() { + var z4 = SIMD.float64x2.zero(); + assertEquals(0.0, z4.x); + assertEquals(0.0, z4.y); +} + +testZeroConstructor(); +testZeroConstructor(); +%OptimizeFunctionOnNextCall(testZeroConstructor); +testZeroConstructor(); + +function testSplatConstructor() { + var z4 = SIMD.float64x2.splat(5.0); + assertEquals(5.0, z4.x); + assertEquals(5.0, z4.y); +} + +testSplatConstructor(); +testSplatConstructor(); +%OptimizeFunctionOnNextCall(testSplatConstructor); +testSplatConstructor(); + +function testTypeof() { + var z4 = SIMD.float64x2.zero(); + assertEquals(typeof(z4), "object"); + + var new_z4 = new SIMD.float64x2(0, 0); + assertEquals(typeof(new_z4), "object"); + assertEquals(typeof(new_z4.valueOf()), "object"); + assertEquals(Object.prototype.toString.call(new_z4), "[object Object]"); +} + +testTypeof(); + +function testSignMaskGetter() { + var a = SIMD.float64x2(-1.0, -2.0); + assertEquals(0x3, a.signMask); + var b = SIMD.float64x2(1.0, 2.0); + assertEquals(0x0, b.signMask); + var c = SIMD.float64x2(1.0, -2.0); + assertEquals(0x2, c.signMask); +} + +testSignMaskGetter(); +testSignMaskGetter(); +%OptimizeFunctionOnNextCall(testSignMaskGetter); +testSignMaskGetter(); + +function testSIMDAbs() { + var a4 = SIMD.float64x2(1.0, -1.0); + var b4 = SIMD.float64x2.abs(a4); + + assertEquals(1.0, b4.x); + assertEquals(1.0, b4.y); +} + +testSIMDAbs(); +testSIMDAbs(); +%OptimizeFunctionOnNextCall(testSIMDAbs); +testSIMDAbs(); + +function testSIMDNeg() { + var a4 = SIMD.float64x2(1.0, -1.0); + var b4 = SIMD.float64x2.neg(a4); + + assertEquals(-1.0, b4.x); + assertEquals(1.0, b4.y); +} + +testSIMDNeg(); +testSIMDNeg(); +%OptimizeFunctionOnNextCall(testSIMDNeg); +testSIMDNeg(); + +function testSIMDAdd() { + var a4 = SIMD.float64x2(1.0, 1.0); + var b4 = SIMD.float64x2(2.0, 2.0); + var c4 = SIMD.float64x2.add(a4, b4); + + assertEquals(3.0, c4.x); + assertEquals(3.0, c4.y); +} + +testSIMDAdd(); +testSIMDAdd(); +%OptimizeFunctionOnNextCall(testSIMDAdd); +testSIMDAdd(); + +function testSIMDSub() { + var a4 = SIMD.float64x2(1.0, 1.0); + var b4 = SIMD.float64x2(2.0, 2.0); + var c4 = SIMD.float64x2.sub(a4, b4); + + assertEquals(-1.0, c4.x); + assertEquals(-1.0, c4.y); +} + +testSIMDSub(); +testSIMDSub(); +%OptimizeFunctionOnNextCall(testSIMDSub); +testSIMDSub(); + +function testSIMDMul() { + var a4 = SIMD.float64x2(1.0, 1.0); + var b4 = SIMD.float64x2(2.0, 2.0); + var c4 = SIMD.float64x2.mul(a4, b4); + + assertEquals(2.0, c4.x); + assertEquals(2.0, c4.y); +} + +testSIMDMul(); +testSIMDMul(); +%OptimizeFunctionOnNextCall(testSIMDMul); +testSIMDMul(); + +function testSIMDDiv() { + var a4 = SIMD.float64x2(1.0, 1.0); + var b4 = SIMD.float64x2(2.0, 2.0); + var c4 = SIMD.float64x2.div(a4, b4); + + assertEquals(0.5, c4.x); + assertEquals(0.5, c4.y); +} + +testSIMDDiv(); +testSIMDDiv(); +%OptimizeFunctionOnNextCall(testSIMDDiv); +testSIMDDiv(); + +function testSIMDClamp() { + var m = SIMD.float64x2(1.0, -2.0); + var lo = SIMD.float64x2(0.0, 0.0); + var hi = SIMD.float64x2(2.0, 2.0); + m = SIMD.float64x2.clamp(m, lo, hi); + assertEquals(1.0, m.x); + assertEquals(0.0, m.y); +} + +testSIMDClamp(); +testSIMDClamp(); +%OptimizeFunctionOnNextCall(testSIMDClamp); +testSIMDClamp(); + +function testSIMDMin() { + var m = SIMD.float64x2(1.0, 2.0); + var n = SIMD.float64x2(1.0, 0.0); + m = SIMD.float64x2.min(m, n); + assertEquals(1.0, m.x); + assertEquals(0.0, m.y); +} + +testSIMDMin(); +testSIMDMin(); +%OptimizeFunctionOnNextCall(testSIMDMin); +testSIMDMin(); + +function testSIMDMax() { + var m = SIMD.float64x2(1.0, 2.0); + var n = SIMD.float64x2(1.0, 0.0); + m = SIMD.float64x2.max(m, n); + assertEquals(1.0, m.x); + assertEquals(2.0, m.y); +} + +testSIMDMax(); +testSIMDMax(); +%OptimizeFunctionOnNextCall(testSIMDMax); +testSIMDMax(); + +function testSIMDScale() { + var m = SIMD.float64x2(1.0, -2.0); + m = SIMD.float64x2.scale(m, 20.0); + assertEquals(20.0, m.x); + assertEquals(-40.0, m.y); +} + +testSIMDScale(); +testSIMDScale(); +%OptimizeFunctionOnNextCall(testSIMDScale); +testSIMDScale(); + +function testSIMDSqrt() { + var m = SIMD.float64x2(1.0, 4.0); + m = SIMD.float64x2.sqrt(m); + assertEquals(1.0, m.x); + assertEquals(2.0, m.y); +} + +testSIMDSqrt(); +testSIMDSqrt(); +%OptimizeFunctionOnNextCall(testSIMDSqrt); +testSIMDSqrt(); + +function testSIMDSetters() { + var f = SIMD.float64x2.zero(); + assertEquals(0.0, f.x); + assertEquals(0.0, f.y); + f = SIMD.float64x2.withX(f, 4.0); + assertEquals(4.0, f.x); + f = SIMD.float64x2.withY(f, 3.0); + assertEquals(3.0, f.y); +} + +testSIMDSetters(); +testSIMDSetters(); +%OptimizeFunctionOnNextCall(testSIMDSetters); +testSIMDSetters(); + +function testFloat64x2ArrayBasic() { + var a = new Float64x2Array(1); + assertEquals(1, a.length); + assertEquals(16, a.byteLength); + assertEquals(16, a.BYTES_PER_ELEMENT); + assertEquals(16, Float64x2Array.BYTES_PER_ELEMENT); + assertEquals(0, a.byteOffset); + assertTrue(undefined != a.buffer); + var b = new Float64x2Array(4); + assertEquals(4, b.length); + assertEquals(64, b.byteLength); + assertEquals(16, b.BYTES_PER_ELEMENT); + assertEquals(16, Float64x2Array.BYTES_PER_ELEMENT); + assertEquals(0, b.byteOffset); + assertTrue(undefined != b.buffer); +} + +testFloat64x2ArrayBasic(); + +function testFloat64x2ArrayGetAndSet() { + var a = new Float64x2Array(4); + a[0] = SIMD.float64x2(1, 2); + a[1] = SIMD.float64x2(5, 6); + a[2] = SIMD.float64x2(9, 10); + a[3] = SIMD.float64x2(13, 14); + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + + var b = new Float64x2Array(4); + b.setAt(0,SIMD.float64x2(1, 2)); + b.setAt(1,SIMD.float64x2(5, 6)); + b.setAt(2,SIMD.float64x2(9, 10)); + b.setAt(3,SIMD.float64x2(13, 14)); + + assertEquals(b.getAt(0).x, 1); + assertEquals(b.getAt(0).y, 2); + + assertEquals(b.getAt(1).x, 5); + assertEquals(b.getAt(1).y, 6); + + assertEquals(b.getAt(2).x, 9); + assertEquals(b.getAt(2).y, 10); + + assertEquals(b.getAt(3).x, 13); + assertEquals(b.getAt(3).y, 14); +} + +testFloat64x2ArrayGetAndSet(); + +function testFloat64x2ArraySwap() { + var a = new Float64x2Array(4); + a[0] = SIMD.float64x2(1, 2); + a[1] = SIMD.float64x2(5, 6); + a[2] = SIMD.float64x2(9, 10); + a[3] = SIMD.float64x2(13, 14); + + // Swap element 0 and element 3 + var t = a[0]; + a[0] = a[3]; + a[3] = t; + + assertEquals(a[3].x, 1); + assertEquals(a[3].y, 2); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + + assertEquals(a[0].x, 13); + assertEquals(a[0].y, 14); +} + +testFloat64x2ArraySwap(); + +function testFloat64x2ArrayCopy() { + var a = new Float64x2Array(4); + a[0] = SIMD.float64x2(1, 2); + a[1] = SIMD.float64x2(5, 6); + a[2] = SIMD.float64x2(9, 10); + a[3] = SIMD.float64x2(13, 14); + var b = new Float64x2Array(a); + assertEquals(a[0].x, b[0].x); + assertEquals(a[0].y, b[0].y); + + assertEquals(a[1].x, b[1].x); + assertEquals(a[1].y, b[1].y); + + assertEquals(a[2].x, b[2].x); + assertEquals(a[2].y, b[2].y); + + assertEquals(a[3].x, b[3].x); + assertEquals(a[3].y, b[3].y); + + a[2] = SIMD.float64x2(17, 18); + + assertEquals(a[2].x, 17); + assertEquals(a[2].y, 18); + + assertTrue(a[2].x != b[2].x); + assertTrue(a[2].y != b[2].y); +} + +testFloat64x2ArrayCopy(); + +function testFloat64x2ArrayViewBasic() { + var a = new Float64Array(8); + // view with no offset. + var b = new Float64x2Array(a.buffer, 0); + // view with offset. + var c = new Float64x2Array(a.buffer, 16); + // view with no offset but shorter than original list. + var d = new Float64x2Array(a.buffer, 0, 1); + assertEquals(a.length, 8); + assertEquals(b.length, 4); + assertEquals(c.length, 3); + assertEquals(d.length, 1); + assertEquals(a.byteLength, 64); + assertEquals(b.byteLength, 64); + assertEquals(c.byteLength, 48); + assertEquals(d.byteLength, 16) + assertEquals(a.byteOffset, 0); + assertEquals(b.byteOffset, 0); + assertEquals(c.byteOffset, 16); + assertEquals(d.byteOffset, 0); +} + +testFloat64x2ArrayViewBasic(); + +function testFloat64x2ArrayViewValues() { + var a = new Float64Array(8); + var b = new Float64x2Array(a.buffer, 0); + var c = new Float64x2Array(a.buffer, 16); + var d = new Float64x2Array(a.buffer, 0, 1); + var start = 100; + for (var i = 0; i < b.length; i++) { + assertEquals(0.0, b[i].x); + assertEquals(0.0, b[i].y); + } + for (var i = 0; i < c.length; i++) { + assertEquals(0.0, c[i].x); + assertEquals(0.0, c[i].y); + } + for (var i = 0; i < d.length; i++) { + assertEquals(0.0, d[i].x); + assertEquals(0.0, d[i].y); + } + for (var i = 0; i < a.length; i++) { + a[i] = i+start; + } + for (var i = 0; i < b.length; i++) { + assertTrue(0.0 != b[i].x); + assertTrue(0.0 != b[i].y); + } + for (var i = 0; i < c.length; i++) { + assertTrue(0.0 != c[i].x); + assertTrue(0.0 != c[i].y); + } + for (var i = 0; i < d.length; i++) { + assertTrue(0.0 != d[i].x); + assertTrue(0.0 != d[i].y); + } + assertEquals(start+0, b[0].x); + assertEquals(start+1, b[0].y); + assertEquals(start+2, b[1].x); + assertEquals(start+3, b[1].y); + assertEquals(start+4, b[2].x); + assertEquals(start+5, b[2].y); + assertEquals(start+6, b[3].x); + assertEquals(start+7, b[3].y); + + assertEquals(start+2, c[0].x); + assertEquals(start+3, c[0].y); + assertEquals(start+4, c[1].x); + assertEquals(start+5, c[1].y); + assertEquals(start+6, c[2].x); + assertEquals(start+7, c[2].y); + + assertEquals(start+0, d[0].x); + assertEquals(start+1, d[0].y); +} + +testFloat64x2ArrayViewValues(); + +function testViewOnFloat64x2Array() { + var a = new Float64x2Array(4); + a[0] = SIMD.float64x2(1, 2); + a[1] = SIMD.float64x2(5, 6); + a[2] = SIMD.float64x2(9, 10); + a[3] = SIMD.float64x2(13, 14); + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + + // Create view on a. + var b = new Float64Array(a.buffer); + assertEquals(b.length, 8); + assertEquals(b.byteLength, 64); + b[2] = 99.0; + b[6] = 1.0; + + // Observe changes in "a" + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + + assertEquals(a[1].x, 99.0); + assertEquals(a[1].y, 6); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + + assertEquals(a[3].x, 1.0); + assertEquals(a[3].y, 14); +} + +testViewOnFloat64x2Array(); + +function testArrayOfFloat64x2() { + var a = []; + var a4 = new Float64x2Array(2); + for (var i = 0; i < a4.length; i++) { + a[i] = SIMD.float64x2(i, i + 1); + a4[i] = SIMD.float64x2(i, i + 1); + } + + for (var i = 0; i < a4.length; i++) { + assertEquals(a[i].x, a4[i].x); + assertEquals(a[i].y, a4[i].y); + } +} + +testArrayOfFloat64x2(); diff --git a/src/v8/test/mjsunit/simd/int32x4.js b/src/v8/test/mjsunit/simd/int32x4.js new file mode 100644 index 0000000..4f0ac5e --- /dev/null +++ b/src/v8/test/mjsunit/simd/int32x4.js @@ -0,0 +1,946 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testConstructor() { + var u4 = SIMD.int32x4(1, 2, 3, 4); + assertEquals(1, u4.x); + assertEquals(2, u4.y); + assertEquals(3, u4.z); + assertEquals(4, u4.w); +} + +testConstructor(); + +function test1ArgumentConstructor() { + var u4 = SIMD.int32x4(1, 2, 3, 4); + var u4_new = SIMD.int32x4(u4); + assertEquals(u4_new.x, u4.x); + assertEquals(u4_new.y, u4.y); + assertEquals(u4_new.z, u4.z); + assertEquals(u4_new.w, u4.w); +} + +test1ArgumentConstructor(); +test1ArgumentConstructor(); +%OptimizeFunctionOnNextCall(test1ArgumentConstructor); +test1ArgumentConstructor(); + +function testZeroConstructor() { + var u4 = SIMD.int32x4.zero(); + assertEquals(0, u4.x); + assertEquals(0, u4.y); + assertEquals(0, u4.z); + assertEquals(0, u4.w); +} + +testZeroConstructor(); +testZeroConstructor(); +%OptimizeFunctionOnNextCall(testZeroConstructor); +testZeroConstructor(); + +function testBoolConstructor() { + var u4 = SIMD.int32x4.bool(true, false, true, false); + assertEquals(-1, u4.x); + assertEquals(0, u4.y); + assertEquals(-1, u4.z); + assertEquals(0, u4.w); +} + +testBoolConstructor(); +testBoolConstructor(); +%OptimizeFunctionOnNextCall(testBoolConstructor); +testBoolConstructor(); + +function testSplatConstructor() { + var u4 = SIMD.int32x4.splat(4); + assertEquals(4, u4.x); + assertEquals(4, u4.y); + assertEquals(4, u4.z); + assertEquals(4, u4.w); +} + +testSplatConstructor(); +testSplatConstructor(); +%OptimizeFunctionOnNextCall(testSplatConstructor); +testSplatConstructor(); + +function testTypeof() { + var u4 = SIMD.int32x4(1, 2, 3, 4); + assertEquals(typeof(u4), "object"); + + var new_u4 = new SIMD.int32x4(1, 2, 3, 4); + assertEquals(typeof(new_u4), "object"); + assertEquals(typeof(new_u4.valueOf()), "object"); + assertEquals(Object.prototype.toString.call(new_u4), "[object Object]"); +} + +testTypeof(); + +function testSignMaskGetter() { + var a = SIMD.int32x4(0x80000000 - 0xFFFFFFFF - 1, 0x7000000, -1, 0x0); + assertEquals(0x5, a.signMask); + var b = SIMD.int32x4(0x0, 0x0, 0x0, 0x0); + assertEquals(0x0, b.signMask); + var c = SIMD.int32x4(-1, -1, -1, -1); + assertEquals(0xf, c.signMask); +} + +testSignMaskGetter(); +testSignMaskGetter(); +%OptimizeFunctionOnNextCall(testSignMaskGetter); +testSignMaskGetter(); + + +function testSIMDAnd() { + var m = SIMD.int32x4(0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1, + 0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1); + var n = SIMD.int32x4(0x55555555, 0x55555555, 0x55555555, 0x55555555); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, m.x); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, m.y); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, m.z); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, m.w); + assertEquals(0x55555555, n.x); + assertEquals(0x55555555, n.y); + assertEquals(0x55555555, n.z); + assertEquals(0x55555555, n.w); + assertEquals(true, n.flagX); + assertEquals(true, n.flagY); + assertEquals(true, n.flagZ); + assertEquals(true, n.flagW); + o = SIMD.int32x4.and(m,n); // and + assertEquals(0x0, o.x); + assertEquals(0x0, o.y); + assertEquals(0x0, o.z); + assertEquals(0x0, o.w); + assertEquals(false, o.flagX); + assertEquals(false, o.flagY); + assertEquals(false, o.flagZ); + assertEquals(false, o.flagW); +} + +testSIMDAnd(); +testSIMDAnd(); +%OptimizeFunctionOnNextCall(testSIMDAnd); +testSIMDAnd(); + +function testSIMDOr() { + var m = SIMD.int32x4(0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1, + 0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1); + var n = SIMD.int32x4(0x55555555, 0x55555555, 0x55555555, 0x55555555); + var o = SIMD.int32x4.or(m,n); // or + assertEquals(-1, o.x); + assertEquals(-1, o.y); + assertEquals(-1, o.z); + assertEquals(-1, o.w); + assertEquals(true, o.flagX); + assertEquals(true, o.flagY); + assertEquals(true, o.flagZ); + assertEquals(true, o.flagW); +} + +testSIMDOr(); +testSIMDOr(); +%OptimizeFunctionOnNextCall(testSIMDOr); +testSIMDOr(); + +function testSIMDInt32x4Or() { + var m = SIMD.int32x4(0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1, + 0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1); + var n = SIMD.int32x4(0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1, + 0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1); + var o = SIMD.int32x4.xor(m,n); // xor + assertEquals(0x0, o.x); + assertEquals(0x0, o.y); + assertEquals(0x0, o.z); + assertEquals(0x0, o.w); + assertEquals(false, o.flagX); + assertEquals(false, o.flagY); + assertEquals(false, o.flagZ); + assertEquals(false, o.flagW); +} + +testSIMDInt32x4Or(); +testSIMDInt32x4Or(); +%OptimizeFunctionOnNextCall(testSIMDInt32x4Or); +testSIMDInt32x4Or(); + +function testSIMDNot() { + var m = SIMD.int32x4(0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1, + 0xAAAAAAAA - 0xFFFFFFFF - 1, 0xAAAAAAAA - 0xFFFFFFFF - 1); + var n = SIMD.int32x4(0x55555555, 0x55555555, 0x55555555, 0x55555555); + m = SIMD.int32x4.not(m); + n = SIMD.int32x4.not(n); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, n.x); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, n.y); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, n.z); + assertEquals(0xAAAAAAAA - 0xFFFFFFFF - 1, n.w); + assertEquals(0x55555555, m.x); + assertEquals(0x55555555, m.y); + assertEquals(0x55555555, m.z); + assertEquals(0x55555555, m.w); +} + +testSIMDNot(); +testSIMDNot(); +%OptimizeFunctionOnNextCall(testSIMDNot); +testSIMDNot(); + +function testSIMDNegu32() { + var m = SIMD.int32x4(-1, 1, -1, 1); + m = SIMD.int32x4.neg(m); + assertEquals(1, m.x); + assertEquals(-1, m.y); + assertEquals(1, m.z); + assertEquals(-1, m.w); +} + +testSIMDNegu32(); +testSIMDNegu32(); +%OptimizeFunctionOnNextCall(testSIMDNegu32); +testSIMDNegu32(); + +function testSIMDSelect() { + var m = SIMD.int32x4.bool(true, true, false, false); + var t = SIMD.int32x4(1, 2, 3, 4); + var f = SIMD.int32x4(5, 6, 7, 8); + var s = SIMD.int32x4.select(m, t, f); + assertEquals(1, s.x); + assertEquals(2, s.y); + assertEquals(7, s.z); + assertEquals(8, s.w); +} + +testSIMDSelect(); +testSIMDSelect(); +%OptimizeFunctionOnNextCall(testSIMDSelect); +testSIMDSelect(); + + +function testSIMDWithXu32() { + var a = SIMD.int32x4(1, 2, 3, 4); + var c = SIMD.int32x4.withX(a, 20); + assertEquals(20, c.x); + assertEquals(2, c.y); + assertEquals(3, c.z); + assertEquals(4, c.w); +} + +testSIMDWithXu32(); +testSIMDWithXu32(); +%OptimizeFunctionOnNextCall(testSIMDWithXu32); +testSIMDWithXu32(); + +function testSIMDWithYu32() { + var a = SIMD.int32x4(1, 2, 3, 4); + var c = SIMD.int32x4.withY(a, 20); + assertEquals(1, c.x); + assertEquals(20, c.y); + assertEquals(3, c.z); + assertEquals(4, c.w); +} + +testSIMDWithYu32(); +testSIMDWithYu32(); +%OptimizeFunctionOnNextCall(testSIMDWithYu32); +testSIMDWithYu32(); + +function testSIMDWithZu32() { + var a = SIMD.int32x4(1, 2, 3, 4); + var c = SIMD.int32x4.withZ(a, 20); + assertEquals(1, c.x); + assertEquals(2, c.y); + assertEquals(20, c.z); + assertEquals(4, c.w); +} + +testSIMDWithZu32(); +testSIMDWithZu32(); +%OptimizeFunctionOnNextCall(testSIMDWithZu32); +testSIMDWithZu32(); + +function testSIMDWithWu32() { + var a = SIMD.int32x4(1, 2, 3, 4); + var c = SIMD.int32x4.withW(a, 20); + assertEquals(1, c.x); + assertEquals(2, c.y); + assertEquals(3, c.z); + assertEquals(20, c.w); +} + +testSIMDWithWu32(); +testSIMDWithWu32(); +%OptimizeFunctionOnNextCall(testSIMDWithWu32); +testSIMDWithWu32(); + +function testSIMDWithFlagX() { + var a = SIMD.int32x4.bool(true, false, true, false); + + // boolean + var c = SIMD.int32x4.withFlagX(a, true); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + c = SIMD.int32x4.withFlagX(a, false); + assertEquals(false, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(0x0, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + + // smi + c = SIMD.int32x4.withFlagX(a, 2); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + c = SIMD.int32x4.withFlagX(a, 0); + assertEquals(false, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(0x0, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + + // string + c = SIMD.int32x4.withFlagX(a, 'true'); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + c = SIMD.int32x4.withFlagX(a, ''); + assertEquals(false, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(0x0, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + + // heap number + c = SIMD.int32x4.withFlagX(a, 3.14); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + c = SIMD.int32x4.withFlagX(a, 0.0); + assertEquals(false, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(0x0, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + + // JS Array + var array = [1]; + c = SIMD.int32x4.withFlagX(a, array); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); + + c = SIMD.int32x4.withFlagX(a, undefined); + assertEquals(false, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(0x0, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); +} + +testSIMDWithFlagX(); +testSIMDWithFlagX(); +%OptimizeFunctionOnNextCall(testSIMDWithFlagX); +testSIMDWithFlagX(); + +function testSIMDWithFlagY() { + var a = SIMD.int32x4.bool(true, false, true, false); + var c = SIMD.int32x4.withFlagY(a, true); + assertEquals(true, c.flagX); + assertEquals(true, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + c = SIMD.int32x4.withFlagY(a, false); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); +} + +testSIMDWithFlagY(); +testSIMDWithFlagY(); +%OptimizeFunctionOnNextCall(testSIMDWithFlagY); +testSIMDWithFlagY(); + +function testSIMDWithFlagZ() { + var a = SIMD.int32x4.bool(true, false, true, false); + var c = SIMD.int32x4.withFlagZ(a, true); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + c = SIMD.int32x4.withFlagZ(a, false); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(false, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(0x0, c.z); + assertEquals(0x0, c.w); +} + +testSIMDWithFlagZ(); +testSIMDWithFlagZ(); +%OptimizeFunctionOnNextCall(testSIMDWithFlagZ); +testSIMDWithFlagZ(); + +function testSIMDWithFlagW() { + var a = SIMD.int32x4.bool(true, false, true, false); + var c = SIMD.int32x4.withFlagW(a, true); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(true, c.flagW); + c = SIMD.int32x4.withFlagW(a, false); + assertEquals(true, c.flagX); + assertEquals(false, c.flagY); + assertEquals(true, c.flagZ); + assertEquals(false, c.flagW); + assertEquals(-1, c.x); + assertEquals(0x0, c.y); + assertEquals(-1, c.z); + assertEquals(0x0, c.w); +} + +testSIMDWithFlagW(); +testSIMDWithFlagW(); +%OptimizeFunctionOnNextCall(testSIMDWithFlagW); +testSIMDWithFlagW(); + +function testSIMDAddu32() { + var a = SIMD.int32x4(-1, -1, 0x7fffffff, 0x0); + var b = SIMD.int32x4(0x1, -1, 0x1, -1); + var c = SIMD.int32x4.add(a, b); + assertEquals(0x0, c.x); + assertEquals(-2, c.y); + assertEquals(0x80000000 - 0xFFFFFFFF - 1, c.z); + assertEquals(-1, c.w); +} + +testSIMDAddu32(); +testSIMDAddu32(); +%OptimizeFunctionOnNextCall(testSIMDAddu32); +testSIMDAddu32(); + +function testSIMDSubu32() { + var a = SIMD.int32x4(-1, -1, 0x80000000 - 0xFFFFFFFF - 1, 0x0); + var b = SIMD.int32x4(0x1, -1, 0x1, -1); + var c = SIMD.int32x4.sub(a, b); + assertEquals(-2, c.x); + assertEquals(0x0, c.y); + assertEquals(0x7FFFFFFF, c.z); + assertEquals(0x1, c.w); +} + +testSIMDSubu32(); +testSIMDSubu32(); +%OptimizeFunctionOnNextCall(testSIMDSubu32); +testSIMDSubu32(); + +function testSIMDMulu32() { + var a = SIMD.int32x4(-1, -1, 0x80000000 - 0xFFFFFFFF - 1, 0x0); + var b = SIMD.int32x4(0x1, -1, 0x80000000 - 0xFFFFFFFF - 1, -1); + var c = SIMD.int32x4.mul(a, b); + assertEquals(-1, c.x); + assertEquals(0x1, c.y); + assertEquals(0x0, c.z); + assertEquals(0x0, c.w); +} + +testSIMDMulu32(); +testSIMDMulu32(); +%OptimizeFunctionOnNextCall(testSIMDMulu32); +testSIMDMulu32(); + +function testSIMDShuffleu32() { + var m = SIMD.int32x4(1, 2, 3, 4); + var xxxx = SIMD.int32x4.shuffle(m, SIMD.XXXX); + assertEquals(1, xxxx.x); + assertEquals(1, xxxx.y); + assertEquals(1, xxxx.z); + assertEquals(1, xxxx.w); + var yyyy = SIMD.int32x4.shuffle(m, SIMD.YYYY); + assertEquals(2, yyyy.x); + assertEquals(2, yyyy.y); + assertEquals(2, yyyy.z); + assertEquals(2, yyyy.w); + var zzzz = SIMD.int32x4.shuffle(m, SIMD.ZZZZ); + assertEquals(3, zzzz.x); + assertEquals(3, zzzz.y); + assertEquals(3, zzzz.z); + assertEquals(3, zzzz.w); + var wwww = SIMD.int32x4.shuffle(m, SIMD.WWWW); + assertEquals(4, wwww.x); + assertEquals(4, wwww.y); + assertEquals(4, wwww.z); + assertEquals(4, wwww.w); + var wzyx = SIMD.int32x4.shuffle(m, SIMD.WZYX); + assertEquals(4, wzyx.x); + assertEquals(3, wzyx.y); + assertEquals(2, wzyx.z); + assertEquals(1, wzyx.w); + var wwzz = SIMD.int32x4.shuffle(m, SIMD.WWZZ); + assertEquals(4, wwzz.x); + assertEquals(4, wwzz.y); + assertEquals(3, wwzz.z); + assertEquals(3, wwzz.w); + var xxyy = SIMD.int32x4.shuffle(m, SIMD.XXYY); + assertEquals(1, xxyy.x); + assertEquals(1, xxyy.y); + assertEquals(2, xxyy.z); + assertEquals(2, xxyy.w); + var yyww = SIMD.int32x4.shuffle(m, SIMD.YYWW); + assertEquals(2, yyww.x); + assertEquals(2, yyww.y); + assertEquals(4, yyww.z); + assertEquals(4, yyww.w); +} + +testSIMDShuffleu32(); +testSIMDShuffleu32(); +%OptimizeFunctionOnNextCall(testSIMDShuffleu32); +testSIMDShuffleu32(); + +function testSIMDComparisons() { + var m = SIMD.int32x4(1, 2, 100, 1); + var n = SIMD.int32x4(2, 2, 1, 100); + var cmp; + cmp = SIMD.int32x4.lessThan(m, n); + assertEquals(-1, cmp.x); + assertEquals(0x0, cmp.y); + assertEquals(0x0, cmp.z); + assertEquals(-1, cmp.w); + + cmp = SIMD.int32x4.equal(m, n); + assertEquals(0x0, cmp.x); + assertEquals(-1, cmp.y); + assertEquals(0x0, cmp.z); + assertEquals(0x0, cmp.w); + + cmp = SIMD.int32x4.greaterThan(m, n); + assertEquals(0x0, cmp.x); + assertEquals(0x0, cmp.y); + assertEquals(-1, cmp.z); + assertEquals(0x0, cmp.w); +} + +testSIMDComparisons(); +testSIMDComparisons(); +%OptimizeFunctionOnNextCall(testSIMDComparisons); +testSIMDComparisons(); + +function testSIMDShift() { + var m = SIMD.int32x4(1, 2, 100, 0); + + var a = SIMD.int32x4.shiftLeft(m, 2); + assertEquals(4, a.x); + assertEquals(8, a.y); + assertEquals(400, a.z); + assertEquals(0, a.w); + + var b = SIMD.int32x4.shiftRight(a, 2); + assertEquals(1, b.x); + assertEquals(2, b.y); + assertEquals(100, b.z); + assertEquals(0, b.w); + + var n = SIMD.int32x4(-8, 2, 1, 100); + + var c = SIMD.int32x4.shiftRightArithmetic(n, 2); + assertEquals(-2, c.x); + assertEquals(0, c.y); + assertEquals(0, c.z); + assertEquals(25, c.w); +} + +testSIMDShift(); +testSIMDShift(); +%OptimizeFunctionOnNextCall(testSIMDShift); +testSIMDShift(); + +function testInt32x4ArrayBasic() { + var a = new Int32x4Array(1); + assertEquals(1, a.length); + assertEquals(16, a.byteLength); + assertEquals(16, a.BYTES_PER_ELEMENT); + assertEquals(16, Int32x4Array.BYTES_PER_ELEMENT); + assertEquals(0, a.byteOffset); + assertTrue(undefined != a.buffer); + var b = new Int32x4Array(4); + assertEquals(4, b.length); + assertEquals(64, b.byteLength); + assertEquals(16, b.BYTES_PER_ELEMENT); + assertEquals(16, Int32x4Array.BYTES_PER_ELEMENT); + assertEquals(0, b.byteOffset); + assertTrue(undefined != b.buffer); +} + +testInt32x4ArrayBasic(); + +function testInt32x4ArrayGetAndSet() { + var a = new Int32x4Array(4); + a[0] = SIMD.int32x4(1, 2, 3, 4); + a[1] = SIMD.int32x4(5, 6, 7, 8); + a[2] = SIMD.int32x4(9, 10, 11, 12); + a[3] = SIMD.int32x4(13, 14, 15, 16); + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + assertEquals(a[0].z, 3); + assertEquals(a[0].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 7); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + assertEquals(a[3].z, 15); + assertEquals(a[3].w, 16); + + var b = new Int32x4Array(4); + b.setAt(0,SIMD.int32x4(1, 2, 3, 4)); + b.setAt(1,SIMD.int32x4(5, 6, 7, 8)); + b.setAt(2,SIMD.int32x4(9, 10, 11, 12)); + b.setAt(3,SIMD.int32x4(13, 14, 15, 16)); + + assertEquals(b.getAt(0).x, 1); + assertEquals(b.getAt(0).y, 2); + assertEquals(b.getAt(0).z, 3); + assertEquals(b.getAt(0).w, 4); + + assertEquals(b.getAt(1).x, 5); + assertEquals(b.getAt(1).y, 6); + assertEquals(b.getAt(1).z, 7); + assertEquals(b.getAt(1).w, 8); + + assertEquals(b.getAt(2).x, 9); + assertEquals(b.getAt(2).y, 10); + assertEquals(b.getAt(2).z, 11); + assertEquals(b.getAt(2).w, 12); + + assertEquals(b.getAt(3).x, 13); + assertEquals(b.getAt(3).y, 14); + assertEquals(b.getAt(3).z, 15); + assertEquals(b.getAt(3).w, 16); +} + +testInt32x4ArrayGetAndSet(); + +function testInt32x4ArraySwap() { + var a = new Int32x4Array(4); + a[0] = SIMD.int32x4(1, 2, 3, 4); + a[1] = SIMD.int32x4(5, 6, 7, 8); + a[2] = SIMD.int32x4(9, 10, 11, 12); + a[3] = SIMD.int32x4(13, 14, 15, 16); + + // Swap element 0 and element 3 + var t = a[0]; + a[0] = a[3]; + a[3] = t; + + assertEquals(a[3].x, 1); + assertEquals(a[3].y, 2); + assertEquals(a[3].z, 3); + assertEquals(a[3].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 7); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[0].x, 13); + assertEquals(a[0].y, 14); + assertEquals(a[0].z, 15); + assertEquals(a[0].w, 16); +} + +testInt32x4ArraySwap(); +testInt32x4ArraySwap(); +%OptimizeFunctionOnNextCall(testInt32x4ArraySwap); +testInt32x4ArraySwap(); + +function testInt32x4ArrayCopy() { + var a = new Int32x4Array(4); + a[0] = SIMD.int32x4(1, 2, 3, 4); + a[1] = SIMD.int32x4(5, 6, 7, 8); + a[2] = SIMD.int32x4(9, 10, 11, 12); + a[3] = SIMD.int32x4(13, 14, 15, 16); + var b = new Int32x4Array(a); + assertEquals(a[0].x, b[0].x); + assertEquals(a[0].y, b[0].y); + assertEquals(a[0].z, b[0].z); + assertEquals(a[0].w, b[0].w); + + assertEquals(a[1].x, b[1].x); + assertEquals(a[1].y, b[1].y); + assertEquals(a[1].z, b[1].z); + assertEquals(a[1].w, b[1].w); + + assertEquals(a[2].x, b[2].x); + assertEquals(a[2].y, b[2].y); + assertEquals(a[2].z, b[2].z); + assertEquals(a[2].w, b[2].w); + + assertEquals(a[3].x, b[3].x); + assertEquals(a[3].y, b[3].y); + assertEquals(a[3].z, b[3].z); + assertEquals(a[3].w, b[3].w); + + a[2] = SIMD.int32x4(17, 18, 19, 20); + + assertEquals(a[2].x, 17); + assertEquals(a[2].y, 18); + assertEquals(a[2].z, 19); + assertEquals(a[2].w, 20); + + assertTrue(a[2].x != b[2].x); + assertTrue(a[2].y != b[2].y); + assertTrue(a[2].z != b[2].z); + assertTrue(a[2].w != b[2].w); +} + +testInt32x4ArrayCopy(); + +function testInt32x4ArrayViewBasic() { + var a = new Uint32Array(8); + // view with no offset. + var b = new Int32x4Array(a.buffer, 0); + // view with offset. + var c = new Int32x4Array(a.buffer, 16); + // view with no offset but shorter than original list. + var d = new Int32x4Array(a.buffer, 0, 1); + assertEquals(a.length, 8); + assertEquals(b.length, 2); + assertEquals(c.length, 1); + assertEquals(d.length, 1); + assertEquals(a.byteLength, 32); + assertEquals(b.byteLength, 32); + assertEquals(c.byteLength, 16); + assertEquals(d.byteLength, 16) + assertEquals(a.byteOffset, 0); + assertEquals(b.byteOffset, 0); + assertEquals(c.byteOffset, 16); + assertEquals(d.byteOffset, 0); +} + +testInt32x4ArrayViewBasic(); + +function testInt32x4ArrayViewValues() { + var a = new Uint32Array(8); + var b = new Int32x4Array(a.buffer, 0); + var c = new Int32x4Array(a.buffer, 16); + var d = new Int32x4Array(a.buffer, 0, 1); + var start = 100; + for (var i = 0; i < b.length; i++) { + assertEquals(0, b[i].x); + assertEquals(0, b[i].y); + assertEquals(0, b[i].z); + assertEquals(0, b[i].w); + } + for (var i = 0; i < c.length; i++) { + assertEquals(0, c[i].x); + assertEquals(0, c[i].y); + assertEquals(0, c[i].z); + assertEquals(0, c[i].w); + } + for (var i = 0; i < d.length; i++) { + assertEquals(0, d[i].x); + assertEquals(0, d[i].y); + assertEquals(0, d[i].z); + assertEquals(0, d[i].w); + } + for (var i = 0; i < a.length; i++) { + a[i] = i+start; + } + for (var i = 0; i < b.length; i++) { + assertTrue(0 != b[i].x); + assertTrue(0 != b[i].y); + assertTrue(0 != b[i].z); + assertTrue(0 != b[i].w); + } + for (var i = 0; i < c.length; i++) { + assertTrue(0 != c[i].x); + assertTrue(0 != c[i].y); + assertTrue(0 != c[i].z); + assertTrue(0 != c[i].w); + } + for (var i = 0; i < d.length; i++) { + assertTrue(0 != d[i].x); + assertTrue(0 != d[i].y); + assertTrue(0 != d[i].z); + assertTrue(0 != d[i].w); + } + assertEquals(start+0, b[0].x); + assertEquals(start+1, b[0].y); + assertEquals(start+2, b[0].z); + assertEquals(start+3, b[0].w); + assertEquals(start+4, b[1].x); + assertEquals(start+5, b[1].y); + assertEquals(start+6, b[1].z); + assertEquals(start+7, b[1].w); + + assertEquals(start+4, c[0].x); + assertEquals(start+5, c[0].y); + assertEquals(start+6, c[0].z); + assertEquals(start+7, c[0].w); + + assertEquals(start+0, d[0].x); + assertEquals(start+1, d[0].y); + assertEquals(start+2, d[0].z); + assertEquals(start+3, d[0].w); +} + +testInt32x4ArrayViewValues(); + +function testViewOnInt32x4Array() { + var a = new Int32x4Array(4); + a[0] = SIMD.int32x4(1, 2, 3, 4); + a[1] = SIMD.int32x4(5, 6, 7, 8); + a[2] = SIMD.int32x4(9, 10, 11, 12); + a[3] = SIMD.int32x4(13, 14, 15, 16); + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + assertEquals(a[0].z, 3); + assertEquals(a[0].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 7); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + assertEquals(a[3].z, 15); + assertEquals(a[3].w, 16); + + // Create view on a. + var b = new Uint32Array(a.buffer); + assertEquals(b.length, 16); + assertEquals(b.byteLength, 64); + b[2] = 99.0; + b[6] = 1.0; + + // Observe changes in "a" + assertEquals(a[0].x, 1); + assertEquals(a[0].y, 2); + assertEquals(a[0].z, 99); + assertEquals(a[0].w, 4); + + assertEquals(a[1].x, 5); + assertEquals(a[1].y, 6); + assertEquals(a[1].z, 1); + assertEquals(a[1].w, 8); + + assertEquals(a[2].x, 9); + assertEquals(a[2].y, 10); + assertEquals(a[2].z, 11); + assertEquals(a[2].w, 12); + + assertEquals(a[3].x, 13); + assertEquals(a[3].y, 14); + assertEquals(a[3].z, 15); + assertEquals(a[3].w, 16); +} + +testViewOnInt32x4Array(); + +function testArrayOfInt32x4() { + var a = []; + var a4 = new Int32x4Array(2); + for (var i = 0; i < a4.length; i++) { + a[i] = SIMD.int32x4(i, i + 1, i + 2, i + 3); + a4[i] = SIMD.int32x4(i, i + 1, i + 2, i + 3); + } + + for (var i = 0; i < a4.length; i++) { + assertEquals(a[i].x, a4[i].x); + assertEquals(a[i].y, a4[i].y); + assertEquals(a[i].z, a4[i].z); + assertEquals(a[i].w, a4[i].w); + } +} + +testArrayOfInt32x4(); diff --git a/src/v8/test/mjsunit/simd/osr.js b/src/v8/test/mjsunit/simd/osr.js new file mode 100644 index 0000000..138e260 --- /dev/null +++ b/src/v8/test/mjsunit/simd/osr.js @@ -0,0 +1,43 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object + +function testSIMDAbs() { + var a4 = SIMD.float32x4(1.0, -2.0, 3.0, -4.0); + var b4; + for (var i = 0; i < 100000; i++) { + b4 = SIMD.float32x4.abs(a4); + } + + assertEquals(1.0, b4.x); + assertEquals(2.0, b4.y); + assertEquals(3.0, b4.z); + assertEquals(4.0, b4.w); +} + +testSIMDAbs(); diff --git a/src/v8/test/mjsunit/simd/prototype.js b/src/v8/test/mjsunit/simd/prototype.js new file mode 100644 index 0000000..0a81e7b --- /dev/null +++ b/src/v8/test/mjsunit/simd/prototype.js @@ -0,0 +1,60 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testFloat32x4Prototype() { + var a4 = SIMD.float32x4(1.0, -2.0, 3.0, -4.0); + SIMD.float32x4.prototype = {}; + try { + var x = a4.x; + } catch (o) { + assertEquals(o instanceof TypeError, true); + assertEquals(o.message, ""); + } +} + +testFloat32x4Prototype(); +testFloat32x4Prototype(); +%OptimizeFunctionOnNextCall(testFloat32x4Prototype); +testFloat32x4Prototype(); + +function testInt32x4Prototype() { + var a4 = SIMD.int32x4(1.0, -2.0, 3.0, -4.0); + SIMD.int32x4.prototype = {}; + try { + var x = a4.x; + } catch (o) { + assertEquals(o instanceof TypeError, true); + assertEquals(o.message, ""); + } +} + +testInt32x4Prototype(); +testInt32x4Prototype(); +%OptimizeFunctionOnNextCall(testInt32x4Prototype); +testInt32x4Prototype(); diff --git a/src/v8/test/mjsunit/simd/representation_change.js b/src/v8/test/mjsunit/simd/representation_change.js new file mode 100644 index 0000000..7374dcf --- /dev/null +++ b/src/v8/test/mjsunit/simd/representation_change.js @@ -0,0 +1,53 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --simd_object --allow-natives-syntax + +function testSIMDAbs(i) { + var a; + if (i < 3) { + a = SIMD.float32x4(1, 1, 1, 1); + } else { + a = SIMD.int32x4(2, 2, 2, 2); + } + return SIMD.float32x4.abs(a); +} + +function tryTestSIMDAbs(i) { + var r = 0; + try { + r = testSIMDAbs(i); + } catch (o) { + assertEquals(o instanceof TypeError, true); + assertEquals(o.message, ""); + } +} + +tryTestSIMDAbs(1); +tryTestSIMDAbs(2); +%OptimizeFunctionOnNextCall(testSIMDAbs); +tryTestSIMDAbs(3); diff --git a/src/v8/tools/generate-runtime-tests.py b/src/v8/tools/generate-runtime-tests.py index a41df67..d6fcc05 100755 --- a/src/v8/tools/generate-runtime-tests.py +++ b/src/v8/tools/generate-runtime-tests.py @@ -47,11 +47,11 @@ EXPAND_MACROS = [ # that the parser doesn't bit-rot. Change the values as needed when you add, # remove or change runtime functions, but make sure we don't lose our ability # to parse them! -EXPECTED_FUNCTION_COUNT = 428 -EXPECTED_FUZZABLE_COUNT = 331 +EXPECTED_FUNCTION_COUNT = 444 +EXPECTED_FUZZABLE_COUNT = 341 EXPECTED_CCTEST_COUNT = 7 -EXPECTED_UNKNOWN_COUNT = 16 -EXPECTED_BUILTINS_COUNT = 809 +EXPECTED_UNKNOWN_COUNT = 22 +EXPECTED_BUILTINS_COUNT = 930 # Don't call these at all. @@ -544,6 +544,25 @@ class Generator(object): fallback="new DataView(new ArrayBuffer(8))") return result + def _Float32x4(self, name, recursion_budget): + x = random.random() + y = random.random() + z = random.random() + w = random.random() + return self._Variable(name, "SIMD.float32x4(%s, %s, %s, %s)" %(x, y, z, w)) + + def _Float64x2(self, name, recursion_budget): + x = random.random() + y = random.random() + return self._Variable(name, "SIMD.float64x2(%s, %s)" %(x, y)) + + def _Int32x4(self, name, recursion_budget): + x = random.randint(-0x40000000, 0x3fffffff) + y = random.randint(-0x40000000, 0x3fffffff) + z = random.randint(-0x40000000, 0x3fffffff) + w = random.randint(-0x40000000, 0x3fffffff) + return self._Variable(name, "SIMD.int32x4(%s, %s, %s, %s)" %(x, y, z, w)) + def _JSDate(self, name, recursion_budget): die = random.random() if die < 0.25: @@ -776,6 +795,9 @@ class Generator(object): "String": ["\"foo\"", _String], "Symbol": ["Symbol(\"symbol\")", _Symbol], "Uint32": ["32", _Uint32], + "Float32x4": ["SIMD.float32x4(0.0, 0.0, 0.0, 0.0)", _Float32x4], + "Float64x2": ["SIMD.float64x2(0.0, 0.0)", _Float64x2], + "Int32x4": ["SIMD.int32x4(0, 0, 0, 0)", _Int32x4], } @@ -1134,6 +1156,10 @@ def _GenerateTestcase(function, definitions, argslist, throws): "// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY", "// Flags: --allow-natives-syntax --harmony --harmony-proxies" ] + definitions + if function.name.find("Float32x4") != -1 or function.name.find("Float64x2") != -1 or function.name.find("Int32x4") != -1: + s = ["// Copyright 2014 the V8 project authors. All rights reserved.", + "// AUTO-GENERATED BY tools/generate-runtime-tests.py, DO NOT MODIFY", + "// Flags: --allow-natives-syntax --harmony --simd-object"] + definitions call = "%%%s%s(%s);" % (function.inline, function.name, ", ".join(argslist)) if throws: s.append("try {") diff --git a/src/v8/tools/gyp/v8.gyp b/src/v8/tools/gyp/v8.gyp index c703155..b993766 100644 --- a/src/v8/tools/gyp/v8.gyp +++ b/src/v8/tools/gyp/v8.gyp @@ -1419,6 +1419,7 @@ '../../src/generator.js', '../../src/harmony-string.js', '../../src/harmony-array.js', + '../../src/simd128.js', ], 'libraries_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries.bin', 'libraries_experimental_bin_file': '<(SHARED_INTERMEDIATE_DIR)/libraries-experimental.bin', diff --git a/src/v8/tools/js2c.py b/src/v8/tools/js2c.py index 77485f6..86b7e3a 100755 --- a/src/v8/tools/js2c.py +++ b/src/v8/tools/js2c.py @@ -349,11 +349,11 @@ def BuildFilterChain(macro_filename): if macro_filename: (consts, macros) = ReadMacros(ReadFile(macro_filename)) filter_chain.append(lambda l: ExpandConstants(l, consts)) + filter_chain.append(lambda l: ExpandInlineMacros(l)) filter_chain.append(lambda l: ExpandMacros(l, macros)) filter_chain.extend([ RemoveCommentsAndTrailingWhitespace, - ExpandInlineMacros, ExpandInlineConstants, Validate, jsmin.JavaScriptMinifier().JSMinify diff --git a/src/xwalk/DEPS.xwalk b/src/xwalk/DEPS.xwalk index 37a0fdb..64b0d15 100644 --- a/src/xwalk/DEPS.xwalk +++ b/src/xwalk/DEPS.xwalk @@ -19,7 +19,7 @@ chromium_crosswalk_rev = '04ba13a65546e6e6309e560b9a2491b904ed57a8' blink_crosswalk_rev = '92e5d6adee53362b3f5aaec11bcb0526d5f0715d' -v8_crosswalk_rev = '6264ffa1bef0681640afbafb5194c55a172ef6df' +v8_crosswalk_rev = '9b7376c845d7ba58715f4ffd9a80fd670b021360' ozone_wayland_rev = 'd301e5c546a7dea0de8fde5b07a2a57afd02103b' crosswalk_git = 'https://github.com/crosswalk-project' @@ -50,6 +50,11 @@ solutions = [ 'https://chromium.googlesource.com/external/webrtc/trunk/webrtc.git@' 'cdc312345fcdfc586a7c8cd720407449cc0bdcd2', + # Include OpenCL header files for WebCL support, target version 1.2. + 'src/third_party/khronos/CL': + 'https://cvs.khronos.org/svn/repos/registry/trunk/public/cl/api/1.2@' + '28150', + # These directories are not relevant to Crosswalk and can be safely ignored # in a checkout. It avoids creating additional directories outside src/ that # are not used and also saves some bandwidth. diff --git a/src/xwalk/VERSION b/src/xwalk/VERSION index a53b87a..0b05496 100644 --- a/src/xwalk/VERSION +++ b/src/xwalk/VERSION @@ -1,4 +1,4 @@ MAJOR=9 MINOR=38 -BUILD=205 +BUILD=207 PATCH=0 diff --git a/src/xwalk/app/android/runtime_activity/src/org/xwalk/app/XWalkRuntimeActivityBase.java b/src/xwalk/app/android/runtime_activity/src/org/xwalk/app/XWalkRuntimeActivityBase.java index 80823b2..117427e 100644 --- a/src/xwalk/app/android/runtime_activity/src/org/xwalk/app/XWalkRuntimeActivityBase.java +++ b/src/xwalk/app/android/runtime_activity/src/org/xwalk/app/XWalkRuntimeActivityBase.java @@ -16,9 +16,9 @@ import android.os.Bundle; import android.view.View; import org.xwalk.app.runtime.extension.XWalkRuntimeExtensionManager; -import org.xwalk.app.runtime.XWalkRuntimeLibraryException; import org.xwalk.app.runtime.XWalkRuntimeView; -import org.xwalk.core.ReflectionHelper; +import org.xwalk.core.SharedXWalkExceptionHandler; +import org.xwalk.core.SharedXWalkView; import org.xwalk.core.XWalkPreferences; public abstract class XWalkRuntimeActivityBase extends Activity { @@ -127,6 +127,14 @@ public abstract class XWalkRuntimeActivityBase extends Activity { private void tryLoadRuntimeView() { try { + SharedXWalkView.initialize(this, new SharedXWalkExceptionHandler() { + @Override + public void onSharedLibraryNotFound() { + String title = getString("dialog_title_install_runtime_lib"); + String message = getString("dialog_message_install_runtime_lib"); + showRuntimeLibraryExceptionDialog(title, message); + } + }); if (mUseAnimatableView) { XWalkPreferences.setValue(XWalkPreferences.ANIMATABLE_XWALK_VIEW, true); } else { @@ -158,45 +166,18 @@ public abstract class XWalkRuntimeActivityBase extends Activity { } public void handleException(Throwable e) { - if (e instanceof RuntimeException) { + if (e == null) return; + if (e instanceof RuntimeException && e.getCause() != null) { handleException(e.getCause()); return; } - - if (e instanceof XWalkRuntimeLibraryException) { - String title = ""; - String message = ""; - XWalkRuntimeLibraryException runtimeException = (XWalkRuntimeLibraryException) e; - switch (runtimeException.getType()) { - case XWalkRuntimeLibraryException.XWALK_RUNTIME_LIBRARY_NOT_UP_TO_DATE_CRITICAL: - title = getString("dialog_title_update_runtime_lib"); - message = getString("dialog_message_update_runtime_lib"); - break; - case XWalkRuntimeLibraryException.XWALK_RUNTIME_LIBRARY_NOT_UP_TO_DATE_WARNING: - title = getString("dialog_title_update_runtime_lib_warning"); - message = getString("dialog_message_update_runtime_lib_warning"); - break; - case XWalkRuntimeLibraryException.XWALK_RUNTIME_LIBRARY_NOT_INSTALLED: - title = getString("dialog_title_install_runtime_lib"); - message = getString("dialog_message_install_runtime_lib"); - break; - case XWalkRuntimeLibraryException.XWALK_RUNTIME_LIBRARY_INVOKE_FAILED: - default: - Exception originException = runtimeException.getOriginException(); - if (originException != null) handleException(originException); - return; - } - showRuntimeLibraryExceptionDialog(title, message); - } else { - e.printStackTrace(); - throw new RuntimeException(e); - } + throw new RuntimeException(e); } private void showRuntimeLibraryExceptionDialog(String title, String message) { if (!mShownNotFoundDialog) { AlertDialog.Builder builder = new AlertDialog.Builder(this); - if (!ReflectionHelper.shouldUseLibrary()) { + if (!SharedXWalkView.usesLibraryOutOfPackage()) { builder.setPositiveButton(android.R.string.ok, new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int id) { diff --git a/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkCoreProviderImpl.java b/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkCoreProviderImpl.java index 94778c4..b7be1aa 100644 --- a/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkCoreProviderImpl.java +++ b/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkCoreProviderImpl.java @@ -10,7 +10,6 @@ import android.content.Intent; import android.view.View; import android.widget.FrameLayout; -import org.xwalk.core.SharedXWalkExceptionHandler; import org.xwalk.core.SharedXWalkView; import org.xwalk.core.XWalkView; import org.xwalk.core.XWalkPreferences; @@ -33,13 +32,7 @@ class XWalkCoreProviderImpl implements XWalkRuntimeViewProvider { private void init(Context context, Activity activity) { // TODO(yongsheng): do customizations for XWalkView. There will // be many callback classes which are needed to be implemented. - mXWalkView = new SharedXWalkView(context, activity, new SharedXWalkExceptionHandler() { - @Override - public void onSharedLibraryNotFound() { - throw new RuntimeException(new XWalkRuntimeLibraryException( - XWalkRuntimeLibraryException.XWALK_RUNTIME_LIBRARY_NOT_INSTALLED)); - } - }); + mXWalkView = new SharedXWalkView(context, activity); } @Override diff --git a/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkRuntimeLibraryException.java b/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkRuntimeLibraryException.java deleted file mode 100644 index 8fc5d63..0000000 --- a/src/xwalk/app/android/runtime_client/src/org/xwalk/app/runtime/XWalkRuntimeLibraryException.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2013 Intel Corporation. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -package org.xwalk.app.runtime; - -/** - * This class is to consolidate the exceptions happen when - * the runtime client is trying to invoke runtime library - * through reflection. - * - * The exception will be set different label to identify which - * stage the exception happened in. - * - * The exception handler for the runtime client will take - * different action based on the type of the exception. - */ -public class XWalkRuntimeLibraryException extends Exception { - public final static int XWALK_RUNTIME_LIBRARY_NOT_INSTALLED = 1; - public final static int XWALK_RUNTIME_LIBRARY_NOT_UP_TO_DATE_CRITICAL = 2; - public final static int XWALK_RUNTIME_LIBRARY_NOT_UP_TO_DATE_WARNING = 3; - public final static int XWALK_RUNTIME_LIBRARY_INVOKE_FAILED = 4; - - private int mType; - private Exception mOriginException; - - XWalkRuntimeLibraryException(int type, Exception originException) { - mType = type; - mOriginException = originException; - } - - XWalkRuntimeLibraryException(int type) { - mType = type; - mOriginException = null; - } - - XWalkRuntimeLibraryException() { - mType = XWALK_RUNTIME_LIBRARY_NOT_INSTALLED; - mOriginException = null; - } - - public int getType() { - return mType; - } - - public Exception getOriginException() { - return mOriginException; - } -} diff --git a/src/xwalk/app/tools/android/extension_manager.py b/src/xwalk/app/tools/android/extension_manager.py index efa54e1..5a7ab6d 100755 --- a/src/xwalk/app/tools/android/extension_manager.py +++ b/src/xwalk/app/tools/android/extension_manager.py @@ -96,13 +96,13 @@ def HandleAdd(git_url, extensions_path, name=None): name = git_url.split('/')[-1].split('.')[0] if not os.path.isdir(extensions_path): if os.path.isfile(extensions_path): - print "WARNING: Please remove file %s" % (extensions_path) + print("WARNING: Please remove file %s" % (extensions_path)) sys.exit(1) else: os.mkdir(extensions_path) local_extension_path = os.path.join(extensions_path, name) if os.path.exists(local_extension_path): - print "ERROR: You already have a repo named \"%s\"." % name + print("ERROR: You already have a repo named \"%s\"." % name) return os.mkdir(local_extension_path) #Only support git. @@ -117,30 +117,30 @@ def HandleRemove(remove_name, extensions_path): if os.path.exists(extension_path): CleanDir(extension_path) else: - print "ERROR: Don't have extension \"%s\"" % (remove_name) + print("ERROR: Don't have extension \"%s\"" % (remove_name)) def PrintExtensionInfo(extension_name, extensions_path): - print "{0} {1}".format( + print("{0} {1}".format( "+" if GetExtensionStatus(extension_name, extensions_path) else "-", - extension_name) + extension_name)) def HandleList(extensions_path): extension_list = GetExtensionList(extensions_path) - print + print("") for extension_name in extension_list: PrintExtensionInfo(extension_name, extensions_path) - print + print("") def HandleSearch(key, extensions_path): extension_list = GetExtensionList(extensions_path) filtered_extensions = fnmatch.filter(extension_list, key) - print + print("") for extension_name in filtered_extensions: PrintExtensionInfo(extension_name, extensions_path) - print + print("") def HandleEnable(extension_name, extension_path): diff --git a/src/xwalk/app/tools/android/make_apk.py b/src/xwalk/app/tools/android/make_apk.py index 75147d1..65a6409 100755 --- a/src/xwalk/app/tools/android/make_apk.py +++ b/src/xwalk/app/tools/android/make_apk.py @@ -42,9 +42,9 @@ def AddExeExtensions(name): exts_str = os.environ.get('PATHEXT', '').lower() exts = [_f for _f in exts_str.split(os.pathsep) if _f] result = [] - result.append(name) for e in exts: result.append(name + e) + result.append(name) return result @@ -60,11 +60,11 @@ def Which(name): return None -def GetAndroidApiLevel(): +def GetAndroidApiLevel(android_path): """Get Highest Android target level installed. return -1 if no targets have been found. """ - target_output = RunCommand(['android', 'list', 'target', '-c']) + target_output = RunCommand([android_path, 'list', 'target', '-c']) target_regex = re.compile(r'android-(\d+)') targets = [int(i) for i in target_regex.findall(target_output)] targets.extend([-1]) @@ -181,12 +181,12 @@ def GetExtensionBinaryPathList(): item, data["binary_path"]) else: - print "The extension \"%s\" doesn't exists." % item + print("The extension \"%s\" doesn't exists." % item) sys.exit(1) if os.path.isdir(extension_binary_path): local_extension_list.append(extension_binary_path) else: - print "The extension \"%s\" doesn't exists." % item + print("The extension \"%s\" doesn't exists." % item) sys.exit(1) return local_extension_list @@ -241,7 +241,7 @@ def Execution(options, name): 'installation and your PATH environment variable.') sys.exit(1) - api_level = GetAndroidApiLevel() + api_level = GetAndroidApiLevel(android_path) if api_level < 14: print('Please install Android API level (>=14) first.') sys.exit(3) @@ -271,20 +271,18 @@ def Execution(options, name): key_alias_code = 'xwalkdebug' # Check whether ant is installed. - try: - cmd = ['ant', '-version'] - RunCommand(cmd, shell=True) - except EnvironmentError: - print('Please install ant first.') + ant_path = Which('ant') + if ant_path is None: + print('Ant could not be found. Please make sure it is installed.') sys.exit(4) # Update android project for app and xwalk_core_library. - update_project_cmd = ['android', 'update', 'project', + update_project_cmd = [android_path, 'update', 'project', '--path', os.path.join (xwalk_dir, name), '--target', target_string, '--name', name] if options.mode == 'embedded': - RunCommand(['android', 'update', 'lib-project', + RunCommand([android_path, 'update', 'lib-project', '--path', os.path.join(xwalk_dir, name, 'xwalk_core_library'), '--target', target_string]) update_project_cmd.extend(['-l', 'xwalk_core_library']) @@ -326,15 +324,16 @@ def Execution(options, name): 'embedded APK.' % arch) sys.exit(10) - ant_cmd = ['ant', 'release', '-f', os.path.join(xwalk_dir, name, 'build.xml')] + ant_cmd = [ant_path, 'release', '-f', + os.path.join(xwalk_dir, name, 'build.xml')] if not options.verbose: ant_cmd.extend(['-quiet']) - ant_cmd.extend(['-Dkey.store="%s"' % os.path.abspath(key_store)]) - ant_cmd.extend(['-Dkey.alias="%s"' % key_alias]) + ant_cmd.extend(['-Dkey.store=%s' % os.path.abspath(key_store)]) + ant_cmd.extend(['-Dkey.alias=%s' % key_alias]) if key_code: - ant_cmd.extend(['-Dkey.store.password="%s"' % key_code]) + ant_cmd.extend(['-Dkey.store.password=%s' % key_code]) if key_alias_code: - ant_cmd.extend(['-Dkey.alias.password="%s"' % key_alias_code]) + ant_cmd.extend(['-Dkey.alias.password=%s' % key_alias_code]) ant_result = subprocess.call(ant_cmd) if ant_result != 0: print('Command "%s" exited with non-zero exit code %d' diff --git a/src/xwalk/app/tools/android/make_apk_test.py b/src/xwalk/app/tools/android/make_apk_test.py index 80ae231..2cc9b22 100755 --- a/src/xwalk/app/tools/android/make_apk_test.py +++ b/src/xwalk/app/tools/android/make_apk_test.py @@ -165,22 +165,25 @@ class TestMakeApk(unittest.TestCase): arch_list.append('arm') return arch_list - def checkApks(self, apk_name, app_version): - # Check whether some files are contained in the given APK. + def checkApks(self, apk_name, app_version, keystore_path=None): + apks = [] if self._mode.find('shared') != -1: apk_path = '%s_%s.apk' % (apk_name, app_version) - self.checkApk(apk_path, '') + apks.append((apk_path, '')) elif self._mode.find('embedded') != -1: x86_apk_path = '%s_%s_x86.apk' % (apk_name, app_version) if os.path.exists(x86_apk_path): - self.checkApk(x86_apk_path, 'x86') + apks.append((x86_apk_path, 'x86')) arm_apk_path = '%s_%s_arm.apk' % (apk_name, app_version) if os.path.exists(arm_apk_path): - self.checkApk(arm_apk_path, 'arm') + apks.append((arm_apk_path, 'arm')) - def checkApk(self, apk_path, arch): - # Check whether some files are contained in the given apk - # for specified arch. + for apk, apk_arch in apks: + self.checkApk(apk, apk_arch, keystore_path) + + def checkApk(self, apk_path, arch, keystore_path=None): + """Checks whether some files are contained in the given APK, + and optionally verifies its signature.""" cmd = ['jar', 'tvf', apk_path] out = RunCommand(cmd) common_files = ['AndroidManifest.xml', 'classes.dex'] @@ -199,6 +202,12 @@ class TestMakeApk(unittest.TestCase): elif arch == 'arm': self.assertTrue(out.find('armeabi-v7a/libxwalkcore.so') != -1) + if keystore_path: + cmd = ['jarsigner', '-verify', '-keystore', keystore_path, + '-verbose', apk_path] + out = RunCommand(cmd) + self.assertIn('smk', out) + def testName(self): cmd = ['python', 'make_apk.py', '--app-version=1.0.0', '--app-url=http://www.intel.com', @@ -526,16 +535,25 @@ class TestMakeApk(unittest.TestCase): '--keystore-passcode=xwalk-test', '--keystore-alias-passcode=xwalk-test', self._mode] RunCommand(cmd) - self.addCleanup(Clean, 'Example', '1.0.0') - self.assertTrue(os.path.exists('Example')) - apk_list = ['Example.apk', 'Example_x86.apk', 'Example_arm.apk'] - for apk in apk_list: - if os.path.isfile(apk): - cmd = ['jarsigner', '-verify', '-keystore', - keystore_path, '-verbose', apk] - out = RunCommand(cmd) - self.assertTrue(out.find('smk') != -1) - self.checkApks('Example', '1.0.0') + self.assertTrue(os.path.isdir('Example')) + self.checkApks('Example', '1.0.0', keystore_path) + Clean('Example', '1.0.0') + + keystore_path_with_space = os.path.join( + 'test_data', 'keystore', 'test keystore') + shutil.copy2(keystore_path, keystore_path_with_space) + keystore_path = os.path.join('test_data', 'keystore', + 'xwalk-test.keystore') + cmd = ['python', 'make_apk.py', '--name=Example', '--app-version=1.0.0', + '--package=org.xwalk.example', '--app-url=http://www.intel.com', + '--keystore-path=%s' % keystore_path_with_space, + '--keystore-alias=xwalk test', + '--keystore-passcode=xwalk-test', + '--keystore-alias-passcode=xwalk test', self._mode] + RunCommand(cmd) + self.assertTrue(os.path.isdir('Example')) + self.checkApks('Example', '1.0.0', keystore_path_with_space) + Clean('Example', '1.0.0') def testManifest(self): manifest_path = os.path.join('test_data', 'manifest', 'manifest.json') diff --git a/src/xwalk/app/tools/android/test_data/keystore/xwalk-test.keystore b/src/xwalk/app/tools/android/test_data/keystore/xwalk-test.keystore index 14c8e08a0246e2fcecceb836e98397eca9785cdd..4dd45e31156c9006af51fa0872f61543b48808ed 100644 GIT binary patch delta 2247 zcmcK4Yd8}M0|)S3F^tG9%C%A)Y?xy>iE@kH#JrA;73PxbVvG~U3Uiw%GD$5$sHp#X{8Oc>)G51*yNdt-0Q56JfM^ui zGbm7%rmhcVja94 z-a;!alj%z<@?D5=Z>?Pv&hVZmQo=V(h-0OM8-*5jwI)=L#B-p(ISJ{q8|DkKMpuAkec<$K$ajm>Kd)IQ=_HSV2Rckctf;N2ru zXUH~7+@8zlRcCYJa~eCzG6#}JwCL7P$M!~fsjW%`+bcVSu~WwsJ7#v^G(A`0^0@T8 z$vr})>=53rws2nBxg|N;iRL{oHaaLzM)&TFv#l5Ux|?rF@hJ{~FDl*M4q>Y-URt`Y zflnkqTQ0t30Wa+No6(ucO}%|J29?JpIlb%@LTFtccd$}bIYWCb&W)KNLrFo|5Yk|F zre(Kwb^;^sHQ>79-r0ODUafkWWEtVuj(MAP%#)i=a%L%e_591vJbd%wU!@f5f=8q5 zPQR_U7qyzXkc3;s!fVR5O>-g)MaM+*VgJ+P2bK65lA7$v476SFsS*#d7Ta%pkz_5U z-(KHcBB7`BMF7#(j`hf@w?S&UhfTBPE@$rphZSw@tP?BgNdMGi69`)r$sJ(~LH>mQ#rBL*#cj|i)ogC{W;q#%x4%Gy%(+UuBkQjJ+h658SmAJ&*J4M(@B|b zDa-z`hGiLCyXb+M{r+-84=Q+N&QsH=&#C=m^gb~<2{m$(Qmq6u}vd5v}K~ zTGs#V8`9O=mX{dwm>(UlXRbbcizZ1TW+W(x@ULkXE(DQ?#*8(!yW2Wo9wz*f!h4Bb z>Y}*$J~H06=g`KfLM5MYkn>8~I_sgjbE0IZOtEX;vFlzC`HHnj7+U|h^G1$LRF%ZB z%;}n#_oPkA^SEtmM`_I`rozEX!%B?%t)%Y(JKQEW8fuXE+UapxjO^&7(VOTpm+h;S z7Xys0Y5H99khjUKY27~+I6+PJNu@TD!84Eh8Jq;&UP7kyG0^0*1o^qRIgrLtdl6KJ z>59h^i6XlD>54gk8TejhT>$alSd8Z)X5P6D9q*xJ#+VyeTdZLz(3Rgqx5?e)vBww%b%t@@ z6qT~s{R&XIX`djclhyDcsaqQ46^|2?=l5TBINQb4C0BlGdhsbHG&Wg|r&3|+@duq~ zAo1fWfdJ^Ax+nx10074#sNiS>736sb3<82ckf>nB5&|v+(@L^UT89IHVo(4g2)XCS zcY<9YLVJZ>LIOiV$ss?50ulKqL4aM>880`SJn~JS<+?1-yvI0Cq3X5x_1e1Zo3TdU zrYQ=|?o-^g8g0YOjD6P+KR@p~?>~5YqF*$juBeTmjPYMyk&Yy-w?R9t=M8RfHLfsB z%{J^y#7#a#1Y}{Hop{aJviRIpNdD}l2xxxs^Su~i+q ze+sq}1)V2VN{`AGzDTOfR*ws!lr?f40<}JV*iA2X0(CNbJ#(*H)dA%ji^a-D*Wmm= zHnZ1gP+b0|T{36PEAp8oyl*AG#{SqvwFPG$uYj4enwE!#o0VCww3JoPJ~WjMA) delta 43 ycmdn2)FsIC@9n?03=9lRK+L##D@PWGh=}K#GvRsNT|PGj9#`6moGE^LrxE~R9TA=Y diff --git a/src/xwalk/application/browser/application.cc b/src/xwalk/application/browser/application.cc index 81fa29e..737aa60 100644 --- a/src/xwalk/application/browser/application.cc +++ b/src/xwalk/application/browser/application.cc @@ -103,7 +103,7 @@ Application::~Application() { } template<> -GURL Application::GetStartURL() { +GURL Application::GetStartURL() { GURL url = GetAbsoluteURLFromKey(widget_keys::kLaunchLocalPathKey); if (!url.is_valid()) { LOG(WARNING) << "Failed to find start URL from the 'config.xml'" @@ -127,7 +127,7 @@ GURL Application::GetStartURL() { } template<> -GURL Application::GetStartURL() { +GURL Application::GetStartURL() { GURL url = GetAbsoluteURLFromKey(keys::kStartURLKey); if (url.is_valid()) return url; @@ -150,7 +150,7 @@ GURL Application::GetStartURL() { template<> -ui::WindowShowState Application::GetWindowShowState( +ui::WindowShowState Application::GetWindowShowState( const LaunchParams& params) { if (params.force_fullscreen) return ui::SHOW_STATE_FULLSCREEN; @@ -171,7 +171,7 @@ ui::WindowShowState Application::GetWindowShowState( } template<> -ui::WindowShowState Application::GetWindowShowState( +ui::WindowShowState Application::GetWindowShowState( const LaunchParams& params) { if (params.force_fullscreen) return ui::SHOW_STATE_FULLSCREEN; @@ -196,10 +196,10 @@ bool Application::Launch(const LaunchParams& launch_params) { } CHECK(!render_process_host_); - bool is_wgt = data_->GetPackageType() == Package::WGT; + bool is_wgt = data_->manifest_type() == Manifest::TYPE_WIDGET; - GURL url = is_wgt ? GetStartURL(): - GetStartURL(); + GURL url = is_wgt ? GetStartURL() : + GetStartURL(); if (!url.is_valid()) return false; @@ -216,8 +216,9 @@ bool Application::Launch(const LaunchParams& launch_params) { NativeAppWindow::CreateParams params; params.net_wm_pid = launch_params.launcher_pid; - params.state = is_wgt ? GetWindowShowState(launch_params): - GetWindowShowState(launch_params); + params.state = is_wgt ? + GetWindowShowState(launch_params): + GetWindowShowState(launch_params); params.splash_screen_path = GetSplashScreenPath(); @@ -385,7 +386,7 @@ void Application::InitSecurityPolicy() { // CSP policy takes precedence over WARP. if (data_->HasCSPDefined()) security_policy_.reset(new SecurityPolicyCSP(this)); - else if (data_->GetPackageType() == Package::WGT) + else if (data_->manifest_type() == Manifest::TYPE_WIDGET) security_policy_.reset(new SecurityPolicyWARP(this)); if (security_policy_) diff --git a/src/xwalk/application/browser/application.h b/src/xwalk/application/browser/application.h index 4a6840c..854ea94 100644 --- a/src/xwalk/application/browser/application.h +++ b/src/xwalk/application/browser/application.h @@ -150,9 +150,9 @@ class Application : public Runtime::Observer, // Try to extract the URL from different possible keys for entry points in the // manifest, returns it and the entry point used. - template GURL GetStartURL(); + template GURL GetStartURL(); - template + template ui::WindowShowState GetWindowShowState(const LaunchParams& params); GURL GetAbsoluteURLFromKey(const std::string& key); diff --git a/src/xwalk/application/browser/application_protocols.cc b/src/xwalk/application/browser/application_protocols.cc index 67d668d..dcce8d4 100644 --- a/src/xwalk/application/browser/application_protocols.cc +++ b/src/xwalk/application/browser/application_protocols.cc @@ -247,7 +247,7 @@ ApplicationProtocolHandler::MaybeCreateJob( if (application) { directory_path = application->Path(); - const char* csp_key = GetCSPKey(application->GetPackageType()); + const char* csp_key = GetCSPKey(application->manifest_type()); const CSPInfo* csp_info = static_cast( application->GetManifestData(csp_key)); if (csp_info) { @@ -262,10 +262,8 @@ ApplicationProtocolHandler::MaybeCreateJob( } } - const std::string& path = request->url().path(); - std::list locales; - if (application && application->GetPackageType() == Package::WGT) { + if (application && application->manifest_type() == Manifest::TYPE_WIDGET) { GetUserAgentLocales(GetSystemLocale(), locales); GetUserAgentLocales(application->GetManifest()->default_locale(), locales); } diff --git a/src/xwalk/application/browser/application_service.cc b/src/xwalk/application/browser/application_service.cc index 6a68e4e..6037e49 100644 --- a/src/xwalk/application/browser/application_service.cc +++ b/src/xwalk/application/browser/application_service.cc @@ -75,18 +75,22 @@ Application* ApplicationService::Launch( return application; } -Application* ApplicationService::LaunchFromUnpackedPath( - const base::FilePath& path, const Application::LaunchParams& params) { +Application* ApplicationService::LaunchFromManifestPath( + const base::FilePath& path, Manifest::Type manifest_type, + const Application::LaunchParams& params) { std::string error; - scoped_refptr application_data; - if (!base::DirectoryExists(path)) { - LOG(ERROR) << "Invalid input parameter: " << path.AsUTF8Unsafe(); + scoped_ptr manifest = LoadManifest(path, manifest_type, &error); + if (!manifest) { + LOG(ERROR) << "Failed to load manifest."; return NULL; } - application_data = - LoadApplication(path, ApplicationData::LOCAL_DIRECTORY, &error); + base::FilePath app_path = path.DirName(); + LOG(ERROR) << "Loading app from " << app_path.MaybeAsASCII(); + scoped_refptr application_data = ApplicationData::Create( + app_path, std::string(), ApplicationData::LOCAL_DIRECTORY, + manifest.Pass(), &error); if (!application_data) { LOG(ERROR) << "Error occurred while trying to load application: " << error; @@ -111,17 +115,17 @@ Application* ApplicationService::LaunchFromPackagePath( return NULL; } - std::string error; - scoped_refptr application_data; - base::CreateTemporaryDirInDir(tmp_dir, package->name(), &target_dir); - if (package->ExtractTo(target_dir)) { - std::string id = tmp_dir.BaseName().AsUTF8Unsafe(); - application_data = - LoadApplication( - target_dir, id, ApplicationData::TEMP_DIRECTORY, &error); + if (!package->ExtractTo(target_dir)) { + LOG(ERROR) << "Failed to unpack to a temporary directory: " + << target_dir.MaybeAsASCII(); + return NULL; } + std::string error; + scoped_refptr application_data = LoadApplication( + target_dir, std::string(), ApplicationData::TEMP_DIRECTORY, + package->manifest_type(), &error); if (!application_data) { LOG(ERROR) << "Error occurred while trying to load application: " << error; @@ -144,17 +148,20 @@ Application* ApplicationService::LaunchHostedURL( const std::string& app_id = GenerateId(url_spec); - base::DictionaryValue manifest; + scoped_ptr settings(new base::DictionaryValue()); // FIXME: define permissions! - manifest.SetString(application_manifest_keys::kStartURLKey, url_spec); + settings->SetString(application_manifest_keys::kStartURLKey, url_spec); // FIXME: Why use URL as name? - manifest.SetString(application_manifest_keys::kNameKey, url_spec); - manifest.SetString(application_manifest_keys::kXWalkVersionKey, "0"); + settings->SetString(application_manifest_keys::kNameKey, url_spec); + settings->SetString(application_manifest_keys::kXWalkVersionKey, "0"); + + scoped_ptr manifest( + new Manifest(settings.Pass(), Manifest::TYPE_MANIFEST)); std::string error; scoped_refptr app_data = - ApplicationData::Create(base::FilePath(), - ApplicationData::EXTERNAL_URL, manifest, app_id, &error); + ApplicationData::Create(base::FilePath(), app_id, + ApplicationData::EXTERNAL_URL, manifest.Pass(), &error); DCHECK(app_data); return Launch(app_data, params); diff --git a/src/xwalk/application/browser/application_service.h b/src/xwalk/application/browser/application_service.h index 92dc801..c73712e 100644 --- a/src/xwalk/application/browser/application_service.h +++ b/src/xwalk/application/browser/application_service.h @@ -39,10 +39,10 @@ class ApplicationService : public Application::Observer { static scoped_ptr Create( RuntimeContext* runtime_context); - // Launch an application using path to a local directory which - // contains manifest file of an unpacked application. - Application* LaunchFromUnpackedPath( - const base::FilePath& path, + // Launch an unpacked application using path to the manifest file + // of an unpacked application. + Application* LaunchFromManifestPath( + const base::FilePath& path, Manifest::Type manifest_type, const Application::LaunchParams& params = Application::LaunchParams()); // Launch an application using path to its package file. diff --git a/src/xwalk/application/browser/application_system.cc b/src/xwalk/application/browser/application_system.cc index ce1544c..740fc3a 100644 --- a/src/xwalk/application/browser/application_system.cc +++ b/src/xwalk/application/browser/application_system.cc @@ -92,11 +92,8 @@ bool ApplicationSystem::LaunchFromCommandLine( return true; } - if (base::DirectoryExists(path)) { // Handles unpacked application. - run_default_message_loop = application_service_->LaunchFromUnpackedPath( - path, launch_params(cmd_line)); - return true; - } + if (!base::PathExists(path)) + return false; if (path.MatchesExtension(FILE_PATH_LITERAL(".xpk")) || path.MatchesExtension(FILE_PATH_LITERAL(".wgt"))) { @@ -105,6 +102,18 @@ bool ApplicationSystem::LaunchFromCommandLine( return true; } + if (path.MatchesExtension(FILE_PATH_LITERAL(".json"))) { + run_default_message_loop = application_service_->LaunchFromManifestPath( + path, Manifest::TYPE_MANIFEST, launch_params(cmd_line)); + return true; + } + + if (path.MatchesExtension(FILE_PATH_LITERAL(".xml"))) { + run_default_message_loop = application_service_->LaunchFromManifestPath( + path, Manifest::TYPE_WIDGET, launch_params(cmd_line)); + return true; + } + return false; } diff --git a/src/xwalk/application/common/application_data.cc b/src/xwalk/application/common/application_data.cc index 8048c4c..a3ad4a9 100644 --- a/src/xwalk/application/common/application_data.cc +++ b/src/xwalk/application/common/application_data.cc @@ -43,16 +43,11 @@ namespace application { // static scoped_refptr ApplicationData::Create( - const base::FilePath& path, - SourceType source_type, - const base::DictionaryValue& manifest_data, - const std::string& explicit_id, + const base::FilePath& path, const std::string& explicit_id, + SourceType source_type, scoped_ptr manifest, std::string* error_message) { DCHECK(error_message); base::string16 error; - scoped_ptr manifest(new Manifest( - scoped_ptr(manifest_data.DeepCopy()))); - if (!manifest->ValidateManifest(error_message)) return NULL; @@ -63,14 +58,20 @@ scoped_refptr ApplicationData::Create( return NULL; } + ManifestHandlerRegistry* registry = + ManifestHandlerRegistry::GetInstance(app_data->manifest_type()); + + if (!registry->ValidateAppManifest(app_data, error_message)) + return NULL; + return app_data; } // static GURL ApplicationData::GetBaseURLFromApplicationId( const std::string& application_id) { - return GURL(std::string(xwalk::application::kApplicationScheme) + - url::kStandardSchemeSeparator + application_id + "/"); + return GURL(std::string(kApplicationScheme) + + url::kStandardSchemeSeparator + application_id + "/"); } ApplicationData::ManifestData* ApplicationData::GetManifestData( @@ -88,13 +89,9 @@ void ApplicationData::SetManifestData(const std::string& key, manifest_data_[key] = linked_ptr(data); } -const std::string& ApplicationData::ID() const { - return manifest_->GetApplicationID(); -} - #if defined(OS_TIZEN) std::string ApplicationData::GetPackageID() const { - return AppIdToPkgId(manifest_->GetApplicationID()); + return AppIdToPkgId(application_id_); } #endif @@ -106,21 +103,26 @@ const std::string ApplicationData::VersionString() const { } bool ApplicationData::IsHostedApp() const { - return GetManifest()->IsHosted(); + bool hosted = source_type_ == EXTERNAL_URL; +#if defined(OS_TIZEN) + if (manifest_->HasPath(widget_keys::kContentNamespace)) { + std::string ns; + if (manifest_->GetString(widget_keys::kContentNamespace, &ns) && + ns == kTizenNamespacePrefix) + hosted = true; + } +#endif + return hosted; } ApplicationData::ApplicationData(const base::FilePath& path, - SourceType source_type, scoped_ptr manifest) + SourceType source_type, scoped_ptr manifest) : manifest_version_(0), + path_(path), manifest_(manifest.release()), finished_parsing_manifest_(false), source_type_(source_type) { - DCHECK(path.empty() || path.IsAbsolute()); - path_ = path; - if (manifest_->HasPath(widget_keys::kWidgetKey)) - package_type_ = Package::WGT; - else - package_type_ = Package::XPK; + DCHECK(path_.empty() || path_.IsAbsolute()); } ApplicationData::~ApplicationData() { @@ -146,15 +148,11 @@ GURL ApplicationData::GetResourceURL(const GURL& application_url, return ret_val; } -Manifest::Type ApplicationData::GetType() const { - return manifest_->GetType(); -} - bool ApplicationData::Init(const std::string& explicit_id, base::string16* error) { DCHECK(error); ManifestHandlerRegistry* registry = - ManifestHandlerRegistry::GetInstance(GetPackageType()); + ManifestHandlerRegistry::GetInstance(manifest_type()); if (!registry->ParseAppManifest(this, error)) return false; @@ -177,7 +175,7 @@ bool ApplicationData::LoadID(const std::string& explicit_id, base::string16* error) { std::string application_id; #if defined(OS_TIZEN) - if (GetPackageType() == Package::WGT) { + if (manifest_type() == Manifest::TYPE_WIDGET) { const TizenApplicationInfo* tizen_app_info = static_cast(GetManifestData( widget_keys::kTizenApplicationKey)); @@ -191,13 +189,13 @@ bool ApplicationData::LoadID(const std::string& explicit_id, } if (!application_id.empty()) { - manifest_->SetApplicationID(application_id); + application_id_ = application_id; return true; } #endif if (!explicit_id.empty()) { - manifest_->SetApplicationID(explicit_id); + application_id_ = explicit_id; return true; } @@ -206,17 +204,17 @@ bool ApplicationData::LoadID(const std::string& explicit_id, NOTREACHED() << "Could not create ID from path."; return false; } - manifest_->SetApplicationID(application_id); + application_id_ = application_id; return true; } bool ApplicationData::LoadName(base::string16* error) { DCHECK(error); base::string16 localized_name; - std::string name_key(GetNameKey(GetPackageType())); + std::string name_key(GetNameKey(manifest_type())); if (!manifest_->GetString(name_key, &localized_name) && - package_type_ == Package::XPK) { + manifest_type() == Manifest::TYPE_MANIFEST) { *error = base::ASCIIToUTF16(errors::kInvalidName); return false; } @@ -232,7 +230,7 @@ bool ApplicationData::LoadVersion(base::string16* error) { version_.reset(new base::Version()); - if (package_type_ == Package::WGT) { + if (manifest_type() == Manifest::TYPE_WIDGET) { bool ok = manifest_->GetString(widget_keys::kVersionKey, &version_str); if (!ok) { *error = base::ASCIIToUTF16(errors::kInvalidVersion); @@ -284,7 +282,7 @@ bool ApplicationData::LoadVersion(base::string16* error) { bool ApplicationData::LoadDescription(base::string16* error) { DCHECK(error); // FIXME: Better to assert on use from Widget. - if (package_type_ != Package::XPK) + if (manifest_type() != Manifest::TYPE_MANIFEST) return true; // No error. bool hasDeprecatedKey = manifest_->HasKey(keys::kDeprecatedDescriptionKey); @@ -351,11 +349,11 @@ PermissionSet ApplicationData::GetManifestPermissions() const { bool ApplicationData::HasCSPDefined() const { #if defined(OS_TIZEN) - return manifest_->HasPath(GetCSPKey(package_type_)) || + return manifest_->HasPath(GetCSPKey(manifest_type())) || manifest_->HasPath(widget_keys::kCSPReportOnlyKey) || manifest_->HasPath(widget_keys::kAllowNavigationKey); #else - return manifest_->HasPath(GetCSPKey(package_type_)); + return manifest_->HasPath(GetCSPKey(manifest_type())); #endif } diff --git a/src/xwalk/application/common/application_data.h b/src/xwalk/application/common/application_data.h index deb61ec..f201ab2 100644 --- a/src/xwalk/application/common/application_data.h +++ b/src/xwalk/application/common/application_data.h @@ -20,7 +20,6 @@ #include "base/strings/string_util.h" #include "base/synchronization/lock.h" #include "base/threading/thread_checker.h" -#include "base/time/time.h" #include "url/gurl.h" #include "xwalk/application/common/manifest.h" #include "xwalk/application/common/permission_types.h" @@ -67,13 +66,9 @@ class ApplicationData : public base::RefCountedThreadSafe { virtual ~ManifestData() {} }; - static scoped_refptr Create(const base::FilePath& path, - SourceType source_type, - const base::DictionaryValue& manifest_data, - const std::string& explicit_id, - std::string* error_message); - - Manifest::Type GetType() const; + static scoped_refptr Create(const base::FilePath& app_path, + const std::string& explicit_id, SourceType source_type, + scoped_ptr manifest, std::string* error_message); // Returns an absolute url to a resource inside of an application. The // |application_url| argument should be the url() from an Application object. @@ -104,7 +99,8 @@ class ApplicationData : public base::RefCountedThreadSafe { void SetPath(const base::FilePath& path) { path_ = path; } const GURL& URL() const { return application_url_; } SourceType source_type() const { return source_type_; } - const std::string& ID() const; + Manifest::Type manifest_type() const { return manifest_->type(); } + const std::string& ID() const { return application_id_; } #if defined(OS_TIZEN) std::string GetPackageID() const; #endif @@ -118,8 +114,6 @@ class ApplicationData : public base::RefCountedThreadSafe { return manifest_.get(); } - const base::Time& install_time() const { return install_time_; } - // App-related. bool IsHostedApp() const; @@ -131,8 +125,6 @@ class ApplicationData : public base::RefCountedThreadSafe { void ClearPermissions(); PermissionSet GetManifestPermissions() const; - Package::Type GetPackageType() const { return package_type_; } - bool HasCSPDefined() const; bool SetApplicationLocale(const std::string& locale, base::string16* error); @@ -141,8 +133,8 @@ class ApplicationData : public base::RefCountedThreadSafe { friend class base::RefCountedThreadSafe; friend class ApplicationStorageImpl; - ApplicationData(const base::FilePath& path, SourceType source_type, - scoped_ptr manifest); + ApplicationData(const base::FilePath& path, + SourceType source_type, scoped_ptr manifest); virtual ~ApplicationData(); // Initialize the application from a parsed manifest. @@ -178,12 +170,10 @@ class ApplicationData : public base::RefCountedThreadSafe { // The absolute path to the directory the application is stored in. base::FilePath path_; - // System events - std::set events_; - - // If it's true, means the data have been changed, - // and need to save in database. - bool is_dirty_; + // A persistent, globally unique ID. An application's ID is used in things + // like directory structures and URLs, and is expected to not change across + // versions. + std::string application_id_; // The base application url for the application. GURL application_url_; @@ -203,8 +193,6 @@ class ApplicationData : public base::RefCountedThreadSafe { // Set to true at the end of InitValue when initialization is finished. bool finished_parsing_manifest_; - base::Time install_time_; - // Ensures that any call to GetManifestData() prior to finishing // initialization happens from the same thread (this can happen when certain // parts of the initialization process need information from previous parts). @@ -213,9 +201,6 @@ class ApplicationData : public base::RefCountedThreadSafe { // Application's persistent permissions. StoredPermissionMap permission_map_; - // The package type, wgt or xpk. - Package::Type package_type_; - // The source the application was loaded from. SourceType source_type_; diff --git a/src/xwalk/application/common/application_file_util.cc b/src/xwalk/application/common/application_file_util.cc index fb9a302..419975a 100644 --- a/src/xwalk/application/common/application_file_util.cc +++ b/src/xwalk/application/common/application_file_util.cc @@ -163,47 +163,6 @@ inline bool IsElementSupportSpanAndDir(xmlNode* root) { return false; } -// FIXME: This function is wrong and has to be re-implemented -// further (see XWALK-2230) -bool GetPackageType(const base::FilePath& path, - xwalk::application::Package::Type* package_type, - std::string* error) { - base::FilePath manifest_path; - - manifest_path = path.Append(xwalk::application::kManifestXpkFilename); - if (base::PathExists(manifest_path)) { - *package_type = xwalk::application::Package::XPK; - return true; - } - - manifest_path = path.Append(xwalk::application::kManifestWgtFilename); - if (base::PathExists(manifest_path)) { - *package_type = xwalk::application::Package::WGT; - return true; - } - - *error = base::StringPrintf("%s", errors::kManifestUnreadable); - return false; -} - -#if defined(OS_TIZEN) -bool GetPackageType(const std::string& application_id, - xwalk::application::Package::Type* package_type, - std::string* error) { - if (xwalk::application::IsValidWGTID(application_id)) { - *package_type = xwalk::application::Package::WGT; - return true; - } else if (xwalk::application::IsValidXPKID(application_id)) { - *package_type = xwalk::application::Package::XPK; - return true; - } - - *error = base::StringPrintf("Invalid application id: %s", - application_id.c_str()); - return false; -} -#endif - bool IsSingletonElement(const std::string& name) { for (int i = 0; i < arraysize(kSingletonElements); ++i) if (kSingletonElements[i] == name) @@ -231,6 +190,8 @@ FileDeleter::~FileDeleter() { base::DeleteFile(path_, recursive_); } +namespace { + // Load XML node into Dictionary structure. // The keys for the XML node to Dictionary mapping are described below: // XML Dictionary @@ -359,72 +320,18 @@ base::DictionaryValue* LoadXMLNode( return value.release(); } -scoped_refptr LoadApplication( - const base::FilePath& application_path, - ApplicationData::SourceType source_type, - std::string* error) { - Package::Type package_type; - if (!GetPackageType(application_path, &package_type, error)) - return NULL; - - return LoadApplication(application_path, std::string(), - source_type, package_type, error); -} - -scoped_refptr LoadApplication( - const base::FilePath& application_path, - const std::string& application_id, - ApplicationData::SourceType source_type, - std::string* error) { - Package::Type package_type; -#if defined(OS_TIZEN) - if (!GetPackageType(application_id, &package_type, error)) -#else - if (!GetPackageType(application_path, &package_type, error)) -#endif - return NULL; - - return LoadApplication(application_path, application_id, - source_type, package_type, error); -} - -scoped_refptr LoadApplication( - const base::FilePath& application_path, - const std::string& application_id, - ApplicationData::SourceType source_type, - Package::Type package_type, - std::string* error) { - scoped_ptr manifest( - LoadManifest(application_path, package_type, error)); - if (!manifest.get()) - return NULL; - - scoped_refptr application = ApplicationData::Create( - application_path, - source_type, - *manifest, - application_id, - error); - if (!application) - return NULL; - - ManifestHandlerRegistry* registry = - manifest->HasKey(widget_keys::kWidgetKey) - ? ManifestHandlerRegistry::GetInstance(Package::WGT) - : ManifestHandlerRegistry::GetInstance(Package::XPK); - - if (!registry->ValidateAppManifest(application, error)) - return NULL; +} // namespace - return application; -} +template +scoped_ptr LoadManifest( + const base::FilePath& manifest_path, std::string* error); -static base::DictionaryValue* LoadManifestXpk( - const base::FilePath& manifest_path, - std::string* error) { +template <> +scoped_ptr LoadManifest( + const base::FilePath& manifest_path, std::string* error) { JSONFileValueSerializer serializer(manifest_path); scoped_ptr root(serializer.Deserialize(NULL, error)); - if (!root.get()) { + if (!root) { if (error->empty()) { // If |error| is empty, than the file could not be read. // It would be cleaner to have the JSON reader give a specific error @@ -433,28 +340,28 @@ static base::DictionaryValue* LoadManifestXpk( *error = base::StringPrintf("%s", errors::kManifestUnreadable); } else { *error = base::StringPrintf("%s %s", - errors::kManifestParseError, - error->c_str()); + errors::kManifestParseError, error->c_str()); } - return NULL; + return scoped_ptr(); } if (!root->IsType(base::Value::TYPE_DICTIONARY)) { *error = base::StringPrintf("%s", errors::kManifestUnreadable); - return NULL; + return scoped_ptr(); } - base::DictionaryValue* dv = - static_cast(root.release()); + scoped_ptr dv = make_scoped_ptr( + static_cast(root.release())); #if defined(OS_TIZEN) // Ignore any Tizen application ID, as this is automatically generated. dv->Remove(keys::kTizenAppIdKey, NULL); #endif - return dv; + return make_scoped_ptr(new Manifest(dv.Pass(), Manifest::TYPE_MANIFEST)); } -static base::DictionaryValue* LoadManifestWgt( +template <> +scoped_ptr LoadManifest( const base::FilePath& manifest_path, std::string* error) { xmlDoc * doc = NULL; @@ -462,7 +369,7 @@ static base::DictionaryValue* LoadManifestWgt( doc = xmlReadFile(manifest_path.MaybeAsASCII().c_str(), NULL, 0); if (doc == NULL) { *error = base::StringPrintf("%s", errors::kManifestUnreadable); - return NULL; + return scoped_ptr(); } root_node = xmlDocGetRootElement(doc); base::DictionaryValue* dv = LoadXMLNode(root_node); @@ -470,24 +377,51 @@ static base::DictionaryValue* LoadManifestWgt( if (dv) result->Set(ToConstCharPointer(root_node->name), dv); - return result.release(); + return make_scoped_ptr(new Manifest(result.Pass(), Manifest::TYPE_WIDGET)); +} + +scoped_ptr LoadManifest(const base::FilePath& manifest_path, + Manifest::Type type, std::string* error) { + if (type == Manifest::TYPE_MANIFEST) + return LoadManifest(manifest_path, error); + + if (type == Manifest::TYPE_WIDGET) + return LoadManifest(manifest_path, error); + + *error = base::StringPrintf("%s", errors::kManifestUnreadable); + return scoped_ptr(); } -base::DictionaryValue* LoadManifest(const base::FilePath& application_path, - Package::Type package_type, - std::string* error) { +base::FilePath GetManifestPath( + const base::FilePath& app_directory, Manifest::Type type) { base::FilePath manifest_path; + switch (type) { + case Manifest::TYPE_WIDGET: + manifest_path = app_directory.Append(kManifestWgtFilename); + break; + case Manifest::TYPE_MANIFEST: + manifest_path = app_directory.Append(kManifestXpkFilename); + break; + default: + NOTREACHED(); + } + + return manifest_path; +} - manifest_path = application_path.Append(kManifestXpkFilename); - if (package_type == Package::XPK) - return LoadManifestXpk(manifest_path, error); +scoped_refptr LoadApplication( + const base::FilePath& app_root, const std::string& app_id, + ApplicationData::SourceType source_type, Manifest::Type manifest_type, + std::string* error) { + base::FilePath manifest_path = GetManifestPath(app_root, manifest_type); - manifest_path = application_path.Append(kManifestWgtFilename); - if (package_type == Package::WGT) - return LoadManifestWgt(manifest_path, error); + scoped_ptr manifest = LoadManifest( + manifest_path, manifest_type, error); + if (!manifest) + return NULL; - *error = base::StringPrintf("%s", errors::kManifestUnreadable); - return NULL; + return ApplicationData::Create( + app_root, app_id, source_type, manifest.Pass(), error); } base::FilePath ApplicationURLToRelativeFilePath(const GURL& url) { diff --git a/src/xwalk/application/common/application_file_util.h b/src/xwalk/application/common/application_file_util.h index 30e07f4..82aa89c 100644 --- a/src/xwalk/application/common/application_file_util.h +++ b/src/xwalk/application/common/application_file_util.h @@ -22,8 +22,6 @@ class FilePath; namespace xwalk { namespace application { -class ApplicationData; - class FileDeleter { public: FileDeleter(const base::FilePath& path, bool recursive); @@ -37,33 +35,21 @@ class FileDeleter { bool recursive_; }; -// Loads and validates an application from the specified directory. Returns NULL +// Loads an application manifest from the specified directory. Returns NULL // on failure, with a description of the error in |error|. -scoped_refptr LoadApplication( - const base::FilePath& application_root, - ApplicationData::SourceType source_type, - std::string* error); +scoped_ptr LoadManifest( + const base::FilePath& file_path, Manifest::Type type, std::string* error); -// The same as LoadApplication except use the provided |application_id|. -scoped_refptr LoadApplication( - const base::FilePath& application_root, - const std::string& application_id, - ApplicationData::SourceType source_type, - std::string* error); +base::FilePath GetManifestPath( + const base::FilePath& app_directory, Manifest::Type type); +// Loads and validates an application from the specified directory. Returns NULL +// on failure, with a description of the error in |error|. scoped_refptr LoadApplication( - const base::FilePath& application_root, - const std::string& application_id, - ApplicationData::SourceType source_type, - Package::Type package_type, + const base::FilePath& app_root, const std::string& app_id, + ApplicationData::SourceType source_type, Manifest::Type manifest_type, std::string* error); -// Loads an application manifest from the specified directory. Returns NULL -// on failure, with a description of the error in |error|. -base::DictionaryValue* LoadManifest(const base::FilePath& application_root, - Package::Type package_type, - std::string* error); - // Get a relative file path from an app:// URL. base::FilePath ApplicationURLToRelativeFilePath(const GURL& url); diff --git a/src/xwalk/application/common/application_file_util_unittest.cc b/src/xwalk/application/common/application_file_util_unittest.cc index 163a844..4c9c894 100644 --- a/src/xwalk/application/common/application_file_util_unittest.cc +++ b/src/xwalk/application/common/application_file_util_unittest.cc @@ -41,7 +41,8 @@ TEST_F(ApplicationFileUtilTest, LoadApplicationWithValidPath) { std::string error; scoped_refptr application(LoadApplication( - install_dir, ApplicationData::LOCAL_DIRECTORY, &error)); + install_dir, std::string(), ApplicationData::LOCAL_DIRECTORY, + Manifest::TYPE_MANIFEST, &error)); ASSERT_TRUE(application != NULL); EXPECT_EQ("The first application that I made.", application->Description()); } @@ -60,7 +61,8 @@ TEST_F(ApplicationFileUtilTest, std::string error; scoped_refptr application(LoadApplication( - install_dir, ApplicationData::LOCAL_DIRECTORY, &error)); + install_dir, std::string(), ApplicationData::LOCAL_DIRECTORY, + Manifest::TYPE_WIDGET, &error)); ASSERT_TRUE(application == NULL); ASSERT_FALSE(error.empty()); ASSERT_STREQ("Manifest file is missing or unreadable.", error.c_str()); @@ -80,7 +82,8 @@ TEST_F(ApplicationFileUtilTest, std::string error; scoped_refptr application(LoadApplication( - install_dir, ApplicationData::LOCAL_DIRECTORY, &error)); + install_dir, std::string(), ApplicationData::LOCAL_DIRECTORY, + Manifest::TYPE_MANIFEST, &error)); ASSERT_TRUE(application == NULL); ASSERT_FALSE(error.empty()); ASSERT_STREQ("Manifest is not valid JSON." @@ -89,13 +92,15 @@ TEST_F(ApplicationFileUtilTest, } static scoped_refptr LoadApplicationManifest( - base::DictionaryValue* manifest, + base::DictionaryValue* values, const base::FilePath& manifest_dir, ApplicationData::SourceType location, int extra_flags, std::string* error) { + scoped_ptr manifest = make_scoped_ptr( + new Manifest(make_scoped_ptr(values->DeepCopy()))); scoped_refptr application = ApplicationData::Create( - manifest_dir, location, *manifest, std::string(), error); + manifest_dir, std::string(), location, manifest.Pass(), error); return application; } diff --git a/src/xwalk/application/common/application_manifest_constants.cc b/src/xwalk/application/common/application_manifest_constants.cc index 4ed0b96..76c992f 100644 --- a/src/xwalk/application/common/application_manifest_constants.cc +++ b/src/xwalk/application/common/application_manifest_constants.cc @@ -147,37 +147,37 @@ const char kManifestUnreadable[] = namespace application { -const char* GetNameKey(Package::Type package_type) { - if (package_type == Package::WGT) +const char* GetNameKey(Manifest::Type manifest_type) { + if (manifest_type == Manifest::TYPE_WIDGET) return application_widget_keys::kNameKey; return application_manifest_keys::kNameKey; } -const char* GetVersionKey(Package::Type package_type) { - if (package_type == Package::WGT) +const char* GetVersionKey(Manifest::Type manifest_type) { + if (manifest_type == Manifest::TYPE_WIDGET) return application_widget_keys::kVersionKey; return application_manifest_keys::kXWalkVersionKey; } -const char* GetCSPKey(Package::Type package_type) { - if (package_type == Package::WGT) +const char* GetCSPKey(Manifest::Type manifest_type) { + if (manifest_type == Manifest::TYPE_WIDGET) return application_widget_keys::kCSPKey; return application_manifest_keys::kCSPKey; } #if defined(OS_TIZEN) -const char* GetTizenAppIdKey(Package::Type package_type) { - if (package_type == Package::WGT) +const char* GetTizenAppIdKey(Manifest::Type manifest_type) { + if (manifest_type == Manifest::TYPE_WIDGET) return application_widget_keys::kTizenAppIdKey; return application_manifest_keys::kTizenAppIdKey; } -const char* GetIcon128Key(Package::Type package_type) { - if (package_type == Package::WGT) +const char* GetIcon128Key(Manifest::Type manifest_type) { + if (manifest_type == Manifest::TYPE_WIDGET) return application_widget_keys::kIcon128Key; return application_manifest_keys::kIcon128Key; diff --git a/src/xwalk/application/common/application_manifest_constants.h b/src/xwalk/application/common/application_manifest_constants.h index 85ebc2d..1d92d23 100644 --- a/src/xwalk/application/common/application_manifest_constants.h +++ b/src/xwalk/application/common/application_manifest_constants.h @@ -6,7 +6,7 @@ #define XWALK_APPLICATION_COMMON_APPLICATION_MANIFEST_CONSTANTS_H_ #include "xwalk/application/common/manifest.h" -#include "xwalk/application/common/package/package.h" + // Keys used in JSON representation of applications. namespace xwalk { namespace application_manifest_keys { @@ -119,11 +119,11 @@ namespace application_manifest_errors { } // namespace application_manifest_errors namespace application { -const char* GetNameKey(Package::Type type); -const char* GetCSPKey(Package::Type type); +const char* GetNameKey(Manifest::Type type); +const char* GetCSPKey(Manifest::Type type); #if defined(OS_TIZEN) -const char* GetTizenAppIdKey(Package::Type type); -const char* GetIcon128Key(Package::Type type); +const char* GetTizenAppIdKey(Manifest::Type type); +const char* GetIcon128Key(Manifest::Type type); #endif } // namespace application } // namespace xwalk diff --git a/src/xwalk/application/common/id_util.cc b/src/xwalk/application/common/id_util.cc index 86de640..348b6d7 100644 --- a/src/xwalk/application/common/id_util.cc +++ b/src/xwalk/application/common/id_util.cc @@ -12,7 +12,7 @@ #include "xwalk/application/common/manifest_handlers/tizen_application_handler.h" #if defined(OS_TIZEN) -#include "xwalk/application/common/tizen/package_path.h" +#include "xwalk/application/common/tizen/package_query.h" #include "third_party/re2/re2/re2.h" #endif @@ -101,10 +101,7 @@ std::string PkgIdToAppId(const std::string& id) { bool IsValidApplicationID(const std::string& id) { #if defined(OS_TIZEN) - if (IsValidWGTID(id) || - IsValidXPKID(id)) - return true; - return false; + return (IsValidWGTID(id) || IsValidXPKID(id)); #endif std::string temp = base::StringToLowerASCII(id); diff --git a/src/xwalk/application/common/manifest.cc b/src/xwalk/application/common/manifest.cc index 7efe82e..45e11d2 100644 --- a/src/xwalk/application/common/manifest.cc +++ b/src/xwalk/application/common/manifest.cc @@ -63,29 +63,10 @@ scoped_ptr ExpandUserAgentLocalesList(const scoped_ptr& list) { } // namespace -Manifest::Manifest(scoped_ptr value) +Manifest::Manifest(scoped_ptr value, Type type) : data_(value.Pass()), i18n_data_(new base::DictionaryValue), - type_(TYPE_UNKNOWN) { - // FIXME: Hosted apps can contain start_url. Below is wrong. - if (data_->Get(keys::kStartURLKey, NULL)) { - type_ = TYPE_PACKAGED_APP; - } else if (data_->HasKey(keys::kAppKey)) { - if (data_->Get(keys::kLaunchWebURLKey, NULL)) { - type_ = TYPE_HOSTED_APP; - } else if (data_->Get(keys::kLaunchLocalPathKey, NULL)) { - type_ = TYPE_PACKAGED_APP; - } -#if defined(OS_TIZEN) - } else if (HasPath(widget_keys::kContentNamespace)) { - std::string ns; - if (data_->GetString(widget_keys::kContentNamespace, &ns) && - ns == kTizenNamespacePrefix) - type_ = TYPE_HOSTED_APP; - else - type_ = TYPE_PACKAGED_APP; -#endif - } + type_(type) { if (data_->HasKey(widget_keys::kWidgetKey) && data_->Get(widget_keys::kWidgetKey, NULL)) @@ -181,8 +162,8 @@ bool Manifest::GetList( Manifest* Manifest::DeepCopy() const { Manifest* manifest = new Manifest( - scoped_ptr(data_->DeepCopy())); - manifest->SetApplicationID(application_id_); + scoped_ptr(data_->DeepCopy()), + type()); return manifest; } diff --git a/src/xwalk/application/common/manifest.h b/src/xwalk/application/common/manifest.h index 3485ef0..7fa6417 100644 --- a/src/xwalk/application/common/manifest.h +++ b/src/xwalk/application/common/manifest.h @@ -23,27 +23,21 @@ namespace application { class Manifest { public: enum Type { - TYPE_UNKNOWN = 0, - TYPE_HOSTED_APP, - TYPE_PACKAGED_APP + TYPE_MANIFEST, // Corresponds to w3c.github.io/manifest + TYPE_WIDGET // Corresponds to http://www.w3.org/TR/widgets }; - explicit Manifest(scoped_ptr value); + explicit Manifest( + scoped_ptr value, Type type = TYPE_MANIFEST); ~Manifest(); - const std::string& GetApplicationID() const { return application_id_; } - void SetApplicationID(const std::string& id) { application_id_ = id; } - // Returns false and |error| will be non-empty if the manifest is malformed. // |warnings| will be populated if there are keys in the manifest that cannot // be specified by the application type. bool ValidateManifest(std::string* error) const; // Returns the manifest type. - Type GetType() const { return type_; } - - bool IsPackaged() const { return type_ == TYPE_PACKAGED_APP; } - bool IsHosted() const { return type_ == TYPE_HOSTED_APP; } + Type type() const { return type_; } // These access the wrapped manifest value, returning false when the property // does not exist or if the manifest type can't access it. @@ -98,11 +92,6 @@ class Manifest { bool CanAccessPath(const std::string& path) const; bool CanAccessKey(const std::string& key) const; - // A persistent, globally unique ID. An application's ID is used in things - // like directory structures and URLs, and is expected to not change across - // versions. - std::string application_id_; - #if defined(OS_TIZEN) // Unique package id for tizen platform std::string package_id_; diff --git a/src/xwalk/application/common/manifest_handler.cc b/src/xwalk/application/common/manifest_handler.cc index 708c4bf..a0c20e8 100644 --- a/src/xwalk/application/common/manifest_handler.cc +++ b/src/xwalk/application/common/manifest_handler.cc @@ -59,8 +59,8 @@ ManifestHandlerRegistry::~ManifestHandlerRegistry() { } ManifestHandlerRegistry* -ManifestHandlerRegistry::GetInstance(Package::Type package_type) { - if (package_type == Package::WGT) +ManifestHandlerRegistry::GetInstance(Manifest::Type type) { + if (type == Manifest::TYPE_WIDGET) return GetInstanceForWGT(); return GetInstanceForXPK(); } @@ -75,7 +75,7 @@ ManifestHandlerRegistry::GetInstanceForWGT() { handlers.push_back(new WidgetHandler); handlers.push_back(new WARPHandler); #if defined(OS_TIZEN) - handlers.push_back(new CSPHandler(Package::WGT)); + handlers.push_back(new CSPHandler(Manifest::TYPE_WIDGET)); handlers.push_back(new NavigationHandler); handlers.push_back(new TizenApplicationHandler); handlers.push_back(new TizenSettingHandler); @@ -94,7 +94,7 @@ ManifestHandlerRegistry::GetInstanceForXPK() { std::vector handlers; // FIXME: Add manifest handlers here like this: // handlers.push_back(new xxxHandler); - handlers.push_back(new CSPHandler(Package::XPK)); + handlers.push_back(new CSPHandler(Manifest::TYPE_MANIFEST)); handlers.push_back(new PermissionsHandler); xpk_registry_ = new ManifestHandlerRegistry(handlers); return xpk_registry_; @@ -114,7 +114,7 @@ bool ManifestHandlerRegistry::ParseAppManifest( iter != handlers_.end(); ++iter) { ManifestHandler* handler = iter->second; if (application->GetManifest()->HasPath(iter->first) || - handler->AlwaysParseForType(application->GetType())) { + handler->AlwaysParseForType(application->manifest_type())) { handlers_by_order[order_map_[handler]] = handler; } } @@ -134,7 +134,7 @@ bool ManifestHandlerRegistry::ValidateAppManifest( iter != handlers_.end(); ++iter) { ManifestHandler* handler = iter->second; if ((application->GetManifest()->HasPath(iter->first) || - handler->AlwaysValidateForType(application->GetType())) && + handler->AlwaysValidateForType(application->manifest_type())) && !handler->Validate(application, error)) return false; } @@ -143,8 +143,8 @@ bool ManifestHandlerRegistry::ValidateAppManifest( // static void ManifestHandlerRegistry::SetInstanceForTesting( - ManifestHandlerRegistry* registry, Package::Type package_type) { - if (package_type == Package::WGT) { + ManifestHandlerRegistry* registry, Manifest::Type type) { + if (type == Manifest::TYPE_WIDGET) { widget_registry_ = registry; return; } diff --git a/src/xwalk/application/common/manifest_handler.h b/src/xwalk/application/common/manifest_handler.h index ec44c0a..97e3666 100644 --- a/src/xwalk/application/common/manifest_handler.h +++ b/src/xwalk/application/common/manifest_handler.h @@ -54,8 +54,7 @@ class ManifestHandlerRegistry { public: ~ManifestHandlerRegistry(); - static ManifestHandlerRegistry* GetInstance( - Package::Type package_type); + static ManifestHandlerRegistry* GetInstance(Manifest::Type type); bool ParseAppManifest( scoped_refptr application, base::string16* error); @@ -75,7 +74,7 @@ class ManifestHandlerRegistry { // Sets a new global registry, for testing purposes. static void SetInstanceForTesting(ManifestHandlerRegistry* registry, - Package::Type package_type); + Manifest::Type type); static ManifestHandlerRegistry* GetInstanceForWGT(); static ManifestHandlerRegistry* GetInstanceForXPK(); diff --git a/src/xwalk/application/common/manifest_handler_unittest.cc b/src/xwalk/application/common/manifest_handler_unittest.cc index f232b84..f0210c8 100644 --- a/src/xwalk/application/common/manifest_handler_unittest.cc +++ b/src/xwalk/application/common/manifest_handler_unittest.cc @@ -31,14 +31,14 @@ class ScopedTestingManifestHandlerRegistry { : registry_( new ManifestHandlerRegistry(handlers)), prev_registry_( - ManifestHandlerRegistry::GetInstance(Package::XPK)) { + ManifestHandlerRegistry::GetInstance(Manifest::TYPE_MANIFEST)) { ManifestHandlerRegistry::SetInstanceForTesting( - registry_.get(), Package::XPK); + registry_.get(), Manifest::TYPE_MANIFEST); } ~ScopedTestingManifestHandlerRegistry() { ManifestHandlerRegistry::SetInstanceForTesting( - prev_registry_, Package::XPK); + prev_registry_, Manifest::TYPE_MANIFEST); } scoped_ptr registry_; @@ -219,10 +219,9 @@ TEST_F(ManifestHandlerTest, DependentHandlers) { manifest.SetInteger("g", 6); std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()))), &error); EXPECT_TRUE(application.get()); // A, B, C.EZ, C.D, K @@ -247,10 +246,9 @@ TEST_F(ManifestHandlerTest, FailingHandlers) { // Succeeds when "a" is not recognized. std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest_a, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest_a.DeepCopy()))), &error); EXPECT_TRUE(application.get()); @@ -264,10 +262,9 @@ TEST_F(ManifestHandlerTest, FailingHandlers) { registry.reset(new ScopedTestingManifestHandlerRegistry(handlers)); application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest_a, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest_a.DeepCopy()))), &error); EXPECT_FALSE(application.get()); EXPECT_EQ("A", error); @@ -285,10 +282,9 @@ TEST_F(ManifestHandlerTest, Validate) { manifest.SetInteger("b", 2); std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()))), &error); EXPECT_TRUE(application.get()); diff --git a/src/xwalk/application/common/manifest_handlers/csp_handler.cc b/src/xwalk/application/common/manifest_handlers/csp_handler.cc index 48863e0..6d29625 100644 --- a/src/xwalk/application/common/manifest_handlers/csp_handler.cc +++ b/src/xwalk/application/common/manifest_handlers/csp_handler.cc @@ -21,8 +21,8 @@ CSPInfo::CSPInfo() { CSPInfo::~CSPInfo() { } -CSPHandler::CSPHandler(Package::Type type) - : package_type_(type) { +CSPHandler::CSPHandler(Manifest::Type type) + : type_(type) { } CSPHandler::~CSPHandler() { @@ -30,11 +30,11 @@ CSPHandler::~CSPHandler() { bool CSPHandler::Parse(scoped_refptr application, base::string16* error) { - if (package_type_ != application->GetPackageType()) + if (type_ != application->manifest_type()) return false; scoped_ptr csp_info(new CSPInfo); std::string policies_str; - const char* csp_key = GetCSPKey(package_type_); + const char* csp_key = GetCSPKey(type_); if (application->GetManifest()->HasPath(csp_key) && !application->GetManifest()->GetString(csp_key, &policies_str)) { *error = base::ASCIIToUTF16( @@ -62,11 +62,11 @@ bool CSPHandler::Parse(scoped_refptr application, } bool CSPHandler::AlwaysParseForType(Manifest::Type type) const { - return package_type_ == Package::XPK; + return type_ == Manifest::TYPE_MANIFEST; } std::vector CSPHandler::Keys() const { - return std::vector(1, GetCSPKey(package_type_)); + return std::vector(1, GetCSPKey(type_)); } } // namespace application diff --git a/src/xwalk/application/common/manifest_handlers/csp_handler.h b/src/xwalk/application/common/manifest_handlers/csp_handler.h index 071520d..d58b8a6 100644 --- a/src/xwalk/application/common/manifest_handlers/csp_handler.h +++ b/src/xwalk/application/common/manifest_handlers/csp_handler.h @@ -32,7 +32,7 @@ class CSPInfo : public ApplicationData::ManifestData { class CSPHandler : public ManifestHandler { public: - explicit CSPHandler(Package::Type type); + explicit CSPHandler(Manifest::Type type); virtual ~CSPHandler(); virtual bool Parse(scoped_refptr application, @@ -41,7 +41,7 @@ class CSPHandler : public ManifestHandler { virtual std::vector Keys() const OVERRIDE; private: - Package::Type package_type_; + Manifest::Type type_; DISALLOW_COPY_AND_ASSIGN(CSPHandler); }; diff --git a/src/xwalk/application/common/manifest_handlers/csp_handler_unittest.cc b/src/xwalk/application/common/manifest_handlers/csp_handler_unittest.cc index e6e21e1..26c9dc5 100644 --- a/src/xwalk/application/common/manifest_handlers/csp_handler_unittest.cc +++ b/src/xwalk/application/common/manifest_handlers/csp_handler_unittest.cc @@ -18,16 +18,17 @@ class CSPHandlerTest: public testing::Test { public: scoped_refptr CreateApplication() { std::string error; - scoped_refptr application = ApplicationData::Create( - base::FilePath(), ApplicationData::LOCAL_DIRECTORY, - manifest, "", &error); - return application; + scoped_refptr app_data = ApplicationData::Create( + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()))), + &error); + return app_data; } const CSPInfo* GetCSPInfo( scoped_refptr application) { const CSPInfo* info = static_cast( - application->GetManifestData(GetCSPKey(application->GetPackageType()))); + application->GetManifestData(GetCSPKey(application->manifest_type()))); return info; } diff --git a/src/xwalk/application/common/manifest_handlers/permissions_handler_unittest.cc b/src/xwalk/application/common/manifest_handlers/permissions_handler_unittest.cc index 8773131..14ba5bb 100644 --- a/src/xwalk/application/common/manifest_handlers/permissions_handler_unittest.cc +++ b/src/xwalk/application/common/manifest_handlers/permissions_handler_unittest.cc @@ -34,10 +34,9 @@ TEST_F(PermissionsHandlerTest, NonePermission) { manifest.SetString(keys::kXWalkVersionKey, "0"); std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()))), &error); EXPECT_TRUE(application.get()); EXPECT_EQ(GetAPIPermissionsInfo(application).size(), 0); @@ -51,10 +50,9 @@ TEST_F(PermissionsHandlerTest, EmptyPermission) { manifest.Set(keys::kPermissionsKey, permissions); std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()))), &error); EXPECT_TRUE(application.get()); EXPECT_EQ(GetAPIPermissionsInfo(application).size(), 0); @@ -69,10 +67,9 @@ TEST_F(PermissionsHandlerTest, DeviceAPIPermission) { manifest.Set(keys::kPermissionsKey, permissions); std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, - manifest, - "", + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()))), &error); EXPECT_TRUE(application.get()); const PermissionSet& permission_list = diff --git a/src/xwalk/application/common/manifest_handlers/warp_handler_unittest.cc b/src/xwalk/application/common/manifest_handlers/warp_handler_unittest.cc index 83ae8bd..aeda711 100644 --- a/src/xwalk/application/common/manifest_handlers/warp_handler_unittest.cc +++ b/src/xwalk/application/common/manifest_handlers/warp_handler_unittest.cc @@ -23,8 +23,10 @@ class WARPHandlerTest: public testing::Test { scoped_refptr CreateApplication() { std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), ApplicationData::LOCAL_DIRECTORY, - manifest, "", &error); + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()), + Manifest::TYPE_WIDGET)), + &error); return application; } @@ -53,7 +55,7 @@ TEST_F(WARPHandlerTest, OneWARP) { manifest.Set(keys::kAccessKey, warp); scoped_refptr application = CreateApplication(); EXPECT_TRUE(application.get()); - EXPECT_EQ(application->GetPackageType(), Package::WGT); + EXPECT_EQ(application->manifest_type(), Manifest::TYPE_WIDGET); const WARPInfo* info = GetWARPInfo(application); EXPECT_TRUE(info); scoped_ptr list(info->GetWARP()->DeepCopy()); @@ -77,7 +79,7 @@ TEST_F(WARPHandlerTest, WARPs) { scoped_refptr application = CreateApplication(); EXPECT_TRUE(application.get()); - EXPECT_EQ(application->GetPackageType(), Package::WGT); + EXPECT_EQ(application->manifest_type(), Manifest::TYPE_WIDGET); const WARPInfo* info = GetWARPInfo(application); EXPECT_TRUE(info); diff --git a/src/xwalk/application/common/manifest_handlers/widget_handler_unittest.cc b/src/xwalk/application/common/manifest_handlers/widget_handler_unittest.cc index 0025074..299160b 100644 --- a/src/xwalk/application/common/manifest_handlers/widget_handler_unittest.cc +++ b/src/xwalk/application/common/manifest_handlers/widget_handler_unittest.cc @@ -55,8 +55,10 @@ class WidgetHandlerTest: public testing::Test { const base::DictionaryValue& manifest) { std::string error; scoped_refptr application = ApplicationData::Create( - base::FilePath(), ApplicationData::LOCAL_DIRECTORY, - manifest, "", &error); + base::FilePath(), std::string(), ApplicationData::LOCAL_DIRECTORY, + make_scoped_ptr(new Manifest(make_scoped_ptr(manifest.DeepCopy()), + Manifest::TYPE_WIDGET)), + &error); return application; } @@ -162,7 +164,7 @@ TEST_F(WidgetHandlerTest, scoped_refptr application; application = CreateApplication(*(manifest.get())); EXPECT_TRUE(application); - EXPECT_EQ(application->GetPackageType(), Package::WGT); + EXPECT_EQ(application->manifest_type(), Manifest::TYPE_WIDGET); // Get widget info from this application. WidgetInfo* info = GetWidgetInfo(application); EXPECT_TRUE(info); @@ -193,8 +195,8 @@ TEST_F(WidgetHandlerTest, // Create an application use this manifest, scoped_refptr application; application = CreateApplication(*(manifest.get())); - EXPECT_TRUE(application); - EXPECT_EQ(application->GetPackageType(), Package::WGT); + EXPECT_TRUE(application.get()); + EXPECT_EQ(application->manifest_type(), Manifest::TYPE_WIDGET); // Get widget info from this application. WidgetInfo* info = GetWidgetInfo(application); EXPECT_TRUE(info); diff --git a/src/xwalk/application/common/manifest_unittest.cc b/src/xwalk/application/common/manifest_unittest.cc index 5973f78..727aa01 100644 --- a/src/xwalk/application/common/manifest_unittest.cc +++ b/src/xwalk/application/common/manifest_unittest.cc @@ -26,13 +26,6 @@ class ManifestTest : public testing::Test { ManifestTest() : default_value_("test") {} protected: - void AssertType(Manifest* manifest, Manifest::Type type) { - EXPECT_EQ(type, manifest->GetType()); - EXPECT_EQ(type == Manifest::TYPE_PACKAGED_APP, - manifest->IsPackaged()); - EXPECT_EQ(type == Manifest::TYPE_HOSTED_APP, manifest->IsHosted()); - } - // Helper function that replaces the Manifest held by |manifest| with a copy // with its |key| changed to |value|. If |value| is NULL, then |key| will // instead be deleted. @@ -63,8 +56,6 @@ TEST_F(ManifestTest, ApplicationData) { std::string error; EXPECT_TRUE(manifest->ValidateManifest(&error)); EXPECT_TRUE(error.empty()); - // TODO(xiang): warnings will not be empty after enable manifest features - // AssertType(manifest.get(), Manifest::TYPE_HOSTED_AP); // The unknown key 'unknown_key' should be accesible. std::string value; @@ -91,21 +82,6 @@ TEST_F(ManifestTest, ApplicationTypes) { std::string error; EXPECT_TRUE(manifest->ValidateManifest(&error)); EXPECT_TRUE(error.empty()); - - // Platform app. - MutateManifest( - &manifest, keys::kStartURLKey, - new base::StringValue("main.html")); - AssertType(manifest.get(), Manifest::TYPE_PACKAGED_APP); - MutateManifest( - &manifest, keys::kStartURLKey, NULL); - - // Hosted app. - MutateManifest( - &manifest, keys::kLaunchWebURLKey, new base::StringValue("foo")); - AssertType(manifest.get(), Manifest::TYPE_HOSTED_APP); - MutateManifest( - &manifest, keys::kLaunchWebURLKey, NULL); } } // namespace application diff --git a/src/xwalk/application/common/package/package.h b/src/xwalk/application/common/package/package.h index 457e2a5..9cd936f 100644 --- a/src/xwalk/application/common/package/package.h +++ b/src/xwalk/application/common/package/package.h @@ -12,6 +12,7 @@ #include "base/files/scoped_file.h" #include "base/files/scoped_temp_dir.h" #include "base/memory/scoped_ptr.h" +#include "xwalk/application/common/manifest.h" namespace xwalk { namespace application { @@ -22,16 +23,12 @@ namespace application { // XPKPackage::Validate() class Package { public: - enum Type { - WGT, - XPK - }; - virtual ~Package(); bool IsValid() const { return is_valid_; } const std::string& Id() const { return id_; } const std::string& name() const { return name_; } - Type type() const { return type_; } + // Returns the type of the manifest which the package contains. + Manifest::Type manifest_type() const { return manifest_type_; } // Factory method for creating a package static scoped_ptr Create(const base::FilePath& path); // The function will unzip the XPK/WGT file and return the target path where @@ -54,7 +51,7 @@ class Package { base::ScopedTempDir temp_dir_; // Represent if the package has been extracted. bool is_extracted_; - Type type_; + Manifest::Type manifest_type_; }; } // namespace application diff --git a/src/xwalk/application/common/package/wgt_package.cc b/src/xwalk/application/common/package/wgt_package.cc index 2825fbb..2320775 100644 --- a/src/xwalk/application/common/package/wgt_package.cc +++ b/src/xwalk/application/common/package/wgt_package.cc @@ -30,7 +30,7 @@ WGTPackage::WGTPackage(const base::FilePath& path) : Package(path) { if (!base::PathExists(path)) return; - type_ = WGT; + manifest_type_ = Manifest::TYPE_WIDGET; base::FilePath extracted_path; // FIXME : we should not call 'extract' here! if (!ExtractToTemporaryDir(&extracted_path)) diff --git a/src/xwalk/application/common/package/xpk_package.cc b/src/xwalk/application/common/package/xpk_package.cc index fb2b62d..a967c84 100644 --- a/src/xwalk/application/common/package/xpk_package.cc +++ b/src/xwalk/application/common/package/xpk_package.cc @@ -26,7 +26,7 @@ XPKPackage::XPKPackage(const base::FilePath& path) : Package(path) { if (!base::PathExists(path)) return; - type_ = XPK; + manifest_type_ = Manifest::TYPE_MANIFEST; scoped_ptr file( new base::ScopedFILE(base::OpenFile(path, "rb"))); file_ = file.Pass(); diff --git a/src/xwalk/application/common/security_policy.cc b/src/xwalk/application/common/security_policy.cc index 74511f6..94a9803 100644 --- a/src/xwalk/application/common/security_policy.cc +++ b/src/xwalk/application/common/security_policy.cc @@ -168,11 +168,11 @@ SecurityPolicyCSP::~SecurityPolicyCSP() { } void SecurityPolicyCSP::Enforce() { - Package::Type package_type = app_->data()->GetPackageType(); - const char* scp_key = GetCSPKey(package_type); + Manifest::Type manifest_type = app_->data()->manifest_type(); + const char* scp_key = GetCSPKey(manifest_type); CSPInfo* csp_info = static_cast(app_->data()->GetManifestData(scp_key)); - if (package_type == Package::WGT) { + if (manifest_type == Manifest::TYPE_WIDGET) { #if defined(OS_TIZEN) if (!csp_info || csp_info->GetDirectives().empty()) app_->data()->SetManifestData(scp_key, GetDefaultCSPInfo()); diff --git a/src/xwalk/application/common/tizen/application_storage_impl.cc b/src/xwalk/application/common/tizen/application_storage_impl.cc index 8610c1d..3736a56 100644 --- a/src/xwalk/application/common/tizen/application_storage_impl.cc +++ b/src/xwalk/application/common/tizen/application_storage_impl.cc @@ -15,7 +15,7 @@ #include "xwalk/application/common/application_file_util.h" #include "xwalk/application/common/id_util.h" #include "xwalk/application/common/tizen/application_storage.h" -#include "xwalk/application/common/tizen/package_path.h" +#include "xwalk/application/common/tizen/package_query.h" namespace { @@ -33,6 +33,21 @@ ail_cb_ret_e appinfo_get_app_id_cb( const char kXWalkPackageType[] = "wgt"; +bool GetPackageType(const std::string& application_id, + xwalk::application::Manifest::Type* package_type) { + if (xwalk::application::IsValidWGTID(application_id)) { + *package_type = xwalk::application::Manifest::TYPE_WIDGET; + return true; + } + + if (xwalk::application::IsValidXPKID(application_id)) { + *package_type = xwalk::application::Manifest::TYPE_MANIFEST; + return true; + } + + return false; +} + } // namespace namespace xwalk { @@ -48,13 +63,43 @@ bool ApplicationStorageImpl::Init() { return true; } +namespace { + +bool GetManifestType(const std::string& app_id, Manifest::Type* manifest_type) { + if (IsValidWGTID(app_id)) { + *manifest_type = Manifest::TYPE_WIDGET; + return true; + } + + if (IsValidXPKID(app_id)) { + *manifest_type = Manifest::TYPE_MANIFEST; + return true; + } + + return false; +} + +} // namespace + scoped_refptr ApplicationStorageImpl::GetApplicationData( const std::string& app_id) { base::FilePath app_path = GetApplicationPath(app_id); - std::string error_str; - return LoadApplication( - app_path, app_id, ApplicationData::INTERNAL, &error_str); + Manifest::Type manifest_type; + if (!GetManifestType(app_id, &manifest_type)) { + LOG(ERROR) << "Failed to detect the manifest type from app id " + << app_id; + return NULL; + } + + std::string error; + scoped_refptr app_data = + LoadApplication( + app_path, app_id, ApplicationData::INTERNAL, manifest_type, &error); + if (!app_data) + LOG(ERROR) << "Error occurred while trying to load application: " << error; + + return app_data; } bool ApplicationStorageImpl::GetInstalledApplicationIDs( diff --git a/src/xwalk/application/common/tizen/package_path.cc b/src/xwalk/application/common/tizen/package_query.cc similarity index 57% rename from src/xwalk/application/common/tizen/package_path.cc rename to src/xwalk/application/common/tizen/package_query.cc index 8c5afba..1165554 100644 --- a/src/xwalk/application/common/tizen/package_path.cc +++ b/src/xwalk/application/common/tizen/package_query.cc @@ -9,11 +9,14 @@ #include #include "base/logging.h" -#include "xwalk/application/common/tizen/package_path.h" +#include "xwalk/application/common/tizen/package_query.h" namespace { -ail_cb_ret_e appinfo_get_exec_cb(const ail_appinfo_h appinfo, void *user_data) { +typedef ail_cb_ret_e (*PropertyCallback)(const ail_appinfo_h, void*); + +ail_cb_ret_e callback_x_slp_exe_path(const ail_appinfo_h appinfo, + void* user_data) { char* package_exec; ail_appinfo_get_str(appinfo, AIL_PROP_X_SLP_EXE_PATH, &package_exec); if (!package_exec) @@ -24,19 +27,30 @@ ail_cb_ret_e appinfo_get_exec_cb(const ail_appinfo_h appinfo, void *user_data) { return AIL_CB_RET_CANCEL; } -base::FilePath GetPath(const std::string& id, const char* type) { +ail_cb_ret_e callback_installed_time(const ail_appinfo_h appinfo, + void* user_data) { + int* installed_time = static_cast(user_data); + ail_appinfo_get_int(appinfo, AIL_PROP_X_SLP_INSTALLEDTIME_INT, + installed_time); + return AIL_CB_RET_CANCEL; +} + +void GetProperty(const std::string& id, + const char* type, + PropertyCallback callback, + void* user_data) { ail_filter_h filter; ail_error_e ret = ail_filter_new(&filter); if (ret != AIL_ERROR_OK) { LOG(ERROR) << "Failed to create AIL filter."; - return base::FilePath(); + return; } ret = ail_filter_add_str(filter, type, id.c_str()); if (ret != AIL_ERROR_OK) { LOG(ERROR) << "Failed to init AIL filter."; ail_filter_destroy(filter); - return base::FilePath(); + return; } int count; @@ -48,24 +62,36 @@ base::FilePath GetPath(const std::string& id, const char* type) { if (ret != AIL_ERROR_OK) { LOG(ERROR) << "Failed to count AIL app info."; ail_filter_destroy(filter); - return base::FilePath(); + return; } if (count != 1) { - LOG(ERROR) << "Invalid count (" << count - << ") of the AIL DB records for the app id " << id; + LOG(ERROR) << "Invalid count (" << count + << ") of the AIL DB records for the app id " << id; ail_filter_destroy(filter); - return base::FilePath(); + return; } - std::string x_slp_exe_path; + if (uid != GLOBAL_USER) - ail_filter_list_usr_appinfo_foreach(filter, appinfo_get_exec_cb, - &x_slp_exe_path, uid); + ail_filter_list_usr_appinfo_foreach(filter, callback, + user_data, uid); else ail_filter_list_appinfo_foreach(filter, - appinfo_get_exec_cb, &x_slp_exe_path); + callback, user_data); ail_filter_destroy(filter); +} + +base::FilePath GetPath(const std::string& app_id, const char* type) { + std::string x_slp_exe_path; + GetProperty(app_id, + type, + callback_x_slp_exe_path, + static_cast(&x_slp_exe_path)); + + if (x_slp_exe_path.empty()) { + return base::FilePath(); + } // x_slp_exe_path is /bin/, we need to // return just . @@ -85,5 +111,14 @@ base::FilePath GetPackagePath(const std::string& pkg_id) { return GetPath(pkg_id, AIL_PROP_X_SLP_PKGID_STR); } +base::Time GetApplicationInstallationTime(const std::string& app_id) { + int installed_time = 0; // seconds since epoch + GetProperty(app_id, + AIL_PROP_X_SLP_APPID_STR, + callback_installed_time, + static_cast(&installed_time)); + return base::Time::FromTimeT(installed_time); +} + } // namespace application } // namespace xwalk diff --git a/src/xwalk/application/common/tizen/package_path.h b/src/xwalk/application/common/tizen/package_query.h similarity index 64% rename from src/xwalk/application/common/tizen/package_path.h rename to src/xwalk/application/common/tizen/package_query.h index 0b5fa65..e754e09 100644 --- a/src/xwalk/application/common/tizen/package_path.h +++ b/src/xwalk/application/common/tizen/package_query.h @@ -3,12 +3,13 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef XWALK_APPLICATION_COMMON_TIZEN_PACKAGE_PATH_H_ -#define XWALK_APPLICATION_COMMON_TIZEN_PACKAGE_PATH_H_ +#ifndef XWALK_APPLICATION_COMMON_TIZEN_PACKAGE_QUERY_H_ +#define XWALK_APPLICATION_COMMON_TIZEN_PACKAGE_QUERY_H_ #include #include "base/file_util.h" +#include "base/time/time.h" namespace xwalk { namespace application { @@ -16,7 +17,9 @@ namespace application { base::FilePath GetApplicationPath(const std::string& app_id); base::FilePath GetPackagePath(const std::string& pkg_id); +base::Time GetApplicationInstallationTime(const std::string& app_id); + } // application } // xwalk -#endif // XWALK_APPLICATION_COMMON_TIZEN_PACKAGE_PATH_H_ +#endif // XWALK_APPLICATION_COMMON_TIZEN_PACKAGE_QUERY_H_ diff --git a/src/xwalk/application/common/xwalk_application_common.gypi b/src/xwalk/application/common/xwalk_application_common.gypi index 1d2848a..ca9e89d 100644 --- a/src/xwalk/application/common/xwalk_application_common.gypi +++ b/src/xwalk/application/common/xwalk_application_common.gypi @@ -73,15 +73,14 @@ 'tizen/application_storage.h', 'tizen/application_storage_impl.cc', 'tizen/application_storage_impl.h', - 'tizen/package_path.cc', - 'tizen/package_path.h', + 'tizen/package_query.cc', + 'tizen/package_query.h', 'tizen/signature_data.h', 'tizen/signature_data.cc', 'tizen/signature_parser.h', 'tizen/signature_parser.cc', 'tizen/signature_validator.cc', 'tizen/signature_validator.h', - ], }], ], diff --git a/src/xwalk/application/test/application_browsertest.cc b/src/xwalk/application/test/application_browsertest.cc index b6dbff0..998aeed 100644 --- a/src/xwalk/application/test/application_browsertest.cc +++ b/src/xwalk/application/test/application_browsertest.cc @@ -9,6 +9,7 @@ #include "xwalk/application/browser/application.h" #include "xwalk/application/browser/application_system.h" #include "xwalk/application/browser/application_service.h" +#include "xwalk/application/common/application_file_util.h" #include "xwalk/application/test/application_browsertest.h" #include "xwalk/application/test/application_testapi.h" #include "xwalk/extensions/browser/xwalk_extension_service.h" @@ -16,6 +17,8 @@ using xwalk::application::Application; using xwalk::application::ApplicationService; +using xwalk::application::Manifest; +using xwalk::application::GetManifestPath; using namespace xwalk::extensions; // NOLINT ApplicationBrowserTest::ApplicationBrowserTest() @@ -59,8 +62,11 @@ void ApplicationBrowserTest::CreateExtensions( } IN_PROC_BROWSER_TEST_F(ApplicationBrowserTest, ApiTest) { - Application* app = application_sevice()->LaunchFromUnpackedPath( - test_data_dir_.Append(FILE_PATH_LITERAL("api"))); + base::FilePath manifest_path = + GetManifestPath(test_data_dir_.Append(FILE_PATH_LITERAL("api")), + Manifest::TYPE_MANIFEST); + Application* app = application_sevice()->LaunchFromManifestPath( + manifest_path, Manifest::TYPE_MANIFEST); ASSERT_TRUE(app); test_runner_->WaitForTestNotification(); EXPECT_EQ(test_runner_->GetTestsResult(), ApiTestRunner::PASS); diff --git a/src/xwalk/application/test/application_multi_app_test.cc b/src/xwalk/application/test/application_multi_app_test.cc index b7aebb0..799b778 100644 --- a/src/xwalk/application/test/application_multi_app_test.cc +++ b/src/xwalk/application/test/application_multi_app_test.cc @@ -8,12 +8,15 @@ #include "xwalk/application/browser/application.h" #include "xwalk/application/browser/application_service.h" #include "xwalk/application/browser/application_system.h" +#include "xwalk/application/common/application_file_util.h" #include "xwalk/application/test/application_browsertest.h" #include "xwalk/application/test/application_testapi.h" #include "xwalk/runtime/browser/xwalk_runner.h" using xwalk::application::Application; using xwalk::application::ApplicationService; +using xwalk::application::Manifest; +using xwalk::application::GetManifestPath; class ApplicationMultiAppTest : public ApplicationBrowserTest { }; @@ -22,8 +25,11 @@ IN_PROC_BROWSER_TEST_F(ApplicationMultiAppTest, TestMultiApp) { ApplicationService* service = application_sevice(); const size_t currently_running_count = service->active_applications().size(); // Launch the first app. - Application* app1 = service->LaunchFromUnpackedPath( - test_data_dir_.Append(FILE_PATH_LITERAL("dummy_app1"))); + base::FilePath manifest_path = + GetManifestPath(test_data_dir_.Append(FILE_PATH_LITERAL("dummy_app1")), + Manifest::TYPE_MANIFEST); + Application* app1 = application_sevice()->LaunchFromManifestPath( + manifest_path, Manifest::TYPE_MANIFEST); ASSERT_TRUE(app1); // Wait for app is fully loaded. test_runner_->WaitForTestNotification(); @@ -36,13 +42,16 @@ IN_PROC_BROWSER_TEST_F(ApplicationMultiAppTest, TestMultiApp) { // Verify that no new App instance was created, if one exists // with the same ID. - Application* failed_app1 = service->LaunchFromUnpackedPath( - test_data_dir_.Append(FILE_PATH_LITERAL("dummy_app1"))); + Application* failed_app1 = application_sevice()->LaunchFromManifestPath( + manifest_path, Manifest::TYPE_MANIFEST); ASSERT_FALSE(failed_app1); // Launch the second app. - Application* app2 = service->LaunchFromUnpackedPath( - test_data_dir_.Append(FILE_PATH_LITERAL("dummy_app2"))); + manifest_path = + GetManifestPath(test_data_dir_.Append(FILE_PATH_LITERAL("dummy_app2")), + Manifest::TYPE_MANIFEST); + Application* app2 = application_sevice()->LaunchFromManifestPath( + manifest_path, Manifest::TYPE_MANIFEST); ASSERT_TRUE(app2); // Wait for app is fully loaded. test_runner_->PostResultToNotificationCallback(); diff --git a/src/xwalk/application/test/application_testapi_test.cc b/src/xwalk/application/test/application_testapi_test.cc index dbb5db8..e1d9fe9 100644 --- a/src/xwalk/application/test/application_testapi_test.cc +++ b/src/xwalk/application/test/application_testapi_test.cc @@ -4,17 +4,23 @@ #include "content/public/test/browser_test_utils.h" #include "net/base/net_util.h" +#include "xwalk/application/common/application_file_util.h" #include "xwalk/application/test/application_browsertest.h" #include "xwalk/application/test/application_testapi.h" using xwalk::application::Application; +using xwalk::application::Manifest; +using xwalk::application::GetManifestPath; class ApplicationTestApiTest : public ApplicationBrowserTest { }; IN_PROC_BROWSER_TEST_F(ApplicationTestApiTest, TestApiTest) { - Application* app = application_sevice()->LaunchFromUnpackedPath( - test_data_dir_.Append(FILE_PATH_LITERAL("testapi"))); + base::FilePath manifest_path = + GetManifestPath(test_data_dir_.Append(FILE_PATH_LITERAL("testapi")), + Manifest::TYPE_MANIFEST); + Application* app = application_sevice()->LaunchFromManifestPath( + manifest_path, Manifest::TYPE_MANIFEST); ASSERT_TRUE(app); test_runner_->WaitForTestNotification(); EXPECT_EQ(test_runner_->GetTestsResult(), ApiTestRunner::FAILURE); diff --git a/src/xwalk/application/tools/tizen/xwalk_backend_plugin.cc b/src/xwalk/application/tools/tizen/xwalk_backend_plugin.cc index 376f2af..4dff936 100644 --- a/src/xwalk/application/tools/tizen/xwalk_backend_plugin.cc +++ b/src/xwalk/application/tools/tizen/xwalk_backend_plugin.cc @@ -11,13 +11,21 @@ #include "base/file_util.h" #include "base/files/file_path.h" #include "base/files/scoped_temp_dir.h" +#include "base/logging.h" #include "base/path_service.h" +#include "base/time/time.h" #include "base/version.h" #include "xwalk/application/common/application_data.h" #include "xwalk/application/common/application_file_util.h" +#include "xwalk/application/common/application_manifest_constants.h" #include "xwalk/application/common/id_util.h" +#include "xwalk/application/common/manifest_handlers/tizen_application_handler.h" +#include "xwalk/application/common/package/package.h" +#include "xwalk/application/common/tizen/package_query.h" #include "xwalk/runtime/common/xwalk_paths.h" +using xwalk::application::Manifest; + namespace { enum PkgmgrPluginBool { @@ -25,6 +33,26 @@ enum PkgmgrPluginBool { kPkgmgrPluginFalse = -1 }; +// Whole app directory size in KB +int64 CountAppTotalSize( + scoped_refptr app_data) { + return base::ComputeDirectorySize(app_data->Path()) / 1024; +} + +// Data directory size in KB +int64 CountAppDataSize( + scoped_refptr app_data) { + int64 size = 0; + + base::FilePath private_path = app_data->Path().Append("private"); + size += base::ComputeDirectorySize(private_path); + + base::FilePath tmp_path = app_data->Path().Append("tmp"); + size += base::ComputeDirectorySize(tmp_path); + + return size / 1024; +} + } // namespace PkgmgrBackendPlugin* PkgmgrBackendPlugin::GetInstance() { @@ -46,7 +74,7 @@ int PkgmgrBackendPlugin::DetailedInfo( if (!app_data) return kPkgmgrPluginFalse; - SaveDetailInfo(app_data.get(), pkg_detail_info); + SaveDetailInfo(app_data, pkg_detail_info); return kPkgmgrPluginTrue; } @@ -57,13 +85,16 @@ int PkgmgrBackendPlugin::DetailedInfoPkg( return kPkgmgrPluginFalse; } + + base::ScopedTempDir dir; + dir.CreateUniqueTempDir(); scoped_refptr app_data = - GetApplicationDataFromPkg(pkg_path); + GetApplicationDataFromPkg(pkg_path, &dir); if (app_data.get() == NULL) { return kPkgmgrPluginFalse; } - SaveDetailInfo(app_data.get(), pkg_detail_info); + SaveDetailInfo(app_data, pkg_detail_info); return kPkgmgrPluginTrue; } @@ -90,7 +121,7 @@ int PkgmgrBackendPlugin::AppsList(package_manager_pkg_info_t** list, static_cast( malloc(sizeof(package_manager_pkg_info_t))); memset(result, 0x00, sizeof(package_manager_pkg_info_t)); - SaveInfo(app_data.get(), result); + SaveInfo(app_data, result); if (*list) { result->next = *list; } @@ -109,11 +140,13 @@ PkgmgrBackendPlugin::PkgmgrBackendPlugin() { } void PkgmgrBackendPlugin::SaveInfo( - xwalk::application::ApplicationData* app_data, + scoped_refptr app_data, package_manager_pkg_info_t* pkg_detail_info) { strncpy(pkg_detail_info->pkg_type, "xpk", PKG_TYPE_STRING_LEN_MAX - 1); strncpy(pkg_detail_info->pkg_name, app_data->GetPackageID().c_str(), PKG_NAME_STRING_LEN_MAX - 1); + strncpy(pkg_detail_info->pkgid, app_data->GetPackageID().c_str(), + PKG_NAME_STRING_LEN_MAX - 1); if (app_data->Version() != NULL) { strncpy(pkg_detail_info->version, app_data->Version()->GetString().c_str(), PKG_VERSION_STRING_LEN_MAX - 1); @@ -121,11 +154,13 @@ void PkgmgrBackendPlugin::SaveInfo( } void PkgmgrBackendPlugin::SaveDetailInfo( - xwalk::application::ApplicationData* app_data, + scoped_refptr app_data, package_manager_pkg_detail_info_t* pkg_detail_info) { strncpy(pkg_detail_info->pkg_type, "xpk", PKG_TYPE_STRING_LEN_MAX - 1); strncpy(pkg_detail_info->pkg_name, app_data->GetPackageID().c_str(), PKG_NAME_STRING_LEN_MAX - 1); + strncpy(pkg_detail_info->pkgid, app_data->GetPackageID().c_str(), + PKG_NAME_STRING_LEN_MAX - 1); if (app_data->Version() != NULL) { strncpy(pkg_detail_info->version, app_data->Version()->GetString().c_str(), PKG_VERSION_STRING_LEN_MAX - 1); @@ -133,14 +168,28 @@ void PkgmgrBackendPlugin::SaveDetailInfo( strncpy(pkg_detail_info->pkg_description, app_data->Description().c_str(), PKG_VALUE_STRING_LEN_MAX - 1); - // TODO(t.iwanek) support this data in ApplicationStorage - // strncpy(pkg_detail_info.min_platform_version, - // app_data->todo, PKG_VERSION_STRING_LEN_MAX -1); - // PKG_VERSION_STRING_LEN_MAX - 1); - // pkg_detail_info->installed_time = 0; - // pkg_detail_info->installed_size = -1; - // pkg_detail_info->app_size = -1; - // pkg_detail_info->data_size = -1; + // xpk do not have this key in manifest + if (app_data->manifest_type() == Manifest::TYPE_WIDGET) { + const xwalk::application::TizenApplicationInfo* tizen_app_info = + static_cast( + app_data->GetManifestData( + xwalk::application_widget_keys::kTizenApplicationKey)); + DCHECK(tizen_app_info); + + strncpy(pkg_detail_info->min_platform_version, + tizen_app_info->required_version().c_str(), + PKG_VERSION_STRING_LEN_MAX -1); + } + + pkg_detail_info->installed_time = + xwalk::application::GetApplicationInstallationTime(app_data->ID()) + .ToTimeT(); // to seconds + + int install_size = CountAppTotalSize(app_data); + int data_size = CountAppDataSize(app_data); + pkg_detail_info->installed_size = install_size; + pkg_detail_info->app_size = install_size - data_size; + pkg_detail_info->data_size = data_size; strncpy(pkg_detail_info->optional_id, app_data->GetPackageID().c_str(), PKG_NAME_STRING_LEN_MAX - 1); @@ -148,10 +197,9 @@ void PkgmgrBackendPlugin::SaveDetailInfo( } scoped_refptr -PkgmgrBackendPlugin::GetApplicationDataFromPkg(const std::string& pkg_path) { - base::ScopedTempDir dir; - dir.CreateUniqueTempDir(); - base::FilePath unpacked_dir = dir.path(); +PkgmgrBackendPlugin::GetApplicationDataFromPkg(const std::string& pkg_path, + base::ScopedTempDir* dir) { + base::FilePath unpacked_dir = dir->path(); scoped_ptr package = xwalk::application::Package::Create(base::FilePath(pkg_path)); @@ -161,6 +209,6 @@ PkgmgrBackendPlugin::GetApplicationDataFromPkg(const std::string& pkg_path) { std::string error; scoped_refptr app_data = LoadApplication( unpacked_dir, app_id, xwalk::application::ApplicationData::TEMP_DIRECTORY, - package->type(), &error); + package->manifest_type(), &error); return app_data; } diff --git a/src/xwalk/application/tools/tizen/xwalk_backend_plugin.h b/src/xwalk/application/tools/tizen/xwalk_backend_plugin.h index c857a2e..efad580 100644 --- a/src/xwalk/application/tools/tizen/xwalk_backend_plugin.h +++ b/src/xwalk/application/tools/tizen/xwalk_backend_plugin.h @@ -36,12 +36,13 @@ class PkgmgrBackendPlugin { private: PkgmgrBackendPlugin(); - void SaveInfo(xwalk::application::ApplicationData* app_data, + void SaveInfo(scoped_refptr app_data, package_manager_pkg_info_t* pkg_detail_info); - void SaveDetailInfo(xwalk::application::ApplicationData* app_data, - package_manager_pkg_detail_info_t* pkg_detail_info); + void SaveDetailInfo( + scoped_refptr app_data, + package_manager_pkg_detail_info_t* pkg_detail_info); scoped_refptr GetApplicationDataFromPkg( - const std::string& pkg_path); + const std::string& pkg_path, base::ScopedTempDir* dir); friend struct DefaultSingletonTraits; diff --git a/src/xwalk/application/tools/tizen/xwalk_package_installer.cc b/src/xwalk/application/tools/tizen/xwalk_package_installer.cc index 0d1e1b0..11ba840 100644 --- a/src/xwalk/application/tools/tizen/xwalk_package_installer.cc +++ b/src/xwalk/application/tools/tizen/xwalk_package_installer.cc @@ -37,6 +37,7 @@ namespace info = application_packageinfo_constants; using xwalk::application::ApplicationData; using xwalk::application::ApplicationStorage; using xwalk::application::FileDeleter; +using xwalk::application::Manifest; using xwalk::application::Package; namespace { @@ -221,7 +222,7 @@ bool PackageInstaller::PlatformInstall(ApplicationData* app_data) { std::string icon_name; if (!app_data->GetManifest()->GetString( - GetIcon128Key(app_data->GetPackageType()), &icon_name)) + GetIcon128Key(app_data->manifest_type()), &icon_name)) LOG(WARNING) << "'icon' not included in manifest"; // This will clean everything inside '/'. @@ -327,7 +328,7 @@ bool PackageInstaller::PlatformUpdate(ApplicationData* app_data) { std::string icon_name; if (!app_data->GetManifest()->GetString( - GetIcon128Key(app_data->GetPackageType()), &icon_name)) + GetIcon128Key(app_data->manifest_type()), &icon_name)) LOG(WARNING) << "'icon' not included in manifest"; // This will clean everything inside '/' and the new XML. @@ -445,7 +446,7 @@ bool PackageInstaller::Install(const base::FilePath& path, std::string* id) { std::string error; scoped_refptr app_data = LoadApplication( unpacked_dir, app_id, ApplicationData::LOCAL_DIRECTORY, - package->type(), &error); + package->manifest_type(), &error); if (!app_data) { LOG(ERROR) << "Error during application installation: " << error; return false; @@ -551,11 +552,8 @@ bool PackageInstaller::Update(const std::string& app_id, std::string error; scoped_refptr new_app_data = - LoadApplication(unpacked_dir, - app_id, - ApplicationData::TEMP_DIRECTORY, - package->type(), - &error); + LoadApplication(unpacked_dir, app_id, ApplicationData::TEMP_DIRECTORY, + package->manifest_type(), &error); if (!new_app_data) { LOG(ERROR) << "An error occurred during application updating: " << error; return false; @@ -568,11 +566,10 @@ bool PackageInstaller::Update(const std::string& app_id, return false; } - if ( - // For Tizen WGT package, downgrade to a lower version or reinstall - // is permitted when using Tizen WRT, Crosswalk runtime need to follow - // this behavior on Tizen platform. - package->type() != Package::WGT && + // For Tizen WGT package, downgrade to a lower version or reinstall + // is permitted when using Tizen WRT, Crosswalk runtime need to follow + // this behavior on Tizen platform. + if (package->manifest_type() != Manifest::TYPE_WIDGET && old_app_data->Version()->CompareTo( *(new_app_data->Version())) >= 0) { LOG(INFO) << "The version number of new XPK/WGT package " @@ -589,11 +586,9 @@ bool PackageInstaller::Update(const std::string& app_id, !base::Move(unpacked_dir, app_dir)) return false; - new_app_data = LoadApplication(app_dir, - app_id, - ApplicationData::LOCAL_DIRECTORY, - package->type(), - &error); + new_app_data = LoadApplication( + app_dir, app_id, ApplicationData::LOCAL_DIRECTORY, + package->manifest_type(), &error); if (!new_app_data) { LOG(ERROR) << "Error during loading new package: " << error; base::DeleteFile(app_dir, true); diff --git a/src/xwalk/packaging/crosswalk.spec b/src/xwalk/packaging/crosswalk.spec index 9b332ec..7444d33 100644 --- a/src/xwalk/packaging/crosswalk.spec +++ b/src/xwalk/packaging/crosswalk.spec @@ -16,7 +16,7 @@ %endif Name: crosswalk -Version: 9.38.205.0 +Version: 9.38.207.0 Release: 0 Summary: Chromium-based app runtime License: (BSD-3-Clause and LGPL-2.1+) diff --git a/src/xwalk/runtime/android/core/src/org/xwalk/core/SharedXWalkView.java b/src/xwalk/runtime/android/core/src/org/xwalk/core/SharedXWalkView.java index 7d8f349..2b83866 100644 --- a/src/xwalk/runtime/android/core/src/org/xwalk/core/SharedXWalkView.java +++ b/src/xwalk/runtime/android/core/src/org/xwalk/core/SharedXWalkView.java @@ -12,21 +12,34 @@ import android.util.AttributeSet; * The XWalkView that allows to use Crosswalk's shared library. */ public class SharedXWalkView extends XWalkView { + + private static boolean initialized = false; + public SharedXWalkView(Context context, AttributeSet attrs, SharedXWalkExceptionHandler handler) { - super(verifyActivity(context, handler), attrs); + super(verifyActivity(context), attrs); } - public SharedXWalkView(Context context, Activity activity, - SharedXWalkExceptionHandler handler) { - super(context, verifyActivity(activity, handler)); + public SharedXWalkView(Context context, Activity activity) { + super(context, verifyActivity(activity)); } - private static Activity verifyActivity(Context context, SharedXWalkExceptionHandler handler) { + private static Activity verifyActivity(Context context) { assert context instanceof Activity; + if (!initialized) initialize(context, null); + return (Activity) context; + } + + public static void initialize(Context context, SharedXWalkExceptionHandler handler) { + if (initialized) return; + assert context.getApplicationContext() instanceof XWalkApplication; ReflectionHelper.allowCrossPackage(); - ReflectionHelper.setExceptionHandler(handler); - return (Activity) context; + if (handler != null) ReflectionHelper.setExceptionHandler(handler); + initialized = true; + } + + public static boolean usesLibraryOutOfPackage() { + return ReflectionHelper.shouldUseLibrary(); } } diff --git a/src/xwalk/runtime/browser/xwalk_browser_main_parts.cc b/src/xwalk/runtime/browser/xwalk_browser_main_parts.cc index 37e4c83..ff61f2e 100644 --- a/src/xwalk/runtime/browser/xwalk_browser_main_parts.cc +++ b/src/xwalk/runtime/browser/xwalk_browser_main_parts.cc @@ -110,7 +110,6 @@ void XWalkBrowserMainParts::PreMainMessageLoopStart() { command_line->AppendSwitch(switches::kAllowFileAccessFromFiles); // Enable SIMD.JS API by default. - /* std::string js_flags("--simd_object"); if (command_line->HasSwitch(switches::kJavaScriptFlags)) { js_flags += " "; @@ -118,7 +117,6 @@ void XWalkBrowserMainParts::PreMainMessageLoopStart() { command_line->GetSwitchValueASCII(switches::kJavaScriptFlags); } command_line->AppendSwitchASCII(switches::kJavaScriptFlags, js_flags); - */ startup_url_ = GetURLFromCommandLine(*command_line); } -- 2.7.4