SET(CMAKE_VERBOSE_MAKEFILE "false")
SET(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type")
-SET(USE_INTEL_COMPILER CACHE BOOL "false")
-SET(EMULATE_IVB CACHE BOOL "false")
-SET(EMULATE_SNB CACHE BOOL "false")
+SET(EMULATE_IVB false CACHE BOOL "To emulate IVB")
+SET(EMULATE_SNB false CACHE BOOL "To emulate SNB")
+SET(EMULATE_HSW false CACHE BOOL "To emulate HSW")
+SET(USE_OLD_COMPILER false CACHE BOOL "To use the old compiler (required for SNB)")
ADD_DEFINITIONS(-D__$(USER)__)
IF (EMULATE_HSW)
ADD_DEFINITIONS(-DEMULATE_GEN=0)
ENDIF (EMULATE_HSW)
+IF (USE_OLD_COMPILER)
+ ADD_DEFINITIONS (-DUSE_OLD_COMPILER=1)
+ELSE (USE_OLD_COMPILER)
+ ADD_DEFINITIONS (-DUSE_OLD_COMPILER=0)
+ENDIF (USE_OLD_COMPILER)
+
IF (USE_FULSIM)
ADD_DEFINITIONS(-DUSE_FULSIM=1)
ELSE (USE_FULSIM)
system("wine AubLoad.exe dump.aub -device ivbB0");
else
system("wine AubLoad.exe dump.aub -device ivbB0 -debug");
+#elif EMULATE_GEN == 75 /* HSW */
+ if (debug_mode == NULL || strcmp(debug_mode, "1"))
+ system("wine AubLoad.exe dump.aub -device hsw.h.a0");
+ else
+ system("wine AubLoad.exe dump.aub -device hsw.h.a0 -debug");
+#else
+#error "Unknown device"
#endif
}
#include "cl_gen7_device.h"
};
+/* XXX we clone IVB for HSW now */
+static struct _cl_device_id intel_hsw_device = {
+ .max_compute_unit = 64,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+ .wg_sz = 512,
+ .compile_wg_sz = {0},
+
+#include "cl_gen75_device.h"
+};
+
LOCAL cl_device_id
cl_get_gt_device(void)
{
cl_device_id ret = NULL;
const int device_id = cl_intel_get_device_id();
- if (device_id == PCI_CHIP_IVYBRIDGE_GT1 ||
- device_id == PCI_CHIP_IVYBRIDGE_M_GT1 ||
- device_id == PCI_CHIP_IVYBRIDGE_S_GT1) {
+ /* XXX we pick IVB for HSW now */
+ if (device_id == PCI_CHIP_HASWELL_M ||
+ device_id == PCI_CHIP_HASWELL_L ||
+ device_id == PCI_CHIP_HASWELL_M0 ||
+ device_id == PCI_CHIP_HASWELL_D0) {
+ intel_hsw_device.vendor_id = device_id;
+ intel_hsw_device.platform = intel_platform;
+ ret = &intel_hsw_device;
+ }
+ else if (device_id == PCI_CHIP_IVYBRIDGE_GT1 ||
+ device_id == PCI_CHIP_IVYBRIDGE_M_GT1 ||
+ device_id == PCI_CHIP_IVYBRIDGE_S_GT1) {
intel_ivb_gt1_device.vendor_id = device_id;
intel_ivb_gt1_device.platform = intel_platform;
ret = &intel_ivb_gt1_device;
if (UNLIKELY(device != &intel_snb_gt1_device &&
device != &intel_snb_gt2_device &&
device != &intel_ivb_gt1_device &&
- device != &intel_ivb_gt2_device))
+ device != &intel_ivb_gt2_device &&
+ device != &intel_hsw_device))
return CL_INVALID_DEVICE;
if (UNLIKELY(param_value == NULL))
return CL_INVALID_VALUE;
if (UNLIKELY(device != &intel_snb_gt1_device &&
device != &intel_snb_gt2_device &&
device != &intel_ivb_gt1_device &&
- device != &intel_ivb_gt2_device))
+ device != &intel_ivb_gt2_device &&
+ device != &intel_hsw_device))
return CL_INVALID_DEVICE;
if (ver == NULL)
return CL_SUCCESS;
if (device == &intel_snb_gt1_device || device == &intel_snb_gt2_device)
*ver = 6;
- else
+ else if (device == &intel_ivb_gt1_device || device == &intel_ivb_gt2_device)
*ver = 7;
+ else
+ *ver = 75;
return CL_SUCCESS;
}
#undef DECL_FIELD
case PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT:
case PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT:
{
- cl_global_memory_object_arg_t *from = (cl_global_memory_object_arg_t *) patch;
TRY_ALLOC (arg_info, CALLOC(cl_arg_info_t));
- arg_info->arg_index = from->index;
- arg_info->offset = from->offset;
- if (item->token == PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT)
+ if (item->token == PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT) {
+ cl_global_memory_object_arg_t *from = (cl_global_memory_object_arg_t *) patch;
+ arg_info->arg_index = from->index;
+ arg_info->offset = from->offset;
arg_info->type = OCLRT_ARG_TYPE_BUFFER;
- else if (item->token == PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT)
+ }
+ else if (item->token == PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT) {
+ cl_global_memory_object_arg_t *from = (cl_global_memory_object_arg_t *) patch;
+ arg_info->arg_index = from->index;
+ arg_info->offset = from->offset;
arg_info->type = OCLRT_ARG_TYPE_CONST;
- else if (item->token == PATCH_TOKEN_IMAGE_MEMORY_KERNEL_ARGUMENT)
+ }
+ else if (item->token == PATCH_TOKEN_IMAGE_MEMORY_KERNEL_ARGUMENT) {
+ cl_image_memory_object_arg_t *from = (cl_image_memory_object_arg_t *) patch;
+ arg_info->arg_index = from->index;
+ arg_info->offset = from->offset;
arg_info->type = OCLRT_ARG_TYPE_IMAGE;
- else
+ } else
assert(0);
arg_info->sz = sizeof(cl_mem);
case DATA_PARAMETER_IMAGE_CHANNEL_ORDER:
case DATA_PARAMETER_NUM_HARDWARE_THREADS:
{
- curbe_key = cl_curbe_key(data->type, data->index, data->src_offset);
+#if USE_OLD_COMPILER == 0
+ if (data->type == DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_ARGUMENT_SIZES)
+ curbe_key = cl_curbe_key(data->type, data->index, 0);
+ else
+#endif
+ curbe_key = cl_curbe_key(data->type, data->index, data->src_offset);
curbe_info = cl_kernel_get_curbe_info_list(k, curbe_key);
if (curbe_info != NULL)
curbe_info->offsets[++curbe_info->last] = data->offset;
/* XXX Structures extracted from the WINDOWS CODE BASE */
/***************************************************************************/
+// Some fields went from 1 to 4 bytes with the new compiler
+#if USE_OLD_COMPILER
+typedef uint8_t cl_compiler_boolean_t;
+#else
+typedef uint32_t cl_compiler_boolean_t;
+#endif /* USE_OLD_COMPILER */
+
typedef struct cl_program_header {
uint32_t magic;
uint32_t version;
typedef struct cl_arg_info {
uint32_t arg_index;
uint32_t type;
- uint8_t is_null;
+ cl_compiler_boolean_t is_null;
uint32_t offset;
uint32_t sz;
void *obj;
- uint8_t is_patched;
+ cl_compiler_boolean_t is_patched;
struct cl_arg_info *next;
} cl_arg_info_t;
uint32_t arg_index;
uint32_t sz;
uint32_t src_offset;
- uint8_t is_patched;
- uint8_t is_local;
+ cl_compiler_boolean_t is_patched;
+ cl_compiler_boolean_t is_local;
struct cl_curbe_patch_info *next;
} cl_curbe_patch_info_t;
uint32_t patch_list_sz;
} cl_kernel_header_t;
-typedef struct cl_kernel_header7_5 {
+typedef struct cl_kernel_header75 {
cl_kernel_header_t header;
uint32_t kernel_heap_sz;
uint32_t general_state_heap_sz;
uint32_t dynamic_state_heap_sz;
uint32_t surface_state_heap_sz;
-} cl_kernel_header7_5_t;
+} cl_kernel_header75_t;
typedef struct cl_kernel_header7 {
cl_kernel_header_t header;
uint32_t offset;
} cl_global_memory_object_arg_t;
-typedef struct cl_patch_image_memory_object_arg {
+#if USE_OLD_COMPILER == 0
+typedef struct cl_image_memory_object_arg {
cl_patch_item_header_t header;
uint32_t index;
+ uint32_t image_type;
uint32_t offset;
-} cl_patch_image_memory_object_arg_t;
+} cl_image_memory_object_arg_t;
+#endif
typedef struct cl_patch_constant_memory_object_arg {
uint32_t index;
#include <string.h>
#include <assert.h>
-static int icbe_ver = 1001L;
+#if USE_OLD_COMPILER
+static const int icbe_ver = 1001;
+#else
+static const int icbe_ver = 1002;
+#endif
#define DECL_LOAD_HEADER(GEN) \
static const char* \
DECL_LOAD_HEADER(6)
DECL_LOAD_HEADER(7)
+DECL_LOAD_HEADER(75)
#undef DECL_LOAD_HEADER
/* Format changes from generation to generation */
TRY_ALLOC (p->ker[i], cl_kernel_new());
switch (header->device) {
+ case IGFX_GEN7_5_CORE:
+ ker = cl_kernel_load_header75(p->ker[i], ker, &name_sz, &ker_sz);
+ break;
case IGFX_GEN7_CORE:
ker = cl_kernel_load_header7(p->ker[i], ker, &name_sz, &ker_sz);
break;
#if EMULATE_GEN
driver->gen_ver = EMULATE_GEN;
- if (EMULATE_GEN == 7)
+ if (EMULATE_GEN == 75)
+ driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */
+ else if (EMULATE_GEN == 7)
driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */
else if (EMULATE_GEN == 6)
driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */
else
FATAL ("Unsupported Gen for emulation");
#else
- if (IS_GEN7(driver->device_id))
+ if (IS_GEN75(driver->device_id))
+ driver->gen_ver = 75;
+ else if (IS_GEN7(driver->device_id))
driver->gen_ver = 7;
else if (IS_GEN6(driver->device_id))
driver->gen_ver = 6;
intel_batchbuffer_alloc_space(state->batch,0);
memset(vfe, 0, sizeof(struct gen6_vfe_state_inline));
- vfe->vfe1.gpgpu_mode = state->drv->gen_ver >= 7 ? 1 : 0;
+ vfe->vfe1.gpgpu_mode =
+ (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) ? 1 : 0;
vfe->vfe1.bypass_gateway_ctl = 1;
vfe->vfe1.reset_gateway_timer = 1;
vfe->vfe1.max_threads = state->max_threads - 1;
vfe->vfe1.urb_entries = 64;
vfe->vfe3.curbe_size = 63;
vfe->vfe3.urbe_size = 13;
- vfe->vfe4.scoreboard_mask = state->drv->gen_ver >= 7 ? 0 : 0x80000000;
+ vfe->vfe4.scoreboard_mask =
+ (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) ? 0 : 0x80000000;
intel_batchbuffer_alloc_space(state->batch, sizeof(gen6_vfe_state_inline_t));
ADVANCE_BATCH(state->batch);
}
{
intel_batchbuffer_start_atomic(state->batch, 256);
gpgpu_pipe_control(state);
- if (state->drv->gen_ver >= 7)
+ if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
intel_gpgpu_set_L3(state, state->ker->use_barrier);
gpgpu_select_pipeline(state);
gpgpu_set_base_address(state);
assert(index < MAX_SURFACES);
if(state->drv->gen_ver == 6)
gpgpu_bind_buf_gen6(state, index, obj_bo, size, cchint);
- else if (state->drv->gen_ver == 7)
+ else if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint);
+ else
+ NOT_IMPLEMENTED;
}
LOCAL void
assert(index < MAX_SURFACES);
if(state->drv->gen_ver == 6)
gpgpu_bind_image2D_gen6(state, index, obj_bo, format, w, h, bpp, cchint);
- else if (state->drv->gen_ver == 7)
+ else if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
gpgpu_bind_image2D_gen7(state, index, obj_bo, format, w, h, bpp, cchint);
+ else
+ NOT_IMPLEMENTED;
}
static void
desc->desc4.curbe_read_offset = 0;
/* Barriers / SLM are automatically handled on Gen7+ */
- if (state->drv->gen_ver >= 7) {
+ if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) {
size_t slm_sz = kernel[i].slm_sz;
desc->desc5.group_threads_num = kernel[i].use_barrier ? kernel[i].thread_n : 0;
desc->desc5.barrier_enable = kernel[i].use_barrier;