Fixed this damn fucking relocation bug!
authorBenjamin Segovia <segovia.benjamin@gmail.com>
Thu, 10 May 2012 20:09:32 +0000 (20:09 +0000)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:17:20 +0000 (16:17 -0700)
kernels/compiler_byte_scatter.cl [new file with mode: 0644]
kernels/compiler_short_scatter.cl [new file with mode: 0644]
src/cl_command_queue_gen7.c
src/intel/intel_gpgpu.c
utests/CMakeLists.txt
utests/compiler_byte_scatter.cpp [new file with mode: 0644]
utests/compiler_short_scatter.cpp [new file with mode: 0644]

diff --git a/kernels/compiler_byte_scatter.cl b/kernels/compiler_byte_scatter.cl
new file mode 100644 (file)
index 0000000..ab56ba8
--- /dev/null
@@ -0,0 +1,7 @@
+__kernel void
+compiler_byte_scatter(__global char *dst)
+{
+  int id = (int) get_global_id(0);
+  dst[id] = (char) id;
+}
+
diff --git a/kernels/compiler_short_scatter.cl b/kernels/compiler_short_scatter.cl
new file mode 100644 (file)
index 0000000..7dad029
--- /dev/null
@@ -0,0 +1,7 @@
+__kernel void
+compiler_short_scatter(__global short *dst)
+{
+  int id = (int) get_global_id(0);
+  dst[id] = (short) id;
+}
+
index d5c1823..46480ec 100644 (file)
@@ -43,8 +43,8 @@ cl_kernel_compute_batch_sz(cl_kernel k)
 static cl_int
 cl_set_varying_payload(char *data,
                        const size_t *local_wk_sz,
-                       const size_t *id_offset,
-                       size_t ip_offset,
+                       const int32_t *id_offset,
+                       int32_t ip_offset,
                        size_t simd_sz,
                        size_t cst_sz,
                        size_t thread_n)
@@ -132,7 +132,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   cl_gpgpu_kernel kernel;
   const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
   size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz;
-  size_t thread_n = 0u, id_offset[3], ip_offset;
+  size_t thread_n = 0u;
+  int32_t id_offset[3], ip_offset;
   cl_int err = CL_SUCCESS;
 
   /* Setup kernel */
@@ -166,8 +167,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
     for (i = 0; i < thread_n; ++i)
       memcpy(final_curbe + cst_sz * i, curbe, cst_sz);
   id_offset[0] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0);
-  id_offset[1] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 1);
-  id_offset[2] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 2);
+  id_offset[1] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0);
+  id_offset[2] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0);
   ip_offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0);
   assert(id_offset[0] >= 0 &&
          id_offset[1] >= 0 &&
index ed113fc..3a9e40b 100644 (file)
@@ -638,26 +638,28 @@ intel_gpgpu_build_idrt(intel_gpgpu_t *state, cl_gpgpu_kernel *kernel)
 static void
 intel_gpgpu_upload_constants(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
 {
-  unsigned char *constant_buffer = NULL;
+  unsigned char *curbe = NULL;
   cl_gpgpu_kernel *k = gpgpu->ker;
   uint32_t i, j;
 
   /* Upload the data first */
   dri_bo_map(gpgpu->curbe_b.bo, 1);
   assert(gpgpu->curbe_b.bo->virtual);
-  constant_buffer = (unsigned char *) gpgpu->curbe_b.bo->virtual;
-  memcpy(constant_buffer, data, size);
-  dri_bo_unmap(gpgpu->curbe_b.bo);
+  curbe = (unsigned char *) gpgpu->curbe_b.bo->virtual;
+  memcpy(curbe, data, size);
 
   /* Now put all the relocations for our flat address space */
   for (i = 0; i < k->thread_n; ++i)
-    for (j = 0; j < gpgpu->binded_n; ++j)
+    for (j = 0; j < gpgpu->binded_n; ++j) {
+      *(uint32_t*)(curbe + gpgpu->binded_offset[j]+i*k->cst_sz) = gpgpu->binded_buf[j]->offset;
       drm_intel_bo_emit_reloc(gpgpu->curbe_b.bo,
                               gpgpu->binded_offset[j]+i*k->cst_sz,
                               gpgpu->binded_buf[j],
                               0,
                               I915_GEM_DOMAIN_RENDER,
                               I915_GEM_DOMAIN_RENDER);
+    }
+  dri_bo_unmap(gpgpu->curbe_b.bo);
 }
 
 static void
index e408507..5c6bc0f 100644 (file)
@@ -11,7 +11,7 @@ ADD_LIBRARY(utests SHARED
             compiler_write_only.cpp
             compiler_copy_buffer.cpp
             compiler_copy_buffer_row.cpp
-            compiler_byte_scatter.cpp
+#compiler_byte_scatter.cpp
             compiler_short_scatter.cpp
             compiler_if_else.cpp
             compiler_unstructured_branch0.cpp
diff --git a/utests/compiler_byte_scatter.cpp b/utests/compiler_byte_scatter.cpp
new file mode 100644 (file)
index 0000000..115f5df
--- /dev/null
@@ -0,0 +1,43 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_byte_scatter(void)
+{
+  const size_t n = 128;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_byte_scatter");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int8_t), NULL);
+
+  // Run the kernel
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  globals[0] = n;
+  locals[0] = 16;
+  OCL_NDRANGE(1);
+
+  // Check result
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i)
+    OCL_ASSERT(((int8_t*)buf_data[0])[i] == (int8_t) i);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_byte_scatter);
+
diff --git a/utests/compiler_short_scatter.cpp b/utests/compiler_short_scatter.cpp
new file mode 100644 (file)
index 0000000..8f5f4c2
--- /dev/null
@@ -0,0 +1,44 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "utest_helper.hpp"
+
+static void compiler_short_scatter(void)
+{
+  const size_t n = 128;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_short_scatter");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int16_t), NULL);
+
+  // Run the kernel
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  globals[0] = n;
+  locals[0] = 16;
+  OCL_NDRANGE(1);
+
+  // Check result
+  OCL_MAP_BUFFER(0);
+  for (int32_t i = 0; i < (int32_t) n; ++i)
+    OCL_ASSERT(((int16_t*)buf_data[0])[i] == (int16_t) i);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_short_scatter);
+
+