Added a new copy buffer test

author Benjamin Segovia <segovia.benjamin@gmail.com>

Fri, 13 Apr 2012 18:43:46 +0000 (18:43 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:16:23 +0000 (16:16 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Fri, 13 Apr 2012 18:43:46 +0000 (18:43 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:16:23 +0000 (16:16 -0700)
diff --git a/kernels/stdlib.h b/kernels/stdlib.h

index 472655a..eaf4b17 100644 (file)
--- a/kernels/stdlib.h
+++ b/kernels/stdlib.h
@@ -43,7 +43,7 @@ DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)
  #undef DECL_PUBLIC_WORK_ITEM_FN
  
  inline unsigned int get_global_id(unsigned int dim) {
-  return get_local_id(dim) + get_local_size(dim) * get_num_groups(dim);
+  return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);
  }
  
  __attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c);
diff --git a/kernels/test_copy_buffer.cl b/kernels/test_copy_buffer.cl

new file mode 100644 (file)

index 0000000..50b79df
--- /dev/null
+++ b/kernels/test_copy_buffer.cl
@@ -0,0 +1,8 @@
+#include "stdlib.h"
+__kernel void
+test_copy_buffer(__global float* src, __global float* dst)
+{
+    int id = (int)get_global_id(0);
+    dst[id] = src[id];
+}
+
diff --git a/kernels/test_copy_buffer.cl.ll b/kernels/test_copy_buffer.cl.ll

new file mode 100644 (file)

index 0000000..2a9c818
--- /dev/null
+++ b/kernels/test_copy_buffer.cl.ll
@@ -0,0 +1,30 @@
+; ModuleID = 'test_copy_buffer.cl.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_copy_buffer(float addrspace(1)* nocapture %src, float addrspace(1)* nocapture %dst) nounwind noinline {
+get_global_id.exit:
+  %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
+  %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone
+  %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_group_id0() nounwind readnone
+  %mul.i = mul i32 %call.i10.i, %call.i3.i
+  %add.i = add i32 %mul.i, %call.i.i
+  %arrayidx = getelementptr inbounds float addrspace(1)* %src, i32 %add.i
+  %0 = load float addrspace(1)* %arrayidx, align 4, !tbaa !1
+  %arrayidx1 = getelementptr inbounds float addrspace(1)* %dst, i32 %add.i
+  store float %0, float addrspace(1)* %arrayidx1, align 4, !tbaa !1
+  ret void
+}
+
+declare ptx_device i32 @__gen_ocl_get_group_id0() nounwind readnone
+
+declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone
+
+declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (float addrspace(1)*, float addrspace(1)*)* @test_copy_buffer}
+!1 = metadata !{metadata !"float", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/kernels/test_write_only_2.cl.ll b/kernels/test_write_only_2.cl.ll

index 3d7e493..4df3092 100644 (file)
--- a/kernels/test_write_only_2.cl.ll
+++ b/kernels/test_write_only_2.cl.ll
@@ -6,7 +6,7 @@ define ptx_kernel void @test_write_only(float addrspace(1)* nocapture %dst) noun
  get_global_id.exit:
    %call.i.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
    %call.i3.i = tail call ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone
-  %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone
+  %call.i10.i = tail call ptx_device i32 @__gen_ocl_get_group_id0() nounwind readnone
    %mul.i = mul i32 %call.i10.i, %call.i3.i
    %add.i = add i32 %mul.i, %call.i.i
    %arrayidx = getelementptr inbounds float addrspace(1)* %dst, i32 %add.i
@@ -14,7 +14,7 @@ get_global_id.exit:
    ret void
  }
  
-declare ptx_device i32 @__gen_ocl_get_num_groups0() nounwind readnone
+declare ptx_device i32 @__gen_ocl_get_group_id0() nounwind readnone
  
  declare ptx_device i32 @__gen_ocl_get_local_size0() nounwind readnone
  
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index 035e941..cb57d49 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -49,9 +49,11 @@ ADD_LIBRARY(cl_test STATIC
  TARGET_LINK_LIBRARIES(cl_test cl)
  
  ADD_EXECUTABLE(test_write_only tests/test_write_only.c)
+ADD_EXECUTABLE(test_copy_buffer tests/test_copy_buffer.c)
  ADD_EXECUTABLE(test_eot tests/test_eot.c)
  TARGET_LINK_LIBRARIES(test_eot cl_test m)
  TARGET_LINK_LIBRARIES(test_write_only cl_test m)
+TARGET_LINK_LIBRARIES(test_copy_buffer cl_test m)
  
  #ADD_EXECUTABLE(test_copy_buffer tests/test_copy_buffer.c)
  #ADD_EXECUTABLE(test_copy_image tests/test_copy_image.c)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c

index fa1a133..ae0bd67 100644 (file)
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -119,7 +119,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
    cl_buffer private_bo = NULL, scratch_bo = NULL;
    cl_gpgpu_kernel kernel;
    const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
-  size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = 0u, local_id_sz = 0u;
+  size_t i, batch_sz = 0u, local_sz = 0u, local_id_sz = 0u, cst_sz = ker->curbe_sz;
    size_t thread_n = 0u, id_offset = 0u;
    cl_int err = CL_SUCCESS;
  
@@ -130,9 +130,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
    kernel.barrierID = 0;
    kernel.use_barrier = 0;
    kernel.slm_sz = 0;
-  kernel.cst_sz = 0;
  
-  /* Fill the constant buffer */
+  /* Curbe step 1: fill the constant buffer data shared by all threads */
    curbe = alloca(ker->curbe_sz);
    cl_curbe_fill(ker, curbe, global_wk_off, global_wk_sz, local_wk_sz);
  
@@ -151,7 +150,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
    cl_command_queue_bind_surface(queue, ker, curbe, NULL, &private_bo, &scratch_bo, 0);
    cl_gpgpu_states_setup(gpgpu, &kernel);
  
-  /* CURBE step 2. Give the localID and upload it to video memory */
+  /* Curbe step 2. Give the localID and upload it to video memory */
    TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
    if (curbe)
      for (i = 0; i < thread_n; ++i)
diff --git a/src/cl_driver.cpp b/src/cl_driver.cpp

index dd44637..0b05fd7 100644 (file)
--- a/src/cl_driver.cpp
+++ b/src/cl_driver.cpp
@@ -24,12 +24,13 @@ extern "C" {
  #include <stdlib.h>
  #include <string.h>
  }
+
  namespace
  {
-  /* Just use c++ pre-main to initialize the call-backs */
-  struct CallBackInitializer
+  /*! Just use c++ pre-main to initialize the call-backs */
+  struct OCLDriverCallBackInitializer
    {
-    CallBackInitializer(void) {
+    OCLDriverCallBackInitializer(void) {
        const char *run_it = getenv("OCL_SIMULATOR");
        if (run_it != NULL && !strcmp(run_it, "2"))
          sim_setup_callbacks();
@@ -38,7 +39,7 @@ namespace
      }
    };
  
-  /* Set the call backs at pre-main time */
-  LOCAL CallBackInitializer cbInitializer;
-}
+  /*! Set the call backs at pre-main time */
+  static OCLDriverCallBackInitializer cbInitializer;
+} /* namespace */
  
diff --git a/src/cl_kernel.c b/src/cl_kernel.c

index 36064a4..0f8d103 100644 (file)
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -168,15 +168,6 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
  
    /* Create the curbe */
    k->curbe_sz = gbe_kernel_get_curbe_size(k->opaque);
-  TRY_ALLOC_NO_ERR(k->curbe, cl_malloc(k->curbe_sz));
-  return;
-
-error:
-  if (k->curbe) cl_free(k->curbe);
-  if (k->bo) cl_buffer_unreference(k->bo);
-  k->curbe = NULL;
-  k->bo = NULL;
-  return;
  }
  
  LOCAL cl_kernel
@@ -194,7 +185,9 @@ cl_kernel_dup(cl_kernel from)
    to->magic = CL_MAGIC_KERNEL_HEADER;
    to->program = from->program;
    to->arg_n = from->arg_n;
+  to->curbe_sz = from->curbe_sz;
    TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
+  if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
  
    /* Retain the bos */
    if (from->bo)       cl_buffer_reference(from->bo);
diff --git a/src/sim/sim_driver.c b/src/sim/sim_driver.c

index 3fa224c..243b02a 100644 (file)
--- a/src/sim/sim_driver.c
+++ b/src/sim/sim_driver.c
@@ -201,7 +201,7 @@ struct _sim_gpgpu
    uint32_t binded_offset[max_buf_n]; /* their offsets in the constant buffer */
    uint32_t memory_remap[max_buf_n];  /* offset of each buffer in the fake memory space */
    uint32_t max_threads;              /* HW threads running */
-  uint32_t cst_sz;                   /* size of the constant buffer */
+  uint32_t curbe_sz;                 /* size of curbe used per HW thread */
    uint32_t binded_n;                 /* number of buffers binded */
    uint32_t thread_n;                 /* number of threads to run per work group */
  };
@@ -244,13 +244,14 @@ static void
  sim_gpgpu_upload_constants(sim_gpgpu gpgpu, const void* data, uint32_t size)
  {
    uint32_t i, j;
-  assert(size == gpgpu->cst_sz * gpgpu->thread_n);
+  assert(size == gpgpu->curbe_sz * gpgpu->thread_n);
    if (gpgpu->curbe) cl_free(gpgpu->curbe);
    gpgpu->curbe = (char*) cl_malloc(size);
+  memcpy(gpgpu->curbe, data, size);
  
    /* Upload the buffer offsets per thread */
    for (i = 0; i < gpgpu->thread_n; ++i) {
-    const uint32_t start_offset = i * gpgpu->cst_sz;
+    const uint32_t start_offset = i * gpgpu->curbe_sz;
      for (j = 0; j < gpgpu->binded_n; ++j) {
        const uint32_t offset = start_offset + gpgpu->binded_offset[j];
        const uint32_t fake_address = gpgpu->memory_remap[j];
@@ -264,7 +265,7 @@ sim_gpgpu_state_init(sim_gpgpu gpgpu, uint32_t max_threads, uint32_t size_cs_ent
  {
    assert(gpgpu);
    memset(gpgpu, 0, sizeof(*gpgpu));
-  gpgpu->cst_sz = size_cs_entry * 32;
+  gpgpu->curbe_sz = size_cs_entry * 32;
    gpgpu->max_threads = max_threads;
  }
  
@@ -323,6 +324,7 @@ sim_gpgpu_walker(sim_gpgpu gpgpu,
    gbe_simulator sim = sim_simulator_new();
    sim->set_base_address(sim, gpgpu->fake_memory);
    sim->set_curbe_address(sim, gpgpu->curbe);
+  sim->set_curbe_size(sim, gpgpu->curbe_sz);
    for (z = 0; z < global_wk_dim[2]; ++z)
    for (y = 0; y < global_wk_dim[1]; ++y)
    for (x = 0; x < global_wk_dim[0]; ++x)
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Fri, 13 Apr 2012 18:43:46 +0000 (18:43 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:16:23 +0000 (16:16 -0700)
kernels/stdlib.h		patch \| blob \| history
kernels/test_copy_buffer.cl	[new file with mode: 0644]	patch \| blob
kernels/test_copy_buffer.cl.ll	[new file with mode: 0644]	patch \| blob
kernels/test_write_only_2.cl.ll		patch \| blob \| history
src/CMakeLists.txt		patch \| blob \| history
src/cl_command_queue_gen7.c		patch \| blob \| history
src/cl_driver.cpp		patch \| blob \| history
src/cl_kernel.c		patch \| blob \| history
src/sim/sim_driver.c		patch \| blob \| history