Added proper buffer big enough to map everything Still need to test that

author Benjamin Segovia <segovia.benjamin@gmail.com>

Mon, 23 Apr 2012 19:51:33 +0000 (19:51 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:16:41 +0000 (16:16 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Mon, 23 Apr 2012 19:51:33 +0000 (19:51 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:16:41 +0000 (16:16 -0700)
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c

index 82d483a..56098ee 100644 (file)
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -153,27 +153,25 @@ static cl_int
  cl_fulsim_dump_all_surfaces(cl_command_queue queue, cl_kernel k)
  {
    cl_int err = CL_SUCCESS;
-#if 0
    cl_mem mem = NULL;
    int i;
    size_t j;
  
    /* Bind user defined surface */
-  for (i = 0; i < k->arg_info_n; ++i) {
+  for (i = 0; i < k->arg_n; ++i) {
      size_t chunk_n, chunk_remainder;
-    if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER)
+    if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR)
        continue;
-    mem = (cl_mem) k->args[k->arg_info[i].arg_index];
+    mem = (cl_mem) k->args[i].mem;
      CHECK_MEM(mem);
-    chunk_n = mem->bo->size / chunk_sz;
-    chunk_remainder = mem->bo->size % chunk_sz;
+    chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz;
+    chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz;
      for (j = 0; j < chunk_n; ++j)
        aub_exec_dump_raw_file(mem->bo, j * chunk_sz, chunk_sz);
      if (chunk_remainder)
        aub_exec_dump_raw_file(mem->bo, chunk_n * chunk_sz, chunk_remainder);
    }
  error:
-#endif
    return err;
  }
  
@@ -196,7 +194,6 @@ struct bmphdr {
    /* raw b, g, r data here, dword aligned per scan line */
  };
  
-#if 0
  static int*
  cl_read_bmp(const char *filename, int *width, int *height)
  {
@@ -213,24 +210,6 @@ cl_read_bmp(const char *filename, int *width, int *height)
    n = fread(&hdr, 1, sizeof(hdr), fp);
    assert(n == sizeof(hdr));
  
-#if 0
-  /* Dump stuff out */
-  printf("   filesize = %d\n", hdr.filesize);  /* total file size incl header */
-  printf("        as0 = %d\n", hdr.as0);
-  printf("        as1 = %d\n", hdr.as1);
-  printf("  bmpoffset = %d\n", hdr.bmpoffset); /* ofset of bmp data  */
-  printf("headerbytes = %d\n", hdr.headerbytes);       /* bytes in header from this point (40 actually) */
-  printf("      width = %d\n", hdr.width);
-  printf("     height = %d\n", hdr.height);
-  printf("    nplanes = %d\n", hdr.nplanes);   /* no of color planes */
-  printf("        bpp = %d\n", hdr.bpp);       /* bits/pixel */
-  printf("compression = %d\n", hdr.compression);       /* BI_RGB = 0 = no compression */
-  printf("    sizeraw = %d\n", hdr.sizeraw);   /* size of raw bmp file, excluding header, incl padding */
-  printf("       hres = %d\n", hdr.hres);      /* horz resolutions pixels/meter */
-  printf("       vres = %d\n", hdr.vres);
-  printf(" npalcolors = %d\n", hdr.npalcolors);        /* No of colors in palette */
-  printf(" nimportant = %d\n", hdr.nimportant);        /* No of important colors */
-#endif
    assert(hdr.width > 0 &&
           hdr.height > 0 &&
           hdr.nplanes == 1
@@ -278,26 +257,24 @@ cl_read_dump(const char *name, size_t *size)
      *size = sz;
    return dump;
  }
-#endif
  
  static cl_int
  cl_fulsim_read_all_surfaces(cl_command_queue queue, cl_kernel k)
  {
    cl_int err = CL_SUCCESS;
-#if 0
    cl_mem mem = NULL;
    char *from = NULL, *to = NULL;
    size_t size, j, chunk_n, chunk_remainder;
    int i, curr = 0;
    /* Bind user defined surface */
-  for (i = 0; i < k->arg_info_n; ++i) {
-    if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER)
+  for (i = 0; i < k->arg_n; ++i) {
+    if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR)
        continue;
-    mem = (cl_mem) k->args[k->arg_info[i].arg_index];
+    mem = (cl_mem) k->args[i].mem;
      CHECK_MEM(mem);
      assert(mem->bo);
-    chunk_n = mem->bo->size / chunk_sz;
-    chunk_remainder = mem->bo->size % chunk_sz;
+    chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz;
+    chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz;
      to = cl_mem_map(mem);
      for (j = 0; j < chunk_n; ++j) {
        char name[256];
@@ -328,11 +305,9 @@ cl_fulsim_read_all_surfaces(cl_command_queue queue, cl_kernel k)
      cl_mem_unmap(mem);
    }
  error:
-#endif
    return err;
-
  }
-#endif /* USE_FULSIM */
+#endif
  
  extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, const size_t *, const size_t *, const size_t *);
  
diff --git a/src/cl_driver.h b/src/cl_driver.h

index 076f398..75df8dd 100644 (file)
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -195,7 +195,7 @@ typedef void* (cl_buffer_get_virtual_cb)(cl_buffer);
  extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual;
  
  /* Get the size of the buffer */
-typedef void* (cl_buffer_get_size_cb)(cl_buffer);
+typedef size_t (cl_buffer_get_size_cb)(cl_buffer);
  extern cl_buffer_get_size_cb *cl_buffer_get_size;
  
  /* Pin a buffer */
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c

index 78d6cb7..d487fc0 100644 (file)
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -340,7 +340,7 @@ intel_driver_get_ver(struct intel_driver *drv)
    return drv->gen_ver;
  }
  
-static uint32_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
+static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
  static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
  
  LOCAL void
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c

index 4de2850..c2a3745 100644 (file)
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -486,23 +486,29 @@ intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *state, int32_t index, dri_bo* obj_
                      obj_bo);
  }
  
+/* Use two one GB surface to map the 2GB address space */
  static void
-intel_gpgpu_map_address_space(intel_gpgpu_t *state,
-                              int32_t index,
-                              uint32_t size,
-                              uint32_t cchint)
+intel_gpgpu_map_address_space(intel_gpgpu_t *state)
  {
    surface_heap_t *heap = state->surface_heap_b.bo->virtual;
-  gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
-  const uint32_t size_ss = size - 1;
-  memset(ss, 0, sizeof(*ss));
-  ss->ss0.surface_type = I965_SURFACE_BUFFER;
-  ss->ss0.surface_format = I965_SURFACEFORMAT_RAW;
-  ss->ss1.base_addr = 0;
-  ss->ss2.width  = size_ss & 0x7f;               /* bits 6:0 of size_ss */
-  ss->ss2.height = (size_ss & 0x1fff80) >> 7;    /* bits 20:7 of size_ss */
-  ss->ss3.depth  = (size_ss & 0xffe00000) >> 20; /* bits 27:21 of size_ss */
-  ss->ss5.cache_control = cc_llc_l3;
+  gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[0];
+  gen7_surface_state_t *ss1 = (gen7_surface_state_t *) heap->surface[1];
+  const uint32_t sz = (1<<30) - 1;
+  //const uint32_t sz = 1024*1024-1;
+  memset(ss0, 0, sizeof(gen7_surface_state_t));
+  memset(ss1, 0, sizeof(gen7_surface_state_t));
+  ss1->ss0.surface_type = ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+  ss1->ss0.surface_format = ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+  ss0->ss1.base_addr = 0;
+  ss1->ss1.base_addr = 1<<30;
+  ss1->ss2.width  = ss0->ss2.width  = sz & 127;          /* bits 6:0 of sz */
+  ss1->ss2.height = ss0->ss2.height = (sz >> 7) & 16383; /* bits 20:7 of sz */
+  ss1->ss3.depth  = ss0->ss3.depth  = (sz >> 21) & 1023; /* bits 30:21 of sz */
+  ss1->ss5.cache_control = ss0->ss5.cache_control = cc_llc_l3;
+  heap->binding_table[0] = offsetof(surface_heap_t, surface)
+                         + 0 * sizeof(gen7_surface_state_t);
+  heap->binding_table[1] = offsetof(surface_heap_t, surface)
+                         + 1 * sizeof(gen7_surface_state_t);
  }
  
  static void
@@ -628,46 +634,60 @@ intel_gpgpu_build_idrt(intel_gpgpu_t *state, cl_gpgpu_kernel *kernel)
  }
  
  static void
-intel_gpgpu_upload_constants(intel_gpgpu_t *state, const void* data, uint32_t size)
+intel_gpgpu_upload_constants(intel_gpgpu_t *gpgpu, const void* data, uint32_t size)
  {
    unsigned char *constant_buffer = NULL;
+  cl_gpgpu_kernel *k = gpgpu->ker;
+  uint32_t i, j;
  
-  dri_bo_map(state->curbe_b.bo, 1);
-  assert(state->curbe_b.bo->virtual);
-  constant_buffer = (unsigned char *) state->curbe_b.bo->virtual;
+  /* Upload the data first */
+  dri_bo_map(gpgpu->curbe_b.bo, 1);
+  assert(gpgpu->curbe_b.bo->virtual);
+  constant_buffer = (unsigned char *) gpgpu->curbe_b.bo->virtual;
    memcpy(constant_buffer, data, size);
-  dri_bo_unmap(state->curbe_b.bo);
+  dri_bo_unmap(gpgpu->curbe_b.bo);
+
+  /* Now put all the relocations for our flat address space */
+  for (i = 0; i < k->thread_n; ++i)
+    for (j = 0; j < gpgpu->binded_n; ++j)
+      drm_intel_bo_emit_reloc(gpgpu->curbe_b.bo,
+                              gpgpu->binded_offset[j]+i*k->cst_sz,
+                              gpgpu->binded_buf[j],
+                              0,
+                              I915_GEM_DOMAIN_RENDER,
+                              I915_GEM_DOMAIN_RENDER);
  }
  
  static void
-intel_gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n)
+intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n)
  {
    if (n) {
      const size_t sz = n * sizeof(gen6_sampler_state_t);
-    memcpy(state->sampler_state_b.bo->virtual, data, sz);
+    memcpy(gpgpu->sampler_state_b.bo->virtual, data, sz);
    }
  }
  
  static void
-intel_gpgpu_states_setup(intel_gpgpu_t *state, cl_gpgpu_kernel *kernel)
+intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
  {
-  state->ker = kernel;
-  intel_gpgpu_build_idrt(state, kernel);
-  dri_bo_unmap(state->surface_heap_b.bo);
-  dri_bo_unmap(state->sampler_state_b.bo);
+  gpgpu->ker = kernel;
+  intel_gpgpu_build_idrt(gpgpu, kernel);
+  intel_gpgpu_map_address_space(gpgpu);
+  dri_bo_unmap(gpgpu->surface_heap_b.bo);
+  dri_bo_unmap(gpgpu->sampler_state_b.bo);
  }
  
  static void
-intel_gpgpu_set_perf_counters(intel_gpgpu_t *state, cl_buffer *perf)
+intel_gpgpu_set_perf_counters(intel_gpgpu_t *gpgpu, cl_buffer *perf)
  {
-  if (state->perf_b.bo)
-    drm_intel_bo_unreference(state->perf_b.bo);
+  if (gpgpu->perf_b.bo)
+    drm_intel_bo_unreference(gpgpu->perf_b.bo);
    drm_intel_bo_reference((drm_intel_bo*) perf);
-  state->perf_b.bo = (drm_intel_bo*) perf;
+  gpgpu->perf_b.bo = (drm_intel_bo*) perf;
  }
  
  static void
-intel_gpgpu_walker(intel_gpgpu_t *state,
+intel_gpgpu_walker(intel_gpgpu_t *gpgpu,
                     uint32_t simd_sz,
                     uint32_t thread_n,
                     const size_t global_wk_off[3],
@@ -680,27 +700,27 @@ intel_gpgpu_walker(intel_gpgpu_t *state,
      global_wk_sz[2] / local_wk_sz[2]
    };
    assert(simd_sz == 8 || simd_sz == 16);
-  BEGIN_BATCH(state->batch, 11);
-  OUT_BATCH(state->batch, CMD_GPGPU_WALKER | 9);
-  OUT_BATCH(state->batch, 0);                        /* kernel index == 0 */
+  BEGIN_BATCH(gpgpu->batch, 11);
+  OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 9);
+  OUT_BATCH(gpgpu->batch, 0);                        /* kernel index == 0 */
    if (simd_sz == 16)
-    OUT_BATCH(state->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
+    OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */
    else
-    OUT_BATCH(state->batch, (0 << 30) | (thread_n-1)); /* SIMD8  | thread max */
-  OUT_BATCH(state->batch, global_wk_off[0]);
-  OUT_BATCH(state->batch, global_wk_dim[0]);
-  OUT_BATCH(state->batch, global_wk_off[1]);
-  OUT_BATCH(state->batch, global_wk_dim[1]);
-  OUT_BATCH(state->batch, global_wk_off[2]);
-  OUT_BATCH(state->batch, global_wk_dim[2]);
-  OUT_BATCH(state->batch, ~0x0);
-  OUT_BATCH(state->batch, ~0x0);
-  ADVANCE_BATCH(state->batch);
-
-  BEGIN_BATCH(state->batch, 2);
-  OUT_BATCH(state->batch, CMD_MEDIA_STATE_FLUSH | 0);
-  OUT_BATCH(state->batch, 0);                        /* kernel index == 0 */
-  ADVANCE_BATCH(state->batch);
+    OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8  | thread max */
+  OUT_BATCH(gpgpu->batch, global_wk_off[0]);
+  OUT_BATCH(gpgpu->batch, global_wk_dim[0]);
+  OUT_BATCH(gpgpu->batch, global_wk_off[1]);
+  OUT_BATCH(gpgpu->batch, global_wk_dim[1]);
+  OUT_BATCH(gpgpu->batch, global_wk_off[2]);
+  OUT_BATCH(gpgpu->batch, global_wk_dim[2]);
+  OUT_BATCH(gpgpu->batch, ~0x0);
+  OUT_BATCH(gpgpu->batch, ~0x0);
+  ADVANCE_BATCH(gpgpu->batch);
+
+  BEGIN_BATCH(gpgpu->batch, 2);
+  OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0);
+  OUT_BATCH(gpgpu->batch, 0);                        /* kernel index == 0 */
+  ADVANCE_BATCH(gpgpu->batch);
  }
  
  LOCAL void
diff --git a/src/sim/sim_driver.c b/src/sim/sim_driver.c

index aa634f4..c983983 100644 (file)
--- a/src/sim/sim_driver.c
+++ b/src/sim/sim_driver.c
@@ -163,11 +163,11 @@ sim_buffer_get_virtual(sim_buffer buf)
    return buf->data;
  }
  
-static void*
+static size_t
  sim_buffer_get_size(sim_buffer buf)
  {
    if (UNLIKELY(buf == NULL)) return 0;
-  return buf->data;
+  return buf->sz;
  }
  
  static int
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Mon, 23 Apr 2012 19:51:33 +0000 (19:51 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:16:41 +0000 (16:16 -0700)
src/cl_command_queue.c		patch \| blob \| history
src/cl_driver.h		patch \| blob \| history
src/intel/intel_driver.c		patch \| blob \| history
src/intel/intel_gpgpu.c		patch \| blob \| history
src/sim/sim_driver.c		patch \| blob \| history