arg_type = gbe_kernel_get_arg_type(k->opaque, i);
if (arg_type != GBE_ARG_GLOBAL_PTR &&
arg_type != GBE_ARG_CONSTANT_PTR &&
- arg_type != GBE_ARG_IMAGE)
+ arg_type != GBE_ARG_IMAGE &&
+ arg_type != GBE_ARG_SAMPLER)
continue;
offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
if (arg_type == GBE_ARG_IMAGE) {
k->args[i].mem->intel_fmt, k->args[i].mem->w,
k->args[i].mem->h, k->args[i].mem->pitch,
k->args[i].mem->tiling);
+ } else if (arg_type == GBE_ARG_SAMPLER) {
+ uint32_t *curbe_index = (uint32_t*)(k->curbe + offset);
+ cl_gpgpu_insert_sampler(queue->gpgpu, curbe_index, k->args[i].sampler);
} else
cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
}
#define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2
#define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0
+#define GEN_MAPFILTER_NEAREST 0x0
+#define GEN_MAPFILTER_LINEAR 0x1
+#define GEN_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN_MIPFILTER_NONE 0
+#define GEN_MIPFILTER_NEAREST 1
+#define GEN_MIPFILTER_LINEAR 3
+
+#define GEN_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
+#define GEN_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
+#define GEN_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
+#define GEN_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
+#define GEN_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
+#define GEN_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
+
+#define GEN_TEXCOORDMODE_WRAP 0
+#define GEN_TEXCOORDMODE_MIRROR 1
+#define GEN_TEXCOORDMODE_CLAMP 2
+#define GEN_TEXCOORDMODE_CUBE 3
+#define GEN_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN_TEXCOORDMODE_MIRROR_ONCE 5
+
#endif /* __GENX_DEFINES_H__ */
#include "cl_alloc.h"
#include "cl_utils.h"
+#include "cl_sampler.h"
#define GEN_CMD_MEDIA_OBJECT (0x71000000)
#define MO_TS_BIT (1 << 24)
enum { max_img_n = 32 };
+enum {max_sampler_n = 16 };
+
/* Handle GPGPU state */
struct intel_gpgpu
{
unsigned int img_index_base; /* base index for image surface.*/
drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */
+ unsigned int sampler_bitmap; /* sampler usage bitmap. */
+
struct { drm_intel_bo *bo; } stack_b;
struct { drm_intel_bo *bo; } idrt_b;
struct { drm_intel_bo *bo; } surface_heap_b;
gpgpu->binded_n = 0;
gpgpu->img_bitmap = 0;
gpgpu->img_index_base = 2;
+ gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
/* URB */
gpgpu->urb.num_cs_entries = 64;
return slot + gpgpu->img_index_base;
}
+static int
+intel_gpgpu_get_free_sampler_index(intel_gpgpu_t *gpgpu)
+{
+ int slot;
+ assert(~gpgpu->sampler_bitmap != 0);
+ slot = __fls(~gpgpu->sampler_bitmap);
+ gpgpu->sampler_bitmap |= (1 << (31 - slot));
+ return slot - max_sampler_n;
+}
+
static void
intel_gpgpu_bind_image2D_gen7(intel_gpgpu_t *gpgpu,
uint32_t *curbe_index,
}
}
+int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest)
+{
+ switch( cl_address_mode ) {
+ case CL_ADDRESS_NONE:
+ case CL_ADDRESS_REPEAT:
+ return GEN_TEXCOORDMODE_WRAP;
+ case CL_ADDRESS_CLAMP:
+ /* GL_CLAMP is the weird mode where coordinates are clamped to
+ * [0.0, 1.0], so linear filtering of coordinates outside of
+ * [0.0, 1.0] give you half edge texel value and half border
+ * color. The fragment shader will clamp the coordinates, and
+ * we set clamp_border here, which gets the result desired. We
+ * just use clamp(_to_edge) for nearest, because for nearest
+ * clamping to 1.0 gives border color instead of the desired
+ * edge texels.
+ */
+ if (using_nearest)
+ return GEN_TEXCOORDMODE_CLAMP;
+ else
+ return GEN_TEXCOORDMODE_CLAMP_BORDER;
+ case CL_ADDRESS_CLAMP_TO_EDGE:
+ return GEN_TEXCOORDMODE_CLAMP;
+ case CL_ADDRESS_MIRRORED_REPEAT:
+ return GEN_TEXCOORDMODE_MIRROR;
+ default:
+ return GEN_TEXCOORDMODE_WRAP;
+ }
+}
+
+static void
+intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t *curbe_index, cl_sampler cl_sampler)
+{
+ int index;
+ int using_nearest = 0;
+ uint32_t wrap_mode;
+ gen7_sampler_state_t *sampler;
+
+ index = intel_gpgpu_get_free_sampler_index(gpgpu);
+ sampler = (gen7_sampler_state_t *)gpgpu->sampler_state_b.bo->virtual + index;
+ if (!cl_sampler->normalized_coords)
+ sampler->ss3.non_normalized_coord = 1;
+ else
+ sampler->ss3.non_normalized_coord = 0;
+
+ switch (cl_sampler->filter) {
+ case CL_FILTER_NEAREST:
+ sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
+ sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+ sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
+ using_nearest = 1;
+ break;
+ case CL_FILTER_LINEAR:
+ sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
+ sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+ sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
+ break;
+ }
+
+ wrap_mode = translate_wrap_mode(cl_sampler->address, using_nearest);
+ sampler->ss3.r_wrap_mode = wrap_mode;
+ sampler->ss3.s_wrap_mode = wrap_mode;
+ sampler->ss3.t_wrap_mode = wrap_mode;
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ sampler->ss0.base_level = 0;
+
+ sampler->ss1.max_lod = 0;
+ sampler->ss1.min_lod = 0;
+
+ if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST)
+ sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ GEN_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ GEN_ADDRESS_ROUNDING_ENABLE_R_MIN;
+ if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST)
+ sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
+ *curbe_index = index;
+}
+
static void
intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
{
cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker;
+ cl_gpgpu_insert_sampler = (cl_gpgpu_insert_sampler_cb *) intel_gpgpu_insert_sampler;
}