+ /* Give the object a graphics address in the AUB file. We
+ * don't just use the GEM object address because we do AUB
+ * dumping before execution -- we want to successfully log
+ * when the hardware might hang, and we might even want to aub
+ * capture for a driver trying to execute on a different
+ * generation of hardware by disabling the actual kernel exec
+ * call.
+ */
+ bo_gem->aub_offset = bufmgr_gem->aub_offset;
+ bufmgr_gem->aub_offset += bo->size;
+ /* XXX: Handle aperture overflow. */
+ assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
+}
+
+static void
+aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+ uint32_t offset, uint32_t size)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+ aub_out(bufmgr_gem,
+ CMD_AUB_TRACE_HEADER_BLOCK |
+ ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
+ aub_out(bufmgr_gem,
+ AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
+ aub_out(bufmgr_gem, subtype);
+ aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+ aub_out(bufmgr_gem, size);
+ if (bufmgr_gem->gen >= 8)
+ aub_out(bufmgr_gem, 0);
+ aub_write_bo_data(bo, offset, size);
+}
+
+/**
+ * Break up large objects into multiple writes. Otherwise a 128kb VBO
+ * would overflow the 16 bits of size field in the packet header and
+ * everything goes badly after that.
+ */
+static void
+aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+ uint32_t offset, uint32_t size)
+{
+ uint32_t block_size;
+ uint32_t sub_offset;
+
+ for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
+ block_size = size - sub_offset;
+
+ if (block_size > 8 * 4096)
+ block_size = 8 * 4096;
+
+ aub_write_trace_block(bo, type, subtype, offset + sub_offset,
+ block_size);
+ }
+}
+
+static void
+aub_write_bo(drm_intel_bo *bo)
+{
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ uint32_t offset = 0;
+ unsigned i;
+
+ aub_bo_get_address(bo);
+
+ /* Write out each annotated section separately. */
+ for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
+ drm_intel_aub_annotation *annotation =
+ &bo_gem->aub_annotations[i];
+ uint32_t ending_offset = annotation->ending_offset;
+ if (ending_offset > bo->size)
+ ending_offset = bo->size;
+ if (ending_offset > offset) {
+ aub_write_large_trace_block(bo, annotation->type,
+ annotation->subtype,
+ offset,
+ ending_offset - offset);
+ offset = ending_offset;
+ }
+ }
+
+ /* Write out any remaining unannotated data */
+ if (offset < bo->size) {
+ aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
+ offset, bo->size - offset);
+ }
+}
+
+/*
+ * Make a ringbuffer on fly and dump it
+ */
+static void
+aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
+ uint32_t batch_buffer, int ring_flag)
+{
+ uint32_t ringbuffer[4096];
+ int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
+ int ring_count = 0;
+
+ if (ring_flag == I915_EXEC_BSD)
+ ring = AUB_TRACE_TYPE_RING_PRB1;
+ else if (ring_flag == I915_EXEC_BLT)
+ ring = AUB_TRACE_TYPE_RING_PRB2;
+
+ /* Make a ring buffer to execute our batchbuffer. */
+ memset(ringbuffer, 0, sizeof(ringbuffer));
+ if (bufmgr_gem->gen >= 8) {
+ ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
+ ringbuffer[ring_count++] = batch_buffer;
+ ringbuffer[ring_count++] = 0;
+ } else {
+ ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
+ ringbuffer[ring_count++] = batch_buffer;
+ }
+
+ /* Write out the ring. This appears to trigger execution of
+ * the ring in the simulator.
+ */
+ aub_out(bufmgr_gem,
+ CMD_AUB_TRACE_HEADER_BLOCK |
+ ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
+ aub_out(bufmgr_gem,
+ AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
+ aub_out(bufmgr_gem, 0); /* general/surface subtype */
+ aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
+ aub_out(bufmgr_gem, ring_count * 4);
+ if (bufmgr_gem->gen >= 8)
+ aub_out(bufmgr_gem, 0);
+
+ /* FIXME: Need some flush operations here? */
+ aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
+
+ /* Update offset pointer */
+ bufmgr_gem->aub_offset += 4096;
+}
+
+void
+drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
+ int x1, int y1, int width, int height,
+ enum aub_dump_bmp_format format,
+ int pitch, int offset)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ uint32_t cpp;
+
+ switch (format) {
+ case AUB_DUMP_BMP_FORMAT_8BIT:
+ cpp = 1;
+ break;
+ case AUB_DUMP_BMP_FORMAT_ARGB_4444:
+ cpp = 2;
+ break;
+ case AUB_DUMP_BMP_FORMAT_ARGB_0888:
+ case AUB_DUMP_BMP_FORMAT_ARGB_8888:
+ cpp = 4;
+ break;
+ default:
+ printf("Unknown AUB dump format %d\n", format);
+ return;
+ }
+
+ if (!bufmgr_gem->aub_file)
+ return;
+
+ aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
+ aub_out(bufmgr_gem, (y1 << 16) | x1);
+ aub_out(bufmgr_gem,
+ (format << 24) |
+ (cpp << 19) |
+ pitch / 4);
+ aub_out(bufmgr_gem, (height << 16) | width);
+ aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+ aub_out(bufmgr_gem,
+ ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
+ ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
+}
+
+static void
+aub_exec(drm_intel_bo *bo, int ring_flag, int used)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ int i;
+ bool batch_buffer_needs_annotations;
+
+ if (!bufmgr_gem->aub_file)
+ return;
+
+ /* If batch buffer is not annotated, annotate it the best we
+ * can.
+ */
+ batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
+ if (batch_buffer_needs_annotations) {
+ drm_intel_aub_annotation annotations[2] = {
+ { AUB_TRACE_TYPE_BATCH, 0, used },
+ { AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
+ };
+ drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
+ }
+
+ /* Write out all buffers to AUB memory */
+ for (i = 0; i < bufmgr_gem->exec_count; i++) {
+ aub_write_bo(bufmgr_gem->exec_bos[i]);
+ }
+
+ /* Remove any annotations we added */
+ if (batch_buffer_needs_annotations)
+ drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
+
+ /* Dump ring buffer */
+ aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
+
+ fflush(bufmgr_gem->aub_file);
+
+ /*
+ * One frame has been dumped. So reset the aub_offset for the next frame.
+ *
+ * FIXME: Can we do this?
+ */
+ bufmgr_gem->aub_offset = 0x10000;
+}
+
+static int
+drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
+ drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ struct drm_i915_gem_execbuffer execbuf;
+ int ret, i;
+
+ if (bo_gem->has_error)
+ return -ENOMEM;
+
+ pthread_mutex_lock(&bufmgr_gem->lock);
+ /* Update indices and set up the validate list. */
+ drm_intel_gem_bo_process_reloc(bo);
+
+ /* Add the batch buffer to the validation list. There are no
+ * relocations pointing to it.
+ */
+ drm_intel_add_validate_buffer(bo);
+
+ VG_CLEAR(execbuf);
+ execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
+ execbuf.buffer_count = bufmgr_gem->exec_count;
+ execbuf.batch_start_offset = 0;
+ execbuf.batch_len = used;
+ execbuf.cliprects_ptr = (uintptr_t) cliprects;
+ execbuf.num_cliprects = num_cliprects;
+ execbuf.DR1 = 0;
+ execbuf.DR4 = DR4;
+
+ ret = drmIoctl(bufmgr_gem->fd,
+ DRM_IOCTL_I915_GEM_EXECBUFFER,
+ &execbuf);
+ if (ret != 0) {
+ ret = -errno;
+ if (errno == ENOSPC) {
+ DBG("Execbuffer fails to pin. "
+ "Estimate: %u. Actual: %u. Available: %u\n",
+ drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
+ bufmgr_gem->
+ exec_count),
+ drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
+ bufmgr_gem->
+ exec_count),
+ (unsigned int)bufmgr_gem->gtt_size);
+ }
+ }
+ drm_intel_update_buffer_offsets(bufmgr_gem);
+
+ if (bufmgr_gem->bufmgr.debug)
+ drm_intel_gem_dump_validation_list(bufmgr_gem);
+
+ for (i = 0; i < bufmgr_gem->exec_count; i++) {
+ drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+ /* Disconnect the buffer from the validate list */
+ bo_gem->validate_index = -1;
+ bufmgr_gem->exec_bos[i] = NULL;
+ }
+ bufmgr_gem->exec_count = 0;
+ pthread_mutex_unlock(&bufmgr_gem->lock);
+
+ return ret;
+}
+
+static int
+do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
+ drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
+ unsigned int flags)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ int ret = 0;
+ int i;
+
+ switch (flags & 0x7) {
+ default:
+ return -EINVAL;
+ case I915_EXEC_BLT:
+ if (!bufmgr_gem->has_blt)
+ return -EINVAL;
+ break;
+ case I915_EXEC_BSD:
+ if (!bufmgr_gem->has_bsd)
+ return -EINVAL;
+ break;
+ case I915_EXEC_VEBOX:
+ if (!bufmgr_gem->has_vebox)
+ return -EINVAL;