anv: Implement transform feedback queries
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 14 Sep 2018 20:10:28 +0000 (15:10 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 22 Jan 2019 16:42:57 +0000 (10:42 -0600)
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
src/intel/vulkan/anv_device.c
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan/genX_query.c

index 97a5ce3..758de3d 100644 (file)
@@ -1305,7 +1305,7 @@ void anv_GetPhysicalDeviceProperties2(
          props->maxTransformFeedbackStreamDataSize = 128 * 4;
          props->maxTransformFeedbackBufferDataSize = 128 * 4;
          props->maxTransformFeedbackBufferDataStride = 2048;
-         props->transformFeedbackQueries = VK_FALSE;
+         props->transformFeedbackQueries = VK_TRUE;
          props->transformFeedbackStreamsLinesTriangles = VK_FALSE;
          props->transformFeedbackRasterizationStreamSelect = VK_FALSE;
          props->transformFeedbackDraw = VK_TRUE;
index 899a96f..d2142ae 100644 (file)
@@ -1150,6 +1150,7 @@ emit_3dstate_streamout(struct anv_pipeline *pipeline,
 #if GEN_GEN >= 8
       if (xfb_info) {
          so.SOFunctionEnable = true;
+         so.SOStatisticsEnable = true;
 
          const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info =
             vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT);
index c3ecd5b..794d92d 100644 (file)
@@ -72,6 +72,12 @@ VkResult genX(CreateQueryPool)(
       /* Statistics queries have a min and max for every statistic */
       uint64s_per_slot += 2 * util_bitcount(pipeline_statistics);
       break;
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      /* Transform feedback queries are 4 values, begin/end for
+       * written/available.
+       */
+      uint64s_per_slot += 4;
+      break;
    default:
       assert(!"Invalid query type");
    }
@@ -220,7 +226,8 @@ VkResult genX(GetQueryPoolResults)(
 
    assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
           pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
-          pool->type == VK_QUERY_TYPE_TIMESTAMP);
+          pool->type == VK_QUERY_TYPE_TIMESTAMP ||
+          pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT);
 
    if (anv_device_is_lost(device))
       return VK_ERROR_DEVICE_LOST;
@@ -284,6 +291,15 @@ VkResult genX(GetQueryPoolResults)(
          break;
       }
 
+      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+         if (write_results)
+            cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
+         idx++;
+         if (write_results)
+            cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]);
+         idx++;
+         break;
+
       case VK_QUERY_TYPE_TIMESTAMP:
          if (write_results)
             cpu_write_query_result(pData, flags, idx, slot[1]);
@@ -411,6 +427,31 @@ emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, uint32_t stat,
    emit_srm64(&cmd_buffer->batch, addr, vk_pipeline_stat_to_reg[stat]);
 }
 
+static void
+emit_xfb_query(struct anv_cmd_buffer *cmd_buffer, uint32_t stream,
+               struct anv_address addr)
+{
+   assert(stream < MAX_XFB_STREAMS);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 0 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 0);
+   }
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 4 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 4);
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 0 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 16);
+   }
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 4 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 20);
+   }
+}
+
 void genX(CmdBeginQuery)(
     VkCommandBuffer                             commandBuffer,
     VkQueryPool                                 queryPool,
@@ -454,6 +495,14 @@ void genX(CmdBeginQueryIndexedEXT)(
       break;
    }
 
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.CommandStreamerStallEnable = true;
+         pc.StallAtPixelScoreboard = true;
+      }
+      emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 8));
+      break;
+
    default:
       unreachable("");
    }
@@ -503,6 +552,16 @@ void genX(CmdEndQueryIndexedEXT)(
       break;
    }
 
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.CommandStreamerStallEnable = true;
+         pc.StallAtPixelScoreboard = true;
+      }
+
+      emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 16));
+      emit_query_availability(cmd_buffer, query_addr);
+      break;
+
    default:
       unreachable("");
    }
@@ -797,6 +856,17 @@ void genX(CmdCopyQueryPoolResults)(
          break;
       }
 
+      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+         compute_query_result(&cmd_buffer->batch, MI_ALU_REG2,
+                              anv_address_add(query_addr, 8));
+         gpu_write_query_result(&cmd_buffer->batch, dest_addr,
+                                flags, idx++, CS_GPR(2));
+         compute_query_result(&cmd_buffer->batch, MI_ALU_REG2,
+                              anv_address_add(query_addr, 24));
+         gpu_write_query_result(&cmd_buffer->batch, dest_addr,
+                                flags, idx++, CS_GPR(2));
+         break;
+
       case VK_QUERY_TYPE_TIMESTAMP:
          emit_load_alu_reg_u64(&cmd_buffer->batch,
                                CS_GPR(2), anv_address_add(query_addr, 8));