* \param old_dmesg_timestamp previous dmesg timestamp parsed at init time
* \param out_addr detected VM fault addr
*/
-bool ac_vm_fault_occured(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
+bool ac_vm_fault_occurred(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
uint64_t *out_addr)
{
#ifdef _WIN32
const char *name, enum amd_gfx_level gfx_level, enum radeon_family family,
ac_debug_addr_callback addr_callback, void *addr_callback_data);
-bool ac_vm_fault_occured(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
+bool ac_vm_fault_occurred(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
uint64_t *out_addr);
unsigned ac_get_wave_info(enum amd_gfx_level gfx_level,
#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6
#define CIK_SDMA_OPCODE_WRITE 0x2
#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0
-#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1
+#define SDMA_WRITE_SUB_OPCODE_TILED 0x1
#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
#define CIK_SDMA_PACKET_FENCE 0x5
#define CIK_SDMA_PACKET_TRAP 0x6
void
get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
{
- std::vector<std::vector<Temp>> phi_ressources;
- std::unordered_map<unsigned, unsigned> temp_to_phi_ressources;
+ std::vector<std::vector<Temp>> phi_resources;
+ std::unordered_map<unsigned, unsigned> temp_to_phi_resources;
for (auto block_rit = ctx.program->blocks.rbegin(); block_rit != ctx.program->blocks.rend();
block_rit++) {
live.erase(def.tempId());
/* mark last-seen phi operand */
std::unordered_map<unsigned, unsigned>::iterator it =
- temp_to_phi_ressources.find(def.tempId());
- if (it != temp_to_phi_ressources.end() &&
- def.regClass() == phi_ressources[it->second][0].regClass()) {
- phi_ressources[it->second][0] = def.getTemp();
+ temp_to_phi_resources.find(def.tempId());
+ if (it != temp_to_phi_resources.end() &&
+ def.regClass() == phi_resources[it->second][0].regClass()) {
+ phi_resources[it->second][0] = def.getTemp();
/* try to coalesce phi affinities with parallelcopies */
Operand op = Operand();
switch (instr->opcode) {
}
if (op.isTemp() && op.isFirstKillBeforeDef() && def.regClass() == op.regClass()) {
- phi_ressources[it->second].emplace_back(op.getTemp());
- temp_to_phi_ressources[op.tempId()] = it->second;
+ phi_resources[it->second].emplace_back(op.getTemp());
+ temp_to_phi_resources[op.tempId()] = it->second;
}
}
}
assert(instr->definitions[0].isTemp());
std::unordered_map<unsigned, unsigned>::iterator it =
- temp_to_phi_ressources.find(instr->definitions[0].tempId());
- unsigned index = phi_ressources.size();
+ temp_to_phi_resources.find(instr->definitions[0].tempId());
+ unsigned index = phi_resources.size();
std::vector<Temp>* affinity_related;
- if (it != temp_to_phi_ressources.end()) {
+ if (it != temp_to_phi_resources.end()) {
index = it->second;
- phi_ressources[index][0] = instr->definitions[0].getTemp();
- affinity_related = &phi_ressources[index];
+ phi_resources[index][0] = instr->definitions[0].getTemp();
+ affinity_related = &phi_resources[index];
} else {
- phi_ressources.emplace_back(std::vector<Temp>{instr->definitions[0].getTemp()});
- affinity_related = &phi_ressources.back();
+ phi_resources.emplace_back(std::vector<Temp>{instr->definitions[0].getTemp()});
+ affinity_related = &phi_resources.back();
}
for (const Operand& op : instr->operands) {
affinity_related->emplace_back(op.getTemp());
if (block.kind & block_kind_loop_header)
continue;
- temp_to_phi_ressources[op.tempId()] = index;
+ temp_to_phi_resources[op.tempId()] = index;
}
}
}
continue;
/* create an (empty) merge-set for the phi-related variables */
- auto it = temp_to_phi_ressources.find(phi->definitions[0].tempId());
- unsigned index = phi_ressources.size();
- if (it == temp_to_phi_ressources.end()) {
- temp_to_phi_ressources[phi->definitions[0].tempId()] = index;
- phi_ressources.emplace_back(std::vector<Temp>{phi->definitions[0].getTemp()});
+ auto it = temp_to_phi_resources.find(phi->definitions[0].tempId());
+ unsigned index = phi_resources.size();
+ if (it == temp_to_phi_resources.end()) {
+ temp_to_phi_resources[phi->definitions[0].tempId()] = index;
+ phi_resources.emplace_back(std::vector<Temp>{phi->definitions[0].getTemp()});
} else {
index = it->second;
}
for (unsigned i = 1; i < phi->operands.size(); i++) {
const Operand& op = phi->operands[i];
if (op.isTemp() && op.isKill() && op.regClass() == phi->definitions[0].regClass()) {
- temp_to_phi_ressources[op.tempId()] = index;
+ temp_to_phi_resources[op.tempId()] = index;
}
}
}
}
}
/* create affinities */
- for (std::vector<Temp>& vec : phi_ressources) {
+ for (std::vector<Temp>& vec : phi_resources) {
for (unsigned i = 1; i < vec.size(); i++)
if (vec[i].id() != vec[0].id())
ctx.assignments[vec[i].id()].affinity = vec[0].id();
return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
}
-void ac_reset_llvm_all_options_occurences()
+void ac_reset_llvm_all_options_occurrences()
{
cl::ResetAllOptionOccurrences();
}
#endif
};
- ac_reset_llvm_all_options_occurences();
+ ac_reset_llvm_all_options_occurrences();
LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
ac_llvm_run_atexit_for_destructors();
const char *ac_get_llvm_processor_name(enum radeon_family family);
void ac_llvm_run_atexit_for_destructors(void);
bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor);
-void ac_reset_llvm_all_options_occurences();
+void ac_reset_llvm_all_options_occurrences();
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes);
bool ac_is_sgpr_param(LLVMValueRef param);
void
load_bounds(VOID_REF ids, uint32_t iter, uint32_t task_index, uint32_t lds_base,
- uint32_t neigbourhood_overlap, uint32_t search_bound)
+ uint32_t neighbourhood_overlap, uint32_t search_bound)
{
- for (uint32_t i = task_index - 2 * neigbourhood_overlap; i < search_bound;
+ for (uint32_t i = task_index - 2 * neighbourhood_overlap; i < search_bound;
i += gl_WorkGroupSize.x) {
uint32_t id = load_id(ids, iter, i);
if (id == RADV_BVH_INVALID_NODE)
/* Find preferred partners and merge them */
PHASE (args.header) {
uint32_t base_index = task_index - gl_LocalInvocationID.x;
- uint32_t neigbourhood_overlap = min(PLOC_NEIGHBOURHOOD, base_index);
- uint32_t double_neigbourhood_overlap = min(2 * PLOC_NEIGHBOURHOOD, base_index);
+ uint32_t neighbourhood_overlap = min(PLOC_NEIGHBOURHOOD, base_index);
+ uint32_t double_neighbourhood_overlap = min(2 * PLOC_NEIGHBOURHOOD, base_index);
/* Upper bound to where valid nearest node indices are written. */
uint32_t write_bound =
min(current_task_count, base_index + gl_WorkGroupSize.x + PLOC_NEIGHBOURHOOD);
/* Upper bound to where valid nearest node indices are searched. */
uint32_t search_bound =
min(current_task_count, base_index + gl_WorkGroupSize.x + 2 * PLOC_NEIGHBOURHOOD);
- uint32_t lds_base = base_index - double_neigbourhood_overlap;
+ uint32_t lds_base = base_index - double_neighbourhood_overlap;
- load_bounds(src_ids, iter, task_index, lds_base, neigbourhood_overlap, search_bound);
+ load_bounds(src_ids, iter, task_index, lds_base, neighbourhood_overlap, search_bound);
for (uint32_t i = gl_LocalInvocationID.x; i < NUM_PLOC_LDS_ITEMS; i += gl_WorkGroupSize.x)
nearest_neighbour_indices[i] = 0xFFFFFFFF;
barrier();
- for (uint32_t i = task_index - double_neigbourhood_overlap; i < write_bound;
+ for (uint32_t i = task_index - double_neighbourhood_overlap; i < write_bound;
i += gl_WorkGroupSize.x) {
uint32_t right_bound = min(search_bound - 1 - i, PLOC_NEIGHBOURHOOD);
uint32_t fallback_pair = i == 0 ? (i + 1) : (i - 1);
uint32_t min_offset = encode_neighbour_offset(INFINITY, i, fallback_pair);
- for (uint32_t j = max(i + 1, base_index - neigbourhood_overlap); j <= i + right_bound;
+ for (uint32_t j = max(i + 1, base_index - neighbourhood_overlap); j <= i + right_bound;
++j) {
float sah = combined_node_cost(lds_base, i, j);
min_offset = min(min_offset, i_encoded_offset);
atomicMin(nearest_neighbour_indices[j - lds_base], j_encoded_offset);
}
- if (i >= base_index - neigbourhood_overlap)
+ if (i >= base_index - neighbourhood_overlap)
atomicMin(nearest_neighbour_indices[i - lds_base], min_offset);
}
shared_aggregate_sum = 0;
barrier();
- for (uint32_t i = task_index - neigbourhood_overlap; i < write_bound;
+ for (uint32_t i = task_index - neighbourhood_overlap; i < write_bound;
i += gl_WorkGroupSize.x) {
uint32_t left_bound = min(i, PLOC_NEIGHBOURHOOD);
uint32_t right_bound = min(search_bound - 1 - i, PLOC_NEIGHBOURHOOD);
if (!device->trace_id_ptr)
return false;
- ac_vm_fault_occured(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, NULL);
+ ac_vm_fault_occurred(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, NULL);
return true;
}
}
static bool
-radv_gpu_hang_occured(struct radv_queue *queue, enum amd_ip_type ring)
+radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
{
struct radeon_winsys *ws = queue->device->ws;
ring = radv_queue_ring(queue);
- bool hang_occurred = radv_gpu_hang_occured(queue, ring);
+ bool hang_occurred = radv_gpu_hang_occurred(queue, ring);
bool vm_fault_occurred = false;
if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
- vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.gfx_level,
+ vm_fault_occurred = ac_vm_fault_occurred(device->physical_device->rad_info.gfx_level,
&device->dmesg_timestamp, &addr);
if (!hang_occurred && !vm_fault_occurred)
return;
/* Get pervertex LDS usage. */
bool uses_instanceid =
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
- bool uses_primtive_id =
+ bool uses_primitive_id =
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
bool streamout_enabled = nir->xfb_info && device->physical_device->use_ngg_streamout;
unsigned pervertex_lds_bytes =
false, /* user edge flag */
info->has_ngg_culling,
uses_instanceid,
- uses_primtive_id);
+ uses_primitive_id);
unsigned total_es_lds_bytes = pervertex_lds_bytes * max_vtx_in;
scratch_lds_base = ALIGN(total_es_lds_bytes, 8u);
uint64_t addr;
char cmd_line[4096];
- if (!ac_vm_fault_occured(sctx->gfx_level, &sctx->dmesg_timestamp, &addr))
+ if (!ac_vm_fault_occurred(sctx->gfx_level, &sctx->dmesg_timestamp, &addr))
return;
f = dd_get_debug_file(false);
* only new messages will be checked for VM faults.
*/
if (sctx->screen->debug_flags & DBG(CHECK_VM))
- ac_vm_fault_occured(sctx->gfx_level, &sctx->dmesg_timestamp, NULL);
+ ac_vm_fault_occurred(sctx->gfx_level, &sctx->dmesg_timestamp, NULL);
}