case gpir_op_mov:
case gpir_op_rcp_impl:
case gpir_op_rsqrt_impl:
+ case gpir_op_exp2_impl:
+ case gpir_op_log2_impl:
{
gpir_alu_node *alu = gpir_node_to_alu(node);
code->complex_src = gpir_get_alu_input(node, alu->children[0]);
case gpir_op_rsqrt_impl:
code->complex_op = gpir_codegen_complex_op_rsqrt;
break;
+ case gpir_op_exp2_impl:
+ code->complex_op = gpir_codegen_complex_op_exp2;
+ break;
+ case gpir_op_log2_impl:
+ code->complex_op = gpir_codegen_complex_op_log2;
+ break;
default:
assert(0);
}
return;
}
+ gpir_alu_node *alu = gpir_node_to_alu(node);
+ code->pass_src = gpir_get_alu_input(node, alu->children[0]);
+
switch (node->op) {
case gpir_op_mov:
- {
- gpir_alu_node *alu = gpir_node_to_alu(node);
- code->pass_src = gpir_get_alu_input(node, alu->children[0]);
code->pass_op = gpir_codegen_pass_op_pass;
break;
- }
+ case gpir_op_preexp2:
+ code->pass_op = gpir_codegen_pass_op_preexp2;
+ break;
+ case gpir_op_postlog2:
+ code->pass_op = gpir_codegen_pass_op_postlog2;
+ break;
default:
assert(0);
}
gpir_alu_node *alu = gpir_node_to_alu(node);
gpir_node *child = alu->children[0];
+ if (node->op == gpir_op_exp2) {
+ gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
+ if (unlikely(!preexp2))
+ return false;
+
+ preexp2->children[0] = child;
+ preexp2->num_child = 1;
+ gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
+ list_addtail(&preexp2->node.list, &node->list);
+
+ child = &preexp2->node;
+ }
+
gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
if (unlikely(!complex2))
return false;
case gpir_op_rsqrt:
impl_op = gpir_op_rsqrt_impl;
break;
+ case gpir_op_exp2:
+ impl_op = gpir_op_exp2_impl;
+ break;
+ case gpir_op_log2:
+ impl_op = gpir_op_log2_impl;
+ break;
default:
assert(0);
}
gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
list_addtail(&impl->node.list, &node->list);
- /* change node to complex1 node */
- node->op = gpir_op_complex1;
- alu->children[0] = &impl->node;
- alu->children[1] = &complex2->node;
- alu->children[2] = child;
- alu->num_child = 3;
- gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
- gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
+ gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
+ complex1->children[0] = &impl->node;
+ complex1->children[1] = &complex2->node;
+ complex1->children[2] = child;
+ complex1->num_child = 3;
+ gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
+ gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
+ gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
+ list_addtail(&complex1->node.list, &node->list);
+
+ gpir_node *result = &complex1->node;
+
+ if (node->op == gpir_op_log2) {
+ gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
+ if (unlikely(!postlog2))
+ return false;
+
+ postlog2->children[0] = result;
+ postlog2->num_child = 1;
+ gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
+ list_addtail(&postlog2->node.list, &node->list);
+
+ result = &postlog2->node;
+ }
+
+ gpir_node_replace_succ(result, node);
+ gpir_node_delete(node);
return true;
}
[gpir_op_neg] = gpir_lower_neg,
[gpir_op_rcp] = gpir_lower_complex,
[gpir_op_rsqrt] = gpir_lower_complex,
+ [gpir_op_exp2] = gpir_lower_complex,
+ [gpir_op_log2] = gpir_lower_complex,
[gpir_op_eq] = gpir_lower_eq_ne,
[gpir_op_ne] = gpir_lower_eq_ne,
[gpir_op_abs] = gpir_lower_abs,
[nir_op_fmax] = gpir_op_max,
[nir_op_frcp] = gpir_op_rcp,
[nir_op_frsq] = gpir_op_rsqrt,
+ [nir_op_fexp2] = gpir_op_exp2,
+ [nir_op_flog2] = gpir_op_log2,
[nir_op_slt] = gpir_op_lt,
[nir_op_sge] = gpir_op_ge,
[nir_op_fcsel] = gpir_op_select,
},
[gpir_op_preexp2] = {
.name = "preexp2",
+ .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
+ .spillless = true,
+ .schedule_first = true,
},
[gpir_op_postlog2] = {
.name = "postlog2",
+ .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
},
[gpir_op_exp2_impl] = {
.name = "exp2_impl",
+ .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
+ .spillless = true,
+ .schedule_first = true,
},
[gpir_op_log2_impl] = {
.name = "log2_impl",
+ .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
+ .spillless = true,
+ .schedule_first = true,
},
[gpir_op_rcp_impl] = {
.name = "rcp_impl",
return true;
}
-static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+/* Create a new node with "node" as the child, replace all uses of "node" with
+ * this new node, and replace "node" with it in the ready list.
+ */
+static gpir_node *create_replacement(sched_ctx *ctx, gpir_node *node,
+ gpir_op op)
{
- gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
- if (unlikely(!move))
- return NULL;
- move->children[0] = node;
- move->num_child = 1;
+ gpir_alu_node *new_node = gpir_node_create(node->block, op);
+ if (unlikely(!new_node))
+ return NULL;
- move->node.sched.instr = NULL;
- move->node.sched.pos = -1;
- move->node.sched.dist = node->sched.dist;
- move->node.sched.max_node = node->sched.max_node;
- move->node.sched.next_max_node = node->sched.next_max_node;
- move->node.sched.complex_allowed = node->sched.complex_allowed;
+ new_node->children[0] = node;
+ new_node->num_child = 1;
- gpir_debug("create move %d for %d\n", move->node.index, node->index);
+ new_node->node.sched.instr = NULL;
+ new_node->node.sched.pos = -1;
+ new_node->node.sched.dist = node->sched.dist;
+ new_node->node.sched.max_node = node->sched.max_node;
+ new_node->node.sched.next_max_node = node->sched.next_max_node;
+ new_node->node.sched.complex_allowed = node->sched.complex_allowed;
ctx->ready_list_slots--;
list_del(&node->list);
node->sched.next_max_node = false;
node->sched.ready = false;
node->sched.inserted = false;
- gpir_node_replace_succ(&move->node, node);
- gpir_node_add_dep(&move->node, node, GPIR_DEP_INPUT);
- schedule_insert_ready_list(ctx, &move->node);
- return &move->node;
+ gpir_node_replace_succ(&new_node->node, node);
+ gpir_node_add_dep(&new_node->node, node, GPIR_DEP_INPUT);
+ schedule_insert_ready_list(ctx, &new_node->node);
+ return &new_node->node;
+}
+
+static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+{
+ gpir_node *move = create_replacement(ctx, node, gpir_op_mov);
+ gpir_debug("create move %d for %d\n", move->index, node->index);
+ return move;
}
+static gpir_node *create_postlog2(sched_ctx *ctx, gpir_node *node)
+{
+ assert(node->op == gpir_op_complex1);
+ gpir_node *postlog2 = create_replacement(ctx, node, gpir_op_postlog2);
+ gpir_debug("create postlog2 %d for %d\n", postlog2->index, node->index);
+ return postlog2;
+}
/* Once we schedule the successor, would the predecessor be fully ready? */
static bool pred_almost_ready(gpir_dep *dep)
return false;
}
+static gpir_node *consuming_postlog2(gpir_node *node)
+{
+ if (node->op != gpir_op_complex1)
+ return NULL;
+ gpir_node_foreach_succ(node, dep) {
+ if (dep->type != GPIR_DEP_INPUT)
+ continue;
+ if (dep->succ->op == gpir_op_postlog2)
+ return dep->succ;
+ else
+ return NULL;
+ }
+
+ return NULL;
+}
static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
{
if (available == 0)
return false;
+ /* Don't spill complex1 if it's used postlog2, turn the postlog2 into a
+ * move, replace the complex1 with postlog2 and spill that instead. The
+ * store needs a move anyways so the postlog2 is usually free.
+ */
+ gpir_node *postlog2 = consuming_postlog2(node);
+ if (postlog2) {
+ postlog2->op = gpir_op_mov;
+ node = create_postlog2(ctx, node);
+ }
+
/* TODO: use a better heuristic for choosing an available register? */
int physreg = ffsll(available) - 1;
{
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (node->sched.max_node) {
- place_move(ctx, node);
+ /* For complex1 that is consumed by a postlog2, we cannot allow any
+ * moves in between. Convert the postlog2 to a move and insert a new
+ * postlog2, and try to schedule it again in try_node().
+ */
+ gpir_node *postlog2 = consuming_postlog2(node);
+ if (postlog2) {
+ postlog2->op = gpir_op_mov;
+ create_postlog2(ctx, node);
+ } else {
+ place_move(ctx, node);
+ }
return true;
}
}