lima/gp: Support exp2 and log2
authorConnor Abbott <cwabbott0@gmail.com>
Sun, 21 Apr 2019 19:46:46 +0000 (21:46 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Tue, 30 Jul 2019 21:01:15 +0000 (23:01 +0200)
log2 is tricky because there cannot be a move between complex1 and
postlog2. We can't guarantee that scheduling complex1 will succeed when
we schedule postlog2, so we try to schedule complex1 and if it fails we
back out by rewriting the postlog2 as a move and introducing a new
postlog2 so that we can try again later.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Acked-by: Qiang Yu <yuq825@gmail.com>
src/gallium/drivers/lima/ir/gp/codegen.c
src/gallium/drivers/lima/ir/gp/lower.c
src/gallium/drivers/lima/ir/gp/nir.c
src/gallium/drivers/lima/ir/gp/node.c
src/gallium/drivers/lima/ir/gp/scheduler.c

index 9bc279e..19eb38c 100644 (file)
@@ -376,6 +376,8 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
    case gpir_op_mov:
    case gpir_op_rcp_impl:
    case gpir_op_rsqrt_impl:
+   case gpir_op_exp2_impl:
+   case gpir_op_log2_impl:
    {
       gpir_alu_node *alu = gpir_node_to_alu(node);
       code->complex_src = gpir_get_alu_input(node, alu->children[0]);
@@ -395,6 +397,12 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
    case gpir_op_rsqrt_impl:
       code->complex_op = gpir_codegen_complex_op_rsqrt;
       break;
+   case gpir_op_exp2_impl:
+      code->complex_op = gpir_codegen_complex_op_exp2;
+      break;
+   case gpir_op_log2_impl:
+      code->complex_op = gpir_codegen_complex_op_log2;
+      break;
    default:
       assert(0);
    }
@@ -410,14 +418,19 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr)
       return;
    }
 
+   gpir_alu_node *alu = gpir_node_to_alu(node);
+   code->pass_src = gpir_get_alu_input(node, alu->children[0]);
+
    switch (node->op) {
    case gpir_op_mov:
-   {
-      gpir_alu_node *alu = gpir_node_to_alu(node);
-      code->pass_src = gpir_get_alu_input(node, alu->children[0]);
       code->pass_op = gpir_codegen_pass_op_pass;
       break;
-   }
+   case gpir_op_preexp2:
+      code->pass_op = gpir_codegen_pass_op_preexp2;
+      break;
+   case gpir_op_postlog2:
+      code->pass_op = gpir_codegen_pass_op_postlog2;
+      break;
    default:
       assert(0);
    }
index 38c2485..6c5f2db 100644 (file)
@@ -177,6 +177,19 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
    gpir_alu_node *alu = gpir_node_to_alu(node);
    gpir_node *child = alu->children[0];
 
+   if (node->op == gpir_op_exp2) {
+      gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
+      if (unlikely(!preexp2))
+         return false;
+
+      preexp2->children[0] = child;
+      preexp2->num_child = 1;
+      gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
+      list_addtail(&preexp2->node.list, &node->list);
+
+      child = &preexp2->node;
+   }
+
    gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
    if (unlikely(!complex2))
       return false;
@@ -194,6 +207,12 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
    case gpir_op_rsqrt:
       impl_op = gpir_op_rsqrt_impl;
       break;
+   case gpir_op_exp2:
+      impl_op = gpir_op_exp2_impl;
+      break;
+   case gpir_op_log2:
+      impl_op = gpir_op_log2_impl;
+      break;
    default:
       assert(0);
    }
@@ -207,14 +226,33 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
    gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
    list_addtail(&impl->node.list, &node->list);
 
-   /* change node to complex1 node */
-   node->op = gpir_op_complex1;
-   alu->children[0] = &impl->node;
-   alu->children[1] = &complex2->node;
-   alu->children[2] = child;
-   alu->num_child = 3;
-   gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
-   gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
+   gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
+   complex1->children[0] = &impl->node;
+   complex1->children[1] = &complex2->node;
+   complex1->children[2] = child;
+   complex1->num_child = 3;
+   gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
+   gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
+   gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
+   list_addtail(&complex1->node.list, &node->list);
+
+   gpir_node *result = &complex1->node;
+
+   if (node->op == gpir_op_log2) {
+      gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
+      if (unlikely(!postlog2))
+         return false;
+
+      postlog2->children[0] = result;
+      postlog2->num_child = 1;
+      gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
+      list_addtail(&postlog2->node.list, &node->list);
+
+      result = &postlog2->node;
+   }
+
+   gpir_node_replace_succ(result, node);
+   gpir_node_delete(node);
 
    return true;
 }
@@ -384,6 +422,8 @@ static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node
    [gpir_op_neg] = gpir_lower_neg,
    [gpir_op_rcp] = gpir_lower_complex,
    [gpir_op_rsqrt] = gpir_lower_complex,
+   [gpir_op_exp2] = gpir_lower_complex,
+   [gpir_op_log2] = gpir_lower_complex,
    [gpir_op_eq] = gpir_lower_eq_ne,
    [gpir_op_ne] = gpir_lower_eq_ne,
    [gpir_op_abs] = gpir_lower_abs,
index d1da7ed..18121b9 100644 (file)
@@ -118,6 +118,8 @@ static int nir_to_gpir_opcodes[nir_num_opcodes] = {
    [nir_op_fmax] = gpir_op_max,
    [nir_op_frcp] = gpir_op_rcp,
    [nir_op_frsq] = gpir_op_rsqrt,
+   [nir_op_fexp2] = gpir_op_exp2,
+   [nir_op_flog2] = gpir_op_log2,
    [nir_op_slt] = gpir_op_lt,
    [nir_op_sge] = gpir_op_ge,
    [nir_op_fcsel] = gpir_op_select,
index a870662..1bf9d80 100644 (file)
@@ -141,15 +141,25 @@ const gpir_op_info gpir_op_infos[] = {
    },
    [gpir_op_preexp2] = {
       .name = "preexp2",
+      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
+      .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_postlog2] = {
       .name = "postlog2",
+      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
    },
    [gpir_op_exp2_impl] = {
       .name = "exp2_impl",
+      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
+      .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_log2_impl] = {
       .name = "log2_impl",
+      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
+      .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_rcp_impl] = {
       .name = "rcp_impl",
index f06089b..1a727ae 100644 (file)
@@ -627,23 +627,26 @@ static bool schedule_try_place_node(sched_ctx *ctx, gpir_node *node,
    return true;
 }
 
-static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+/* Create a new node with "node" as the child, replace all uses of "node" with
+ * this new node, and replace "node" with it in the ready list.
+ */
+static gpir_node *create_replacement(sched_ctx *ctx, gpir_node *node,
+                                     gpir_op op)
 {
-   gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
-   if (unlikely(!move))
-      return NULL;
 
-   move->children[0] = node;
-   move->num_child = 1;
+   gpir_alu_node *new_node = gpir_node_create(node->block, op);
+   if (unlikely(!new_node))
+      return NULL;
 
-   move->node.sched.instr = NULL;
-   move->node.sched.pos = -1;
-   move->node.sched.dist = node->sched.dist;
-   move->node.sched.max_node = node->sched.max_node;
-   move->node.sched.next_max_node = node->sched.next_max_node;
-   move->node.sched.complex_allowed = node->sched.complex_allowed;
+   new_node->children[0] = node;
+   new_node->num_child = 1;
 
-   gpir_debug("create move %d for %d\n", move->node.index, node->index);
+   new_node->node.sched.instr = NULL;
+   new_node->node.sched.pos = -1;
+   new_node->node.sched.dist = node->sched.dist;
+   new_node->node.sched.max_node = node->sched.max_node;
+   new_node->node.sched.next_max_node = node->sched.next_max_node;
+   new_node->node.sched.complex_allowed = node->sched.complex_allowed;
 
    ctx->ready_list_slots--;
    list_del(&node->list);
@@ -651,12 +654,26 @@ static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
    node->sched.next_max_node = false;
    node->sched.ready = false;
    node->sched.inserted = false;
-   gpir_node_replace_succ(&move->node, node);
-   gpir_node_add_dep(&move->node, node, GPIR_DEP_INPUT);
-   schedule_insert_ready_list(ctx, &move->node);
-   return &move->node;
+   gpir_node_replace_succ(&new_node->node, node);
+   gpir_node_add_dep(&new_node->node, node, GPIR_DEP_INPUT);
+   schedule_insert_ready_list(ctx, &new_node->node);
+   return &new_node->node;
+}
+
+static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+{
+   gpir_node *move = create_replacement(ctx, node, gpir_op_mov);
+   gpir_debug("create move %d for %d\n", move->index, node->index);
+   return move;
 }
 
+static gpir_node *create_postlog2(sched_ctx *ctx, gpir_node *node)
+{
+   assert(node->op == gpir_op_complex1);
+   gpir_node *postlog2 = create_replacement(ctx, node, gpir_op_postlog2);
+   gpir_debug("create postlog2 %d for %d\n", postlog2->index, node->index);
+   return postlog2;
+}
 
 /* Once we schedule the successor, would the predecessor be fully ready? */
 static bool pred_almost_ready(gpir_dep *dep)
@@ -936,7 +953,22 @@ static bool used_by_store(gpir_node *node, gpir_instr *instr)
    return false;
 }
 
+static gpir_node *consuming_postlog2(gpir_node *node)
+{
+   if (node->op != gpir_op_complex1)
+      return NULL;
 
+   gpir_node_foreach_succ(node, dep) {
+      if (dep->type != GPIR_DEP_INPUT)
+         continue;
+      if (dep->succ->op == gpir_op_postlog2)
+         return dep->succ;
+      else
+         return NULL;
+   }
+
+   return NULL;
+}
 
 static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
 {
@@ -961,6 +993,16 @@ static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
       if (available == 0)
          return false;
 
+      /* Don't spill complex1 if it's used postlog2, turn the postlog2 into a
+       * move, replace the complex1 with postlog2 and spill that instead. The
+       * store needs a move anyways so the postlog2 is usually free.
+       */
+      gpir_node *postlog2 = consuming_postlog2(node);
+      if (postlog2) {
+         postlog2->op = gpir_op_mov;
+         node = create_postlog2(ctx, node);
+      }
+
       /* TODO: use a better heuristic for choosing an available register? */
       int physreg = ffsll(available) - 1;
 
@@ -1305,7 +1347,17 @@ static bool sched_move(sched_ctx *ctx)
 {
    list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
       if (node->sched.max_node) {
-         place_move(ctx, node);
+         /* For complex1 that is consumed by a postlog2, we cannot allow any
+          * moves in between. Convert the postlog2 to a move and insert a new
+          * postlog2, and try to schedule it again in try_node().
+          */
+         gpir_node *postlog2 = consuming_postlog2(node);
+         if (postlog2) {
+            postlog2->op = gpir_op_mov;
+            create_postlog2(ctx, node);
+         } else {
+            place_move(ctx, node);
+         }
          return true;
       }
    }