aarch64: Implement flags2d
authorMarek Vasut <marex@denx.de>
Sat, 29 Aug 2020 23:23:05 +0000 (01:23 +0200)
committerMarek Vasut <marex@denx.de>
Sat, 19 Sep 2020 01:08:46 +0000 (03:08 +0200)
Implement support for .flags 2d by adding code for handling the loop
counter. The implementation is very similar to aarch32.

Signed-off-by: Marek Vasut <marex@denx.de>
orc/orcprogram-neon.c

index ae81810..8d55411 100644 (file)
@@ -753,8 +753,20 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
   orc_neon_load_constants_outer (compiler);
 
   if (compiler->is_64bit) {
-    /** @todo not supported yet */
-    if (compiler->program->is_2d) return;
+    if (compiler->program->is_2d) {
+      if (compiler->program->constant_m > 0) {
+        orc_arm64_emit_mov_imm (compiler, 32, ORC_ARM64_IP1, compiler->program->constant_m);
+        orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+            (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]));
+      } else {
+        orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+            (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A1]));
+        orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+            (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2]));
+      }
+
+      orc_arm_emit_label (compiler, LABEL_OUTER_LOOP);
+    }
 
     if (compiler->loop_shift > 0) {
       orc_neon64_loop_shift (compiler);
@@ -781,6 +793,17 @@ orc_compiler_neon_assemble (OrcCompiler *compiler)
 
       orc_neon64_loop_caches (compiler);
     }
+
+    if (compiler->program->is_2d) {
+      neon_add_strides (compiler);
+
+      orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+          (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2]));
+      orc_arm64_emit_subs_imm (compiler, 32, ORC_ARM64_IP1, ORC_ARM64_IP1, 1);
+      orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+          (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]));
+      orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, LABEL_OUTER_LOOP);
+    }
   } else {
     if (compiler->program->is_2d) {
       if (compiler->program->constant_m > 0) {
@@ -1275,13 +1298,23 @@ neon_add_strides (OrcCompiler *compiler)
         break;
       case ORC_VAR_TYPE_SRC:
       case ORC_VAR_TYPE_DEST:
-        orc_arm_emit_load_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
-            (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
-        orc_arm_emit_load_reg (compiler, ORC_ARM_A2, compiler->exec_reg,
-            (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]));
-        orc_arm_emit_add (compiler, ORC_ARM_A3, ORC_ARM_A3, ORC_ARM_A2);
-        orc_arm_emit_store_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
-            (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+        if (compiler->is_64bit) {
+          orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+              (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+          orc_arm64_emit_load_reg (compiler, 32, ORC_ARM64_R18, compiler->exec_reg,
+              (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]));
+          orc_arm64_emit_add (compiler, 32, ORC_ARM64_IP1, ORC_ARM64_IP1, ORC_ARM64_R18);
+          orc_arm64_emit_store_reg (compiler, 32, ORC_ARM64_IP1, compiler->exec_reg,
+              (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[i]));
+        } else {
+          orc_arm_emit_load_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
+              (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+          orc_arm_emit_load_reg (compiler, ORC_ARM_A2, compiler->exec_reg,
+              (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]));
+          orc_arm_emit_add (compiler, ORC_ARM_A3, ORC_ARM_A3, ORC_ARM_A2);
+          orc_arm_emit_store_reg (compiler, ORC_ARM_A3, compiler->exec_reg,
+              (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+        }
         break;
       case ORC_VAR_TYPE_ACCUMULATOR:
         break;