nir/opt_memcpy: lower copies to/from tightly packed types
authorJason Ekstrand <jason@jlekstrand.net>
Sat, 24 Oct 2020 21:48:16 +0000 (16:48 -0500)
committerMarge Bot <emma+marge@anholt.net>
Fri, 24 Jun 2022 19:21:26 +0000 (19:21 +0000)
v2: Add comment by Jason (Lionel)

Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com> (1.5 years later)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13166>

src/compiler/nir/nir_opt_memcpy.c

index be3f19d..d022e58 100644 (file)
@@ -168,6 +168,42 @@ try_lower_memcpy(nir_builder *b, nir_intrinsic_instr *cpy)
       return true;
    }
 
+   /* If one of the two types is tightly packed and happens to equal the
+    * memcpy size, then we can get the memcpy by casting to that type and
+    * doing a deref copy.
+    *
+    * However, if we blindly apply this logic, we may end up with extra casts
+    * where we don't want them. The whole point of converting memcpy to
+    * copy_deref is in the hopes that nir_opt_copy_prop_vars or
+    * nir_lower_vars_to_ssa will get rid of the copy and those passes don't
+    * handle casts well. Heuristically, only do this optimization if the
+    * tightly packed type is on a deref with nir_var_function_temp so we stick
+    * the cast on the other mode.
+    */
+   if (dst->modes == nir_var_function_temp &&
+       type_is_tightly_packed(dst->type, &type_size) &&
+       type_size == size) {
+      b->cursor = nir_instr_remove(&cpy->instr);
+      src = nir_build_deref_cast(b, &src->dest.ssa,
+                                 src->modes, dst->type, 0);
+      nir_copy_deref_with_access(b, dst, src,
+                                 nir_intrinsic_dst_access(cpy),
+                                 nir_intrinsic_src_access(cpy));
+      return true;
+   }
+
+   if (src->modes == nir_var_function_temp &&
+       type_is_tightly_packed(src->type, &type_size) &&
+       type_size == size) {
+      b->cursor = nir_instr_remove(&cpy->instr);
+      dst = nir_build_deref_cast(b, &dst->dest.ssa,
+                                 dst->modes, src->type, 0);
+      nir_copy_deref_with_access(b, dst, src,
+                                 nir_intrinsic_dst_access(cpy),
+                                 nir_intrinsic_src_access(cpy));
+      return true;
+   }
+
    return false;
 }