.macro COPY_4x16
lxvd2x vs0, o0, A0
- lxvd2x vs8, o0, A1
- lxvd2x vs24, o0, A3
- lxvd2x vs16, o0, A2
+ lxvd2x vs1, o0, A1
+ lxvd2x vs2, o0, A2
+ lxvd2x vs3, o0, A3
- lxvd2x vs1, o16, A0
- lxvd2x vs9, o16, A1
- lxvd2x vs17, o16, A2
- lxvd2x vs25, o16, A3
+ lxvd2x vs4, o16, A0
+ lxvd2x vs5, o16, A1
+ lxvd2x vs6, o16, A2
+ lxvd2x vs7, o16, A3
- lxvd2x vs2, o32, A0
- lxvd2x vs10, o32, A1
- lxvd2x vs18, o32, A2
- lxvd2x vs26, o32, A3
+ xxpermdi vs32, vs0, vs1, 0
+ xxpermdi vs33, vs2, vs3, 0
+ xxpermdi vs34, vs0, vs1, 3
+ xxpermdi vs35, vs2, vs3, 3
- lxvd2x vs3, o48, A0
- lxvd2x vs11, o48, A1
- lxvd2x vs19, o48, A2
- lxvd2x vs27, o48, A3
+ xxpermdi vs36, vs4, vs5, 0
+ xxpermdi vs37, vs6, vs7, 0
+ xxpermdi vs38, vs4, vs5, 3
+ xxpermdi vs39, vs6, vs7, 3
- lxvd2x vs4, o64, A0
- lxvd2x vs12, o64, A1
- lxvd2x vs20, o64, A2
- lxvd2x vs28, o64, A3
+ lxvd2x vs0, o32, A0
+ lxvd2x vs1, o32, A1
+ lxvd2x vs2, o32, A2
+ lxvd2x vs3, o32, A3
- lxvd2x vs5, o80, A0
- lxvd2x vs13, o80, A1
- lxvd2x vs21, o80, A2
- lxvd2x vs29, o80, A3
+ lxvd2x vs4, o48, A0
+ lxvd2x vs5, o48, A1
+ lxvd2x vs6, o48, A2
+ lxvd2x vs7, o48, A3
- lxvd2x vs6, o96, A0
- lxvd2x vs14, o96, A1
- lxvd2x vs22, o96, A2
- lxvd2x vs30, o96, A3
- lxvd2x vs7, o112, A0
- lxvd2x vs15, o112, A1
- lxvd2x vs23, o112, A2
- lxvd2x vs31, o112, A3
+ xxpermdi vs40, vs0, vs1, 0
+ xxpermdi vs41, vs2, vs3, 0
+ xxpermdi vs42, vs0, vs1, 3
+ xxpermdi vs43, vs2, vs3, 3
+ xxpermdi vs44, vs4, vs5, 0
+ xxpermdi vs45, vs6, vs7, 0
+ xxpermdi vs46, vs4, vs5, 3
+ xxpermdi vs47, vs6, vs7, 3
- xxpermdi vs32, vs0, vs8, 0
- xxpermdi vs33, vs16, vs24, 0
- xxpermdi vs34, vs0, vs8, 3
- xxpermdi vs35, vs16, vs24, 3
+ lxvd2x vs0, o64, A0
+ lxvd2x vs1, o64, A1
+ lxvd2x vs2, o64, A2
+ lxvd2x vs3, o64, A3
- xxpermdi vs36, vs1, vs9, 0
- xxpermdi vs37, vs17, vs25, 0
- xxpermdi vs38, vs1, vs9, 3
- xxpermdi vs39, vs17, vs25, 3
+ lxvd2x vs4, o80, A0
+ lxvd2x vs5, o80, A1
+ lxvd2x vs6, o80, A2
+ lxvd2x vs7, o80, A3
- xxpermdi vs40, vs2, vs10, 0
- xxpermdi vs41, vs18, vs26, 0
- xxpermdi vs42, vs2, vs10, 3
- xxpermdi vs43, vs18, vs26, 3
- xxpermdi vs44, vs3, vs11, 0
- xxpermdi vs45, vs19, vs27, 0
- xxpermdi vs46, vs3, vs11, 3
- xxpermdi vs47, vs19, vs27, 3
+ xxpermdi vs48, vs0, vs1, 0
+ xxpermdi vs49, vs2, vs3, 0
+ xxpermdi vs50, vs0, vs1, 3
+ xxpermdi vs51, vs2, vs3, 3
+
+ xxpermdi vs8, vs4, vs5, 0
+ xxpermdi vs9, vs6, vs7, 0
+ xxpermdi vs10, vs4, vs5, 3
+ xxpermdi vs11, vs6, vs7, 3
+
+ lxvd2x vs0, o96, A0
+ lxvd2x vs1, o96, A1
+ lxvd2x vs2, o96, A2
+ lxvd2x vs3, o96, A3
+
+
+ lxvd2x vs6, o112, A0
+ lxvd2x vs7, o112, A1
+ lxvd2x vs12, o112, A2
+ lxvd2x vs13, o112, A3
- xxpermdi vs48, vs4, vs12, 0
- xxpermdi vs49, vs20, vs28, 0
- xxpermdi vs50, vs4, vs12, 3
- xxpermdi vs51, vs20, vs28, 3
- xxpermdi vs52, vs5, vs13, 0
- xxpermdi vs53, vs21, vs29, 0
- xxpermdi vs54, vs5, vs13, 3
- xxpermdi vs55, vs21, vs29, 3
+ xxpermdi vs4, vs0, vs1, 0
+ xxpermdi vs5, vs2, vs3, 0
+ xxpermdi vs0, vs0, vs1, 3
+ xxpermdi vs2, vs2, vs3, 3
+
addi A0, A0, 128
addi A1, A1, 128
- xxpermdi vs56, vs6, vs14, 0
- xxpermdi vs57, vs22, vs30, 0
- xxpermdi vs58, vs6, vs14, 3
- xxpermdi vs59, vs22, vs30, 3
+ xxpermdi vs1, vs6, vs7, 0
+ xxpermdi vs3, vs12, vs13, 0
+ xxpermdi vs6, vs6, vs7, 3
+ xxpermdi vs12, vs12, vs13, 3
+
+ dcbt BO, PREB
addi A3, A3, 128
addi A2, A2, 128
- xxpermdi vs60, vs7, vs15, 0
- xxpermdi vs61, vs23, vs31, 0
- xxpermdi vs62, vs7, vs15, 3
- xxpermdi vs63, vs23, vs31, 3
-
- dcbt BO, PREB
-
stxvd2x vs32, o0, BO
stxvd2x vs33, o16, BO
stxvd2x vs34, o32, BO
stxvd2x vs49, o16, BO
stxvd2x vs50, o32, BO
stxvd2x vs51, o48, BO
- stxvd2x vs52, o64, BO
- stxvd2x vs53, o80, BO
- stxvd2x vs54, o96, BO
- stxvd2x vs55, o112, BO
+ stxvd2x vs8, o64, BO
+ stxvd2x vs9, o80, BO
+ stxvd2x vs10, o96, BO
+ stxvd2x vs11, o112, BO
addi BO, BO, 128
dcbt BO, PREB
- stxvd2x vs56, o0, BO
- stxvd2x vs57, o16, BO
- stxvd2x vs58, o32, BO
- stxvd2x vs59, o48, BO
- stxvd2x vs60, o64, BO
- stxvd2x vs61, o80, BO
- stxvd2x vs62, o96, BO
- stxvd2x vs63, o112, BO
+ stxvd2x vs4, o0, BO
+ stxvd2x vs5, o16, BO
+ stxvd2x vs0, o32, BO
+ stxvd2x vs2, o48, BO
+ stxvd2x vs1, o64, BO
+ stxvd2x vs3, o80, BO
+ stxvd2x vs6, o96, BO
+ stxvd2x vs12, o112, BO
addi BO, BO, 128
addi A1, A1, 64
- lxvd2x vs16, o0, A2
- lxvd2x vs17, o16, A2
- lxvd2x vs18, o32, A2
- lxvd2x vs19, o48, A2
+ lxvd2x vs4, o0, A2
+ lxvd2x vs5, o16, A2
+ lxvd2x vs6, o32, A2
+ lxvd2x vs7, o48, A2
addi A2, A2, 64
- lxvd2x vs24, o0, A3
- lxvd2x vs25, o16, A3
- lxvd2x vs26, o32, A3
- lxvd2x vs27, o48, A3
+ lxvd2x vs12, o0, A3
+ lxvd2x vs13, o16, A3
+ lxvd2x vs50, o32, A3
+ lxvd2x vs51, o48, A3
addi A3, A3, 64
xxpermdi vs32, vs0, vs8, 0
- xxpermdi vs33, vs16, vs24, 0
+ xxpermdi vs33, vs4, vs12, 0
xxpermdi vs34, vs0, vs8, 3
- xxpermdi vs35, vs16, vs24, 3
+ xxpermdi vs35, vs4, vs12, 3
xxpermdi vs36, vs1, vs9, 0
- xxpermdi vs37, vs17, vs25, 0
+ xxpermdi vs37, vs5, vs13, 0
xxpermdi vs38, vs1, vs9, 3
- xxpermdi vs39, vs17, vs25, 3
+ xxpermdi vs39, vs5, vs13, 3
xxpermdi vs40, vs2, vs10, 0
- xxpermdi vs41, vs18, vs26, 0
+ xxpermdi vs41, vs6, vs50, 0
xxpermdi vs42, vs2, vs10, 3
- xxpermdi vs43, vs18, vs26, 3
+ xxpermdi vs43, vs6, vs50, 3
xxpermdi vs44, vs3, vs11, 0
- xxpermdi vs45, vs19, vs27, 0
+ xxpermdi vs45, vs7, vs51, 0
xxpermdi vs46, vs3, vs11, 3
- xxpermdi vs47, vs19, vs27, 3
+ xxpermdi vs47, vs7, vs51, 3
stxvd2x vs32, o0, BO
addi A1, A1, 32
- lxvd2x vs16, o0, A2
- lxvd2x vs17, o16, A2
+ lxvd2x vs10, o0, A2
+ lxvd2x vs11, o16, A2
addi A2, A2, 32
- lxvd2x vs24, o0, A3
- lxvd2x vs25, o16, A3
+ lxvd2x vs12, o0, A3
+ lxvd2x vs13, o16, A3
addi A3, A3, 32
xxpermdi vs32, vs0, vs8, 0
- xxpermdi vs33, vs16, vs24, 0
+ xxpermdi vs33, vs10, vs12, 0
xxpermdi vs34, vs0, vs8, 3
- xxpermdi vs35, vs16, vs24, 3
+ xxpermdi vs35, vs10, vs12, 3
xxpermdi vs36, vs1, vs9, 0
- xxpermdi vs37, vs17, vs25, 0
+ xxpermdi vs37, vs11, vs13, 0
xxpermdi vs38, vs1, vs9, 3
- xxpermdi vs39, vs17, vs25, 3
+ xxpermdi vs39, vs11, vs13, 3
stxvd2x vs32, o0, BO
addi A1, A1, 16
- lxvd2x vs16, o0, A2
+ lxvd2x vs9, o0, A2
addi A2, A2, 16
- lxvd2x vs24, o0, A3
+ lxvd2x vs10, o0, A3
addi A3, A3, 16
xxpermdi vs32, vs0, vs8, 0
- xxpermdi vs33, vs16, vs24, 0
+ xxpermdi vs33, vs9, vs10, 0
xxpermdi vs34, vs0, vs8, 3
- xxpermdi vs35, vs16, vs24, 3
+ xxpermdi vs35, vs9, vs10, 3
stxvd2x vs32, o0, BO
addi A1, A1, 8
- lxsdx vs16, o0, A2
+ lxsdx vs9, o0, A2
addi A2, A2, 8
- lxsdx vs24, o0, A3
+ lxsdx vs10, o0, A3
addi A3, A3, 8
xxpermdi vs32, vs0, vs8, 0
- xxpermdi vs33, vs16, vs24, 0
+ xxpermdi vs33, vs9, vs10, 0
stxvd2x vs32, o0, BO
lxvd2x vs11, o48, A1
lxvd2x vs12, o64, A1
lxvd2x vs13, o80, A1
- lxvd2x vs14, o96, A1
- lxvd2x vs15, o112, A1
+ lxvd2x vs48, o96, A1
+ lxvd2x vs49, o112, A1
addi A1, A1, 128
xxpermdi vs42, vs5, vs13, 0
xxpermdi vs43, vs5, vs13, 3
- xxpermdi vs44, vs6, vs14, 0
- xxpermdi vs45, vs6, vs14, 3
+ xxpermdi vs44, vs6, vs48, 0
+ xxpermdi vs45, vs6, vs48, 3
- xxpermdi vs46, vs7, vs15, 0
- xxpermdi vs47, vs7, vs15, 3
+ xxpermdi vs46, vs7, vs49, 0
+ xxpermdi vs47, vs7, vs49, 3
stxvd2x vs32, o0, BO