"xscvdpspn %x3, %x3 \n\t"
"xxspltw %x3, %x3, 0 \n\t"
- "lxvw4x 32, 0, %2 \n\t"
- "lxvw4x 33, %4, %2 \n\t"
- "lxvw4x 34, %5, %2 \n\t"
- "lxvw4x 35, %6, %2 \n\t"
- "lxvw4x 36, %7, %2 \n\t"
- "lxvw4x 37, %8, %2 \n\t"
- "lxvw4x 38, %9, %2 \n\t"
- "lxvw4x 39, %10, %2 \n\t"
+ "lxvd2x 32, 0, %2 \n\t"
+ "lxvd2x 33, %4, %2 \n\t"
+ "lxvd2x 34, %5, %2 \n\t"
+ "lxvd2x 35, %6, %2 \n\t"
+ "lxvd2x 36, %7, %2 \n\t"
+ "lxvd2x 37, %8, %2 \n\t"
+ "lxvd2x 38, %9, %2 \n\t"
+ "lxvd2x 39, %10, %2 \n\t"
"addi %2, %2, 128 \n\t"
"xvmulsp 40, 32, %x3 \n\t"
"xvmulsp 41, 33, %x3 \n\t"
- "lxvw4x 32, 0, %2 \n\t"
- "lxvw4x 33, %4, %2 \n\t"
+ "lxvd2x 32, 0, %2 \n\t"
+ "lxvd2x 33, %4, %2 \n\t"
"xvmulsp 42, 34, %x3 \n\t"
"xvmulsp 43, 35, %x3 \n\t"
- "lxvw4x 34, %5, %2 \n\t"
- "lxvw4x 35, %6, %2 \n\t"
+ "lxvd2x 34, %5, %2 \n\t"
+ "lxvd2x 35, %6, %2 \n\t"
"xvmulsp 44, 36, %x3 \n\t"
"xvmulsp 45, 37, %x3 \n\t"
- "lxvw4x 36, %7, %2 \n\t"
- "lxvw4x 37, %8, %2 \n\t"
+ "lxvd2x 36, %7, %2 \n\t"
+ "lxvd2x 37, %8, %2 \n\t"
"xvmulsp 46, 38, %x3 \n\t"
"xvmulsp 47, 39, %x3 \n\t"
- "lxvw4x 38, %9, %2 \n\t"
- "lxvw4x 39, %10, %2 \n\t"
+ "lxvd2x 38, %9, %2 \n\t"
+ "lxvd2x 39, %10, %2 \n\t"
"addi %2, %2, -128 \n\t"
- "stxvw4x 40, 0, %2 \n\t"
- "stxvw4x 41, %4, %2 \n\t"
- "stxvw4x 42, %5, %2 \n\t"
- "stxvw4x 43, %6, %2 \n\t"
- "stxvw4x 44, %7, %2 \n\t"
- "stxvw4x 45, %8, %2 \n\t"
- "stxvw4x 46, %9, %2 \n\t"
- "stxvw4x 47, %10, %2 \n\t"
+ "stxvd2x 40, 0, %2 \n\t"
+ "stxvd2x 41, %4, %2 \n\t"
+ "stxvd2x 42, %5, %2 \n\t"
+ "stxvd2x 43, %6, %2 \n\t"
+ "stxvd2x 44, %7, %2 \n\t"
+ "stxvd2x 45, %8, %2 \n\t"
+ "stxvd2x 46, %9, %2 \n\t"
+ "stxvd2x 47, %10, %2 \n\t"
"addi %2, %2, 256 \n\t"
"xvmulsp 46, 38, %x3 \n\t"
"xvmulsp 47, 39, %x3 \n\t"
- "stxvw4x 40, 0, %2 \n\t"
- "stxvw4x 41, %4, %2 \n\t"
- "stxvw4x 42, %5, %2 \n\t"
- "stxvw4x 43, %6, %2 \n\t"
- "stxvw4x 44, %7, %2 \n\t"
- "stxvw4x 45, %8, %2 \n\t"
- "stxvw4x 46, %9, %2 \n\t"
- "stxvw4x 47, %10, %2 \n"
+ "stxvd2x 40, 0, %2 \n\t"
+ "stxvd2x 41, %4, %2 \n\t"
+ "stxvd2x 42, %5, %2 \n\t"
+ "stxvd2x 43, %6, %2 \n\t"
+ "stxvd2x 44, %7, %2 \n\t"
+ "stxvd2x 45, %8, %2 \n\t"
+ "stxvd2x 46, %9, %2 \n\t"
+ "stxvd2x 47, %10, %2 \n"
"#n=%1 alpha=%3 x=%0=%2 o16=%4 o32=%5 o48=%6 o64=%7 o80=%8 o96=%9 o112=%10"
:
".p2align 5 \n"
"1: \n\t"
- "stxvw4x %x3, 0, %2 \n\t"
- "stxvw4x %x3, %4, %2 \n\t"
- "stxvw4x %x3, %5, %2 \n\t"
- "stxvw4x %x3, %6, %2 \n\t"
- "stxvw4x %x3, %7, %2 \n\t"
- "stxvw4x %x3, %8, %2 \n\t"
- "stxvw4x %x3, %9, %2 \n\t"
- "stxvw4x %x3, %10, %2 \n\t"
+ "stxvd2x %x3, 0, %2 \n\t"
+ "stxvd2x %x3, %4, %2 \n\t"
+ "stxvd2x %x3, %5, %2 \n\t"
+ "stxvd2x %x3, %6, %2 \n\t"
+ "stxvd2x %x3, %7, %2 \n\t"
+ "stxvd2x %x3, %8, %2 \n\t"
+ "stxvd2x %x3, %9, %2 \n\t"
+ "stxvd2x %x3, %10, %2 \n\t"
"addi %2, %2, 128 \n\t"