"xscvdpspn 37, %x14 \n\t" // load s to all words
"xxspltw 37, 37, 0 \n\t"
- "lxvw4x 32, 0, %3 \n\t" // load x
- "lxvw4x 33, %15, %3 \n\t"
- "lxvw4x 34, %16, %3 \n\t"
- "lxvw4x 35, %17, %3 \n\t"
+ "lxvd2x 32, 0, %3 \n\t" // load x
+ "lxvd2x 33, %15, %3 \n\t"
+ "lxvd2x 34, %16, %3 \n\t"
+ "lxvd2x 35, %17, %3 \n\t"
- "lxvw4x 48, 0, %4 \n\t" // load y
- "lxvw4x 49, %15, %4 \n\t"
- "lxvw4x 50, %16, %4 \n\t"
- "lxvw4x 51, %17, %4 \n\t"
+ "lxvd2x 48, 0, %4 \n\t" // load y
+ "lxvd2x 49, %15, %4 \n\t"
+ "lxvd2x 50, %16, %4 \n\t"
+ "lxvd2x 51, %17, %4 \n\t"
"addi %3, %3, 64 \n\t"
"addi %4, %4, 64 \n\t"
"xvmulsp 44, 32, 37 \n\t" // s * x
"xvmulsp 45, 33, 37 \n\t"
- "lxvw4x 32, 0, %3 \n\t" // load x
- "lxvw4x 33, %15, %3 \n\t"
+ "lxvd2x 32, 0, %3 \n\t" // load x
+ "lxvd2x 33, %15, %3 \n\t"
"xvmulsp 46, 34, 37 \n\t"
"xvmulsp 47, 35, 37 \n\t"
- "lxvw4x 34, %16, %3 \n\t"
- "lxvw4x 35, %17, %3 \n\t"
+ "lxvd2x 34, %16, %3 \n\t"
+ "lxvd2x 35, %17, %3 \n\t"
"xvmulsp %x9, 48, 37 \n\t" // s * y
"xvmulsp %x10, 49, 37 \n\t"
- "lxvw4x 48, 0, %4 \n\t" // load y
- "lxvw4x 49, %15, %4 \n\t"
+ "lxvd2x 48, 0, %4 \n\t" // load y
+ "lxvd2x 49, %15, %4 \n\t"
"xvmulsp %x11, 50, 37 \n\t"
"xvmulsp %x12, 51, 37 \n\t"
- "lxvw4x 50, %16, %4 \n\t"
- "lxvw4x 51, %17, %4 \n\t"
+ "lxvd2x 50, %16, %4 \n\t"
+ "lxvd2x 51, %17, %4 \n\t"
"xvaddsp 40, 40, %x9 \n\t" // c * x + s * y
"xvaddsp 41, 41, %x10 \n\t" // c * x + s * y
"xvsubsp %x7, %x7, 46 \n\t" // c * y - s * x
"xvsubsp %x8, %x8, 47 \n\t" // c * y - s * x
- "stxvw4x 40, 0, %3 \n\t" // store x
- "stxvw4x 41, %15, %3 \n\t"
- "stxvw4x 42, %16, %3 \n\t"
- "stxvw4x 43, %17, %3 \n\t"
+ "stxvd2x 40, 0, %3 \n\t" // store x
+ "stxvd2x 41, %15, %3 \n\t"
+ "stxvd2x 42, %16, %3 \n\t"
+ "stxvd2x 43, %17, %3 \n\t"
- "stxvw4x %x5, 0, %4 \n\t" // store y
- "stxvw4x %x6, %15, %4 \n\t"
- "stxvw4x %x7, %16, %4 \n\t"
- "stxvw4x %x8, %17, %4 \n\t"
+ "stxvd2x %x5, 0, %4 \n\t" // store y
+ "stxvd2x %x6, %15, %4 \n\t"
+ "stxvd2x %x7, %16, %4 \n\t"
+ "stxvd2x %x8, %17, %4 \n\t"
"addi %3, %3, 128 \n\t"
"addi %4, %4, 128 \n\t"
"xvsubsp %x7, %x7, 46 \n\t" // c * y - s * x
"xvsubsp %x8, %x8, 47 \n\t" // c * y - s * x
- "stxvw4x 40, 0, %3 \n\t" // store x
- "stxvw4x 41, %15, %3 \n\t"
- "stxvw4x 42, %16, %3 \n\t"
- "stxvw4x 43, %17, %3 \n\t"
+ "stxvd2x 40, 0, %3 \n\t" // store x
+ "stxvd2x 41, %15, %3 \n\t"
+ "stxvd2x 42, %16, %3 \n\t"
+ "stxvd2x 43, %17, %3 \n\t"
- "stxvw4x %x5, 0, %4 \n\t" // store y
- "stxvw4x %x6, %15, %4 \n\t"
- "stxvw4x %x7, %16, %4 \n\t"
- "stxvw4x %x8, %17, %4 \n"
+ "stxvd2x %x5, 0, %4 \n\t" // store y
+ "stxvd2x %x6, %15, %4 \n\t"
+ "stxvd2x %x7, %16, %4 \n\t"
+ "stxvd2x %x8, %17, %4 \n"
"#n=%2 x=%0=%3 y=%1=%4 c=%13 s=%14 o16=%15 o32=%16 o48=%17\n"
"#t0=%x5 t1=%x6 t2=%x7 t3=%x8 t4=%x9 t5=%x10 t6=%x11 t7=%x12"