"vflpsb %%v0,%%v0 \n\t"
"vflpsb %%v16,%%v16 \n\t"
"vfasb %%v0,%%v0,%%v16 \n\t"
+ "vleib %%v1,0,0 \n\t"
+ "vleib %%v1,1,1 \n\t"
+ "vleib %%v1,2,2 \n\t"
+ "vleib %%v1,3,3 \n\t"
+ "vleib %%v1,8,4 \n\t"
+ "vleib %%v1,9,5 \n\t"
+ "vleib %%v1,10,6 \n\t"
+ "vleib %%v1,11,7 \n\t"
+ "vleib %%v1,16,8 \n\t"
+ "vleib %%v1,17,9 \n\t"
+ "vleib %%v1,18,10 \n\t"
+ "vleib %%v1,19,11 \n\t"
+ "vleib %%v1,24,12 \n\t"
+ "vleib %%v1,25,13 \n\t"
+ "vleib %%v1,26,14 \n\t"
+ "vleib %%v1,27,15 \n\t"
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
- "vlef %%v16,0(%%r1,%2),0 \n\t"
- "vlef %%v17,4(%%r1,%2),0 \n\t"
- "vlef %%v16,8(%%r1,%2),1 \n\t"
- "vlef %%v17,12(%%r1,%2),1 \n\t"
- "vlef %%v16,16(%%r1,%2),2 \n\t"
- "vlef %%v17,20(%%r1,%2),2 \n\t"
- "vlef %%v16,24(%%r1,%2),3 \n\t"
- "vlef %%v17,28(%%r1,%2),3 \n\t"
-
- "vlef %%v18,32(%%r1,%2),0 \n\t"
- "vlef %%v19,36(%%r1,%2),0 \n\t"
- "vlef %%v18,40(%%r1,%2),1 \n\t"
- "vlef %%v19,44(%%r1,%2),1 \n\t"
- "vlef %%v18,48(%%r1,%2),2 \n\t"
- "vlef %%v19,52(%%r1,%2),2 \n\t"
- "vlef %%v18,56(%%r1,%2),3 \n\t"
- "vlef %%v19,30(%%r1,%2),3 \n\t"
-
- "vlef %%v20,64(%%r1,%2),0 \n\t"
- "vlef %%v21,68(%%r1,%2),0 \n\t"
- "vlef %%v20,72(%%r1,%2),1 \n\t"
- "vlef %%v21,76(%%r1,%2),1 \n\t"
- "vlef %%v20,80(%%r1,%2),2 \n\t"
- "vlef %%v21,84(%%r1,%2),2 \n\t"
- "vlef %%v20,88(%%r1,%2),3 \n\t"
- "vlef %%v21,92(%%r1,%2),3 \n\t"
-
- "vlef %%v22,96(%%r1,%2),0 \n\t"
- "vlef %%v23,100(%%r1,%2),0 \n\t"
- "vlef %%v22,104(%%r1,%2),1 \n\t"
- "vlef %%v23,108(%%r1,%2),1 \n\t"
- "vlef %%v22,112(%%r1,%2),2 \n\t"
- "vlef %%v23,116(%%r1,%2),2 \n\t"
- "vlef %%v22,120(%%r1,%2),3 \n\t"
- "vlef %%v23,124(%%r1,%2),3 \n\t"
-
- "vlef %%v24,128(%%r1,%2),0 \n\t"
- "vlef %%v25,132(%%r1,%2),0 \n\t"
- "vlef %%v24,136(%%r1,%2),1 \n\t"
- "vlef %%v25,140(%%r1,%2),1 \n\t"
- "vlef %%v24,144(%%r1,%2),2 \n\t"
- "vlef %%v25,148(%%r1,%2),2 \n\t"
- "vlef %%v24,152(%%r1,%2),3 \n\t"
- "vlef %%v25,156(%%r1,%2),3 \n\t"
-
- "vlef %%v26,160(%%r1,%2),0 \n\t"
- "vlef %%v27,164(%%r1,%2),0 \n\t"
- "vlef %%v26,168(%%r1,%2),1 \n\t"
- "vlef %%v27,172(%%r1,%2),1 \n\t"
- "vlef %%v26,176(%%r1,%2),2 \n\t"
- "vlef %%v27,180(%%r1,%2),2 \n\t"
- "vlef %%v26,184(%%r1,%2),3 \n\t"
- "vlef %%v27,188(%%r1,%2),3 \n\t"
-
- "vlef %%v28,192(%%r1,%2),0 \n\t"
- "vlef %%v29,196(%%r1,%2),0 \n\t"
- "vlef %%v28,200(%%r1,%2),1 \n\t"
- "vlef %%v29,204(%%r1,%2),1 \n\t"
- "vlef %%v28,208(%%r1,%2),2 \n\t"
- "vlef %%v29,212(%%r1,%2),2 \n\t"
- "vlef %%v28,216(%%r1,%2),3 \n\t"
- "vlef %%v29,220(%%r1,%2),3 \n\t"
-
- "vlef %%v30,224(%%r1,%2),0 \n\t"
- "vlef %%v31,228(%%r1,%2),0 \n\t"
- "vlef %%v30,232(%%r1,%2),1 \n\t"
- "vlef %%v31,236(%%r1,%2),1 \n\t"
- "vlef %%v30,240(%%r1,%2),2 \n\t"
- "vlef %%v31,244(%%r1,%2),2 \n\t"
- "vlef %%v30,248(%%r1,%2),3 \n\t"
- "vlef %%v31,252(%%r1,%2),3 \n\t"
+ "vl %%v16,0(%%r1,%2) \n\t"
+ "vl %%v2,16(%%r1,%2) \n\t"
+ "vpkg %%v17,%%v16,%%v2 \n\t"
+ "vperm %%v16,%%v16,%%v2,%%v1 \n\t"
+
+ "vl %%v18,32(%%r1,%2) \n\t"
+ "vl %%v2,48(%%r1,%2) \n\t"
+ "vpkg %%v19,%%v18,%%v2 \n\t"
+ "vperm %%v18,%%v18,%%v2,%%v1 \n\t"
+
+ "vl %%v20,64(%%r1,%2) \n\t"
+ "vl %%v2,80(%%r1,%2) \n\t"
+ "vpkg %%v21,%%v20,%%v2 \n\t"
+ "vperm %%v20,%%v20,%%v2,%%v1 \n\t"
+
+ "vl %%v22,96(%%r1,%2) \n\t"
+ "vl %%v2,112(%%r1,%2) \n\t"
+ "vpkg %%v23,%%v22,%%v2 \n\t"
+ "vperm %%v22,%%v22,%%v2,%%v1 \n\t"
+
+ "vl %%v24,128(%%r1,%2) \n\t"
+ "vl %%v2,144(%%r1,%2) \n\t"
+ "vpkg %%v25,%%v24,%%v2 \n\t"
+ "vperm %%v24,%%v24,%%v2,%%v1 \n\t"
+
+ "vl %%v26,160(%%r1,%2) \n\t"
+ "vl %%v2,176(%%r1,%2) \n\t"
+ "vpkg %%v27,%%v26,%%v2 \n\t"
+ "vperm %%v26,%%v26,%%v2,%%v1 \n\t"
+
+ "vl %%v28,192(%%r1,%2) \n\t"
+ "vl %%v2,208(%%r1,%2) \n\t"
+ "vpkg %%v29,%%v28,%%v2 \n\t"
+ "vperm %%v28,%%v28,%%v2,%%v1 \n\t"
+
+ "vl %%v30,224(%%r1,%2) \n\t"
+ "vl %%v2,240(%%r1,%2) \n\t"
+ "vpkg %%v31,%%v30,%%v2 \n\t"
+ "vperm %%v30,%%v30,%%v2,%%v1 \n\t"
"vflpsb %%v16,%%v16 \n\t"
"vflpsb %%v17,%%v17 \n\t"
"ler %0,%%f0 "
:"=f"(amax)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
- :"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27"
+ :"memory","cc","r0","r1","v0","v1","v2","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
);
return amax;
"vflpsb %%v0,%%v0 \n\t"
"vflpsb %%v16,%%v16 \n\t"
"vfasb %%v0,%%v0,%%v16 \n\t"
+ "vleib %%v1,0,0 \n\t"
+ "vleib %%v1,1,1 \n\t"
+ "vleib %%v1,2,2 \n\t"
+ "vleib %%v1,3,3 \n\t"
+ "vleib %%v1,8,4 \n\t"
+ "vleib %%v1,9,5 \n\t"
+ "vleib %%v1,10,6 \n\t"
+ "vleib %%v1,11,7 \n\t"
+ "vleib %%v1,16,8 \n\t"
+ "vleib %%v1,17,9 \n\t"
+ "vleib %%v1,18,10 \n\t"
+ "vleib %%v1,19,11 \n\t"
+ "vleib %%v1,24,12 \n\t"
+ "vleib %%v1,25,13 \n\t"
+ "vleib %%v1,26,14 \n\t"
+ "vleib %%v1,27,15 \n\t"
"srlg %%r0,%1,5 \n\t"
"xgr %%r1,%%r1 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%2) \n\t"
- "vlef %%v16,0(%%r1,%2),0 \n\t"
- "vlef %%v17,4(%%r1,%2),0 \n\t"
- "vlef %%v16,8(%%r1,%2),1 \n\t"
- "vlef %%v17,12(%%r1,%2),1 \n\t"
- "vlef %%v16,16(%%r1,%2),2 \n\t"
- "vlef %%v17,20(%%r1,%2),2 \n\t"
- "vlef %%v16,24(%%r1,%2),3 \n\t"
- "vlef %%v17,28(%%r1,%2),3 \n\t"
-
- "vlef %%v18,32(%%r1,%2),0 \n\t"
- "vlef %%v19,36(%%r1,%2),0 \n\t"
- "vlef %%v18,40(%%r1,%2),1 \n\t"
- "vlef %%v19,44(%%r1,%2),1 \n\t"
- "vlef %%v18,48(%%r1,%2),2 \n\t"
- "vlef %%v19,52(%%r1,%2),2 \n\t"
- "vlef %%v18,56(%%r1,%2),3 \n\t"
- "vlef %%v19,30(%%r1,%2),3 \n\t"
-
- "vlef %%v20,64(%%r1,%2),0 \n\t"
- "vlef %%v21,68(%%r1,%2),0 \n\t"
- "vlef %%v20,72(%%r1,%2),1 \n\t"
- "vlef %%v21,76(%%r1,%2),1 \n\t"
- "vlef %%v20,80(%%r1,%2),2 \n\t"
- "vlef %%v21,84(%%r1,%2),2 \n\t"
- "vlef %%v20,88(%%r1,%2),3 \n\t"
- "vlef %%v21,92(%%r1,%2),3 \n\t"
-
- "vlef %%v22,96(%%r1,%2),0 \n\t"
- "vlef %%v23,100(%%r1,%2),0 \n\t"
- "vlef %%v22,104(%%r1,%2),1 \n\t"
- "vlef %%v23,108(%%r1,%2),1 \n\t"
- "vlef %%v22,112(%%r1,%2),2 \n\t"
- "vlef %%v23,116(%%r1,%2),2 \n\t"
- "vlef %%v22,120(%%r1,%2),3 \n\t"
- "vlef %%v23,124(%%r1,%2),3 \n\t"
-
- "vlef %%v24,128(%%r1,%2),0 \n\t"
- "vlef %%v25,132(%%r1,%2),0 \n\t"
- "vlef %%v24,136(%%r1,%2),1 \n\t"
- "vlef %%v25,140(%%r1,%2),1 \n\t"
- "vlef %%v24,144(%%r1,%2),2 \n\t"
- "vlef %%v25,148(%%r1,%2),2 \n\t"
- "vlef %%v24,152(%%r1,%2),3 \n\t"
- "vlef %%v25,156(%%r1,%2),3 \n\t"
-
- "vlef %%v26,160(%%r1,%2),0 \n\t"
- "vlef %%v27,164(%%r1,%2),0 \n\t"
- "vlef %%v26,168(%%r1,%2),1 \n\t"
- "vlef %%v27,172(%%r1,%2),1 \n\t"
- "vlef %%v26,176(%%r1,%2),2 \n\t"
- "vlef %%v27,180(%%r1,%2),2 \n\t"
- "vlef %%v26,184(%%r1,%2),3 \n\t"
- "vlef %%v27,188(%%r1,%2),3 \n\t"
-
- "vlef %%v28,192(%%r1,%2),0 \n\t"
- "vlef %%v29,196(%%r1,%2),0 \n\t"
- "vlef %%v28,200(%%r1,%2),1 \n\t"
- "vlef %%v29,204(%%r1,%2),1 \n\t"
- "vlef %%v28,208(%%r1,%2),2 \n\t"
- "vlef %%v29,212(%%r1,%2),2 \n\t"
- "vlef %%v28,216(%%r1,%2),3 \n\t"
- "vlef %%v29,220(%%r1,%2),3 \n\t"
-
- "vlef %%v30,224(%%r1,%2),0 \n\t"
- "vlef %%v31,228(%%r1,%2),0 \n\t"
- "vlef %%v30,232(%%r1,%2),1 \n\t"
- "vlef %%v31,236(%%r1,%2),1 \n\t"
- "vlef %%v30,240(%%r1,%2),2 \n\t"
- "vlef %%v31,244(%%r1,%2),2 \n\t"
- "vlef %%v30,248(%%r1,%2),3 \n\t"
- "vlef %%v31,252(%%r1,%2),3 \n\t"
+ "vl %%v16,0(%%r1,%2) \n\t"
+ "vl %%v2,16(%%r1,%2) \n\t"
+ "vpkg %%v17,%%v16,%%v2 \n\t"
+ "vperm %%v16,%%v16,%%v2,%%v1 \n\t"
+
+ "vl %%v18,32(%%r1,%2) \n\t"
+ "vl %%v2,48(%%r1,%2) \n\t"
+ "vpkg %%v19,%%v18,%%v2 \n\t"
+ "vperm %%v18,%%v18,%%v2,%%v1 \n\t"
+
+ "vl %%v20,64(%%r1,%2) \n\t"
+ "vl %%v2,80(%%r1,%2) \n\t"
+ "vpkg %%v21,%%v20,%%v2 \n\t"
+ "vperm %%v20,%%v20,%%v2,%%v1 \n\t"
+
+ "vl %%v22,96(%%r1,%2) \n\t"
+ "vl %%v2,112(%%r1,%2) \n\t"
+ "vpkg %%v23,%%v22,%%v2 \n\t"
+ "vperm %%v22,%%v22,%%v2,%%v1 \n\t"
+
+ "vl %%v24,128(%%r1,%2) \n\t"
+ "vl %%v2,144(%%r1,%2) \n\t"
+ "vpkg %%v25,%%v24,%%v2 \n\t"
+ "vperm %%v24,%%v24,%%v2,%%v1 \n\t"
+
+ "vl %%v26,160(%%r1,%2) \n\t"
+ "vl %%v2,176(%%r1,%2) \n\t"
+ "vpkg %%v27,%%v26,%%v2 \n\t"
+ "vperm %%v26,%%v26,%%v2,%%v1 \n\t"
+
+ "vl %%v28,192(%%r1,%2) \n\t"
+ "vl %%v2,208(%%r1,%2) \n\t"
+ "vpkg %%v29,%%v28,%%v2 \n\t"
+ "vperm %%v28,%%v28,%%v2,%%v1 \n\t"
+
+ "vl %%v30,224(%%r1,%2) \n\t"
+ "vl %%v2,240(%%r1,%2) \n\t"
+ "vpkg %%v31,%%v30,%%v2 \n\t"
+ "vperm %%v30,%%v30,%%v2,%%v1 \n\t"
"vflpsb %%v16,%%v16 \n\t"
"vflpsb %%v17,%%v17 \n\t"
"ler %0,%%f0 "
:"=f"(amin)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
- :"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27"
+ :"memory","cc","r0","r1","v0","v1","v2","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
);
return amin;
"vleig %%v2,3,1 \n\t"
"vrepig %%v3,16 \n\t"
"vzero %%v4 \n\t"
+ "vleib %%v9,0,0 \n\t"
+ "vleib %%v9,1,1 \n\t"
+ "vleib %%v9,2,2 \n\t"
+ "vleib %%v9,3,3 \n\t"
+ "vleib %%v9,8,4 \n\t"
+ "vleib %%v9,9,5 \n\t"
+ "vleib %%v9,10,6 \n\t"
+ "vleib %%v9,11,7 \n\t"
+ "vleib %%v9,16,8 \n\t"
+ "vleib %%v9,17,9 \n\t"
+ "vleib %%v9,18,10 \n\t"
+ "vleib %%v9,19,11 \n\t"
+ "vleib %%v9,24,12 \n\t"
+ "vleib %%v9,25,13 \n\t"
+ "vleib %%v9,26,14 \n\t"
+ "vleib %%v9,27,15 \n\t"
"vleif %%v24,0,0 \n\t"
"vleif %%v24,1,1 \n\t"
"vleif %%v24,2,2 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
- "vlef %%v16,0(%%r1,%3),0 \n\t"
- "vlef %%v17,4(%%r1,%3),0 \n\t"
- "vlef %%v16,8(%%r1,%3),1 \n\t"
- "vlef %%v17,12(%%r1,%3),1 \n\t"
- "vlef %%v16,16(%%r1,%3),2 \n\t"
- "vlef %%v17,20(%%r1,%3),2 \n\t"
- "vlef %%v16,24(%%r1,%3),3 \n\t"
- "vlef %%v17,28(%%r1,%3),3 \n\t"
+ "vl %%v16,0(%%r1,%3) \n\t"
+ "vl %%v28,16(%%r1,%3) \n\t"
+ "vpkg %%v17,%%v16,%%v28 \n\t"
+ "vperm %%v16,%%v16,%%v28,%%v9 \n\t"
- "vlef %%v18,32(%%r1,%3),0 \n\t"
- "vlef %%v19,36(%%r1,%3),0 \n\t"
- "vlef %%v18,40(%%r1,%3),1 \n\t"
- "vlef %%v19,44(%%r1,%3),1 \n\t"
- "vlef %%v18,48(%%r1,%3),2 \n\t"
- "vlef %%v19,52(%%r1,%3),2 \n\t"
- "vlef %%v18,56(%%r1,%3),3 \n\t"
- "vlef %%v19,60(%%r1,%3),3 \n\t"
+ "vl %%v18,32(%%r1,%3) \n\t"
+ "vl %%v29,48(%%r1,%3) \n\t"
+ "vpkg %%v19,%%v18,%%v29 \n\t"
+ "vperm %%v18,%%v18,%%v29,%%v9 \n\t"
- "vlef %%v20,64(%%r1,%3),0 \n\t"
- "vlef %%v21,68(%%r1,%3),0 \n\t"
- "vlef %%v20,72(%%r1,%3),1 \n\t"
- "vlef %%v21,76(%%r1,%3),1 \n\t"
- "vlef %%v20,80(%%r1,%3),2 \n\t"
- "vlef %%v21,84(%%r1,%3),2 \n\t"
- "vlef %%v20,88(%%r1,%3),3 \n\t"
- "vlef %%v21,92(%%r1,%3),3 \n\t"
+ "vl %%v20,64(%%r1,%3) \n\t"
+ "vl %%v30,80(%%r1,%3) \n\t"
+ "vpkg %%v21,%%v20,%%v30 \n\t"
+ "vperm %%v20,%%v20,%%v30,%%v9 \n\t"
- "vlef %%v22,96(%%r1,%3),0 \n\t"
- "vlef %%v23,100(%%r1,%3),0 \n\t"
- "vlef %%v22,104(%%r1,%3),1 \n\t"
- "vlef %%v23,108(%%r1,%3),1 \n\t"
- "vlef %%v22,112(%%r1,%3),2 \n\t"
- "vlef %%v23,116(%%r1,%3),2 \n\t"
- "vlef %%v22,120(%%r1,%3),3 \n\t"
- "vlef %%v23,124(%%r1,%3),3 \n\t"
+ "vl %%v22,96(%%r1,%3) \n\t"
+ "vl %%v31,112(%%r1,%3) \n\t"
+ "vpkg %%v23,%%v22,%%v31 \n\t"
+ "vperm %%v22,%%v22,%%v31,%%v9 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
- "vlef %%v16,128(%%r1,%3),0 \n\t"
- "vlef %%v17,132(%%r1,%3),0 \n\t"
- "vlef %%v16,136(%%r1,%3),1 \n\t"
- "vlef %%v17,140(%%r1,%3),1 \n\t"
- "vlef %%v16,144(%%r1,%3),2 \n\t"
- "vlef %%v17,148(%%r1,%3),2 \n\t"
- "vlef %%v16,152(%%r1,%3),3 \n\t"
- "vlef %%v17,156(%%r1,%3),3 \n\t"
+ "vl %%v16,128(%%r1,%3) \n\t"
+ "vl %%v28,144(%%r1,%3) \n\t"
+ "vpkg %%v17,%%v16,%%v28 \n\t"
+ "vperm %%v16,%%v16,%%v28,%%v9 \n\t"
- "vlef %%v18,160(%%r1,%3),0 \n\t"
- "vlef %%v19,164(%%r1,%3),0 \n\t"
- "vlef %%v18,168(%%r1,%3),1 \n\t"
- "vlef %%v19,172(%%r1,%3),1 \n\t"
- "vlef %%v18,176(%%r1,%3),2 \n\t"
- "vlef %%v19,180(%%r1,%3),2 \n\t"
- "vlef %%v18,184(%%r1,%3),3 \n\t"
- "vlef %%v19,188(%%r1,%3),3 \n\t"
+ "vl %%v18,160(%%r1,%3) \n\t"
+ "vl %%v29,176(%%r1,%3) \n\t"
+ "vpkg %%v19,%%v18,%%v29 \n\t"
+ "vperm %%v18,%%v18,%%v29,%%v9 \n\t"
- "vlef %%v20,192(%%r1,%3),0 \n\t"
- "vlef %%v21,196(%%r1,%3),0 \n\t"
- "vlef %%v20,200(%%r1,%3),1 \n\t"
- "vlef %%v21,204(%%r1,%3),1 \n\t"
- "vlef %%v20,208(%%r1,%3),2 \n\t"
- "vlef %%v21,212(%%r1,%3),2 \n\t"
- "vlef %%v20,216(%%r1,%3),3 \n\t"
- "vlef %%v21,220(%%r1,%3),3 \n\t"
+ "vl %%v20,192(%%r1,%3) \n\t"
+ "vl %%v30,208(%%r1,%3) \n\t"
+ "vpkg %%v21,%%v20,%%v30 \n\t"
+ "vperm %%v20,%%v20,%%v30,%%v9 \n\t"
- "vlef %%v22,224(%%r1,%3),0 \n\t"
- "vlef %%v23,228(%%r1,%3),0 \n\t"
- "vlef %%v22,232(%%r1,%3),1 \n\t"
- "vlef %%v23,236(%%r1,%3),1 \n\t"
- "vlef %%v22,240(%%r1,%3),2 \n\t"
- "vlef %%v23,244(%%r1,%3),2 \n\t"
- "vlef %%v22,248(%%r1,%3),3 \n\t"
- "vlef %%v23,252(%%r1,%3),3 \n\t"
+ "vl %%v22,224(%%r1,%3) \n\t"
+ "vl %%v31,240(%%r1,%3) \n\t"
+ "vpkg %%v23,%%v22,%%v31 \n\t"
+ "vperm %%v22,%%v22,%%v31,%%v9 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"nop "
:"=r"(iamax),"=m"(*amax)
:"r"(n),"ZR"((const FLOAT (*)[n * 2])x)
- :"memory","cc","r0","r1","v0","v1","v2","v3","v4","v5","v6","v7","v8","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27"
+ :"memory","cc","r0","r1","v0","v1","v2","v3","v4","v5","v6","v7","v8","v9","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
);
return iamax;
"vleig %%v2,3,1 \n\t"
"vrepig %%v3,16 \n\t"
"vzero %%v4 \n\t"
+ "vleib %%v9,0,0 \n\t"
+ "vleib %%v9,1,1 \n\t"
+ "vleib %%v9,2,2 \n\t"
+ "vleib %%v9,3,3 \n\t"
+ "vleib %%v9,8,4 \n\t"
+ "vleib %%v9,9,5 \n\t"
+ "vleib %%v9,10,6 \n\t"
+ "vleib %%v9,11,7 \n\t"
+ "vleib %%v9,16,8 \n\t"
+ "vleib %%v9,17,9 \n\t"
+ "vleib %%v9,18,10 \n\t"
+ "vleib %%v9,19,11 \n\t"
+ "vleib %%v9,24,12 \n\t"
+ "vleib %%v9,25,13 \n\t"
+ "vleib %%v9,26,14 \n\t"
+ "vleib %%v9,27,15 \n\t"
"vleif %%v24,0,0 \n\t"
"vleif %%v24,1,1 \n\t"
"vleif %%v24,2,2 \n\t"
"0: \n\t"
"pfd 1, 1024(%%r1,%3) \n\t"
- "vlef %%v16,0(%%r1,%3),0 \n\t"
- "vlef %%v17,4(%%r1,%3),0 \n\t"
- "vlef %%v16,8(%%r1,%3),1 \n\t"
- "vlef %%v17,12(%%r1,%3),1 \n\t"
- "vlef %%v16,16(%%r1,%3),2 \n\t"
- "vlef %%v17,20(%%r1,%3),2 \n\t"
- "vlef %%v16,24(%%r1,%3),3 \n\t"
- "vlef %%v17,28(%%r1,%3),3 \n\t"
+ "vl %%v16,0(%%r1,%3) \n\t"
+ "vl %%v28,16(%%r1,%3) \n\t"
+ "vpkg %%v17,%%v16,%%v28 \n\t"
+ "vperm %%v16,%%v16,%%v28,%%v9 \n\t"
- "vlef %%v18,32(%%r1,%3),0 \n\t"
- "vlef %%v19,36(%%r1,%3),0 \n\t"
- "vlef %%v18,40(%%r1,%3),1 \n\t"
- "vlef %%v19,44(%%r1,%3),1 \n\t"
- "vlef %%v18,48(%%r1,%3),2 \n\t"
- "vlef %%v19,52(%%r1,%3),2 \n\t"
- "vlef %%v18,56(%%r1,%3),3 \n\t"
- "vlef %%v19,60(%%r1,%3),3 \n\t"
+ "vl %%v18,32(%%r1,%3) \n\t"
+ "vl %%v29,48(%%r1,%3) \n\t"
+ "vpkg %%v19,%%v18,%%v29 \n\t"
+ "vperm %%v18,%%v18,%%v29,%%v9 \n\t"
- "vlef %%v20,64(%%r1,%3),0 \n\t"
- "vlef %%v21,68(%%r1,%3),0 \n\t"
- "vlef %%v20,72(%%r1,%3),1 \n\t"
- "vlef %%v21,76(%%r1,%3),1 \n\t"
- "vlef %%v20,80(%%r1,%3),2 \n\t"
- "vlef %%v21,84(%%r1,%3),2 \n\t"
- "vlef %%v20,88(%%r1,%3),3 \n\t"
- "vlef %%v21,92(%%r1,%3),3 \n\t"
+ "vl %%v20,64(%%r1,%3) \n\t"
+ "vl %%v30,80(%%r1,%3) \n\t"
+ "vpkg %%v21,%%v20,%%v30 \n\t"
+ "vperm %%v20,%%v20,%%v30,%%v9 \n\t"
- "vlef %%v22,96(%%r1,%3),0 \n\t"
- "vlef %%v23,100(%%r1,%3),0 \n\t"
- "vlef %%v22,104(%%r1,%3),1 \n\t"
- "vlef %%v23,108(%%r1,%3),1 \n\t"
- "vlef %%v22,112(%%r1,%3),2 \n\t"
- "vlef %%v23,116(%%r1,%3),2 \n\t"
- "vlef %%v22,120(%%r1,%3),3 \n\t"
- "vlef %%v23,124(%%r1,%3),3 \n\t"
+ "vl %%v22,96(%%r1,%3) \n\t"
+ "vl %%v31,112(%%r1,%3) \n\t"
+ "vpkg %%v23,%%v22,%%v31 \n\t"
+ "vperm %%v22,%%v22,%%v31,%%v9 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"vsel %%v2,%%v2,%%v6,%%v8 \n\t"
"vag %%v4,%%v4,%%v3 \n\t"
- "vlef %%v16,128(%%r1,%3),0 \n\t"
- "vlef %%v17,132(%%r1,%3),0 \n\t"
- "vlef %%v16,136(%%r1,%3),1 \n\t"
- "vlef %%v17,140(%%r1,%3),1 \n\t"
- "vlef %%v16,144(%%r1,%3),2 \n\t"
- "vlef %%v17,148(%%r1,%3),2 \n\t"
- "vlef %%v16,152(%%r1,%3),3 \n\t"
- "vlef %%v17,156(%%r1,%3),3 \n\t"
+ "vl %%v16,128(%%r1,%3) \n\t"
+ "vl %%v28,144(%%r1,%3) \n\t"
+ "vpkg %%v17,%%v16,%%v28 \n\t"
+ "vperm %%v16,%%v16,%%v28,%%v9 \n\t"
- "vlef %%v18,160(%%r1,%3),0 \n\t"
- "vlef %%v19,164(%%r1,%3),0 \n\t"
- "vlef %%v18,168(%%r1,%3),1 \n\t"
- "vlef %%v19,172(%%r1,%3),1 \n\t"
- "vlef %%v18,176(%%r1,%3),2 \n\t"
- "vlef %%v19,180(%%r1,%3),2 \n\t"
- "vlef %%v18,184(%%r1,%3),3 \n\t"
- "vlef %%v19,188(%%r1,%3),3 \n\t"
+ "vl %%v18,160(%%r1,%3) \n\t"
+ "vl %%v29,176(%%r1,%3) \n\t"
+ "vpkg %%v19,%%v18,%%v29 \n\t"
+ "vperm %%v18,%%v18,%%v29,%%v9 \n\t"
- "vlef %%v20,192(%%r1,%3),0 \n\t"
- "vlef %%v21,196(%%r1,%3),0 \n\t"
- "vlef %%v20,200(%%r1,%3),1 \n\t"
- "vlef %%v21,204(%%r1,%3),1 \n\t"
- "vlef %%v20,208(%%r1,%3),2 \n\t"
- "vlef %%v21,212(%%r1,%3),2 \n\t"
- "vlef %%v20,216(%%r1,%3),3 \n\t"
- "vlef %%v21,220(%%r1,%3),3 \n\t"
+ "vl %%v20,192(%%r1,%3) \n\t"
+ "vl %%v30,208(%%r1,%3) \n\t"
+ "vpkg %%v21,%%v20,%%v30 \n\t"
+ "vperm %%v20,%%v20,%%v30,%%v9 \n\t"
- "vlef %%v22,224(%%r1,%3),0 \n\t"
- "vlef %%v23,228(%%r1,%3),0 \n\t"
- "vlef %%v22,232(%%r1,%3),1 \n\t"
- "vlef %%v23,236(%%r1,%3),1 \n\t"
- "vlef %%v22,240(%%r1,%3),2 \n\t"
- "vlef %%v23,244(%%r1,%3),2 \n\t"
- "vlef %%v22,248(%%r1,%3),3 \n\t"
- "vlef %%v23,252(%%r1,%3),3 \n\t"
+ "vl %%v22,224(%%r1,%3) \n\t"
+ "vl %%v31,240(%%r1,%3) \n\t"
+ "vpkg %%v23,%%v22,%%v31 \n\t"
+ "vperm %%v22,%%v22,%%v31,%%v9 \n\t"
"vflpsb %%v16, %%v16 \n\t"
"vflpsb %%v17, %%v17 \n\t"
"nop "
:"=r"(iamin),"=m"(*amin)
:"r"(n),"ZR"((const FLOAT (*)[n * 2])x)
- :"memory","cc","r0","r1","v0","v1","v2","v3","v4","v5","v6","v7","v8","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27"
+ :"memory","cc","r0","r1","v0","v1","v2","v3","v4","v5","v6","v7","v8","v9","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
);
return iamin;
"ldr %0,%%f0 "
:"=f"(amax)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
- :"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27"
+ :"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
);
return amax;
"ldr %0,%%f0 "
:"=f"(amin)
:"r"(n),"ZR"((const FLOAT (*)[n])x)
- :"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27"
+ :"memory","cc","r0","r1","v0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
);
return amin;