* Add gcc7-generated assembly files for POWER8/9 isa/ica-min/max and POWER9 caxpy
To work around internal compiler errors encountered when compiling the original C source with gcc 4 and 5, and wrong code generated by gcc 8.3.0
* Use gcc-generated assembly instead of original C sources
to work around internal compiler errors encountered with gcc 4.8/5.4 and wrong code generation by gcc 8.3
* Use gcc-generated assembly instead of the original C source
to work around internal compiler errors encountered with gcc 4.8 and 5.4, and wrong code generation by gcc 8.3
* Add gcc7-generated assembler version of caxpy for power8
to work around wrong code generated by gcc 8.3
* Handle CONJ define for caxpyc
* Handle CONJ define for caxpyc
* Add gcc7-generated assembly cdot for POWER9
* Use prebuilt assembly for POWER9 cdot
created with gcc 7.3.1 to work around ICE in older gcc versions
* Exclude POWER9 from DYNAMIC_ARCH when gcc versions is lower than 6
* Update Makefile.system
* Use PROLOGUE macro to ensure correct function name for DYNAMIC_ARCH
* Disable POWER9 with old gcc versions
endif
ifeq ($(C_COMPILER), GCC)
-#Test for supporting MS_ABI
+#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
+GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGT4), 1)
-# GCC Majar version > 4
+# GCC Major version > 4
# It is compatible with MSVC ABI.
CCOMMON_OPT += -DMS_ABI
endif
ifeq ($(ARCH), power)
DYNAMIC_CORE = POWER6
DYNAMIC_CORE += POWER8
+ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9
endif
+ifeq ($(C_COMPILER), GCC)
+ifeq ($(GCCVERSIONGT5), 1)
+DYNAMIC_CORE += POWER9
+else
+$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
+endif
+endif
+endif
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
ifndef DYNAMIC_CORE
extern gotoblas_t gotoblas_POWER6;
extern gotoblas_t gotoblas_POWER8;
+#if (!defined C_GCC) || (GCC_VERSION >= 60000)
extern gotoblas_t gotoblas_POWER9;
+#endif
extern void openblas_warning(int verbose, const char *msg);
char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_POWER6) return corename[1];
if (gotoblas == &gotoblas_POWER8) return corename[2];
+#if (!defined C_GCC) || (GCC_VERSION >= 60000)
if (gotoblas == &gotoblas_POWER9) return corename[3];
+#endif
return corename[0];
}
return &gotoblas_POWER6;
if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8;
+#if (!defined C_GCC) || (GCC_VERSION >= 60000)
if (__builtin_cpu_is("power9"))
return &gotoblas_POWER9;
+#endif
return NULL;
}
{
case 1: return (&gotoblas_POWER6);
case 2: return (&gotoblas_POWER8);
+#if (!defined C_GCC) || (GCC_VERSION >= 60000)
case 3: return (&gotoblas_POWER9);
+#endif
default: return NULL;
}
snprintf(message, 128, "Core not found: %s\n", coretype);
#SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c
#
-ISAMAXKERNEL = isamax.c
+ISAMAXKERNEL = isamax_power8.S
IDAMAXKERNEL = idamax.c
-ICAMAXKERNEL = icamax.c
+ICAMAXKERNEL = icamax_power8.S
IZAMAXKERNEL = izamax.c
#
-ISAMINKERNEL = isamin.c
+ISAMINKERNEL = isamin_power8.S
IDAMINKERNEL = idamin.c
-ICAMINKERNEL = icamin.c
+ICAMINKERNEL = icamin_power8.S
IZAMINKERNEL = izamin.c
#
#ISMAXKERNEL = ../arm/imax.c
#
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
-CAXPYKERNEL = caxpy.c
+CAXPYKERNEL = caxpy_power8.S
ZAXPYKERNEL = zaxpy.c
#
SCOPYKERNEL = scopy.c
#SMINKERNEL = ../arm/min.c\r
#DMINKERNEL = ../arm/min.c\r
#\r
-ISAMAXKERNEL = isamax.c\r
+ISAMAXKERNEL = isamax_power9.S\r
IDAMAXKERNEL = idamax.c\r
-ICAMAXKERNEL = icamax.c\r
+ICAMAXKERNEL = icamax_power9.S\r
IZAMAXKERNEL = izamax.c\r
#\r
-ISAMINKERNEL = isamin.c\r
+ISAMINKERNEL = isamin_power9.S\r
IDAMINKERNEL = idamin.c\r
-ICAMINKERNEL = icamin.c\r
+ICAMINKERNEL = icamin_power9.S\r
IZAMINKERNEL = izamin.c\r
#\r
#ISMAXKERNEL = ../arm/imax.c\r
#\r
SAXPYKERNEL = saxpy.c\r
DAXPYKERNEL = daxpy.c\r
-CAXPYKERNEL = caxpy.c\r
+CAXPYKERNEL = caxpy_power9.S\r
ZAXPYKERNEL = zaxpy.c\r
#\r
SCOPYKERNEL = scopy.c\r
SDOTKERNEL = sdot.c\r
DDOTKERNEL = ddot.c\r
DSDOTKERNEL = sdot.c\r
-CDOTKERNEL = cdot.c\r
+CDOTKERNEL = cdot_power9.S\r
ZDOTKERNEL = zdot.c\r
#\r
SNRM2KERNEL = ../arm/nrm2.c\r
--- /dev/null
+#define ASSEMBLER
+#include "common.h"
+/*
+ .file "caxpy.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl caxpy_k
+ .type caxpy_k, @function
+*/
+
+ PROLOGUE
+
+caxpy_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry caxpy_k,.-caxpy_k
+ mr. 7,3
+ ble 0,.L33
+ cmpdi 7,9,1
+ beq 7,.L41
+.L3:
+ mtctr 7
+ ld 7,96(1)
+ sldi 9,9,3
+ sldi 7,7,3
+ .p2align 4,,15
+.L14:
+ lfs 10,4(8)
+ lfs 11,0(8)
+ lfs 12,0(10)
+ lfs 0,4(10)
+ fmuls 10,2,10
+#ifdef CONJ
+ fmsubs 11,11,1,10
+#else
+ fmadds 11,11,1,10
+#endif
+ fadds 12,12,11
+ stfs 12,0(10)
+ lfs 11,0(8)
+ lfs 12,4(8)
+ add 8,8,9
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,12,1,11
+ fsubs 0,0,12
+#else
+ fmadds 12,12,1,11
+ fadds 0,0,12
+#endif
+ stfs 0,4(10)
+ add 10,10,7
+ bdnz .L14
+.L33:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L41:
+ ld 6,96(1)
+ cmpdi 7,6,1
+ bne 7,.L3
+ rldicr. 4,7,0,59
+ std 31,-8(1)
+ li 11,0
+ bne 0,.L42
+.L4:
+ addi 6,11,8
+ subf 0,4,7
+ sldi 6,6,2
+ addi 9,6,-32
+ add 5,10,6
+ add 3,8,9
+ add 6,8,6
+ subfc 5,5,3
+ add 9,10,9
+ subfe 5,5,5
+ subfc 6,6,9
+ subfe 31,31,31
+ addi 6,5,1
+ addi 5,31,1
+ or 6,6,5
+ rlwinm 6,6,0,0xff
+ cmpwi 7,6,0
+ beq 7,.L7
+ sradi 6,4,63
+ srdi 5,7,63
+ subfc 31,7,4
+ adde 6,5,6
+ subfic 31,0,3
+ subfe 31,31,31
+ xori 6,6,0x1
+ neg 31,31
+ and 6,6,31
+ rlwinm 6,6,0,0xff
+ cmpwi 7,6,0
+ beq 7,.L7
+ cmpd 7,4,7
+ li 6,1
+ blt 7,.L43
+.L9:
+ addi 0,7,-1
+ subf 0,4,0
+ subfic 0,0,3
+ subfe 31,31,31
+ addi 0,31,1
+ rlwinm 0,0,0,0xff
+ cmpwi 7,0,0
+ bne 7,.L10
+ sradi 0,4,63
+ subfc 31,7,4
+ adde 5,5,0
+ rlwinm 5,5,0,0xff
+ cmpwi 7,5,0
+ bne 7,.L10
+ addi 0,6,-1
+ addis 31,2,.LC3@toc@ha
+ std 30,-16(1)
+ xscvdpspn 12,1
+ xscvdpspn 11,2
+ srdi. 30,0,2
+ addis 6,2,.LC2@toc@ha
+ addi 6,6,.LC2@toc@l
+ mtctr 30
+ addi 31,31,.LC3@toc@l
+ lxvd2x 42,0,6
+ li 5,16
+ li 6,0
+ lxvd2x 41,0,31
+ xxspltw 12,12,0
+ xxspltw 11,11,0
+ xxpermdi 42,42,42,2
+ xxpermdi 41,41,41,2
+ beq 0,.L44
+ .p2align 4,,15
+.L11:
+#ifdef CONJ
+ lxvd2x 44,3,6
+ lxvd2x 45,3,5
+ lxvd2x 33,9,6
+ lxvd2x 0,9,5
+ xxpermdi 44,44,44,2
+ xxpermdi 45,45,45,2
+ xxpermdi 32,33,33,2
+ xxpermdi 33,0,0,2
+ vperm 11,13,12,10
+ vperm 13,13,12,9
+ vperm 12,1,0,10
+ vperm 1,1,0,9
+ xvmulsp 0,11,43
+ xvmulsp 32,11,45
+ xvmsubmsp 45,12,0
+ xvmaddasp 32,12,43
+ xvaddsp 44,32,44
+ xvsubsp 32,33,45
+ vmrglw 1,0,12
+ vmrghw 0,0,12
+#else
+ lxvd2x 45,3,6
+ lxvd2x 33,3,5
+ lxvd2x 43,9,6
+ lxvd2x 0,9,5
+ xxpermdi 45,45,45,2
+ xxpermdi 33,33,33,2
+ xxpermdi 32,43,43,2
+ xxpermdi 43,0,0,2
+ vperm 12,1,13,10
+ vperm 1,1,13,9
+ vperm 13,11,0,10
+ vperm 11,11,0,9
+ xvmulsp 0,11,44
+ xvmulsp 32,11,33
+ xvmaddmsp 33,12,0
+ xvmsubasp 32,12,44
+ xvaddsp 45,32,45
+ xvaddsp 32,33,43
+ vmrglw 1,0,13
+ vmrghw 0,0,13
+#endif
+ xxpermdi 0,33,33,2
+ xxpermdi 32,32,32,2
+ stxvd2x 0,9,6
+ addi 6,6,32
+ stxvd2x 32,9,5
+ addi 5,5,32
+ bdnz .L11
+ rldicr 0,0,0,61
+ ld 30,-16(1)
+ sldi 9,0,1
+ add 4,4,0
+ add 11,11,9
+.L10:
+ sldi 6,11,2
+ addi 9,4,1
+ addi 5,6,4
+ cmpd 7,7,9
+ lfsx 12,8,6
+ lfsx 0,10,6
+ addi 9,11,2
+ lfsx 11,8,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,12,1,11
+#else
+ fmsubs 12,12,1,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,6
+ lfsx 11,8,6
+ lfsx 12,8,5
+ lfsx 0,10,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,12,1,11
+ fsubs 0,0,12
+#else
+ fmadds 12,12,1,11
+ fadds 0,0,12
+#endif
+ stfsx 0,10,5
+ ble 7,.L39
+ sldi 9,9,2
+ addi 6,4,2
+ addi 5,9,4
+ cmpd 7,7,6
+ lfsx 12,8,9
+ lfsx 0,10,9
+ addi 6,11,4
+ lfsx 11,8,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,1,12,11
+#else
+ fmsubs 12,1,12,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,9
+ lfsx 11,8,9
+ lfsx 12,8,5
+ lfsx 0,10,5
+ fmuls 11,2,11
+ fmsubs 12,1,12,11
+ fsubs 0,0,12
+ stfsx 0,10,5
+ ble 7,.L39
+ sldi 6,6,2
+ addi 4,4,3
+ addi 5,6,4
+ cmpd 7,7,4
+ lfsx 12,8,6
+ lfsx 0,10,6
+ addi 9,11,6
+ lfsx 11,8,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,1,12,11
+#else
+ fmsubs 12,1,12,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,6
+ lfsx 11,8,6
+ lfsx 12,8,5
+ lfsx 0,10,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,1,12,11
+ fsubs 0,0,12
+#else
+ fmadds 12,1,12,11
+ fadds 0,0,12
+#endif
+ stfsx 0,10,5
+ ble 7,.L39
+ sldi 9,9,2
+ ld 31,-8(1)
+ addi 7,9,4
+ lfsx 12,8,9
+ lfsx 0,10,9
+ lfsx 11,8,7
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,1,12,11
+#else
+ fmsubs 12,1,12,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,9
+ lfsx 11,8,9
+ lfsx 12,8,7
+ lfsx 0,10,7
+ fmuls 2,2,11
+#ifdef CONJ
+ fmsubs 1,1,12,2
+ fsubs 1,0,1
+#else
+ fmadds 1,1,12,2
+ fadds 1,0,1
+#endif
+ stfsx 1,10,7
+ b .L33
+.L43:
+ mr 6,0
+ b .L9
+.L7:
+ addi 10,4,1
+ cmpd 7,10,7
+ subf 10,4,7
+ mtctr 10
+ bgt 7,.L26
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,7,10
+ beq 7,.L26
+ .p2align 4,,15
+.L13:
+ lfs 10,4(3)
+ lfs 11,0(3)
+ addi 9,9,8
+ addi 3,3,8
+ lfs 12,-8(9)
+ lfs 0,-4(9)
+ fmuls 10,2,10
+#ifdef CONJ
+ fmadds 11,1,11,10
+#else
+ fmsubs 11,1,11,10
+#endif
+ fadds 12,12,11
+ stfs 12,-8(9)
+ lfs 11,-8(3)
+ lfs 12,-4(3)
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,1,12,11
+ fsubs 0,0,12
+#else
+ fmadds 12,1,12,11
+ fadds 0,0,12
+#endif
+ stfs 0,-4(9)
+ bdnz .L13
+.L39:
+ ld 31,-8(1)
+ b .L33
+.L42:
+#ifdef CONJ
+ fneg 0,1
+ xxpermdi 32,1,1,0
+ addis 9,2,.LANCHOR0@toc@ha
+ std 28,-32(1)
+ sradi. 28,4,1
+ addi 9,9,.LANCHOR0@toc@l
+ xscvdpspn 5,2
+ xvcvdpsp 32,32
+ lxvd2x 12,0,9
+ xxpermdi 39,0,0,0
+ xxspltw 5,5,0
+ xvcvdpsp 39,39
+#else
+ fneg 0,2
+ xxpermdi 39,2,2,0
+ addis 9,2,.LANCHOR0@toc@ha
+ std 28,-32(1)
+ sradi. 28,4,1
+ addi 9,9,.LANCHOR0@toc@l
+ xscvdpspn 5,1
+ xvcvdpsp 39,39
+ lxvd2x 12,0,9
+ xxpermdi 32,0,0,0
+ xxspltw 5,5,0
+ xvcvdpsp 32,32
+#endif
+ xxpermdi 12,12,12,2
+ vmrgew 7,7,0
+ beq 0,.L5
+ xxlnor 38,12,12
+ std 29,-24(1)
+ std 30,-16(1)
+ mr 6,8
+ mr 9,10
+ li 29,0
+ li 30,16
+ li 31,32
+ li 12,48
+ li 0,64
+ li 11,80
+ li 3,96
+ li 5,112
+ .p2align 4,,15
+.L6:
+ lxvd2x 6,0,9
+ lxvd2x 40,0,6
+ addi 29,29,8
+ lxvd2x 41,6,30
+ lxvd2x 42,6,31
+ cmpd 7,28,29
+ lxvd2x 43,6,12
+ lxvd2x 44,6,0
+ lxvd2x 45,6,11
+ lxvd2x 33,6,3
+ lxvd2x 32,6,5
+ lxvd2x 7,9,30
+ addi 6,6,128
+ lxvd2x 8,9,31
+ lxvd2x 9,9,12
+ xxpermdi 40,40,40,2
+ xxpermdi 6,6,6,2
+ lxvd2x 10,9,0
+ lxvd2x 11,9,11
+ xxpermdi 41,41,41,2
+ xxpermdi 42,42,42,2
+ lxvd2x 12,9,3
+ lxvd2x 0,9,5
+ xxpermdi 43,43,43,2
+ xxpermdi 44,44,44,2
+ xxpermdi 45,45,45,2
+ xxpermdi 33,33,33,2
+ xxpermdi 32,32,32,2
+ xxpermdi 7,7,7,2
+ xxpermdi 8,8,8,2
+ xxpermdi 9,9,9,2
+ xxpermdi 10,10,10,2
+ xxpermdi 11,11,11,2
+ xxpermdi 12,12,12,2
+ xxpermdi 0,0,0,2
+#ifndef CONJ
+ xvmaddasp 6,5,40
+ xvmaddasp 7,5,41
+ xvmaddasp 8,5,42
+ xvmaddasp 9,5,43
+ xvmaddasp 10,5,44
+ xvmaddasp 11,5,45
+ xvmaddasp 12,5,33
+ xvmaddasp 0,5,32
+ vperm 8,8,8,6
+ vperm 9,9,9,6
+ vperm 10,10,10,6
+ vperm 11,11,11,6
+ vperm 12,12,12,6
+ vperm 13,13,13,6
+ vperm 1,1,1,6
+ vperm 0,0,0,6
+#endif
+ xvmaddasp 6,39,40
+ xvmaddasp 7,39,41
+ xvmaddasp 8,39,42
+ xvmaddasp 9,39,43
+ xvmaddasp 10,39,44
+ xvmaddasp 11,39,45
+ xvmaddasp 12,39,33
+ xvmaddasp 0,39,32
+#ifdef CONJ
+ vperm 8,8,8,6
+ vperm 9,9,9,6
+ vperm 10,10,10,6
+ vperm 11,11,11,6
+ vperm 12,12,12,6
+ vperm 13,13,13,6
+ vperm 1,1,1,6
+ vperm 0,0,0,6
+ xvmaddasp 6,5,40
+ xvmaddasp 7,5,41
+ xvmaddasp 8,5,42
+ xvmaddasp 9,5,43
+ xvmaddasp 10,5,44
+ xvmaddasp 11,5,45
+ xvmaddasp 12,5,33
+ xvmaddasp 0,5,32
+#endif
+ xxpermdi 6,6,6,2
+ xxpermdi 7,7,7,2
+ xxpermdi 8,8,8,2
+ xxpermdi 9,9,9,2
+ stxvd2x 6,0,9
+ xxpermdi 10,10,10,2
+ stxvd2x 7,9,30
+ xxpermdi 11,11,11,2
+ stxvd2x 8,9,31
+ xxpermdi 12,12,12,2
+ stxvd2x 9,9,12
+ xxpermdi 0,0,0,2
+ stxvd2x 10,9,0
+ stxvd2x 11,9,11
+ stxvd2x 12,9,3
+ stxvd2x 0,9,5
+ addi 9,9,128
+ bgt 7,.L6
+ ld 29,-24(1)
+ ld 30,-16(1)
+.L5:
+ cmpd 7,7,4
+ ble 7,.L36
+ sldi 11,4,1
+ ld 28,-32(1)
+ b .L4
+.L36:
+ ld 28,-32(1)
+ ld 31,-8(1)
+ b .L33
+.L44:
+ li 31,1
+ mtctr 31
+ b .L11
+.L26:
+ li 10,1
+ mtctr 10
+ b .L13
+ .long 0
+ .byte 0,0,0,0,0,4,0,0
+ .size caxpy_k,.-caxpy_k
+ .section .rodata
+ .align 4
+ .set .LANCHOR0,. + 0
+ .type swap_mask_arr, @object
+ .size swap_mask_arr, 16
+swap_mask_arr:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 7
+ .byte 6
+ .byte 5
+ .byte 4
+.LC3:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .gnu_attribute 4, 1
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+#define ASSEMBLER
+#include "common.h"
+
+/*
+ .file "caxpy.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl caxpy_k
+ .type caxpy_k, @function
+*/
+
+ PROLOGUE
+
+caxpy_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry caxpy_k,.-caxpy_k
+ mr. 7,3
+ ble 0,.L33
+ cmpdi 7,9,1
+ beq 7,.L37
+.L3:
+ mtctr 7
+ ld 7,96(1)
+ sldi 9,9,3
+ sldi 7,7,3
+ .p2align 4,,15
+.L14:
+ lfs 10,4(8)
+ lfs 11,0(8)
+ lfs 12,0(10)
+ lfs 0,4(10)
+ fmuls 10,2,10
+#ifdef CONJ
+ fmadds 11,11,1,10
+#else
+ fmsubs 11,11,1,10
+#endif
+ fadds 12,12,11
+ stfs 12,0(10)
+ lfs 11,0(8)
+ lfs 12,4(8)
+ add 8,8,9
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,12,1,11
+ fsubs 0,0,12
+#else
+ fmadds 12,12,1,11
+ fadds 0,0,12
+#endif
+ stfs 0,4(10)
+ add 10,10,7
+ bdnz .L14
+.L33:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L37:
+ ld 6,96(1)
+ cmpdi 7,6,1
+ bne 7,.L3
+ rldicr. 4,7,0,59
+ li 11,0
+ bne 0,.L38
+.L4:
+ addi 6,11,8
+ subf 0,4,7
+ sldi 6,6,2
+ addi 9,6,-32
+ add 5,10,6
+ add 6,8,6
+ add 3,8,9
+ add 9,10,9
+ subfc 5,5,3
+ subfe 5,5,5
+ subfc 6,6,9
+ subfe 12,12,12
+ addi 6,5,1
+ addi 5,12,1
+ or 6,6,5
+ rlwinm 6,6,0,0xff
+ cmpwi 7,6,0
+ beq 7,.L7
+ sradi 6,4,63
+ srdi 5,7,63
+ subfc 12,7,4
+ adde 6,5,6
+ subfic 12,0,4
+ subfe 12,12,12
+ xori 6,6,0x1
+ neg 12,12
+ and 6,6,12
+ rlwinm 6,6,0,0xff
+ cmpwi 7,6,0
+ beq 7,.L7
+ cmpd 7,4,7
+ li 6,1
+ blt 7,.L39
+.L9:
+ addi 0,7,-1
+ subf 0,4,0
+ subfic 0,0,3
+ subfe 12,12,12
+ addi 0,12,1
+ rlwinm 0,0,0,0xff
+ cmpwi 7,0,0
+ bne 7,.L10
+ sradi 0,4,63
+ subfc 12,7,4
+ adde 5,5,0
+ rlwinm 5,5,0,0xff
+ cmpwi 7,5,0
+ bne 7,.L10
+ xscvdpspn 0,1
+ xscvdpspn 12,2
+ addi 0,6,-1
+ std 31,-8(1)
+ addis 12,2,.LC2@toc@ha
+ addis 6,2,.LC3@toc@ha
+ li 5,16
+ srdi. 31,0,2
+ addi 6,6,.LC3@toc@l
+ addi 12,12,.LC2@toc@l
+ mtctr 31
+ lxv 41,0(6)
+ lxv 42,0(12)
+ li 6,0
+ xxspltw 0,0,0
+ xxspltw 12,12,0
+ beq 0,.L40
+ .p2align 4,,15
+.L11:
+#ifdef CONJ
+ lxvx 33,3,5
+ lxvx 44,3,6
+ lxvx 43,9,6
+ lxvx 32,9,5
+ vperm 13,1,12,10
+ vperm 12,1,12,9
+ vperm 8,0,11,10
+ vperm 0,0,11,9
+ xvmulsp 33,12,44
+ xvmulsp 11,12,45
+ xvmaddasp 33,0,45
+ xvmsubmsp 44,0,11
+ xvaddsp 33,33,40
+ xvsubsp 32,32,44
+#else
+ lxvx 33,3,6
+ lxvx 32,3,5
+ lxvx 43,9,6
+ lxvx 44,9,5
+ vperm 13,0,1,10
+ vperm 0,0,1,9
+ vperm 8,12,11,10
+ vperm 12,12,11,9
+ xvmulsp 33,12,32
+ xvmulsp 11,12,45
+ xvmsubasp 33,0,45
+ xvmaddmsp 32,0,11
+ xvaddsp 33,33,40
+ xvaddsp 32,32,44
+#endif
+ vmrglw 13,0,1
+ vmrghw 0,0,1
+ stxvx 45,9,6
+ stxvx 32,9,5
+ addi 6,6,32
+ addi 5,5,32
+ bdnz .L11
+ rldicr 0,0,0,61
+ ld 31,-8(1)
+ sldi 9,0,1
+ add 4,4,0
+ add 11,11,9
+.L10:
+ sldi 5,11,2
+ addi 6,4,1
+ addi 9,11,2
+ addi 3,5,4
+ lfsx 12,8,5
+ cmpd 7,7,6
+ lfsx 0,10,5
+ lfsx 11,8,3
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,12,1,11
+#else
+ fmsubs 12,12,1,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,5
+ lfsx 11,8,5
+ lfsx 12,8,3
+ lfsx 0,10,3
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,12,1,11
+ fsubs 0,0,12
+#else
+ fmadds 12,12,1,11
+ fadds 0,0,12
+#endif
+ stfsx 0,10,3
+ ble 7,.L33
+ sldi 9,9,2
+ addi 5,4,2
+ addi 6,11,4
+ addi 3,9,4
+ lfsx 12,8,9
+ cmpd 7,7,5
+ lfsx 0,10,9
+ lfsx 11,8,3
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,1,12,11
+#else
+ fmsubs 12,1,12,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,9
+ lfsx 11,8,9
+ lfsx 12,8,3
+ lfsx 0,10,3
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,1,12,11
+ fsubs 0,0,12
+#else
+ fmadds 12,1,12,11
+ fadds 0,0,12
+#endif
+ stfsx 0,10,3
+ ble 7,.L33
+ sldi 6,6,2
+ addi 4,4,3
+ addi 9,11,6
+ addi 5,6,4
+ lfsx 12,8,6
+ cmpd 7,7,4
+ lfsx 0,10,6
+ lfsx 11,8,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,1,12,11
+#else
+ fmsubs 12,1,12,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,6
+ lfsx 11,8,6
+ lfsx 12,8,5
+ lfsx 0,10,5
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,1,12,11
+ fsubs 0,0,12
+#else
+ fmadds 12,1,12,11
+ fadds 0,0,12
+#endif
+ stfsx 0,10,5
+ ble 7,.L33
+ sldi 9,9,2
+ addi 7,9,4
+ lfsx 12,8,9
+ lfsx 0,10,9
+ lfsx 11,8,7
+ fmuls 11,2,11
+#ifdef CONJ
+ fmadds 12,1,12,11
+#else
+ fmsubs 12,1,12,11
+#endif
+ fadds 0,0,12
+ stfsx 0,10,9
+ lfsx 11,8,9
+ lfsx 12,8,7
+ lfsx 0,10,7
+ fmuls 2,2,11
+#ifdef CONJ
+ fmsubs 1,1,12,2
+ fsubs 1,0,1
+#else
+ fmadds 1,1,12,2
+ fadds 1,0,1
+#endif
+ stfsx 1,10,7
+ b .L33
+.L39:
+ mr 6,0
+ b .L9
+.L38:
+#ifdef CONJ
+ fneg 0,1
+ xxpermdi 45,1,1,0
+ xscvdpspn 12,2
+ addis 9,2,.LANCHOR0@toc@ha
+ sradi. 3,4,1
+ xxpermdi 44,0,0,0
+ addi 9,9,.LANCHOR0@toc@l
+ xvcvdpsp 45,45
+ lxv 33,0(9)
+ xvcvdpsp 32,44
+ xxspltw 12,12,0
+#else
+ fneg 12,2
+ xxpermdi 32,2,2,0
+ xscvdpspn 0,1
+ addis 9,2,.LANCHOR0@toc@ha
+ sradi. 3,4,1
+ xxpermdi 45,12,12,0
+ addi 9,9,.LANCHOR0@toc@l
+ xvcvdpsp 32,32
+ lxv 33,0(9)
+ xvcvdpsp 45,45
+ xxspltw 0,0,0
+#endif
+ vmrgew 0,0,13
+ beq 0,.L5
+ mr 6,8
+ mr 9,10
+ li 5,0
+ .p2align 4,,15
+.L6:
+ lxv 38,16(6)
+ lxv 11,16(9)
+ addi 5,5,8
+ addi 6,6,128
+ addi 9,9,128
+ lxv 39,-96(6)
+ lxv 40,-80(6)
+ lxv 41,-64(6)
+ lxv 42,-48(6)
+ cmpd 7,3,5
+ lxv 43,-32(6)
+ lxv 45,-128(6)
+ lxv 44,-16(6)
+#ifdef CONJ
+ lxv 0,-128(9)
+ vpermr 17,6,6,1
+ xvmaddmsp 38,32,11
+ lxv 11,-96(9)
+ vpermr 18,7,7,1
+ vpermr 19,8,8,1
+ vpermr 2,9,9,1
+ vpermr 3,10,10,1
+ vpermr 4,11,11,1
+ xvmaddasp 0,32,45
+ vpermr 5,12,12,1
+ xvmaddmsp 39,32,11
+ lxv 11,-80(9)
+ vpermr 13,13,13,1
+ xvmaddasp 38,12,49
+ xvmaddmsp 40,32,11
+ lxv 11,-64(9)
+ xvmaddmsp 45,12,0
+ xvmaddasp 39,12,50
+ stxv 38,-112(9)
+ xvmaddmsp 41,32,11
+ lxv 11,-48(9)
+ xvmaddasp 40,12,51
+ stxv 45,-128(9)
+ stxv 39,-96(9)
+ xvmaddmsp 42,32,11
+ lxv 11,-32(9)
+ xvmaddasp 41,12,34
+ stxv 40,-80(9)
+ xvmaddmsp 43,32,11
+ lxv 11,-16(9)
+ xvmaddasp 42,12,35
+ stxv 41,-64(9)
+ xvmaddmsp 44,32,11
+ xvmaddasp 43,12,36
+ stxv 42,-48(9)
+ xvmaddasp 44,12,37
+#else
+ lxv 12,-128(9)
+ vpermr 17,6,6,1
+ xvmaddmsp 38,0,11
+ lxv 11,-96(9)
+ vpermr 18,7,7,1
+ vpermr 19,8,8,1
+ vpermr 2,9,9,1
+ vpermr 3,10,10,1
+ vpermr 4,11,11,1
+ xvmaddasp 12,0,45
+ vpermr 5,12,12,1
+ xvmaddmsp 39,0,11
+ lxv 11,-80(9)
+ vpermr 13,13,13,1
+ xvmaddasp 38,32,49
+ xvmaddmsp 40,0,11
+ lxv 11,-64(9)
+ xvmaddmsp 45,32,12
+ xvmaddasp 39,32,50
+ stxv 38,-112(9)
+ xvmaddmsp 41,0,11
+ lxv 11,-48(9)
+ xvmaddasp 40,32,51
+ stxv 45,-128(9)
+ stxv 39,-96(9)
+ xvmaddmsp 42,0,11
+ lxv 11,-32(9)
+ xvmaddasp 41,32,34
+ stxv 40,-80(9)
+ xvmaddmsp 43,0,11
+ lxv 11,-16(9)
+ xvmaddasp 42,32,35
+ stxv 41,-64(9)
+ xvmaddmsp 44,0,11
+ xvmaddasp 43,32,36
+ stxv 42,-48(9)
+ xvmaddasp 44,32,37
+#endif
+ stxv 43,-32(9)
+ stxv 44,-16(9)
+ bgt 7,.L6
+.L5:
+ cmpd 7,7,4
+ ble 7,.L33
+ sldi 11,4,1
+ b .L4
+.L7:
+ addi 10,4,1
+ subf 8,4,7
+ cmpd 7,10,7
+ mtctr 8
+ bgt 7,.L26
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,7,10
+ beq 7,.L26
+ .p2align 4,,15
+.L13:
+ lfs 10,4(3)
+ lfs 11,0(3)
+ lfs 12,0(9)
+ lfs 0,4(9)
+ addi 3,3,8
+ addi 9,9,8
+ fmuls 10,2,10
+#ifdef CONJ
+ fmadds 11,1,11,10
+#else
+ fmsubs 11,1,11,10
+#endif
+ fadds 12,12,11
+ stfs 12,-8(9)
+ lfs 11,-8(3)
+ lfs 12,-4(3)
+ fmuls 11,2,11
+#ifdef CONJ
+ fmsubs 12,1,12,11
+ fsubs 0,0,12
+#else
+ fmadds 12,1,12,11
+ fadds 0,0,12
+#endif
+ stfs 0,-4(9)
+ bdnz .L13
+ b .L33
+.L40:
+ li 31,1
+ mtctr 31
+ b .L11
+.L26:
+ li 10,1
+ mtctr 10
+ b .L13
+ .long 0
+ .byte 0,0,0,0,0,1,0,0
+ .size caxpy_k,.-caxpy_k
+ .section .rodata
+ .align 4
+ .set .LANCHOR0,. + 0
+ .type swap_mask_arr, @object
+ .size swap_mask_arr, 16
+swap_mask_arr:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .byte 31
+ .byte 30
+ .byte 29
+ .byte 28
+ .byte 23
+ .byte 22
+ .byte 21
+ .byte 20
+ .byte 15
+ .byte 14
+ .byte 13
+ .byte 12
+ .byte 7
+ .byte 6
+ .byte 5
+ .byte 4
+.LC3:
+ .byte 27
+ .byte 26
+ .byte 25
+ .byte 24
+ .byte 19
+ .byte 18
+ .byte 17
+ .byte 16
+ .byte 11
+ .byte 10
+ .byte 9
+ .byte 8
+ .byte 3
+ .byte 2
+ .byte 1
+ .byte 0
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .gnu_attribute 4, 1
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+ .file "cdot.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl cdot_k
+ .type cdot_k, @function
+cdot_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry cdot_k,.-cdot_k
+ mr. 9,3
+ ble 0,.L10
+ cmpdi 7,5,1
+ beq 7,.L18
+.L3:
+ mtctr 9
+ xxlxor 2,2,2
+ sldi 5,5,3
+ sldi 7,7,3
+#ifdef CONJ
+ fmr 12,2
+#endif
+ fmr 8,2
+#ifndef CONJ
+ fmr 9,2
+#endif
+ fmr 1,2
+ .p2align 4,,15
+.L9:
+#ifdef CONJ
+ lfs 9,0(4)
+ lfs 11,0(6)
+ lfs 10,4(6)
+ lfs 0,4(4)
+ add 6,6,7
+ add 4,4,5
+ fmadds 1,9,11,1
+ fmadds 12,9,10,12
+ fmadds 8,0,10,8
+ fmadds 2,11,0,2
+#else
+ lfs 10,0(4)
+ lfs 12,0(6)
+ lfs 11,4(6)
+ lfs 0,4(4)
+ add 6,6,7
+ add 4,4,5
+ fmadds 1,10,12,1
+ fmadds 8,10,11,8
+ fmadds 9,0,11,9
+ fmadds 2,12,0,2
+#endif
+ bdnz .L9
+.L7:
+#ifdef CONJ
+ fsubs 2,12,2
+ fadds 1,1,8
+#else
+ fadds 2,2,8
+ fsubs 1,1,9
+#endif
+ blr
+ .p2align 4,,15
+.L18:
+ cmpdi 7,7,1
+ bne 7,.L3
+ rldicr. 10,9,0,60
+ bne 0,.L19
+ xxlxor 2,2,2
+ li 8,0
+#ifdef CONJ
+ fmr 12,2
+#endif
+ fmr 8,2
+#ifndef CONJ
+ fmr 9,2
+#endif
+ fmr 1,2
+.L4:
+ addi 7,10,1
+ sldi 8,8,2
+ subf 10,10,9
+ cmpd 7,7,9
+ mtctr 10
+ add 4,4,8
+ add 6,6,8
+ bgt 7,.L16
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L16
+ .p2align 4,,15
+.L8:
+#ifdef CONJ
+ lfs 9,0(4)
+ lfs 11,0(6)
+ lfs 10,4(6)
+ lfs 0,4(4)
+ addi 6,6,8
+ addi 4,4,8
+ fmadds 1,9,11,1
+ fmadds 12,9,10,12
+ fmadds 8,0,10,8
+ fmadds 2,11,0,2
+#else
+ lfs 10,0(4)
+ lfs 12,0(6)
+ lfs 11,4(6)
+ lfs 0,4(4)
+ addi 6,6,8
+ addi 4,4,8
+ fmadds 1,10,12,1
+ fmadds 8,10,11,8
+ fmadds 9,0,11,9
+ fmadds 2,12,0,2
+#endif
+ bdnz .L8
+ b .L7
+ .p2align 4,,15
+.L10:
+ xxlxor 1,1,1
+ fmr 2,1
+ blr
+.L19:
+ addis 8,2,.LANCHOR0@toc@ha
+ sradi. 3,10,1
+ xxspltib 42,0
+ addi 8,8,.LANCHOR0@toc@l
+ lxv 32,0(8)
+ beq 0,.L12
+ xxlor 6,42,42
+ xxlor 4,42,42
+ xxlor 0,42,42
+ xxlor 7,42,42
+ xxlor 5,42,42
+ xxlor 3,42,42
+ xxlor 12,42,42
+ mr 7,4
+ mr 8,6
+ li 5,0
+ .p2align 4,,15
+.L6:
+ lxv 43,0(8)
+ lxv 44,16(8)
+ addi 5,5,4
+ addi 8,8,64
+ addi 7,7,64
+ lxv 45,-32(8)
+ lxv 33,-16(8)
+ lxv 8,-64(7)
+ lxv 9,-48(7)
+ cmpd 7,3,5
+ lxv 10,-32(7)
+ lxv 11,-16(7)
+ vpermr 6,11,11,0
+ vpermr 7,12,12,0
+ vpermr 8,13,13,0
+ vpermr 9,1,1,0
+ xvmaddasp 12,43,8
+ xvmaddasp 3,44,9
+ xvmaddasp 0,8,38
+ xvmaddasp 4,9,39
+ xvmaddasp 6,10,40
+ xvmaddasp 5,45,10
+ xvmaddasp 42,11,41
+ xvmaddasp 7,33,11
+ bgt 7,.L6
+ xvaddsp 12,12,3
+ xvaddsp 0,0,4
+ xvaddsp 12,12,5
+ xvaddsp 0,0,6
+ xvaddsp 12,12,7
+ xvaddsp 42,0,42
+.L5:
+#ifdef CONJ
+ xxpermdi 8,12,12,2
+ xxpermdi 0,42,42,2
+ cmpd 7,9,10
+ sldi 8,10,1
+ xvaddsp 8,8,12
+ xvaddsp 0,0,42
+ xxsldwi 1,8,8,3
+ xxsldwi 12,0,0,3
+ xxsldwi 8,8,8,2
+ xxsldwi 0,0,0,2
+ xscvspdp 1,1
+ xscvspdp 12,12
+ xscvspdp 8,8
+#else
+ xxpermdi 9,12,12,2
+ xxpermdi 0,42,42,2
+ cmpd 7,9,10
+ sldi 8,10,1
+ xvaddsp 9,9,12
+ xvaddsp 0,0,42
+ xxsldwi 1,9,9,3
+ xxsldwi 2,0,0,3
+ xxsldwi 9,9,9,2
+ xxsldwi 0,0,0,2
+ xscvspdp 8,2
+ xscvspdp 1,1
+ xscvspdp 9,9
+#endif
+ xscvspdp 2,0
+ bgt 7,.L4
+ b .L7
+.L12:
+ xxlor 12,42,42
+ b .L5
+.L16:
+ li 9,1
+ mtctr 9
+ b .L8
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .size cdot_k,.-cdot_k
+ .section .rodata
+ .align 4
+ .set .LANCHOR0,. + 0
+ .type swap_mask_arr, @object
+ .size swap_mask_arr, 16
+swap_mask_arr:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+/* .file "icamax.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl icamax_k
+ .type icamax_k, @function
+*/
+#define ASSEMBLER
+#include "common.h"
+
+ PROLOGUE
+
+icamax_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry icamax_k,.-icamax_k
+ mr. 9,3
+ ble 0,.L25
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ cmpdi 7,5,1
+ beq 7,.L54
+ lfs 11,0(4)
+ lfs 0,4(4)
+ cmpdi 7,9,1
+ fabs 11,11
+ fabs 0,0
+ fadds 11,11,0
+ beq 7,.L29
+ addi 9,9,-1
+ sldi 5,5,3
+ mtctr 9
+ add 4,4,5
+ li 3,0
+ li 9,1
+ .p2align 4,,15
+.L24:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ add 4,4,5
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bng 7,.L23
+ fmr 11,0
+ mr 3,9
+.L23:
+ addi 9,9,1
+ bdnz .L24
+.L52:
+ addi 3,3,1
+ blr
+ .p2align 4,,15
+.L25:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L54:
+ rldicr. 8,9,0,58
+ bne 0,.L55
+ addi 7,8,1
+ li 10,0
+ xxlxor 11,11,11
+ cmpd 7,7,9
+ sldi 10,10,2
+ add 4,4,10
+ subf 10,8,9
+ mtctr 10
+ li 3,0
+ bgt 7,.L43
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L43
+ .p2align 4,,15
+.L44:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bng 7,.L46
+ fmr 11,0
+ mr 3,8
+.L46:
+ addi 8,8,1
+ bdnz .L44
+ b .L52
+ .p2align 4,,15
+.L55:
+ li 0,-144
+ std 31,-8(1)
+ addis 5,2,.LC2@toc@ha
+ vspltisw 18,0
+ vspltisw 19,0
+ addis 6,2,.LC3@toc@ha
+ addi 5,5,.LC2@toc@l
+ stvx 24,1,0
+ li 0,-128
+ addi 6,6,.LC3@toc@l
+ xxlor 49,50,50
+ addis 7,2,.LC4@toc@ha
+ lxvd2x 44,0,5
+ addis 10,2,.LC5@toc@ha
+ stvx 25,1,0
+ li 0,-112
+ addi 7,7,.LC4@toc@l
+ lxvd2x 45,0,6
+ addis 5,2,.LC6@toc@ha
+ addis 6,2,.LC7@toc@ha
+ stvx 26,1,0
+ li 0,-96
+ addi 10,10,.LC5@toc@l
+ addi 6,6,.LC7@toc@l
+ addi 5,5,.LC6@toc@l
+ stvx 27,1,0
+ li 0,-80
+ lxvd2x 46,0,10
+ xxpermdi 44,44,44,2
+ mr 10,4
+ lxvd2x 48,0,6
+ lxvd2x 47,0,5
+ xxpermdi 45,45,45,2
+ li 6,0
+ stvx 28,1,0
+ li 0,-64
+ xxlnand 44,44,44
+ xxlnand 45,45,45
+ stvx 29,1,0
+ li 0,-48
+ vspltisw 29,8
+ vadduwm 29,29,29
+ xxpermdi 46,46,46,2
+ stvx 30,1,0
+ li 0,-32
+ xxpermdi 47,47,47,2
+ xxpermdi 48,48,48,2
+ stvx 31,1,0
+ lxvd2x 63,0,7
+ addis 7,2,.LC8@toc@ha
+ addi 7,7,.LC8@toc@l
+ lxvd2x 62,0,7
+ xxpermdi 63,63,63,2
+ .p2align 4,,15
+.L5:
+ addi 3,10,16
+ addi 5,10,32
+ lxvd2x 34,0,10
+ addi 7,10,64
+ addi 31,10,48
+ addi 12,10,80
+ addi 11,10,96
+ lxvd2x 36,0,3
+ lxvd2x 37,0,5
+ addi 3,10,112
+ addi 5,10,128
+ lxvd2x 38,0,7
+ lxvd2x 7,0,31
+ addi 7,10,160
+ addi 31,10,144
+ lxvd2x 33,0,12
+ lxvd2x 39,0,11
+ addi 12,10,176
+ addi 11,10,192
+ lxvd2x 8,0,3
+ lxvd2x 40,0,5
+ xxpermdi 34,34,34,2
+ addi 3,10,208
+ addi 5,10,224
+ lxvd2x 41,0,7
+ lxvd2x 9,0,31
+ addi 7,10,240
+ lxvd2x 10,0,12
+ lxvd2x 42,0,11
+ xxpermdi 37,37,37,2
+ xxpermdi 36,36,36,2
+ addi 6,6,32
+ lxvd2x 32,0,3
+ lxvd2x 43,0,5
+ xxpermdi 7,7,7,2
+ xxpermdi 38,38,38,2
+ cmpd 7,8,6
+ addi 10,10,256
+ lxvd2x 11,0,7
+ xxpermdi 39,39,39,2
+ xxpermdi 33,33,33,2
+ xxpermdi 40,40,40,2
+ xxpermdi 8,8,8,2
+ xxpermdi 41,41,41,2
+ xxpermdi 9,9,9,2
+ xxpermdi 10,10,10,2
+ xxpermdi 42,42,42,2
+ xxpermdi 43,43,43,2
+ xxpermdi 32,32,32,2
+ xxpermdi 11,11,11,2
+ xvabssp 57,37
+ xvabssp 58,39
+ xvabssp 35,40
+ xvabssp 59,41
+ xvabssp 34,34
+ xvabssp 33,33
+ xvabssp 32,32
+ xvabssp 60,43
+ xvabssp 36,36
+ xvabssp 37,7
+ xvabssp 38,38
+ xvabssp 39,8
+ xvabssp 40,9
+ xvabssp 41,10
+ xvabssp 42,42
+ xvabssp 43,11
+ vperm 24,4,2,12
+ vperm 4,4,2,13
+ vperm 2,5,25,12
+ vperm 5,5,25,13
+ vperm 25,1,6,12
+ vperm 6,1,6,13
+ vperm 1,7,26,12
+ vperm 7,7,26,13
+ vperm 26,8,3,12
+ vperm 8,8,3,13
+ vperm 3,9,27,12
+ vperm 9,9,27,13
+ vperm 27,0,10,12
+ vperm 10,0,10,13
+ vperm 0,11,28,12
+ vperm 11,11,28,13
+ xvaddsp 12,33,39
+ xvaddsp 38,57,38
+ xvaddsp 0,32,43
+ xvaddsp 42,59,42
+ xvaddsp 36,56,36
+ xvaddsp 37,34,37
+ xvaddsp 40,58,40
+ xvaddsp 41,35,41
+ xvcmpgtsp 32,12,38
+ xvcmpgtsp 33,0,42
+ xvcmpgtsp 43,37,36
+ xvcmpgtsp 39,41,40
+ xxsel 12,38,12,32
+ xxsel 38,47,48,32
+ xxsel 0,42,0,33
+ xxsel 42,47,48,33
+ xxsel 37,36,37,43
+ xxsel 43,63,46,43
+ xxsel 41,40,41,39
+ xxsel 39,63,46,39
+ xvcmpgtsp 32,12,37
+ xvcmpgtsp 33,0,41
+ xxsel 12,37,12,32
+ xxsel 43,43,38,32
+ xxsel 0,41,0,33
+ xxsel 33,39,42,33
+ xvcmpgtsp 32,0,12
+ vadduwm 1,1,29
+ xxsel 0,12,0,32
+ xxsel 32,43,33,32
+ xvcmpgtsp 33,0,51
+ vadduwm 0,17,0
+ vadduwm 17,17,30
+ xxsel 50,50,32,33
+ xxsel 51,51,0,33
+ bgt 7,.L5
+ xxsldwi 11,51,51,3
+ xxsldwi 12,51,51,2
+ vspltw 0,18,3
+ xxsldwi 0,51,51,1
+ xscvspdp 11,11
+ xscvspdp 12,12
+ mfvsrwz 6,32
+ vspltw 0,18,2
+ xscvspdp 0,0
+ mfvsrwz 7,50
+ mfvsrwz 5,32
+ vspltw 0,18,0
+ xscvspdp 51,51
+ mfvsrwz 10,32
+ fcmpu 7,11,12
+ rldicl 3,6,0,32
+ fmr 10,0
+ rldicl 11,7,0,32
+ rldicl 31,5,0,32
+ rldicl 0,10,0,32
+ beq 7,.L56
+ bnl 7,.L8
+ fmr 11,12
+ mr 3,31
+.L8:
+ xscmpudp 7,0,51
+ bne 7,.L11
+ cmplw 7,7,10
+ ble 7,.L12
+ mr 7,10
+.L12:
+ rldicl 11,7,0,32
+.L13:
+ fcmpu 7,11,10
+ beq 7,.L57
+ blt 7,.L58
+.L17:
+ cmpd 7,9,8
+ ble 7,.L19
+ addi 7,8,1
+ sldi 10,8,1
+ cmpd 7,7,9
+ sldi 10,10,2
+ add 4,4,10
+ subf 10,8,9
+ mtctr 10
+ bgt 7,.L37
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L37
+ .p2align 4,,15
+.L21:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bng 7,.L20
+ fmr 11,0
+ mr 3,8
+.L20:
+ addi 8,8,1
+ bdnz .L21
+.L19:
+ li 0,-144
+ ld 31,-8(1)
+ addi 3,3,1
+ lvx 24,1,0
+ li 0,-128
+ lvx 25,1,0
+ li 0,-112
+ lvx 26,1,0
+ li 0,-96
+ lvx 27,1,0
+ li 0,-80
+ lvx 28,1,0
+ li 0,-64
+ lvx 29,1,0
+ li 0,-48
+ lvx 30,1,0
+ li 0,-32
+ lvx 31,1,0
+ blr
+ .p2align 4,,15
+.L56:
+ cmplw 7,6,5
+ ble 7,.L7
+ mr 6,5
+.L7:
+ rldicl 3,6,0,32
+ b .L8
+ .p2align 4,,15
+.L29:
+ li 3,1
+ blr
+ .p2align 4,,15
+.L11:
+ bnl 7,.L13
+ xscpsgndp 10,51,51
+ mr 11,0
+ b .L13
+ .p2align 4,,15
+.L57:
+ cmpd 7,3,11
+ ble 7,.L17
+ mr 3,11
+ b .L17
+ .p2align 4,,15
+.L58:
+ fmr 11,10
+ mr 3,11
+ b .L17
+.L43:
+ li 9,1
+ mtctr 9
+ b .L44
+.L37:
+ li 9,1
+ mtctr 9
+ b .L21
+ .long 0
+ .byte 0,0,0,0,0,1,0,0
+ .size icamax_k,.-icamax_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 16
+ .byte 17
+ .byte 18
+ .byte 19
+ .byte 24
+ .byte 25
+ .byte 26
+ .byte 27
+.LC3:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 20
+ .byte 21
+ .byte 22
+ .byte 23
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+.LC4:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC5:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC6:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC7:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+.LC8:
+ .long 32
+ .long 32
+ .long 32
+ .long 32
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+ .file "icamax.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl icamax_k
+ .type icamax_k, @function
+icamax_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry icamax_k,.-icamax_k
+ mr. 9,3
+ ble 0,.L25
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ cmpdi 7,5,1
+ beq 7,.L53
+ lfs 11,0(4)
+ lfs 0,4(4)
+ cmpdi 7,9,1
+ fabs 11,11
+ fabs 0,0
+ fadds 11,11,0
+ beq 7,.L29
+ addi 9,9,-1
+ sldi 5,5,3
+ li 3,0
+ mtctr 9
+ add 4,4,5
+ li 9,1
+ .p2align 4,,15
+.L24:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ add 4,4,5
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bng 7,.L23
+ fmr 11,0
+ mr 3,9
+.L23:
+ addi 9,9,1
+ bdnz .L24
+.L51:
+ addi 3,3,1
+ blr
+ .p2align 4,,15
+.L25:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L53:
+ rldicr. 8,9,0,58
+ bne 0,.L54
+ addi 7,8,1
+ li 10,0
+ subf 6,8,9
+ li 3,0
+ xxlxor 11,11,11
+ cmpd 7,7,9
+ sldi 10,10,2
+ mtctr 6
+ add 4,4,10
+ bgt 7,.L43
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L43
+ .p2align 4,,15
+.L44:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bng 7,.L46
+ fmr 11,0
+ mr 3,8
+.L46:
+ addi 8,8,1
+ bdnz .L44
+ b .L51
+ .p2align 4,,15
+.L54:
+ addis 11,2,.LC2@toc@ha
+ addis 3,2,.LC3@toc@ha
+ addis 5,2,.LC6@toc@ha
+ addis 6,2,.LC7@toc@ha
+ xxspltib 47,0
+ addis 7,2,.LC4@toc@ha
+ addis 10,2,.LC5@toc@ha
+ stxv 58,-96(1)
+ stxv 59,-80(1)
+ addi 11,11,.LC2@toc@l
+ addi 3,3,.LC3@toc@l
+ addi 5,5,.LC6@toc@l
+ addi 6,6,.LC7@toc@l
+ stxv 62,-32(1)
+ stxv 63,-16(1)
+ xxspltib 58,16
+ addi 7,7,.LC4@toc@l
+ addi 10,10,.LC5@toc@l
+ xxspltib 59,32
+ lxv 44,0(11)
+ lxv 45,0(3)
+ xxspltib 48,0
+ lxv 62,0(5)
+ xxlor 46,47,47
+ lxv 63,0(6)
+ stxv 60,-64(1)
+ stxv 61,-48(1)
+ lxv 60,0(7)
+ lxv 61,0(10)
+ li 7,0
+ mr 10,4
+ vextsb2w 26,26
+ vextsb2w 27,27
+ stxv 56,-128(1)
+ stxv 57,-112(1)
+ .p2align 4,,15
+.L5:
+ lxv 0,0(10)
+ addi 7,7,32
+ addi 10,10,256
+ cmpd 7,8,7
+ xvabssp 34,0
+ lxv 0,-240(10)
+ xvabssp 42,0
+ lxv 0,-224(10)
+ xvabssp 49,0
+ lxv 0,-208(10)
+ vpermr 25,10,2,12
+ vpermr 2,10,2,13
+ xvabssp 35,0
+ lxv 0,-192(10)
+ xvaddsp 34,57,34
+ xvabssp 36,0
+ lxv 0,-176(10)
+ vpermr 10,3,17,12
+ vpermr 3,3,17,13
+ xvabssp 33,0
+ lxv 0,-160(10)
+ xvaddsp 10,42,35
+ xvabssp 50,0
+ lxv 0,-144(10)
+ vpermr 17,1,4,12
+ vpermr 4,1,4,13
+ xvabssp 37,0
+ lxv 0,-128(10)
+ xvaddsp 36,49,36
+ xvabssp 38,0
+ lxv 0,-112(10)
+ vpermr 1,5,18,12
+ vpermr 5,5,18,13
+ xvabssp 43,0
+ lxv 0,-96(10)
+ xvaddsp 12,33,37
+ xvabssp 51,0
+ lxv 0,-80(10)
+ vpermr 18,11,6,12
+ vpermr 6,11,6,13
+ xvabssp 39,0
+ lxv 0,-64(10)
+ xvaddsp 38,50,38
+ xvabssp 40,0
+ lxv 0,-48(10)
+ vpermr 11,7,19,12
+ vpermr 7,7,19,13
+ xvabssp 32,0
+ lxv 0,-32(10)
+ xvaddsp 11,43,39
+ xvcmpgtsp 39,10,34
+ xvcmpgtsp 43,12,36
+ xvabssp 56,0
+ lxv 0,-16(10)
+ vpermr 19,0,8,12
+ vpermr 8,0,8,13
+ xxsel 10,34,10,39
+ xxsel 12,36,12,43
+ xxsel 39,60,61,39
+ xxsel 43,62,63,43
+ xvabssp 41,0
+ xvaddsp 40,51,40
+ vpermr 0,9,24,12
+ vpermr 9,9,24,13
+ xvaddsp 0,32,41
+ xvcmpgtsp 41,11,38
+ xvcmpgtsp 32,12,10
+ xvcmpgtsp 42,0,40
+ xxsel 11,38,11,41
+ xxsel 12,10,12,32
+ xxsel 43,39,43,32
+ xxsel 41,60,61,41
+ xxsel 0,40,0,42
+ xxsel 42,62,63,42
+ xvcmpgtsp 33,0,11
+ xxsel 0,11,0,33
+ xxsel 33,41,42,33
+ xvcmpgtsp 32,0,12
+ vadduwm 1,1,26
+ xxsel 0,12,0,32
+ xxsel 32,43,33,32
+ xvcmpgtsp 33,0,48
+ vadduwm 0,14,0
+ vadduwm 14,14,27
+ xxsel 47,47,32,33
+ xxsel 48,48,0,33
+ bgt 7,.L5
+ xxsldwi 11,48,48,3
+ xxsldwi 12,48,48,2
+ li 10,0
+ li 3,12
+ xxsldwi 0,48,48,1
+ xscvspdp 48,48
+ vextuwrx 6,10,15
+ li 10,4
+ xscvspdp 11,11
+ xscvspdp 12,12
+ xscvspdp 0,0
+ vextuwrx 5,10,15
+ li 10,8
+ vextuwrx 7,10,15
+ vextuwrx 10,3,15
+ rldicl 12,5,0,32
+ rldicl 3,6,0,32
+ rldicl 11,7,0,32
+ rldicl 0,10,0,32
+ fcmpu 7,11,12
+ fmr 10,0
+ beq 7,.L55
+ bnl 7,.L8
+ mr 3,12
+ fmr 11,12
+.L8:
+ xscmpudp 7,0,48
+ bne 7,.L11
+ cmplw 7,7,10
+ ble 7,.L12
+ mr 7,10
+.L12:
+ rldicl 11,7,0,32
+.L13:
+ fcmpu 7,11,10
+ beq 7,.L56
+ bnl 7,.L17
+ mr 3,11
+ fmr 11,10
+.L17:
+ cmpd 7,9,8
+ ble 7,.L19
+ addi 7,8,1
+ sldi 10,8,1
+ subf 6,8,9
+ cmpd 7,7,9
+ sldi 10,10,2
+ mtctr 6
+ add 4,4,10
+ bgt 7,.L37
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L37
+ .p2align 4,,15
+.L21:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bng 7,.L20
+ fmr 11,0
+ mr 3,8
+.L20:
+ addi 8,8,1
+ bdnz .L21
+.L19:
+ lxv 56,-128(1)
+ lxv 57,-112(1)
+ addi 3,3,1
+ lxv 58,-96(1)
+ lxv 59,-80(1)
+ lxv 60,-64(1)
+ lxv 61,-48(1)
+ lxv 62,-32(1)
+ lxv 63,-16(1)
+ blr
+ .p2align 4,,15
+.L55:
+ cmplw 7,6,5
+ ble 7,.L7
+ mr 6,5
+.L7:
+ rldicl 3,6,0,32
+ b .L8
+ .p2align 4,,15
+.L29:
+ li 3,1
+ blr
+ .p2align 4,,15
+.L11:
+ bnl 7,.L13
+ mr 11,0
+ xscpsgndp 10,48,48
+ b .L13
+ .p2align 4,,15
+.L56:
+ cmpd 7,3,11
+ ble 7,.L17
+ mr 3,11
+ b .L17
+.L37:
+ li 9,1
+ mtctr 9
+ b .L21
+.L43:
+ li 9,1
+ mtctr 9
+ b .L44
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .size icamax_k,.-icamax_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 16
+ .byte 17
+ .byte 18
+ .byte 19
+ .byte 24
+ .byte 25
+ .byte 26
+ .byte 27
+.LC3:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 20
+ .byte 21
+ .byte 22
+ .byte 23
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+.LC4:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC5:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC6:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC7:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+/* .file "icamin.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl icamin_k
+ .type icamin_k, @function
+*/
+#define ASSEMBLER
+#include "common.h"
+
+ PROLOGUE
+
+icamin_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry icamin_k,.-icamin_k
+ mr. 9,3
+ ble 0,.L25
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ lfs 11,0(4)
+ lfs 0,4(4)
+ cmpdi 7,5,1
+ fabs 11,11
+ fabs 0,0
+ fadds 11,11,0
+ beq 7,.L54
+ cmpdi 7,9,1
+ beq 7,.L29
+ addi 9,9,-1
+ sldi 5,5,3
+ mtctr 9
+ add 4,4,5
+ li 3,0
+ li 9,1
+ .p2align 4,,15
+.L24:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ add 4,4,5
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bnl 7,.L23
+ fmr 11,0
+ mr 3,9
+.L23:
+ addi 9,9,1
+ bdnz .L24
+.L52:
+ addi 3,3,1
+ blr
+ .p2align 4,,15
+.L25:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L54:
+ rldicr. 8,9,0,58
+ bne 0,.L55
+ addi 7,8,1
+ li 10,0
+ cmpd 7,7,9
+ sldi 10,10,2
+ add 4,4,10
+ subf 10,8,9
+ mtctr 10
+ li 3,0
+ bgt 7,.L43
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L43
+ .p2align 4,,15
+.L44:
+ lfs 0,0(4)
+ lfs 12,4(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,11,0
+ bng 7,.L46
+ fmr 11,0
+ mr 3,8
+.L46:
+ addi 8,8,1
+ bdnz .L44
+ b .L52
+ .p2align 4,,15
+.L55:
+ li 0,-128
+ std 31,-8(1)
+ addis 5,2,.LC2@toc@ha
+ xscvdpspn 11,11
+ vspltisw 19,0
+ addis 6,2,.LC3@toc@ha
+ addi 5,5,.LC2@toc@l
+ stvx 25,1,0
+ li 0,-112
+ addi 6,6,.LC3@toc@l
+ xxlor 50,51,51
+ addis 7,2,.LC4@toc@ha
+ lxvd2x 44,0,5
+ addis 10,2,.LC5@toc@ha
+ stvx 26,1,0
+ li 0,-96
+ addi 7,7,.LC4@toc@l
+ lxvd2x 45,0,6
+ addis 5,2,.LC6@toc@ha
+ addis 6,2,.LC7@toc@ha
+ stvx 27,1,0
+ li 0,-80
+ addi 10,10,.LC5@toc@l
+ xxspltw 5,11,0
+ addi 6,6,.LC7@toc@l
+ addi 5,5,.LC6@toc@l
+ stvx 28,1,0
+ li 0,-64
+ lxvd2x 47,0,10
+ xxpermdi 44,44,44,2
+ mr 10,4
+ lxvd2x 49,0,6
+ lxvd2x 48,0,5
+ xxpermdi 45,45,45,2
+ li 6,0
+ stvx 29,1,0
+ li 0,-48
+ xxlnand 44,44,44
+ xxlnand 45,45,45
+ stvx 30,1,0
+ lxvd2x 62,0,7
+ addis 7,2,.LC8@toc@ha
+ li 0,-32
+ addi 7,7,.LC8@toc@l
+ xxpermdi 47,47,47,2
+ stvx 31,1,0
+ vspltisw 31,8
+ xxpermdi 48,48,48,2
+ lxvd2x 46,0,7
+ vadduwm 31,31,31
+ xxpermdi 49,49,49,2
+ xxpermdi 62,62,62,2
+ .p2align 4,,15
+.L5:
+ addi 3,10,16
+ addi 5,10,32
+ lxvd2x 34,0,10
+ addi 7,10,64
+ addi 31,10,48
+ addi 12,10,80
+ addi 11,10,96
+ lxvd2x 36,0,3
+ lxvd2x 37,0,5
+ addi 3,10,112
+ addi 5,10,128
+ lxvd2x 38,0,7
+ lxvd2x 6,0,31
+ addi 7,10,160
+ addi 31,10,144
+ lxvd2x 33,0,12
+ lxvd2x 39,0,11
+ addi 12,10,176
+ addi 11,10,192
+ lxvd2x 7,0,3
+ lxvd2x 40,0,5
+ xxpermdi 34,34,34,2
+ addi 3,10,208
+ addi 5,10,224
+ lxvd2x 41,0,7
+ lxvd2x 8,0,31
+ addi 7,10,240
+ lxvd2x 9,0,12
+ lxvd2x 42,0,11
+ xxpermdi 37,37,37,2
+ xxpermdi 36,36,36,2
+ addi 6,6,32
+ lxvd2x 32,0,3
+ lxvd2x 43,0,5
+ xxpermdi 6,6,6,2
+ xxpermdi 38,38,38,2
+ cmpd 7,8,6
+ addi 10,10,256
+ lxvd2x 10,0,7
+ xxpermdi 39,39,39,2
+ xxpermdi 33,33,33,2
+ xxpermdi 40,40,40,2
+ xxpermdi 7,7,7,2
+ xxpermdi 41,41,41,2
+ xxpermdi 8,8,8,2
+ xxpermdi 9,9,9,2
+ xxpermdi 42,42,42,2
+ xxpermdi 43,43,43,2
+ xxpermdi 32,32,32,2
+ xxpermdi 10,10,10,2
+ xvabssp 58,37
+ xvabssp 59,39
+ xvabssp 35,40
+ xvabssp 60,41
+ xvabssp 34,34
+ xvabssp 33,33
+ xvabssp 32,32
+ xvabssp 61,43
+ xvabssp 36,36
+ xvabssp 37,6
+ xvabssp 38,38
+ xvabssp 39,7
+ xvabssp 40,8
+ xvabssp 41,9
+ xvabssp 42,42
+ xvabssp 43,10
+ vperm 25,4,2,12
+ vperm 4,4,2,13
+ vperm 2,5,26,12
+ vperm 5,5,26,13
+ vperm 26,1,6,12
+ vperm 6,1,6,13
+ vperm 1,7,27,12
+ vperm 7,7,27,13
+ vperm 27,8,3,12
+ vperm 8,8,3,13
+ vperm 3,9,28,12
+ vperm 9,9,28,13
+ vperm 28,0,10,12
+ vperm 10,0,10,13
+ vperm 0,11,29,12
+ vperm 11,11,29,13
+ xvaddsp 12,33,39
+ xvaddsp 38,58,38
+ xvaddsp 0,32,43
+ xvaddsp 42,60,42
+ xvaddsp 36,57,36
+ xvaddsp 37,34,37
+ xvaddsp 40,59,40
+ xvaddsp 41,35,41
+ xvcmpgtsp 32,38,12
+ xvcmpgtsp 33,42,0
+ xvcmpgtsp 43,36,37
+ xvcmpgtsp 39,40,41
+ xxsel 12,38,12,32
+ xxsel 38,48,49,32
+ xxsel 0,42,0,33
+ xxsel 42,48,49,33
+ xxsel 37,36,37,43
+ xxsel 43,62,47,43
+ xxsel 41,40,41,39
+ xxsel 39,62,47,39
+ xvcmpgtsp 32,37,12
+ xvcmpgtsp 33,41,0
+ xxsel 12,37,12,32
+ xxsel 43,43,38,32
+ xxsel 0,41,0,33
+ xxsel 33,39,42,33
+ xvcmpgtsp 32,12,0
+ vadduwm 1,1,31
+ xxsel 0,12,0,32
+ xxsel 32,43,33,32
+ xvcmpgtsp 33,5,0
+ vadduwm 0,0,18
+ vadduwm 18,18,14
+ xxsel 51,51,32,33
+ xxsel 5,5,0,33
+ bgt 7,.L5
+ xxsldwi 11,5,5,3
+ xxsldwi 12,5,5,2
+ vspltw 0,19,3
+ xxsldwi 0,5,5,1
+ xscvspdp 11,11
+ xscvspdp 12,12
+ mfvsrwz 6,32
+ vspltw 0,19,2
+ xscvspdp 0,0
+ mfvsrwz 7,51
+ mfvsrwz 5,32
+ vspltw 0,19,0
+ xscvspdp 5,5
+ mfvsrwz 10,32
+ fcmpu 7,11,12
+ rldicl 3,6,0,32
+ fmr 10,0
+ rldicl 11,7,0,32
+ rldicl 31,5,0,32
+ rldicl 0,10,0,32
+ beq 7,.L56
+ bng 7,.L8
+ fmr 11,12
+ mr 3,31
+.L8:
+ fcmpu 7,0,5
+ bne 7,.L11
+ cmplw 7,7,10
+ ble 7,.L12
+ mr 7,10
+.L12:
+ rldicl 11,7,0,32
+.L13:
+ fcmpu 7,11,10
+ beq 7,.L57
+ bgt 7,.L58
+.L17:
+ cmpd 7,9,8
+ ble 7,.L19
+ addi 7,8,1
+ sldi 10,8,1
+ cmpd 7,7,9
+ sldi 10,10,2
+ add 4,4,10
+ subf 10,8,9
+ mtctr 10
+ bgt 7,.L37
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L37
+ .p2align 4,,15
+.L21:
+ lfs 0,0(4)
+ lfs 12,4(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,11,0
+ bng 7,.L20
+ fmr 11,0
+ mr 3,8
+.L20:
+ addi 8,8,1
+ bdnz .L21
+.L19:
+ li 0,-128
+ ld 31,-8(1)
+ addi 3,3,1
+ lvx 25,1,0
+ li 0,-112
+ lvx 26,1,0
+ li 0,-96
+ lvx 27,1,0
+ li 0,-80
+ lvx 28,1,0
+ li 0,-64
+ lvx 29,1,0
+ li 0,-48
+ lvx 30,1,0
+ li 0,-32
+ lvx 31,1,0
+ blr
+ .p2align 4,,15
+.L56:
+ cmplw 7,6,5
+ ble 7,.L7
+ mr 6,5
+.L7:
+ rldicl 3,6,0,32
+ b .L8
+ .p2align 4,,15
+.L29:
+ li 3,1
+ blr
+ .p2align 4,,15
+.L11:
+ bng 7,.L13
+ fmr 10,5
+ mr 11,0
+ b .L13
+ .p2align 4,,15
+.L57:
+ cmpd 7,3,11
+ ble 7,.L17
+ mr 3,11
+ b .L17
+ .p2align 4,,15
+.L58:
+ fmr 11,10
+ mr 3,11
+ b .L17
+.L43:
+ li 9,1
+ mtctr 9
+ b .L44
+.L37:
+ li 9,1
+ mtctr 9
+ b .L21
+ .long 0
+ .byte 0,0,0,0,0,1,0,0
+ .size icamin_k,.-icamin_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 16
+ .byte 17
+ .byte 18
+ .byte 19
+ .byte 24
+ .byte 25
+ .byte 26
+ .byte 27
+.LC3:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 20
+ .byte 21
+ .byte 22
+ .byte 23
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+.LC4:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC5:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC6:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC7:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+.LC8:
+ .long 32
+ .long 32
+ .long 32
+ .long 32
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+ .file "icamin.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl icamin_k
+ .type icamin_k, @function
+icamin_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry icamin_k,.-icamin_k
+ mr. 9,3
+ ble 0,.L25
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ lfs 11,0(4)
+ lfs 0,4(4)
+ cmpdi 7,5,1
+ fabs 11,11
+ fabs 0,0
+ fadds 11,11,0
+ beq 7,.L53
+ cmpdi 7,9,1
+ beq 7,.L29
+ addi 9,9,-1
+ sldi 5,5,3
+ li 3,0
+ mtctr 9
+ add 4,4,5
+ li 9,1
+ .p2align 4,,15
+.L24:
+ lfs 0,4(4)
+ lfs 12,0(4)
+ add 4,4,5
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,0,11
+ bnl 7,.L23
+ fmr 11,0
+ mr 3,9
+.L23:
+ addi 9,9,1
+ bdnz .L24
+.L51:
+ addi 3,3,1
+ blr
+ .p2align 4,,15
+.L25:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L53:
+ rldicr. 8,9,0,58
+ bne 0,.L54
+ addi 7,8,1
+ li 10,0
+ subf 6,8,9
+ li 3,0
+ cmpd 7,7,9
+ sldi 10,10,2
+ mtctr 6
+ add 4,4,10
+ bgt 7,.L43
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L43
+ .p2align 4,,15
+.L44:
+ lfs 0,0(4)
+ lfs 12,4(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,11,0
+ bng 7,.L46
+ fmr 11,0
+ mr 3,8
+.L46:
+ addi 8,8,1
+ bdnz .L44
+ b .L51
+ .p2align 4,,15
+.L54:
+ xscvdpspn 9,11
+ addis 11,2,.LC2@toc@ha
+ addis 3,2,.LC3@toc@ha
+ addis 5,2,.LC6@toc@ha
+ addis 6,2,.LC7@toc@ha
+ addis 7,2,.LC4@toc@ha
+ addis 10,2,.LC5@toc@ha
+ xxspltib 48,0
+ addi 11,11,.LC2@toc@l
+ addi 3,3,.LC3@toc@l
+ addi 5,5,.LC6@toc@l
+ stxv 59,-80(1)
+ addi 6,6,.LC7@toc@l
+ stxv 60,-64(1)
+ stxv 63,-16(1)
+ addi 7,7,.LC4@toc@l
+ xxspltib 59,16
+ lxv 44,0(11)
+ xxspltib 60,32
+ lxv 45,0(3)
+ lxv 63,0(5)
+ xxlor 47,48,48
+ lxv 46,0(6)
+ addi 10,10,.LC5@toc@l
+ stxv 61,-48(1)
+ stxv 62,-32(1)
+ xxspltw 9,9,0
+ lxv 61,0(7)
+ lxv 62,0(10)
+ li 7,0
+ mr 10,4
+ vextsb2w 27,27
+ vextsb2w 28,28
+ stxv 57,-112(1)
+ stxv 58,-96(1)
+ .p2align 4,,15
+.L5:
+ lxv 0,0(10)
+ addi 7,7,32
+ addi 10,10,256
+ cmpd 7,8,7
+ xvabssp 34,0
+ lxv 0,-240(10)
+ xvabssp 42,0
+ lxv 0,-224(10)
+ xvabssp 49,0
+ lxv 0,-208(10)
+ vpermr 26,10,2,12
+ vpermr 2,10,2,13
+ xvabssp 35,0
+ lxv 0,-192(10)
+ xvaddsp 34,58,34
+ xvabssp 36,0
+ lxv 0,-176(10)
+ vpermr 10,3,17,12
+ vpermr 3,3,17,13
+ xvabssp 33,0
+ lxv 0,-160(10)
+ xvaddsp 10,42,35
+ xvabssp 50,0
+ lxv 0,-144(10)
+ vpermr 17,1,4,12
+ vpermr 4,1,4,13
+ xvabssp 37,0
+ lxv 0,-128(10)
+ xvaddsp 36,49,36
+ xvabssp 38,0
+ lxv 0,-112(10)
+ vpermr 1,5,18,12
+ vpermr 5,5,18,13
+ xvabssp 43,0
+ lxv 0,-96(10)
+ xvaddsp 12,33,37
+ xvabssp 51,0
+ lxv 0,-80(10)
+ vpermr 18,11,6,12
+ vpermr 6,11,6,13
+ xvabssp 39,0
+ lxv 0,-64(10)
+ xvaddsp 38,50,38
+ xvabssp 40,0
+ lxv 0,-48(10)
+ vpermr 11,7,19,12
+ vpermr 7,7,19,13
+ xvabssp 32,0
+ lxv 0,-32(10)
+ xvaddsp 11,43,39
+ xvcmpgtsp 39,34,10
+ xvcmpgtsp 43,36,12
+ xvabssp 57,0
+ lxv 0,-16(10)
+ vpermr 19,0,8,12
+ vpermr 8,0,8,13
+ xxsel 10,34,10,39
+ xxsel 12,36,12,43
+ xxsel 39,61,62,39
+ xxsel 43,63,46,43
+ xvabssp 41,0
+ xvaddsp 40,51,40
+ vpermr 0,9,25,12
+ vpermr 9,9,25,13
+ xvaddsp 0,32,41
+ xvcmpgtsp 41,38,11
+ xvcmpgtsp 32,10,12
+ xvcmpgtsp 42,40,0
+ xxsel 11,38,11,41
+ xxsel 12,10,12,32
+ xxsel 43,39,43,32
+ xxsel 41,61,62,41
+ xxsel 0,40,0,42
+ xxsel 42,63,46,42
+ xvcmpgtsp 33,11,0
+ xxsel 0,11,0,33
+ xxsel 33,41,42,33
+ xvcmpgtsp 32,12,0
+ vadduwm 1,1,27
+ xxsel 0,12,0,32
+ xxsel 32,43,33,32
+ xvcmpgtsp 33,9,0
+ vadduwm 0,0,15
+ vadduwm 15,15,28
+ xxsel 48,48,32,33
+ xxsel 9,9,0,33
+ bgt 7,.L5
+ xxsldwi 11,9,9,3
+ xxsldwi 12,9,9,2
+ li 10,0
+ li 3,12
+ xxsldwi 0,9,9,1
+ xscvspdp 9,9
+ vextuwrx 6,10,16
+ li 10,4
+ xscvspdp 11,11
+ xscvspdp 12,12
+ xscvspdp 0,0
+ vextuwrx 5,10,16
+ li 10,8
+ vextuwrx 7,10,16
+ vextuwrx 10,3,16
+ rldicl 12,5,0,32
+ rldicl 3,6,0,32
+ rldicl 11,7,0,32
+ rldicl 0,10,0,32
+ fcmpu 7,11,12
+ fmr 10,0
+ beq 7,.L55
+ bng 7,.L8
+ mr 3,12
+ fmr 11,12
+.L8:
+ fcmpu 7,0,9
+ bne 7,.L11
+ cmplw 7,7,10
+ ble 7,.L12
+ mr 7,10
+.L12:
+ rldicl 11,7,0,32
+.L13:
+ fcmpu 7,11,10
+ beq 7,.L56
+ bng 7,.L17
+ mr 3,11
+ fmr 11,10
+.L17:
+ cmpd 7,9,8
+ ble 7,.L19
+ addi 7,8,1
+ sldi 10,8,1
+ subf 6,8,9
+ cmpd 7,7,9
+ sldi 10,10,2
+ mtctr 6
+ add 4,4,10
+ bgt 7,.L37
+ li 10,-1
+ rldicr 10,10,0,0
+ cmpd 7,9,10
+ beq 7,.L37
+ .p2align 4,,15
+.L21:
+ lfs 0,0(4)
+ lfs 12,4(4)
+ addi 4,4,8
+ fabs 0,0
+ fabs 12,12
+ fadds 0,0,12
+ fcmpu 7,11,0
+ bng 7,.L20
+ fmr 11,0
+ mr 3,8
+.L20:
+ addi 8,8,1
+ bdnz .L21
+.L19:
+ lxv 57,-112(1)
+ lxv 58,-96(1)
+ addi 3,3,1
+ lxv 59,-80(1)
+ lxv 60,-64(1)
+ lxv 61,-48(1)
+ lxv 62,-32(1)
+ lxv 63,-16(1)
+ blr
+ .p2align 4,,15
+.L55:
+ cmplw 7,6,5
+ ble 7,.L7
+ mr 6,5
+.L7:
+ rldicl 3,6,0,32
+ b .L8
+ .p2align 4,,15
+.L29:
+ li 3,1
+ blr
+ .p2align 4,,15
+.L11:
+ bng 7,.L13
+ mr 11,0
+ fmr 10,9
+ b .L13
+ .p2align 4,,15
+.L56:
+ cmpd 7,3,11
+ ble 7,.L17
+ mr 3,11
+ b .L17
+.L37:
+ li 9,1
+ mtctr 9
+ b .L21
+.L43:
+ li 9,1
+ mtctr 9
+ b .L44
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .size icamin_k,.-icamin_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .byte 0
+ .byte 1
+ .byte 2
+ .byte 3
+ .byte 8
+ .byte 9
+ .byte 10
+ .byte 11
+ .byte 16
+ .byte 17
+ .byte 18
+ .byte 19
+ .byte 24
+ .byte 25
+ .byte 26
+ .byte 27
+.LC3:
+ .byte 4
+ .byte 5
+ .byte 6
+ .byte 7
+ .byte 12
+ .byte 13
+ .byte 14
+ .byte 15
+ .byte 20
+ .byte 21
+ .byte 22
+ .byte 23
+ .byte 28
+ .byte 29
+ .byte 30
+ .byte 31
+.LC4:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC5:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC6:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC7:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+/* .file "isamax.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl isamax_k
+ .type isamax_k, @function
+*/
+
+#define ASSEMBLER
+#include "common.h"
+
+ PROLOGUE
+
+isamax_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry isamax_k,.-isamax_k
+ mr. 11,3
+ ble 0,.L36
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ cmpdi 7,5,1
+ beq 7,.L69
+ rldicr. 7,11,0,61
+ beq 0,.L40
+ sldi 3,5,1
+ xxlxor 0,0,0
+ sldi 6,5,2
+ add 3,3,5
+ sldi 0,5,4
+ sldi 3,3,2
+ sldi 5,5,3
+ mr 9,4
+ li 8,0
+ li 10,0
+ .p2align 4,,15
+.L31:
+ lfs 12,0(9)
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L23
+ fmr 0,12
+ mr 8,10
+.L23:
+ lfsx 12,9,6
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L25
+ fmr 0,12
+ addi 8,10,1
+.L25:
+ lfsx 12,9,5
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L27
+ fmr 0,12
+ addi 8,10,2
+.L27:
+ lfsx 12,9,3
+ add 9,9,0
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L29
+ fmr 0,12
+ addi 8,10,3
+.L29:
+ addi 10,10,4
+ cmpd 7,7,10
+ bgt 7,.L31
+ addi 7,7,-1
+ srdi 7,7,2
+ addi 7,7,1
+ sldi 9,7,2
+ mulld 7,6,7
+ cmpd 7,11,9
+ ble 7,.L67
+.L22:
+ addi 10,9,1
+ sldi 7,7,2
+ cmpd 7,10,11
+ subf 10,9,11
+ mtctr 10
+ add 4,4,7
+ bgt 7,.L54
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L54
+ .p2align 4,,15
+.L35:
+ lfs 12,0(4)
+ add 4,4,6
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L33
+ fmr 0,12
+ mr 8,9
+.L33:
+ addi 9,9,1
+ bdnz .L35
+.L67:
+ addi 3,8,1
+ blr
+ .p2align 4,,15
+.L36:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L69:
+ rldicr. 10,11,0,57
+ bne 0,.L70
+ addi 7,10,1
+ sldi 9,10,2
+ xxlxor 12,12,12
+ cmpd 7,7,11
+ add 4,4,9
+ subf 9,10,11
+ li 8,0
+ mtctr 9
+ bgt 7,.L60
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L60
+ .p2align 4,,15
+.L61:
+ lfs 0,0(4)
+ addi 4,4,4
+ fabs 0,0
+ fcmpu 7,0,12
+ bng 7,.L63
+ fmr 12,0
+ mr 8,10
+.L63:
+ addi 10,10,1
+ bdnz .L61
+ b .L67
+ .p2align 4,,15
+.L70:
+ li 0,-64
+ std 31,-8(1)
+ addis 3,2,.LC2@toc@ha
+ vspltisw 18,0
+ vspltisw 12,0
+ addis 5,2,.LC3@toc@ha
+ addis 6,2,.LC6@toc@ha
+ stvx 29,1,0
+ li 0,-48
+ addis 8,2,.LC7@toc@ha
+ xxlor 35,50,50
+ addi 3,3,.LC2@toc@l
+ addi 5,5,.LC3@toc@l
+ stvx 30,1,0
+ addi 6,6,.LC6@toc@l
+ li 0,-32
+ addi 8,8,.LC7@toc@l
+ lxvd2x 51,0,3
+ lxvd2x 34,0,5
+ addis 7,2,.LC4@toc@ha
+ stvx 31,1,0
+ lxvd2x 47,0,6
+ addis 9,2,.LC5@toc@ha
+ addi 7,7,.LC4@toc@l
+ lxvd2x 48,0,8
+ addi 9,9,.LC5@toc@l
+ vspltisw 17,8
+ vadduwm 17,17,17
+ lxvd2x 36,0,7
+ li 7,0
+ lxvd2x 37,0,9
+ mr 9,4
+ .p2align 4,,15
+.L5:
+ addi 5,9,16
+ addi 6,9,32
+ lxvd2x 41,0,9
+ vadduwm 31,3,15
+ addi 8,9,64
+ addi 31,9,48
+ addi 12,9,80
+ addi 3,9,96
+ lxvd2x 5,0,5
+ lxvd2x 43,0,6
+ addi 5,9,112
+ addi 6,9,128
+ lxvd2x 1,0,8
+ lxvd2x 9,0,31
+ addi 8,9,160
+ addi 31,9,144
+ lxvd2x 6,0,12
+ lxvd2x 13,0,3
+ addi 12,9,176
+ addi 3,9,192
+ lxvd2x 11,0,5
+ lxvd2x 2,0,6
+ xvabssp 41,41
+ addi 5,9,208
+ addi 6,9,224
+ lxvd2x 3,0,8
+ lxvd2x 7,0,31
+ addi 8,9,240
+ lxvd2x 10,0,12
+ lxvd2x 4,0,3
+ xvabssp 43,43
+ xvabssp 5,5
+ addi 7,7,64
+ lxvd2x 8,0,5
+ lxvd2x 0,0,6
+ xvabssp 9,9
+ xvabssp 1,1
+ cmpd 7,10,7
+ addi 9,9,256
+ lxvd2x 12,0,8
+ xvabssp 6,6
+ xvabssp 13,13
+ xvabssp 11,11
+ xvabssp 2,2
+ xvabssp 7,7
+ xvabssp 3,3
+ xvabssp 10,10
+ xvabssp 4,4
+ xvabssp 8,8
+ xvabssp 0,0
+ xvabssp 12,12
+ xvcmpgtsp 32,5,41
+ xvcmpgtsp 61,9,43
+ xvcmpgtsp 45,6,1
+ xvcmpgtsp 62,11,13
+ xvcmpgtsp 38,7,2
+ xvcmpgtsp 46,10,3
+ xvcmpgtsp 40,8,4
+ xvcmpgtsp 39,12,0
+ xxsel 5,41,5,32
+ xxsel 32,51,34,32
+ xxsel 9,43,9,61
+ xxsel 6,1,6,45
+ xxsel 11,13,11,62
+ xxsel 43,51,34,45
+ xxsel 7,2,7,38
+ xvcmpgtsp 41,9,5
+ xxsel 10,3,10,46
+ xvcmpgtsp 45,11,6
+ xxsel 8,4,8,40
+ xxsel 62,36,37,62
+ xxsel 0,0,12,39
+ xvcmpgtsp 42,10,7
+ xxsel 61,36,37,61
+ xxsel 40,51,34,40
+ xvcmpgtsp 33,0,8
+ xxsel 39,36,37,39
+ xxsel 38,51,34,38
+ xxsel 46,36,37,46
+ xxsel 9,5,9,41
+ xxsel 41,32,61,41
+ xxsel 12,6,11,45
+ xxsel 45,43,62,45
+ xxsel 11,7,10,42
+ xvcmpgtsp 32,12,9
+ vadduwm 13,13,17
+ xxsel 42,38,46,42
+ xxsel 0,8,0,33
+ xxsel 33,40,39,33
+ xvcmpgtsp 43,0,11
+ vadduwm 1,1,17
+ xxsel 12,9,12,32
+ xxsel 32,41,45,32
+ vadduwm 0,3,0
+ vadduwm 3,3,16
+ xxsel 0,11,0,43
+ xxsel 33,42,33,43
+ xvcmpgtsp 45,0,12
+ vadduwm 1,31,1
+ xxsel 0,12,0,45
+ xxsel 32,32,33,45
+ xvcmpgtsp 33,0,44
+ xxsel 50,50,32,33
+ xxsel 44,44,0,33
+ bgt 7,.L5
+ xxsldwi 12,44,44,1
+ xscvspdp 10,44
+ vspltw 0,18,0
+ xxsldwi 0,44,44,3
+ xscvspdp 12,12
+ mfvsrwz 3,50
+ mfvsrwz 6,32
+ vspltw 0,18,3
+ xscvspdp 0,0
+ xxsldwi 44,44,44,2
+ mfvsrwz 7,32
+ vspltw 0,18,2
+ xscvspdp 44,44
+ mfvsrwz 9,32
+ fcmpu 7,12,10
+ rldicl 8,3,0,32
+ rldicl 31,6,0,32
+ fmr 11,0
+ rldicl 0,7,0,32
+ rldicl 5,9,0,32
+ beq 7,.L71
+ bnl 7,.L8
+ fmr 12,10
+ mr 8,31
+.L8:
+ xscmpudp 7,0,44
+ bne 7,.L11
+ cmplw 7,7,9
+ ble 7,.L12
+ mr 7,9
+.L12:
+ rldicl 5,7,0,32
+.L13:
+ fcmpu 7,12,11
+ beq 7,.L72
+ bnl 7,.L17
+ fmr 12,11
+ mr 8,5
+.L17:
+ cmpd 7,11,10
+ ble 7,.L16
+ addi 7,10,1
+ sldi 9,10,2
+ cmpd 7,7,11
+ add 4,4,9
+ subf 9,10,11
+ mtctr 9
+ bgt 7,.L53
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L53
+ .p2align 4,,15
+.L21:
+ lfs 0,0(4)
+ addi 4,4,4
+ fabs 0,0
+ fcmpu 7,0,12
+ bng 7,.L19
+ fmr 12,0
+ mr 8,10
+.L19:
+ addi 10,10,1
+ bdnz .L21
+.L16:
+ li 0,-64
+ ld 31,-8(1)
+ addi 3,8,1
+ lvx 29,1,0
+ li 0,-48
+ lvx 30,1,0
+ li 0,-32
+ lvx 31,1,0
+ blr
+ .p2align 4,,15
+.L71:
+ cmplw 7,3,6
+ ble 7,.L7
+ mr 3,6
+.L7:
+ rldicl 8,3,0,32
+ b .L8
+ .p2align 4,,15
+.L40:
+ xxlxor 0,0,0
+ sldi 6,5,2
+ li 8,0
+ li 9,0
+ b .L22
+ .p2align 4,,15
+.L11:
+ blt 7,.L39
+ mr 5,0
+ b .L13
+ .p2align 4,,15
+.L72:
+ cmpd 7,8,5
+ ble 7,.L17
+ mr 8,5
+ b .L17
+ .p2align 4,,15
+.L39:
+ xscpsgndp 11,44,44
+ b .L13
+.L53:
+ li 9,1
+ mtctr 9
+ b .L21
+.L54:
+ li 10,1
+ mtctr 10
+ b .L35
+.L60:
+ li 9,1
+ mtctr 9
+ b .L61
+ .long 0
+ .byte 0,0,0,0,0,1,0,0
+ .size isamax_k,.-isamax_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC3:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC4:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC5:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+.LC6:
+ .long 32
+ .long 32
+ .long 32
+ .long 32
+.LC7:
+ .long 64
+ .long 64
+ .long 64
+ .long 64
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+ .file "isamax.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl isamax_k
+ .type isamax_k, @function
+isamax_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry isamax_k,.-isamax_k
+ mr. 11,3
+ ble 0,.L36
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ cmpdi 7,5,1
+ beq 7,.L69
+ rldicr. 7,11,0,61
+ beq 0,.L40
+ sldi 10,5,1
+ sldi 6,5,2
+ sldi 0,5,4
+ sldi 3,5,3
+ mr 9,4
+ xxlxor 0,0,0
+ li 8,0
+ add 5,10,5
+ li 10,0
+ sldi 5,5,2
+ .p2align 4,,15
+.L31:
+ lfs 12,0(9)
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L23
+ fmr 0,12
+ mr 8,10
+.L23:
+ lfsx 12,9,6
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L25
+ fmr 0,12
+ addi 8,10,1
+.L25:
+ lfsx 12,9,3
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L27
+ fmr 0,12
+ addi 8,10,2
+.L27:
+ lfsx 12,9,5
+ add 9,9,0
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L29
+ fmr 0,12
+ addi 8,10,3
+.L29:
+ addi 10,10,4
+ cmpd 7,7,10
+ bgt 7,.L31
+ addi 7,7,-1
+ srdi 7,7,2
+ addi 7,7,1
+ sldi 9,7,2
+ mulld 7,6,7
+ cmpd 7,11,9
+ ble 7,.L67
+.L22:
+ addi 10,9,1
+ sldi 7,7,2
+ subf 5,9,11
+ cmpd 7,10,11
+ mtctr 5
+ add 4,4,7
+ bgt 7,.L54
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L54
+ .p2align 4,,15
+.L35:
+ lfs 12,0(4)
+ add 4,4,6
+ fabs 12,12
+ fcmpu 7,12,0
+ bng 7,.L33
+ fmr 0,12
+ mr 8,9
+.L33:
+ addi 9,9,1
+ bdnz .L35
+.L67:
+ addi 3,8,1
+ blr
+ .p2align 4,,15
+.L36:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L69:
+ rldicr. 10,11,0,57
+ bne 0,.L70
+ addi 7,10,1
+ sldi 9,10,2
+ subf 6,10,11
+ li 8,0
+ xxlxor 12,12,12
+ cmpd 7,7,11
+ mtctr 6
+ add 4,4,9
+ bgt 7,.L60
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L60
+ .p2align 4,,15
+.L61:
+ lfs 0,0(4)
+ addi 4,4,4
+ fabs 0,0
+ fcmpu 7,0,12
+ bng 7,.L63
+ fmr 12,0
+ mr 8,10
+.L63:
+ addi 10,10,1
+ bdnz .L61
+ b .L67
+ .p2align 4,,15
+.L70:
+ addis 6,2,.LC2@toc@ha
+ addis 7,2,.LC3@toc@ha
+ addis 8,2,.LC4@toc@ha
+ addis 9,2,.LC5@toc@ha
+ xxspltib 46,0
+ stxv 61,-48(1)
+ stxv 62,-32(1)
+ addi 6,6,.LC2@toc@l
+ addi 7,7,.LC3@toc@l
+ stxv 63,-16(1)
+ xxspltib 61,32
+ xxspltib 63,16
+ xxspltib 62,64
+ addi 8,8,.LC4@toc@l
+ addi 9,9,.LC5@toc@l
+ lxv 47,0(6)
+ xxspltib 34,0
+ lxv 48,0(7)
+ xxlor 51,46,46
+ lxv 49,0(8)
+ lxv 50,0(9)
+ li 8,0
+ mr 9,4
+ vextsb2w 29,29
+ vextsb2w 31,31
+ vextsb2w 30,30
+ stxv 59,-80(1)
+ stxv 60,-64(1)
+ .p2align 4,,15
+.L5:
+ lxv 0,0(9)
+ vadduwm 27,19,29
+ lxv 12,240(9)
+ addi 8,8,64
+ addi 9,9,256
+ cmpd 7,10,8
+ xvabssp 44,0
+ lxv 0,-240(9)
+ xvabssp 12,12
+ xvabssp 5,0
+ lxv 0,-224(9)
+ xvabssp 32,0
+ lxv 0,-208(9)
+ xvcmpgtsp 35,5,44
+ xvabssp 9,0
+ lxv 0,-192(9)
+ xxsel 5,44,5,35
+ xxsel 35,47,48,35
+ xvabssp 1,0
+ lxv 0,-176(9)
+ xvcmpgtsp 60,9,32
+ xvabssp 6,0
+ lxv 0,-160(9)
+ xxsel 9,32,9,60
+ xxsel 60,49,50,60
+ xvabssp 13,0
+ lxv 0,-144(9)
+ xvcmpgtsp 42,9,5
+ xvcmpgtsp 37,6,1
+ xvabssp 11,0
+ lxv 0,-128(9)
+ xxsel 9,5,9,42
+ xxsel 42,35,60,42
+ xxsel 6,1,6,37
+ xxsel 37,47,48,37
+ xvabssp 2,0
+ lxv 0,-112(9)
+ xvcmpgtsp 36,11,13
+ xvabssp 7,0
+ lxv 0,-96(9)
+ xxsel 11,13,11,36
+ xxsel 36,49,50,36
+ xvabssp 3,0
+ lxv 0,-80(9)
+ xvcmpgtsp 45,11,6
+ xvcmpgtsp 39,7,2
+ xvabssp 10,0
+ lxv 0,-64(9)
+ xxsel 7,2,7,39
+ xxsel 39,47,48,39
+ xvabssp 4,0
+ lxv 0,-48(9)
+ xvcmpgtsp 38,10,3
+ xvabssp 8,0
+ lxv 0,-32(9)
+ xxsel 10,3,10,38
+ xxsel 38,49,50,38
+ xvabssp 0,0
+ xvcmpgtsp 43,10,7
+ xvcmpgtsp 41,8,4
+ xvcmpgtsp 40,12,0
+ xxsel 8,4,8,41
+ xxsel 41,47,48,41
+ xxsel 0,0,12,40
+ xxsel 12,6,11,45
+ xxsel 11,7,10,43
+ xxsel 45,37,36,45
+ xvcmpgtsp 33,0,8
+ xvcmpgtsp 32,12,9
+ vadduwm 13,13,31
+ xxsel 40,49,50,40
+ xxsel 43,39,38,43
+ xxsel 0,8,0,33
+ xxsel 12,9,12,32
+ xxsel 33,41,40,33
+ xxsel 32,42,45,32
+ xvcmpgtsp 44,0,11
+ vadduwm 1,1,31
+ vadduwm 0,19,0
+ vadduwm 19,19,30
+ xxsel 0,11,0,44
+ xxsel 33,43,33,44
+ xvcmpgtsp 45,0,12
+ vadduwm 1,27,1
+ xxsel 0,12,0,45
+ xxsel 32,32,33,45
+ xvcmpgtsp 33,0,34
+ xxsel 46,46,32,33
+ xxsel 34,34,0,33
+ bgt 7,.L5
+ xxsldwi 12,34,34,3
+ xxsldwi 11,34,34,2
+ li 9,0
+ li 8,12
+ xxsldwi 0,34,34,1
+ xscvspdp 34,34
+ vextuwrx 3,9,14
+ li 9,4
+ xscvspdp 12,12
+ xscvspdp 11,11
+ xscvspdp 0,0
+ vextuwrx 6,9,14
+ li 9,8
+ vextuwrx 7,9,14
+ vextuwrx 9,8,14
+ rldicl 12,6,0,32
+ rldicl 8,3,0,32
+ rldicl 0,7,0,32
+ rldicl 5,9,0,32
+ fcmpu 7,12,11
+ fmr 10,0
+ beq 7,.L71
+ bnl 7,.L8
+ mr 8,12
+ fmr 12,11
+.L8:
+ xscmpudp 7,0,34
+ bne 7,.L11
+ cmplw 7,7,9
+ ble 7,.L12
+ mr 7,9
+.L12:
+ rldicl 5,7,0,32
+.L13:
+ fcmpu 7,12,10
+ beq 7,.L72
+ bnl 7,.L17
+ mr 8,5
+ fmr 12,10
+.L17:
+ cmpd 7,11,10
+ ble 7,.L16
+ addi 7,10,1
+ sldi 9,10,2
+ subf 6,10,11
+ cmpd 7,7,11
+ mtctr 6
+ add 4,4,9
+ bgt 7,.L53
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L53
+ .p2align 4,,15
+.L21:
+ lfs 0,0(4)
+ addi 4,4,4
+ fabs 0,0
+ fcmpu 7,0,12
+ bng 7,.L19
+ fmr 12,0
+ mr 8,10
+.L19:
+ addi 10,10,1
+ bdnz .L21
+.L16:
+ lxv 59,-80(1)
+ lxv 60,-64(1)
+ addi 3,8,1
+ lxv 61,-48(1)
+ lxv 62,-32(1)
+ lxv 63,-16(1)
+ blr
+ .p2align 4,,15
+.L71:
+ cmplw 7,3,6
+ ble 7,.L7
+ mr 3,6
+.L7:
+ rldicl 8,3,0,32
+ b .L8
+ .p2align 4,,15
+.L40:
+ sldi 6,5,2
+ li 8,0
+ li 9,0
+ xxlxor 0,0,0
+ b .L22
+ .p2align 4,,15
+.L11:
+ blt 7,.L39
+ mr 5,0
+ b .L13
+ .p2align 4,,15
+.L72:
+ cmpd 7,8,5
+ ble 7,.L17
+ mr 8,5
+ b .L17
+ .p2align 4,,15
+.L39:
+ xscpsgndp 10,34,34
+ b .L13
+.L53:
+ li 9,1
+ mtctr 9
+ b .L21
+.L54:
+ li 10,1
+ mtctr 10
+ b .L35
+.L60:
+ li 9,1
+ mtctr 9
+ b .L61
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .size isamax_k,.-isamax_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC3:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC4:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC5:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+/* .file "isamin.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl isamin_k
+ .type isamin_k, @function
+*/
+#define ASSEMBLER
+#include "common.h"
+
+ PROLOGUE
+
+isamin_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry isamin_k,.-isamin_k
+ mr. 11,3
+ ble 0,.L36
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ lfs 0,0(4)
+ li 0,-48
+ cmpdi 7,5,1
+ stvx 30,1,0
+ li 0,-32
+ stvx 31,1,0
+ fabs 0,0
+ beq 7,.L62
+ rldicr. 6,11,0,61
+ beq 0,.L40
+ sldi 0,5,1
+ sldi 12,5,2
+ std 31,-8(1)
+ add 0,0,5
+ neg 31,5
+ sldi 3,5,4
+ sldi 0,0,2
+ add 7,4,12
+ sldi 31,31,2
+ sldi 5,5,3
+ li 9,0
+ li 10,0
+ b .L24
+ .p2align 4,,15
+.L41:
+ mr 10,9
+.L25:
+ fmr 0,12
+ add 7,7,3
+.L24:
+ lfs 12,0(7)
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L26
+ fmr 0,12
+ addi 10,9,1
+.L26:
+ add 8,31,7
+ lfsx 12,8,5
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L28
+ fmr 0,12
+ addi 10,9,2
+.L28:
+ lfsx 12,8,0
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L30
+ fmr 0,12
+ addi 10,9,3
+.L30:
+ addi 9,9,4
+ cmpd 7,6,9
+ ble 7,.L63
+ lfsx 12,8,3
+ fabs 12,12
+ fcmpu 7,12,0
+ blt 7,.L41
+ fmr 12,0
+ b .L25
+ .p2align 4,,15
+.L36:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L63:
+ addi 6,6,-1
+ ld 31,-8(1)
+ srdi 6,6,2
+ addi 6,6,1
+ sldi 9,6,2
+ mulld 6,12,6
+ cmpd 7,11,9
+ ble 7,.L33
+.L23:
+ addi 8,9,1
+ sldi 6,6,2
+ cmpd 7,8,11
+ subf 8,9,11
+ mtctr 8
+ add 4,4,6
+ bgt 7,.L52
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L52
+ .p2align 4,,15
+.L35:
+ lfs 12,0(4)
+ add 4,4,12
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L34
+ fmr 0,12
+ mr 10,9
+.L34:
+ addi 9,9,1
+ bdnz .L35
+.L33:
+ li 0,-48
+ addi 3,10,1
+ lvx 30,1,0
+ li 0,-32
+ lvx 31,1,0
+ blr
+ .p2align 4,,15
+.L62:
+ rldicr. 8,11,0,57
+ li 10,0
+ bne 0,.L64
+.L4:
+ addi 7,8,1
+ sldi 9,8,2
+ cmpd 7,7,11
+ add 4,4,9
+ subf 9,8,11
+ mtctr 9
+ bgt 7,.L51
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L51
+ .p2align 4,,15
+.L22:
+ lfs 12,0(4)
+ addi 4,4,4
+ fabs 12,12
+ fcmpu 7,0,12
+ bng 7,.L21
+ fmr 0,12
+ mr 10,8
+.L21:
+ addi 8,8,1
+ bdnz .L22
+ li 0,-48
+ addi 3,10,1
+ lvx 30,1,0
+ li 0,-32
+ lvx 31,1,0
+ blr
+ .p2align 4,,15
+.L64:
+ lxvd2x 4,0,4
+ addis 10,2,.LC2@toc@ha
+ addis 5,2,.LC3@toc@ha
+ std 31,-8(1)
+ vspltisw 2,0
+ addi 10,10,.LC2@toc@l
+ addis 7,2,.LC4@toc@ha
+ addis 9,2,.LC5@toc@ha
+ addis 6,2,.LC6@toc@ha
+ lxvd2x 51,0,10
+ addis 10,2,.LC7@toc@ha
+ addi 7,7,.LC4@toc@l
+ addi 9,9,.LC5@toc@l
+ addi 5,5,.LC3@toc@l
+ xvabssp 4,4
+ addi 6,6,.LC6@toc@l
+ addi 10,10,.LC7@toc@l
+ lxvd2x 36,0,7
+ vspltisw 18,8
+ lxvd2x 37,0,9
+ lxvd2x 35,0,5
+ mr 9,4
+ li 7,0
+ lxvd2x 48,0,6
+ lxvd2x 49,0,10
+ vadduwm 18,18,18
+ xxlor 38,51,51
+ xxlor 40,4,4
+ b .L6
+ .p2align 4,,15
+.L65:
+ lxvd2x 5,0,9
+ xvabssp 40,5
+.L6:
+ addi 5,9,16
+ addi 6,9,32
+ vadduwm 14,2,16
+ addi 10,9,64
+ addi 12,9,48
+ addi 31,9,80
+ addi 3,9,96
+ lxvd2x 5,0,5
+ lxvd2x 42,0,6
+ addi 5,9,112
+ addi 6,9,128
+ lxvd2x 44,0,10
+ lxvd2x 9,0,12
+ addi 10,9,160
+ addi 12,9,144
+ lxvd2x 6,0,31
+ lxvd2x 1,0,3
+ addi 31,9,176
+ addi 3,9,192
+ lxvd2x 11,0,5
+ lxvd2x 13,0,6
+ addi 5,9,208
+ addi 6,9,224
+ lxvd2x 2,0,10
+ lxvd2x 7,0,12
+ addi 10,9,240
+ lxvd2x 10,0,31
+ lxvd2x 3,0,3
+ xvabssp 42,42
+ xvabssp 5,5
+ addi 7,7,64
+ lxvd2x 8,0,5
+ lxvd2x 0,0,6
+ xvabssp 44,44
+ xvabssp 9,9
+ cmpd 7,8,7
+ addi 9,9,256
+ lxvd2x 12,0,10
+ xvabssp 6,6
+ xvabssp 1,1
+ xvabssp 11,11
+ xvabssp 13,13
+ xvabssp 7,7
+ xvabssp 2,2
+ xvabssp 10,10
+ xvabssp 3,3
+ xvabssp 8,8
+ xvabssp 0,0
+ xvabssp 12,12
+ xvcmpgtsp 32,40,5
+ xvcmpgtsp 62,42,9
+ xvcmpgtsp 45,44,6
+ xvcmpgtsp 63,1,11
+ xvcmpgtsp 39,13,7
+ xvcmpgtsp 47,2,10
+ xvcmpgtsp 41,3,8
+ xvcmpgtsp 33,0,12
+ xxsel 5,40,5,32
+ xxsel 32,38,35,32
+ xxsel 9,42,9,62
+ xxsel 6,44,6,45
+ xxsel 11,1,11,63
+ xxsel 44,38,35,45
+ xxsel 7,13,7,39
+ xvcmpgtsp 42,5,9
+ xxsel 10,2,10,47
+ xvcmpgtsp 45,6,11
+ xxsel 8,3,8,41
+ xxsel 63,36,37,63
+ xxsel 0,0,12,33
+ xvcmpgtsp 43,7,10
+ xxsel 40,36,37,33
+ xxsel 62,36,37,62
+ xvcmpgtsp 33,8,0
+ xxsel 41,38,35,41
+ xxsel 39,38,35,39
+ xxsel 47,36,37,47
+ xxsel 9,5,9,42
+ xxsel 42,32,62,42
+ xxsel 12,6,11,45
+ xxsel 45,44,63,45
+ xxsel 11,7,10,43
+ xvcmpgtsp 32,9,12
+ vadduwm 13,13,18
+ xxsel 43,39,47,43
+ xxsel 0,8,0,33
+ xxsel 33,41,40,33
+ xvcmpgtsp 44,11,0
+ vadduwm 1,1,18
+ xxsel 12,9,12,32
+ xxsel 32,42,45,32
+ vadduwm 0,2,0
+ vadduwm 2,2,17
+ xxsel 0,11,0,44
+ xxsel 33,43,33,44
+ xvcmpgtsp 45,12,0
+ vadduwm 1,14,1
+ xxsel 0,12,0,45
+ xxsel 32,32,33,45
+ xvcmpgtsp 33,4,0
+ xxsel 51,51,32,33
+ xxsel 4,4,0,33
+ bgt 7,.L65
+ xxsldwi 0,4,4,1
+ xscvspdp 10,4
+ vspltw 0,19,0
+ xxsldwi 12,4,4,3
+ xscvspdp 0,0
+ mfvsrwz 3,51
+ mfvsrwz 6,32
+ vspltw 0,19,3
+ xscvspdp 12,12
+ xxsldwi 4,4,4,2
+ mfvsrwz 7,32
+ vspltw 0,19,2
+ xscvspdp 4,4
+ mfvsrwz 9,32
+ fcmpu 7,0,10
+ rldicl 10,3,0,32
+ rldicl 31,6,0,32
+ fmr 11,12
+ rldicl 5,7,0,32
+ rldicl 0,9,0,32
+ beq 7,.L66
+ bng 7,.L9
+ fmr 0,10
+ mr 10,31
+.L9:
+ fcmpu 7,12,4
+ bne 7,.L12
+ cmplw 7,7,9
+ ble 7,.L13
+ mr 7,9
+.L13:
+ rldicl 5,7,0,32
+.L14:
+ fcmpu 7,0,11
+ beq 7,.L67
+ bng 7,.L19
+ fmr 0,11
+ mr 10,5
+.L19:
+ cmpd 7,11,8
+ ld 31,-8(1)
+ bgt 7,.L4
+ b .L33
+ .p2align 4,,15
+.L66:
+ cmplw 7,3,6
+ ble 7,.L8
+ mr 3,6
+.L8:
+ rldicl 10,3,0,32
+ b .L9
+ .p2align 4,,15
+.L40:
+ sldi 12,5,2
+ li 10,0
+ li 9,0
+ b .L23
+ .p2align 4,,15
+.L12:
+ bng 7,.L14
+ fmr 11,4
+ mr 5,0
+ b .L14
+ .p2align 4,,15
+.L67:
+ cmpd 7,10,5
+ ble 7,.L19
+ mr 10,5
+ b .L19
+.L51:
+ li 9,1
+ mtctr 9
+ b .L22
+.L52:
+ li 8,1
+ mtctr 8
+ b .L35
+ .long 0
+ .byte 0,0,0,0,0,1,0,0
+ .size isamin_k,.-isamin_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC3:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC4:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC5:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+.LC6:
+ .long 32
+ .long 32
+ .long 32
+ .long 32
+.LC7:
+ .long 64
+ .long 64
+ .long 64
+ .long 64
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+ .file "isamin.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl isamin_k
+ .type isamin_k, @function
+isamin_k:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry isamin_k,.-isamin_k
+ mr. 11,3
+ ble 0,.L36
+ cmpdi 7,5,0
+ li 3,0
+ blelr 7
+ lfs 0,0(4)
+ cmpdi 7,5,1
+ stxv 61,-64(1)
+ stxv 62,-48(1)
+ stxv 63,-32(1)
+ fabs 0,0
+ beq 7,.L62
+ rldicr. 6,11,0,61
+ beq 0,.L40
+ sldi 8,5,1
+ sldi 0,5,2
+ neg 12,5
+ std 31,-8(1)
+ sldi 3,5,4
+ sldi 31,5,3
+ li 9,0
+ li 10,0
+ add 5,8,5
+ add 7,4,0
+ sldi 12,12,2
+ sldi 5,5,2
+ b .L24
+ .p2align 4,,15
+.L41:
+ mr 10,9
+.L25:
+ add 7,7,3
+ fmr 0,12
+.L24:
+ lfs 12,0(7)
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L26
+ fmr 0,12
+ addi 10,9,1
+.L26:
+ add 8,7,12
+ lfsx 12,8,31
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L28
+ fmr 0,12
+ addi 10,9,2
+.L28:
+ lfsx 12,8,5
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L30
+ fmr 0,12
+ addi 10,9,3
+.L30:
+ addi 9,9,4
+ cmpd 7,6,9
+ ble 7,.L63
+ lfsx 12,8,3
+ fabs 12,12
+ fcmpu 7,12,0
+ blt 7,.L41
+ fmr 12,0
+ b .L25
+ .p2align 4,,15
+.L36:
+ li 3,0
+ blr
+ .p2align 4,,15
+.L63:
+ addi 6,6,-1
+ ld 31,-8(1)
+ srdi 6,6,2
+ addi 6,6,1
+ sldi 9,6,2
+ mulld 6,0,6
+ cmpd 7,11,9
+ ble 7,.L33
+.L23:
+ addi 8,9,1
+ sldi 6,6,2
+ subf 7,9,11
+ cmpd 7,8,11
+ mtctr 7
+ add 4,4,6
+ bgt 7,.L52
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L52
+ .p2align 4,,15
+.L35:
+ lfs 12,0(4)
+ add 4,4,0
+ fabs 12,12
+ fcmpu 7,12,0
+ bnl 7,.L34
+ fmr 0,12
+ mr 10,9
+.L34:
+ addi 9,9,1
+ bdnz .L35
+.L33:
+ lxv 61,-64(1)
+ lxv 62,-48(1)
+ addi 3,10,1
+ lxv 63,-32(1)
+ blr
+ .p2align 4,,15
+.L62:
+ rldicr. 8,11,0,57
+ li 10,0
+ bne 0,.L64
+.L4:
+ addi 7,8,1
+ sldi 9,8,2
+ subf 6,8,11
+ cmpd 7,7,11
+ mtctr 6
+ add 4,4,9
+ bgt 7,.L51
+ li 3,-1
+ rldicr 3,3,0,0
+ cmpd 7,11,3
+ beq 7,.L51
+ .p2align 4,,15
+.L22:
+ lfs 12,0(4)
+ addi 4,4,4
+ fabs 12,12
+ fcmpu 7,0,12
+ bng 7,.L21
+ fmr 0,12
+ mr 10,8
+.L21:
+ addi 8,8,1
+ bdnz .L22
+ lxv 61,-64(1)
+ lxv 62,-48(1)
+ addi 3,10,1
+ lxv 63,-32(1)
+ blr
+ .p2align 4,,15
+.L64:
+ lxv 0,0(4)
+ xxspltib 47,16
+ addis 6,2,.LC2@toc@ha
+ addis 7,2,.LC3@toc@ha
+ addis 10,2,.LC4@toc@ha
+ addis 9,2,.LC5@toc@ha
+ xxspltib 63,32
+ xxspltib 46,64
+ addi 6,6,.LC2@toc@l
+ addi 10,10,.LC4@toc@l
+ addi 7,7,.LC3@toc@l
+ std 31,-8(1)
+ addi 9,9,.LC5@toc@l
+ xxspltib 50,0
+ vextsb2w 15,15
+ lxv 48,0(6)
+ lxv 51,0(10)
+ vextsb2w 31,31
+ vextsb2w 14,14
+ xvabssp 4,0
+ lxv 34,0(9)
+ lxv 49,0(7)
+ mr 9,4
+ li 10,0
+ xxlor 35,48,48
+ xxlor 40,4,4
+ b .L6
+ .p2align 4,,15
+.L65:
+ lxv 0,0(9)
+ xvabssp 40,0
+.L6:
+ lxv 0,16(9)
+ vadduwm 29,18,31
+ lxv 12,240(9)
+ addi 10,10,64
+ addi 9,9,256
+ cmpd 7,8,10
+ xvabssp 5,0
+ lxv 0,-224(9)
+ xvabssp 12,12
+ xvabssp 32,0
+ lxv 0,-208(9)
+ xvcmpgtsp 42,40,5
+ xvabssp 9,0
+ lxv 0,-192(9)
+ xxsel 5,40,5,42
+ xvabssp 44,0
+ lxv 0,-176(9)
+ xvcmpgtsp 62,32,9
+ xvabssp 6,0
+ lxv 0,-160(9)
+ xxsel 9,32,9,62
+ xxsel 32,35,49,42
+ xvabssp 1,0
+ lxv 0,-144(9)
+ xxsel 62,51,34,62
+ xvcmpgtsp 42,5,9
+ xvcmpgtsp 37,44,6
+ xvabssp 11,0
+ lxv 0,-128(9)
+ xxsel 9,5,9,42
+ xxsel 42,32,62,42
+ xxsel 6,44,6,37
+ xxsel 37,35,49,37
+ xvabssp 13,0
+ lxv 0,-112(9)
+ xvcmpgtsp 36,1,11
+ xvabssp 7,0
+ lxv 0,-96(9)
+ xxsel 11,1,11,36
+ xxsel 36,51,34,36
+ xvabssp 2,0
+ lxv 0,-80(9)
+ xvcmpgtsp 45,6,11
+ xvcmpgtsp 39,13,7
+ xvabssp 10,0
+ lxv 0,-64(9)
+ xxsel 7,13,7,39
+ xxsel 39,35,49,39
+ xvabssp 3,0
+ lxv 0,-48(9)
+ xvcmpgtsp 38,2,10
+ xvabssp 8,0
+ lxv 0,-32(9)
+ xxsel 10,2,10,38
+ xxsel 38,51,34,38
+ xvabssp 0,0
+ xvcmpgtsp 43,7,10
+ xvcmpgtsp 41,3,8
+ xvcmpgtsp 33,0,12
+ xxsel 8,3,8,41
+ xxsel 41,35,49,41
+ xxsel 0,0,12,33
+ xxsel 40,51,34,33
+ xxsel 12,6,11,45
+ xxsel 11,7,10,43
+ xvcmpgtsp 33,8,0
+ xxsel 45,37,36,45
+ xvcmpgtsp 32,9,12
+ xxsel 43,39,38,43
+ vadduwm 13,13,15
+ xxsel 0,8,0,33
+ xxsel 33,41,40,33
+ xxsel 12,9,12,32
+ xxsel 32,42,45,32
+ xvcmpgtsp 44,11,0
+ vadduwm 1,1,15
+ vadduwm 0,18,0
+ vadduwm 18,18,14
+ xxsel 0,11,0,44
+ xxsel 33,43,33,44
+ xvcmpgtsp 45,12,0
+ vadduwm 1,29,1
+ xxsel 0,12,0,45
+ xxsel 32,32,33,45
+ xvcmpgtsp 33,4,0
+ xxsel 48,48,32,33
+ xxsel 4,4,0,33
+ bgt 7,.L65
+ xxsldwi 0,4,4,3
+ xxsldwi 11,4,4,2
+ li 9,0
+ li 10,12
+ xxsldwi 12,4,4,1
+ xscvspdp 4,4
+ vextuwrx 3,9,16
+ li 9,4
+ xscvspdp 0,0
+ xscvspdp 11,11
+ xscvspdp 12,12
+ vextuwrx 6,9,16
+ li 9,8
+ vextuwrx 7,9,16
+ vextuwrx 9,10,16
+ rldicl 31,6,0,32
+ rldicl 10,3,0,32
+ rldicl 5,7,0,32
+ rldicl 0,9,0,32
+ fcmpu 7,0,11
+ fmr 10,12
+ beq 7,.L66
+ bng 7,.L9
+ mr 10,31
+ fmr 0,11
+.L9:
+ fcmpu 7,12,4
+ bne 7,.L12
+ cmplw 7,7,9
+ ble 7,.L13
+ mr 7,9
+.L13:
+ rldicl 5,7,0,32
+.L14:
+ fcmpu 7,0,10
+ beq 7,.L67
+ bng 7,.L19
+ mr 10,5
+ fmr 0,10
+.L19:
+ cmpd 7,11,8
+ ld 31,-8(1)
+ bgt 7,.L4
+ b .L33
+ .p2align 4,,15
+.L66:
+ cmplw 7,3,6
+ ble 7,.L8
+ mr 3,6
+.L8:
+ rldicl 10,3,0,32
+ b .L9
+ .p2align 4,,15
+.L40:
+ sldi 0,5,2
+ li 10,0
+ li 9,0
+ b .L23
+ .p2align 4,,15
+.L12:
+ bng 7,.L14
+ mr 5,0
+ fmr 10,4
+ b .L14
+ .p2align 4,,15
+.L67:
+ cmpd 7,10,5
+ ble 7,.L19
+ mr 10,5
+ b .L19
+.L51:
+ li 9,1
+ mtctr 9
+ b .L22
+.L52:
+ li 8,1
+ mtctr 8
+ b .L35
+ .long 0
+ .byte 0,0,0,0,0,1,0,0
+ .size isamin_k,.-isamin_k
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.LC2:
+ .long 0
+ .long 1
+ .long 2
+ .long 3
+.LC3:
+ .long 4
+ .long 5
+ .long 6
+ .long 7
+.LC4:
+ .long 8
+ .long 9
+ .long 10
+ .long 11
+.LC5:
+ .long 12
+ .long 13
+ .long 14
+ .long 15
+ .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
+ .section .note.GNU-stack,"",@progbits