x86_64: Fix svml_s_atanf16_core_avx512.S code formatting
authorSunil K Pandey <skpgkp2@gmail.com>
Mon, 7 Mar 2022 18:47:09 +0000 (10:47 -0800)
committerSunil K Pandey <skpgkp2@gmail.com>
Tue, 8 Mar 2022 05:14:09 +0000 (21:14 -0800)
This commit contains following formatting changes

1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
   between it and the first operand.
3. Instruction greater than 7 characters in length have a
   space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S

index 4285a4b..62d96d1 100644 (file)
 
 /* Offsets for data table __svml_satan_data_internal_avx512
  */
-#define AbsMask                        0
-#define Shifter                        64
-#define MaxThreshold                   128
-#define MOne                           192
-#define One                            256
-#define LargeX                         320
-#define Zero                           384
-#define Tbl_H                          448
-#define Pi2                            576
-#define coeff_1                        640
-#define coeff_2                        704
-#define coeff_3                        768
+#define AbsMask                                0
+#define Shifter                                64
+#define MaxThreshold                   128
+#define MOne                           192
+#define One                            256
+#define LargeX                         320
+#define Zero                           384
+#define Tbl_H                          448
+#define Pi2                            576
+#define coeff_1                                640
+#define coeff_2                                704
+#define coeff_3                                768
 
 #include <sysdep.h>
 
-        .text
-       .section .text.exex512,"ax",@progbits
+       .section .text.exex512, "ax", @progbits
 ENTRY(_ZGVeN16v_atanf_skx)
-        vandps    __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
-        vmovups   MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
-        vmovups   One+__svml_satan_data_internal_avx512(%rip), %zmm8
-
-/* round to 2 bits after binary point */
-        vreduceps $40, {sae}, %zmm7, %zmm5
-
-/* saturate X range */
-        vmovups   LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
-        vmovups   Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
-        vcmpps    $29, {sae}, %zmm3, %zmm7, %k1
-
-/* table lookup sequence */
-        vmovups   Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
-        vsubps    {rn-sae}, %zmm5, %zmm7, %zmm4
-        vaddps    {rn-sae}, %zmm2, %zmm7, %zmm1
-        vxorps    %zmm0, %zmm7, %zmm0
-        vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
-        vmovups   coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
-
-/* if|X|>=MaxThreshold, set DiffX=-1 */
-        vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
-        vmovups   coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
-
-/* if|X|>=MaxThreshold, set Y=X */
-        vminps    {sae}, %zmm7, %zmm6, %zmm8{%k1}
-
-/* R+Rl = DiffX/Y */
-        vgetmantps $0, {sae}, %zmm9, %zmm12
-        vgetexpps {sae}, %zmm9, %zmm10
-        vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
-        vgetmantps $0, {sae}, %zmm8, %zmm15
-        vgetexpps {sae}, %zmm8, %zmm11
-        vmovups   coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
-
-/* set table value to Pi/2 for large X */
-        vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
-        vrcp14ps  %zmm15, %zmm13
-        vsubps    {rn-sae}, %zmm11, %zmm10, %zmm2
-        vmulps    {rn-sae}, %zmm13, %zmm12, %zmm14
-        vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
-        vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
-        vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
-
-/* polynomial evaluation */
-        vmulps    {rn-sae}, %zmm7, %zmm7, %zmm8
-        vmulps    {rn-sae}, %zmm7, %zmm8, %zmm6
-        vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
-        vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
-        vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
-        vaddps    {rn-sae}, %zmm9, %zmm8, %zmm10
-        vxorps    %zmm0, %zmm10, %zmm0
-        ret
+       vandps  __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
+       vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
+       vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8
+
+       /* round to 2 bits after binary point */
+       vreduceps $40, {sae}, %zmm7, %zmm5
+
+       /* saturate X range */
+       vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
+       vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
+       vcmpps  $29, {sae}, %zmm3, %zmm7, %k1
+
+       /* table lookup sequence */
+       vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
+       vsubps  {rn-sae}, %zmm5, %zmm7, %zmm4
+       vaddps  {rn-sae}, %zmm2, %zmm7, %zmm1
+       vxorps  %zmm0, %zmm7, %zmm0
+       vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
+       vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
+
+       /* if|X|>=MaxThreshold, set DiffX=-1 */
+       vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
+       vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
+
+       /* if|X|>=MaxThreshold, set Y=X */
+       vminps  {sae}, %zmm7, %zmm6, %zmm8{%k1}
+
+       /* R+Rl = DiffX/Y */
+       vgetmantps $0, {sae}, %zmm9, %zmm12
+       vgetexpps {sae}, %zmm9, %zmm10
+       vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
+       vgetmantps $0, {sae}, %zmm8, %zmm15
+       vgetexpps {sae}, %zmm8, %zmm11
+       vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
+
+       /* set table value to Pi/2 for large X */
+       vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
+       vrcp14ps %zmm15, %zmm13
+       vsubps  {rn-sae}, %zmm11, %zmm10, %zmm2
+       vmulps  {rn-sae}, %zmm13, %zmm12, %zmm14
+       vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
+       vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
+       vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
+
+       /* polynomial evaluation */
+       vmulps  {rn-sae}, %zmm7, %zmm7, %zmm8
+       vmulps  {rn-sae}, %zmm7, %zmm8, %zmm6
+       vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
+       vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
+       vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
+       vaddps  {rn-sae}, %zmm9, %zmm8, %zmm10
+       vxorps  %zmm0, %zmm10, %zmm0
+       ret
 
 END(_ZGVeN16v_atanf_skx)
 
-        .section .rodata, "a"
-        .align 64
+       .section .rodata, "a"
+       .align  64
 
 #ifdef __svml_satan_data_internal_avx512_typedef
 typedef unsigned int VUINT32;
 typedef struct {
-        __declspec(align(64)) VUINT32 AbsMask[16][1];
-        __declspec(align(64)) VUINT32 Shifter[16][1];
-        __declspec(align(64)) VUINT32 MaxThreshold[16][1];
-        __declspec(align(64)) VUINT32 MOne[16][1];
-        __declspec(align(64)) VUINT32 One[16][1];
-        __declspec(align(64)) VUINT32 LargeX[16][1];
-        __declspec(align(64)) VUINT32 Zero[16][1];
-        __declspec(align(64)) VUINT32 Tbl_H[32][1];
-        __declspec(align(64)) VUINT32 Pi2[16][1];
-        __declspec(align(64)) VUINT32 coeff[3][16][1];
-    } __svml_satan_data_internal_avx512;
+       __declspec(align(64)) VUINT32 AbsMask[16][1];
+       __declspec(align(64)) VUINT32 Shifter[16][1];
+       __declspec(align(64)) VUINT32 MaxThreshold[16][1];
+       __declspec(align(64)) VUINT32 MOne[16][1];
+       __declspec(align(64)) VUINT32 One[16][1];
+       __declspec(align(64)) VUINT32 LargeX[16][1];
+       __declspec(align(64)) VUINT32 Zero[16][1];
+       __declspec(align(64)) VUINT32 Tbl_H[32][1];
+       __declspec(align(64)) VUINT32 Pi2[16][1];
+       __declspec(align(64)) VUINT32 coeff[3][16][1];
+} __svml_satan_data_internal_avx512;
 #endif
 __svml_satan_data_internal_avx512:
-        /*== AbsMask ==*/
-        .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
-        /*== Shifter ==*/
-        .align 64
-        .long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
-        /*== MaxThreshold ==*/
-        .align 64
-        .long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
-        /*== MOne ==*/
-        .align 64
-        .long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
-        /*== One ==*/
-        .align 64
-        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
-        /*== LargeX ==*/
-        .align 64
-        .long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
-        /*== Zero ==*/
-        .align 64
-        .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
-        /*== Tbl_H ==*/
-        .align 64
-        .long 0x00000000, 0x3e7adbb0
-        .long 0x3eed6338, 0x3f24bc7d
-        .long 0x3f490fdb, 0x3f6563e3
-        .long 0x3f7b985f, 0x3f869c79
-        .long 0x3f8db70d, 0x3f93877b
-        .long 0x3f985b6c, 0x3f9c6b53
-        .long 0x3f9fe0bb, 0x3fa2daa4
-        .long 0x3fa57088, 0x3fa7b46f
-        .long 0x3fa9b465, 0x3fab7b7a
-        .long 0x3fad1283, 0x3fae809e
-        .long 0x3fafcb99, 0x3fb0f836
-        .long 0x3fb20a6a, 0x3fb30581
-        .long 0x3fb3ec43, 0x3fb4c10a
-        .long 0x3fb585d7, 0x3fb63c64
-        .long 0x3fb6e62c, 0x3fb78478
-        .long 0x3fb81868, 0x3fb8a2f5
-        /*== Pi2 ==*/
-        .align 64
-        .long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
-        /*== coeff3 ==*/
-        .align 64
-        .long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
-        .long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
-        .long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
-        .align 64
-        .type  __svml_satan_data_internal_avx512,@object
-        .size  __svml_satan_data_internal_avx512,.-__svml_satan_data_internal_avx512
+       /* AbsMask */
+       .long   0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
+       /* Shifter */
+       .align  64
+       .long   0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
+       /* MaxThreshold */
+       .align  64
+       .long   0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
+       /* MOne */
+       .align  64
+       .long   0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
+       /* One */
+       .align  64
+       .long   0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+       /* LargeX */
+       .align  64
+       .long   0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
+       /* Zero */
+       .align  64
+       .long   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+       /* Tbl_H */
+       .align  64
+       .long   0x00000000, 0x3e7adbb0
+       .long   0x3eed6338, 0x3f24bc7d
+       .long   0x3f490fdb, 0x3f6563e3
+       .long   0x3f7b985f, 0x3f869c79
+       .long   0x3f8db70d, 0x3f93877b
+       .long   0x3f985b6c, 0x3f9c6b53
+       .long   0x3f9fe0bb, 0x3fa2daa4
+       .long   0x3fa57088, 0x3fa7b46f
+       .long   0x3fa9b465, 0x3fab7b7a
+       .long   0x3fad1283, 0x3fae809e
+       .long   0x3fafcb99, 0x3fb0f836
+       .long   0x3fb20a6a, 0x3fb30581
+       .long   0x3fb3ec43, 0x3fb4c10a
+       .long   0x3fb585d7, 0x3fb63c64
+       .long   0x3fb6e62c, 0x3fb78478
+       .long   0x3fb81868, 0x3fb8a2f5
+       /* Pi2 */
+       .align  64
+       .long   0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
+       /* coeff3 */
+       .align  64
+       .long   0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
+       .long   0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
+       .long   0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
+       .align  64
+       .type   __svml_satan_data_internal_avx512, @object
+       .size   __svml_satan_data_internal_avx512, .-__svml_satan_data_internal_avx512