From: Ben Crocker Date: Fri, 2 Jun 2017 23:37:55 +0000 (-0400) Subject: mapi: Enable assembly language API acceleration for PPC64LE (V2) X-Git-Tag: upstream/18.1.0~8265 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c26be2b2e920b9e79067c9ffa51cc6f122a08743;p=platform%2Fupstream%2Fmesa.git mapi: Enable assembly language API acceleration for PPC64LE (V2) Implement assembly language API acceleration for PPC64LE, analogous to long-standing implementations for X86 and X86-64. See also similar implementation in libglvnd. Tested with Piglit. Signed-off-by: Ben Crocker Acked-by: Emil Velikov Reviewed-by: Bill Schmidt --- diff --git a/configure.ac b/configure.ac index c9dc51b..7fade23 100644 --- a/configure.ac +++ b/configure.ac @@ -766,6 +766,13 @@ if test "x$enable_asm" = xyes; then ;; esac ;; + powerpc64le) + case "$host_os" in + linux*) + asm_arch=ppc64le + ;; + esac + ;; esac case "$asm_arch" in @@ -781,6 +788,10 @@ if test "x$enable_asm" = xyes; then DEFINES="$DEFINES -DUSE_SPARC_ASM" AC_MSG_RESULT([yes, sparc]) ;; + ppc64le) + DEFINES="$DEFINES -DUSE_PPC64LE_ASM" + AC_MSG_RESULT([yes, ppc64le]) + ;; *) AC_MSG_RESULT([no, platform not supported]) ;; @@ -2663,6 +2674,7 @@ AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" = xyes -o \ AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64) AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64) AM_CONDITIONAL(HAVE_SPARC_ASM, test "x$asm_arch" = xsparc) +AM_CONDITIONAL(HAVE_PPC64LE_ASM, test "x$asm_arch" = xppc64le) AC_SUBST([NINE_MAJOR], 1) AC_SUBST([NINE_MINOR], 0) diff --git a/src/mapi/Makefile.sources b/src/mapi/Makefile.sources index 37d6ef3..5647158 100644 --- a/src/mapi/Makefile.sources +++ b/src/mapi/Makefile.sources @@ -26,6 +26,8 @@ MAPI_BRIDGE_FILES = \ entry_x86-64_tls.h \ entry_x86_tls.h \ entry_x86_tsd.h \ + entry_ppc64le_tls.h \ + entry_ppc64le_tsd.h \ mapi_tmp.h MAPI_FILES = \ diff --git a/src/mapi/entry.c b/src/mapi/entry.c index 167386d..1e25012 100644 --- a/src/mapi/entry.c +++ b/src/mapi/entry.c @@ -25,8 +25,12 @@ * Chia-I Wu */ +#include +#include + #include "entry.h" #include "u_current.h" +#include "util/u_endian.h" #define _U_STRINGIFY(x) #x #define U_STRINGIFY(x) _U_STRINGIFY(x) @@ -49,10 +53,14 @@ # endif #elif defined(USE_X86_64_ASM) && defined(__GNUC__) && defined(GLX_USE_TLS) # include "entry_x86-64_tls.h" +#elif defined(USE_PPC64LE_ASM) && defined(__GNUC__) && defined(PIPE_ARCH_LITTLE_ENDIAN) +# ifdef GLX_USE_TLS +# include "entry_ppc64le_tls.h" +# else +# include "entry_ppc64le_tsd.h" +# endif #else -#include - static inline const struct _glapi_table * entry_current_get(void) { diff --git a/src/mapi/entry_ppc64le_tls.h b/src/mapi/entry_ppc64le_tls.h new file mode 100644 index 0000000..e09a117 --- /dev/null +++ b/src/mapi/entry_ppc64le_tls.h @@ -0,0 +1,152 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2017 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ben Crocker + */ + +#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY +#define HIDDEN __attribute__((visibility("hidden"))) +#else +#define HIDDEN +#endif + +// NOTE: These must be powers of two: +#define PPC64LE_ENTRY_SIZE 64 +#define PPC64LE_PAGE_ALIGN 65536 +#if ((PPC64LE_ENTRY_SIZE & (PPC64LE_ENTRY_SIZE - 1)) != 0) +#error PPC64LE_ENTRY_SIZE must be a power of two! +#endif +#if ((PPC64LE_PAGE_ALIGN & (PPC64LE_PAGE_ALIGN - 1)) != 0) +#error PPC64LE_PAGE_ALIGN must be a power of two! +#endif + +__asm__(".text\n" + ".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" + "ppc64le_entry_start:"); + +#define STUB_ASM_ENTRY(func) \ + ".globl " func "\n" \ + ".type " func ", @function\n" \ + ".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" \ + func ":\n\t" \ + " addis 2, 12, .TOC.-" func "@ha\n\t" \ + " addi 2, 2, .TOC.-" func "@l\n\t" \ + " .localentry " func ", .-" func "\n\t" + +#define STUB_ASM_CODE(slot) \ + " addis 11, 2, " ENTRY_CURRENT_TABLE "@got@tprel@ha\n\t" \ + " ld 11, " ENTRY_CURRENT_TABLE "@got@tprel@l(11)\n\t" \ + " add 11, 11," ENTRY_CURRENT_TABLE "@tls\n\t" \ + " ld 11, 0(11)\n\t" \ + " ld 12, " slot "*8(11)\n\t" \ + " mtctr 12\n\t" \ + " bctr\n" \ + +#define MAPI_TMP_STUB_ASM_GCC +#include "mapi_tmp.h" + +#ifndef MAPI_MODE_BRIDGE + +#include +#include "u_execmem.h" + +void +entry_patch_public(void) +{ +} + +extern char +ppc64le_entry_start[] HIDDEN; + +mapi_func +entry_get_public(int slot) +{ + return (mapi_func) (ppc64le_entry_start + slot * PPC64LE_ENTRY_SIZE); +} + +__asm__(".text\n"); + +__asm__("ppc64le_dispatch_tls:\n\t" + " addis 3, 2, " ENTRY_CURRENT_TABLE "@got@tprel@ha\n\t" + " ld 3, " ENTRY_CURRENT_TABLE "@got@tprel@l(3)\n\t" + " blr\n" + ); + +extern uint64_t ppc64le_dispatch_tls(); + +static const uint32_t code_templ[] = { + // This should be functionally the same code as would be generated from + // the STUB_ASM_CODE macro, but defined as a buffer. + // This is used to generate new dispatch stubs. Mesa will copy this + // data to the dispatch stub, and then it will patch the slot number and + // any addresses that it needs to. + // NOTE!!! NOTE!!! NOTE!!! + // This representation is correct for both little- and big-endian systems. + // However, more work needs to be done for big-endian Linux because it + // adheres to an older, AIX-compatible ABI that uses function descriptors. + // 1000: + 0x7C0802A6, // : mflr 0 + 0xF8010010, // : std 0, 16(1) + 0xE96C0028, // : ld 11, 9000f-1000b+0(12) + 0x7D6B6A14, // : add 11, 11, 13 + 0xE96B0000, // : ld 11, 0(11) + 0xE80C0030, // : ld 0, 9000f-1000b+8(12) + 0x7D8B002A, // : ldx 12, 11, 0 + 0x7D8903A6, // : mtctr 12 + 0x4E800420, // : bctr + 0x60000000, // : nop + // 9000: + 0, 0, // : .quad _glapi_tls_Dispatch + 0, 0 // : .quad *8 +}; +static const uint64_t TEMPLATE_OFFSET_TLS_ADDR = sizeof(code_templ) - 2*8; +static const uint64_t TEMPLATE_OFFSET_SLOT = sizeof(code_templ) - 1*8; + +void +entry_patch(mapi_func entry, int slot) +{ + char *code = (char *) entry; + *((uint64_t *) (code + TEMPLATE_OFFSET_TLS_ADDR)) = ppc64le_dispatch_tls(); + *((uint64_t *) (code + TEMPLATE_OFFSET_SLOT)) = slot * sizeof(mapi_func); +} + +mapi_func +entry_generate(int slot) +{ + char *code; + mapi_func entry; + + code = u_execmem_alloc(sizeof(code_templ)); + if (!code) + return NULL; + + memcpy(code, code_templ, sizeof(code_templ)); + + entry = (mapi_func) code; + entry_patch(entry, slot); + + return entry; +} + +#endif /* MAPI_MODE_BRIDGE */ diff --git a/src/mapi/entry_ppc64le_tsd.h b/src/mapi/entry_ppc64le_tsd.h new file mode 100644 index 0000000..a583b93 --- /dev/null +++ b/src/mapi/entry_ppc64le_tsd.h @@ -0,0 +1,210 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2017 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Ben Crocker + */ + +#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY +#define HIDDEN __attribute__((visibility("hidden"))) +#else +#define HIDDEN +#endif + +// NOTE: These must be powers of two: +#define PPC64LE_ENTRY_SIZE 256 +#define PPC64LE_PAGE_ALIGN 65536 +#if ((PPC64LE_ENTRY_SIZE & (PPC64LE_ENTRY_SIZE - 1)) != 0) +#error PPC64LE_ENTRY_SIZE must be a power of two! +#endif +#if ((PPC64LE_PAGE_ALIGN & (PPC64LE_PAGE_ALIGN - 1)) != 0) +#error PPC64LE_PAGE_ALIGN must be a power of two! +#endif + +__asm__(".text\n" + ".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" + "ppc64le_entry_start:"); + +#define STUB_ASM_ENTRY(func) \ + ".globl " func "\n" \ + ".type " func ", @function\n" \ + ".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" \ + func ":\n\t" \ + " addis 2, 12, .TOC.-" func "@ha\n\t" \ + " addi 2, 2, .TOC.-" func "@l\n\t" \ + " .localentry " func ", .-" func "\n\t" + +#define STUB_ASM_CODE(slot) \ + " addis 11, 2, " ENTRY_CURRENT_TABLE "@got@ha\n\t" \ + " ld 11, " ENTRY_CURRENT_TABLE "@got@l(11)\n\t" \ + " ld 11, 0(11)\n\t" \ + " cmpldi 11, 0\n\t" \ + " beq 2000f\n" \ + "1050:\n\t" \ + " ld 12, " slot "*8(11)\n\t" \ + " mtctr 12\n\t" \ + " bctr\n" \ + "2000:\n\t" \ + " mflr 0\n\t" \ + " std 0, 16(1)\n\t" \ + " std 2, 40(1)\n\t" \ + " stdu 1, -144(1)\n\t" \ + " std 3, 56(1)\n\t" \ + " std 4, 64(1)\n\t" \ + " std 5, 72(1)\n\t" \ + " std 6, 80(1)\n\t" \ + " std 7, 88(1)\n\t" \ + " std 8, 96(1)\n\t" \ + " std 9, 104(1)\n\t" \ + " std 10, 112(1)\n\t" \ + " std 12, 128(1)\n\t" \ + " addis 12, 2, " ENTRY_CURRENT_TABLE_GET "@got@ha\n\t" \ + " ld 12, " ENTRY_CURRENT_TABLE_GET "@got@l(12)\n\t" \ + " mtctr 12\n\t" \ + " bctrl\n\t" \ + " ld 2, 144+40(1)\n\t" \ + " mr 11, 3\n\t" \ + " ld 3, 56(1)\n\t" \ + " ld 4, 64(1)\n\t" \ + " ld 5, 72(1)\n\t" \ + " ld 6, 80(1)\n\t" \ + " ld 7, 88(1)\n\t" \ + " ld 8, 96(1)\n\t" \ + " ld 9, 104(1)\n\t" \ + " ld 10, 112(1)\n\t" \ + " ld 12, 128(1)\n\t" \ + " addi 1, 1, 144\n\t" \ + " ld 0, 16(1)\n\t" \ + " mtlr 0\n\t" \ + " b 1050b\n" + +#define MAPI_TMP_STUB_ASM_GCC +#include "mapi_tmp.h" + +#ifndef MAPI_MODE_BRIDGE + +#include +#include "u_execmem.h" + +void +entry_patch_public(void) +{ +} + +extern char +ppc64le_entry_start[] HIDDEN; + +mapi_func +entry_get_public(int slot) +{ + return (mapi_func) (ppc64le_entry_start + slot * PPC64LE_ENTRY_SIZE); +} + +static const uint32_t code_templ[] = { + // This should be functionally the same code as would be generated from + // the STUB_ASM_CODE macro, but defined as a buffer. + // This is used to generate new dispatch stubs. Mesa will copy this + // data to the dispatch stub, and then it will patch the slot number and + // any addresses that it needs to. + // NOTE!!! NOTE!!! NOTE!!! + // This representation is correct for both little- and big-endian systems. + // However, more work needs to be done for big-endian Linux because it + // adheres to an older, AIX-compatible ABI that uses function descriptors. + // 1000: + 0x7C0802A6, // : mflr 0 + 0xF8010010, // : std 0, 16(1) + 0xE96C0098, // : ld 11, 9000f-1000b+0(12) + 0xE96B0000, // : ld 11, 0(11) + 0x282B0000, // : cmpldi 11, 0 + 0x41820014, // : beq 2000f + // 1050: + 0xE80C00A8, // : ld 0, 9000f-1000b+16(12) + 0x7D8B002A, // : ldx 12, 11, 0 + 0x7D8903A6, // : mtctr 12 + 0x4E800420, // : bctr + // 2000: + 0xF8410028, // : std 2, 40(1) + 0xF821FF71, // : stdu 1, -144(1) + 0xF8610038, // : std 3, 56(1) + 0xF8810040, // : std 4, 64(1) + 0xF8A10048, // : std 5, 72(1) + 0xF8C10050, // : std 6, 80(1) + 0xF8E10058, // : std 7, 88(1) + 0xF9010060, // : std 8, 96(1) + 0xF9210068, // : std 9, 104(1) + 0xF9410070, // : std 10, 112(1) + 0xF9810080, // : std 12, 128(1) + 0xE98C00A0, // : ld 12, 9000f-1000b+8(12) + 0x7D8903A6, // : mtctr 12 + 0x4E800421, // : bctrl + 0x7C6B1B78, // : mr 11, 3 + 0xE8610038, // : ld 3, 56(1) + 0xE8810040, // : ld 4, 64(1) + 0xE8A10048, // : ld 5, 72(1) + 0xE8C10050, // : ld 6, 80(1) + 0xE8E10058, // : ld 7, 88(1) + 0xE9010060, // : ld 8, 96(1) + 0xE9210068, // : ld 9, 104(1) + 0xE9410070, // : ld 10, 112(1) + 0xE9810080, // : ld 12, 128(1) + 0x38210090, // : addi 1, 1, 144 + 0xE8010010, // : ld 0, 16(1) + 0x7C0803A6, // : mtlr 0 + 0x4BFFFF84, // : b 1050b + // 9000: + 0, 0, // : .quad ENTRY_CURRENT_TABLE + 0, 0, // : .quad ENTRY_CURRENT_TABLE_GET + 0, 0 // : .quad *8 +}; +static const uint64_t TEMPLATE_OFFSET_CURRENT_TABLE = sizeof(code_templ) - 3*8; +static const uint64_t TEMPLATE_OFFSET_CURRENT_TABLE_GET = sizeof(code_templ) - 2*8; +static const uint64_t TEMPLATE_OFFSET_SLOT = sizeof(code_templ) - 1*8; + +void +entry_patch(mapi_func entry, int slot) +{ + char *code = (char *) entry; + *((uint64_t *) (code + TEMPLATE_OFFSET_CURRENT_TABLE)) = (uint64_t) ENTRY_CURRENT_TABLE; + *((uint64_t *) (code + TEMPLATE_OFFSET_CURRENT_TABLE_GET)) = (uint64_t) ENTRY_CURRENT_TABLE_GET; + *((uint64_t *) (code + TEMPLATE_OFFSET_SLOT)) = slot * sizeof(mapi_func); +} + +mapi_func +entry_generate(int slot) +{ + char *code; + mapi_func entry; + + code = u_execmem_alloc(sizeof(code_templ)); + if (!code) + return NULL; + + memcpy(code, code_templ, sizeof(code_templ)); + + entry = (mapi_func) code; + entry_patch(entry, slot); + + return entry; +} + +#endif /* MAPI_MODE_BRIDGE */