/* bitmask of debug flags */
enum debug_t {
- PRINT_RAW = 0x1, /* dump raw hexdump */
- PRINT_VERBOSE = 0x2,
- PRINT_STATS = 0x4,
- EXPAND_REPEAT = 0x8,
+ PRINT_RAW = 0x1, /* dump raw hexdump */
+ PRINT_VERBOSE = 0x2,
+ PRINT_STATS = 0x4,
+ EXPAND_REPEAT = 0x8,
};
struct shader_stats {
- /* instructions counts rpnN, and instlen does not */
- int instructions, instlen;
- int nops;
- int ss, sy;
- int constlen;
- int halfreg;
- int fullreg;
- uint16_t sstall;
- uint16_t mov_count;
- uint16_t cov_count;
- uint16_t last_baryf;
- uint16_t instrs_per_cat[8];
+ /* instructions counts rpnN, and instlen does not */
+ int instructions, instlen;
+ int nops;
+ int ss, sy;
+ int constlen;
+ int halfreg;
+ int fullreg;
+ uint16_t sstall;
+ uint16_t mov_count;
+ uint16_t cov_count;
+ uint16_t last_baryf;
+ uint16_t instrs_per_cat[8];
};
-int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type);
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level,
+ gl_shader_stage type);
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,
+ unsigned gpu_id);
int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
- unsigned gpu_id, struct shader_stats *stats);
-int try_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+ unsigned gpu_id, struct shader_stats *stats);
+int try_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,
+ unsigned gpu_id);
void disasm_a2xx_set_debug(enum debug_t debug);
void disasm_a3xx_set_debug(enum debug_t debug);
- /*
- * Copyright © 2020 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
+/*
+ * Copyright © 2020 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
#include "freedreno_dev_info.h"
#include "util/macros.h"
static inline unsigned
max_bitfield_val(unsigned high, unsigned low, unsigned shift)
{
- return BITFIELD_MASK(high - low) << shift;
+ return BITFIELD_MASK(high - low) << shift;
}
void
freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
{
- if (gpu_id >= 600) {
- info->gmem_align_w = 16;
- info->gmem_align_h = 4;
- info->tile_align_w = gpu_id == 650 ? 96 : 32;
- info->tile_align_h = 32;
- /* based on GRAS_BIN_CONTROL: */
- info->tile_max_w = 1024; /* max_bitfield_val(5, 0, 5) */
- info->tile_max_h = max_bitfield_val(14, 8, 4);
- info->num_vsc_pipes = 32;
+ if (gpu_id >= 600) {
+ info->gmem_align_w = 16;
+ info->gmem_align_h = 4;
+ info->tile_align_w = gpu_id == 650 ? 96 : 32;
+ info->tile_align_h = 32;
+ /* based on GRAS_BIN_CONTROL: */
+ info->tile_max_w = 1024; /* max_bitfield_val(5, 0, 5) */
+ info->tile_max_h = max_bitfield_val(14, 8, 4);
+ info->num_vsc_pipes = 32;
- switch (gpu_id) {
- case 615:
- case 618:
- info->num_sp_cores = 1;
- info->fibers_per_sp = 128 * 16;
- info->a6xx.ccu_offset_gmem = 0x7c000;
- info->a6xx.ccu_offset_bypass = 0x10000;
- info->a6xx.ccu_cntl_gmem_unk2 = true;
- info->a6xx.supports_multiview_mask = false;
- info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
- info->a6xx.magic.PC_UNKNOWN_9805 = 0;
- info->a6xx.magic.SP_UNKNOWN_A0F8 = 0;
- break;
- case 630:
- info->num_sp_cores = 2;
- info->fibers_per_sp = 128 * 16;
- info->a6xx.ccu_offset_gmem = 0xf8000;
- info->a6xx.ccu_offset_bypass = 0x20000;
- info->a6xx.ccu_cntl_gmem_unk2 = true;
- info->a6xx.supports_multiview_mask = false;
- info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000;
- info->a6xx.magic.PC_UNKNOWN_9805 = 1;
- info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
- break;
- case 640:
- info->num_sp_cores = 2;
- /* The wavefront ID returned by the getwid instruction has a
- * maximum of 3 * 10 - 1, or so it seems. However the swizzled
- * index used in the mem offset calcuation is
- * "(wid / 3) | ((wid % 3) << 4)", so that the actual max is
- * around 3 * 16. Furthermore, with the per-fiber layout, the HW
- * swizzles the wavefront index and fiber index itself, and it
- * pads the number of wavefronts to 4 * 16 to make the swizzling
- * simpler, so we have to bump the number of wavefronts to 4 * 16
- * for the per-fiber layout. We could theoretically reduce it for
- * the per-wave layout though.
- */
- info->fibers_per_sp = 128 * 4 * 16;
- info->a6xx.ccu_offset_gmem = 0xf8000;
- info->a6xx.ccu_offset_bypass = 0x20000;
- info->a6xx.supports_multiview_mask = true;
- info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
- info->a6xx.magic.PC_UNKNOWN_9805 = 1;
- info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
- info->a6xx.has_z24uint_s8uint = true;
- break;
- case 650:
- info->num_sp_cores = 3;
- info->fibers_per_sp = 128 * 2 * 16;
- info->a6xx.ccu_offset_gmem = 0x114000;
- info->a6xx.ccu_offset_bypass = 0x30000;
- info->a6xx.supports_multiview_mask = true;
- info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000;
- info->a6xx.magic.PC_UNKNOWN_9805 = 2;
- info->a6xx.magic.SP_UNKNOWN_A0F8 = 2;
- info->a6xx.has_z24uint_s8uint = true;
- break;
- default:
- /* Drivers should be doing their own version filtering, so we
- * should never get here.
- */
- unreachable("missing a6xx config");
- }
- } else if (gpu_id >= 500) {
- info->gmem_align_w = info->tile_align_w = 64;
- info->gmem_align_h = info->tile_align_h = 32;
- /* based on VSC_BIN_SIZE: */
- info->tile_max_w = 1024; /* max_bitfield_val(7, 0, 5) */
- info->tile_max_h = max_bitfield_val(16, 9, 5);
- info->num_vsc_pipes = 16;
- } else if (gpu_id >= 400) {
- info->gmem_align_w = info->tile_align_w = 32;
- info->gmem_align_h = info->tile_align_h = 32;
- /* based on VSC_BIN_SIZE: */
- info->tile_max_w = 1024; /* max_bitfield_val(4, 0, 5) */
- info->tile_max_h = max_bitfield_val(9, 5, 5);
- info->num_vsc_pipes = 8;
- } else if (gpu_id >= 300) {
- info->gmem_align_w = info->tile_align_w = 32;
- info->gmem_align_h = info->tile_align_h = 32;
- /* based on VSC_BIN_SIZE: */
- info->tile_max_w = 992; /* max_bitfield_val(4, 0, 5) */
- info->tile_max_h = max_bitfield_val(9, 5, 5);
- info->num_vsc_pipes = 8;
- } else {
- info->gmem_align_w = info->tile_align_w = 32;
- info->gmem_align_h = info->tile_align_h = 32;
- info->tile_max_w = 512;
- info->tile_max_h = ~0; /* TODO */
- info->num_vsc_pipes = 8;
- }
+ switch (gpu_id) {
+ case 615:
+ case 618:
+ info->num_sp_cores = 1;
+ info->fibers_per_sp = 128 * 16;
+ info->a6xx.ccu_offset_gmem = 0x7c000;
+ info->a6xx.ccu_offset_bypass = 0x10000;
+ info->a6xx.ccu_cntl_gmem_unk2 = true;
+ info->a6xx.supports_multiview_mask = false;
+ info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
+ info->a6xx.magic.PC_UNKNOWN_9805 = 0;
+ info->a6xx.magic.SP_UNKNOWN_A0F8 = 0;
+ break;
+ case 630:
+ info->num_sp_cores = 2;
+ info->fibers_per_sp = 128 * 16;
+ info->a6xx.ccu_offset_gmem = 0xf8000;
+ info->a6xx.ccu_offset_bypass = 0x20000;
+ info->a6xx.ccu_cntl_gmem_unk2 = true;
+ info->a6xx.supports_multiview_mask = false;
+ info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000;
+ info->a6xx.magic.PC_UNKNOWN_9805 = 1;
+ info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
+ break;
+ case 640:
+ info->num_sp_cores = 2;
+ /* The wavefront ID returned by the getwid instruction has a
+ * maximum of 3 * 10 - 1, or so it seems. However the swizzled
+ * index used in the mem offset calcuation is
+ * "(wid / 3) | ((wid % 3) << 4)", so that the actual max is
+ * around 3 * 16. Furthermore, with the per-fiber layout, the HW
+ * swizzles the wavefront index and fiber index itself, and it
+ * pads the number of wavefronts to 4 * 16 to make the swizzling
+ * simpler, so we have to bump the number of wavefronts to 4 * 16
+ * for the per-fiber layout. We could theoretically reduce it for
+ * the per-wave layout though.
+ */
+ info->fibers_per_sp = 128 * 4 * 16;
+ info->a6xx.ccu_offset_gmem = 0xf8000;
+ info->a6xx.ccu_offset_bypass = 0x20000;
+ info->a6xx.supports_multiview_mask = true;
+ info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
+ info->a6xx.magic.PC_UNKNOWN_9805 = 1;
+ info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
+ info->a6xx.has_z24uint_s8uint = true;
+ break;
+ case 650:
+ info->num_sp_cores = 3;
+ info->fibers_per_sp = 128 * 2 * 16;
+ info->a6xx.ccu_offset_gmem = 0x114000;
+ info->a6xx.ccu_offset_bypass = 0x30000;
+ info->a6xx.supports_multiview_mask = true;
+ info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000;
+ info->a6xx.magic.PC_UNKNOWN_9805 = 2;
+ info->a6xx.magic.SP_UNKNOWN_A0F8 = 2;
+ info->a6xx.has_z24uint_s8uint = true;
+ break;
+ default:
+ /* Drivers should be doing their own version filtering, so we
+ * should never get here.
+ */
+ unreachable("missing a6xx config");
+ }
+ } else if (gpu_id >= 500) {
+ info->gmem_align_w = info->tile_align_w = 64;
+ info->gmem_align_h = info->tile_align_h = 32;
+ /* based on VSC_BIN_SIZE: */
+ info->tile_max_w = 1024; /* max_bitfield_val(7, 0, 5) */
+ info->tile_max_h = max_bitfield_val(16, 9, 5);
+ info->num_vsc_pipes = 16;
+ } else if (gpu_id >= 400) {
+ info->gmem_align_w = info->tile_align_w = 32;
+ info->gmem_align_h = info->tile_align_h = 32;
+ /* based on VSC_BIN_SIZE: */
+ info->tile_max_w = 1024; /* max_bitfield_val(4, 0, 5) */
+ info->tile_max_h = max_bitfield_val(9, 5, 5);
+ info->num_vsc_pipes = 8;
+ } else if (gpu_id >= 300) {
+ info->gmem_align_w = info->tile_align_w = 32;
+ info->gmem_align_h = info->tile_align_h = 32;
+ /* based on VSC_BIN_SIZE: */
+ info->tile_max_w = 992; /* max_bitfield_val(4, 0, 5) */
+ info->tile_max_h = max_bitfield_val(9, 5, 5);
+ info->num_vsc_pipes = 8;
+ } else {
+ info->gmem_align_w = info->tile_align_w = 32;
+ info->gmem_align_h = info->tile_align_h = 32;
+ info->tile_max_w = 512;
+ info->tile_max_h = ~0; /* TODO */
+ info->num_vsc_pipes = 8;
+ }
}
- /*
- * Copyright © 2020 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
+/*
+ * Copyright © 2020 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
#ifndef FREEDRENO_DEVICE_INFO_H
#define FREEDRENO_DEVICE_INFO_H
-#include <stdint.h>
#include <stdbool.h>
+#include <stdint.h>
#ifdef __cplusplus
extern "C" {
*/
struct freedreno_dev_info {
- /* alignment for size of tiles */
- uint32_t tile_align_w, tile_align_h;
- /* gmem load/store granularity */
- uint32_t gmem_align_w, gmem_align_h;
- /* max tile size */
- uint32_t tile_max_w, tile_max_h;
-
- uint32_t num_vsc_pipes;
+ /* alignment for size of tiles */
+ uint32_t tile_align_w, tile_align_h;
+ /* gmem load/store granularity */
+ uint32_t gmem_align_w, gmem_align_h;
+ /* max tile size */
+ uint32_t tile_max_w, tile_max_h;
- /* Information for private memory calculations */
- uint32_t num_sp_cores, fibers_per_sp;
+ uint32_t num_vsc_pipes;
- union {
- struct {
- /* Whether the PC_MULTIVIEW_MASK register exists. */
- bool supports_multiview_mask;
+ /* Information for private memory calculations */
+ uint32_t num_sp_cores, fibers_per_sp;
- /* info for setting RB_CCU_CNTL */
- uint32_t ccu_offset_gmem;
- uint32_t ccu_offset_bypass;
- bool ccu_cntl_gmem_unk2;
- bool has_z24uint_s8uint;
+ union {
+ struct {
+ /* Whether the PC_MULTIVIEW_MASK register exists. */
+ bool supports_multiview_mask;
- struct {
- uint32_t RB_UNKNOWN_8E04_blit;
- uint32_t PC_UNKNOWN_9805;
- uint32_t SP_UNKNOWN_A0F8;
- } magic;
- } a6xx;
- };
+ /* info for setting RB_CCU_CNTL */
+ uint32_t ccu_offset_gmem;
+ uint32_t ccu_offset_bypass;
+ bool ccu_cntl_gmem_unk2;
+ bool has_z24uint_s8uint;
+
+ struct {
+ uint32_t RB_UNKNOWN_8E04_blit;
+ uint32_t PC_UNKNOWN_9805;
+ uint32_t SP_UNKNOWN_A0F8;
+ } magic;
+ } a6xx;
+ };
};
void freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id);
#endif
#endif /* FREEDRENO_DEVICE_INFO_H */
-
#ifndef __FREEDRENO_GUARDBAND_H__
#define __FREEDRENO_GUARDBAND_H__
+#include <assert.h>
#include <math.h>
#include <stdbool.h>
-#include <assert.h>
static inline unsigned
fd_calc_guardband(float offset, float scale, bool is_a3xx)
{
- /* On a3xx, the viewport max is 4k and the docs say the max guardband
- * width is 8k. That is, GRAS cannot handle triangle coordinates more than
- * 8k, positive or negative. On a4xx+ the viewport width was bumped to
- * 16k, and so the guardband width was necessarily also bumped. Note that
- * the numbers here should correspond to
- * VkPhysicalDeviceLimits::viewportBoundsRange in Vulkan.
- */
- const float gb_min = is_a3xx ? -8192. : -32768.;
- const float gb_max = is_a3xx ? 8191. : 32767.;
+ /* On a3xx, the viewport max is 4k and the docs say the max guardband
+ * width is 8k. That is, GRAS cannot handle triangle coordinates more than
+ * 8k, positive or negative. On a4xx+ the viewport width was bumped to
+ * 16k, and so the guardband width was necessarily also bumped. Note that
+ * the numbers here should correspond to
+ * VkPhysicalDeviceLimits::viewportBoundsRange in Vulkan.
+ */
+ const float gb_min = is_a3xx ? -8192. : -32768.;
+ const float gb_max = is_a3xx ? 8191. : 32767.;
- /* Clipping happens in normalized device coordinates, so we have to
- * transform gb_min and gb_max to ndc using the inverse of the viewport
- * transform. Avoid flipping min and max by using the absolute value of
- * the scale.
- */
- const float gb_min_ndc = (gb_min - offset) / fabsf(scale);
- const float gb_max_ndc = (gb_max - offset) / fabsf(scale);
+ /* Clipping happens in normalized device coordinates, so we have to
+ * transform gb_min and gb_max to ndc using the inverse of the viewport
+ * transform. Avoid flipping min and max by using the absolute value of
+ * the scale.
+ */
+ const float gb_min_ndc = (gb_min - offset) / fabsf(scale);
+ const float gb_max_ndc = (gb_max - offset) / fabsf(scale);
- /* There's only one GB_ADJ field, so presumably the guardband is
- * [-GB_ADJ, GB_ADJ] like on Radeon. It's always safe to make the
- * guardband smaller, so we have to take the min to get the largest range
- * contained in [gb_min_ndc, gb_max_ndc].
- */
- const float gb_adj = fminf(-gb_min_ndc, gb_max_ndc);
+ /* There's only one GB_ADJ field, so presumably the guardband is
+ * [-GB_ADJ, GB_ADJ] like on Radeon. It's always safe to make the
+ * guardband smaller, so we have to take the min to get the largest range
+ * contained in [gb_min_ndc, gb_max_ndc].
+ */
+ const float gb_adj = fminf(-gb_min_ndc, gb_max_ndc);
- /* The viewport should always be contained in the guardband. */
- assert(gb_adj >= 1.0);
+ /* The viewport should always be contained in the guardband. */
+ assert(gb_adj >= 1.0);
- /* frexp returns an unspecified value if given an infinite value, which
- * can happen if scale == 0.
- */
- if (isinf(gb_adj))
- return 0x1ff;
+ /* frexp returns an unspecified value if given an infinite value, which
+ * can happen if scale == 0.
+ */
+ if (isinf(gb_adj))
+ return 0x1ff;
- /* Convert gb_adj to 3.6 floating point, rounding down since it's always
- * safe to make the guard band smaller (but not the other way around!).
- *
- * Note: After converting back to a float, the value the blob returns here
- * is sometimes a little smaller than the value we return. This seems to
- * happen around the boundary between two different rounded values. For
- * example, using the a6xx blob:
- *
- * min | width | unrounded gb_adj | blob result | mesa result
- * ------------------------------------------------------------
- * 0 | 510 | 127.498 | 127. | 127.
- * 0 | 511 | 127.247 | 126. | 127.
- * 0 | 512 | 126.996 | 126. | 126.
- *
- * The guardband must be 32767 wide, since that's what the blob reports
- * for viewportBoundsRange, so I'm guessing that they're rounding slightly
- * more conservatively somehow.
- */
- int gb_adj_exp;
- float gb_adj_mantissa = frexpf(gb_adj, &gb_adj_exp);
- assert(gb_adj_exp > 0);
+ /* Convert gb_adj to 3.6 floating point, rounding down since it's always
+ * safe to make the guard band smaller (but not the other way around!).
+ *
+ * Note: After converting back to a float, the value the blob returns here
+ * is sometimes a little smaller than the value we return. This seems to
+ * happen around the boundary between two different rounded values. For
+ * example, using the a6xx blob:
+ *
+ * min | width | unrounded gb_adj | blob result | mesa result
+ * ------------------------------------------------------------
+ * 0 | 510 | 127.498 | 127. | 127.
+ * 0 | 511 | 127.247 | 126. | 127.
+ * 0 | 512 | 126.996 | 126. | 126.
+ *
+ * The guardband must be 32767 wide, since that's what the blob reports
+ * for viewportBoundsRange, so I'm guessing that they're rounding slightly
+ * more conservatively somehow.
+ */
+ int gb_adj_exp;
+ float gb_adj_mantissa = frexpf(gb_adj, &gb_adj_exp);
+ assert(gb_adj_exp > 0);
- /* Round non-representable numbers down to the largest possible number. */
- if (gb_adj_exp > 8)
- return 0x1ff;
+ /* Round non-representable numbers down to the largest possible number. */
+ if (gb_adj_exp > 8)
+ return 0x1ff;
- return ((gb_adj_exp - 1) << 6) |
- ((unsigned) truncf(gb_adj_mantissa * (1 << 7)) - (1 << 6));
+ return ((gb_adj_exp - 1) << 6) |
+ ((unsigned)truncf(gb_adj_mantissa * (1 << 7)) - (1 << 6));
}
#endif /* __FREEDRENO_GUARDBAND_H__ */
#include <stdio.h>
#include <string.h>
-#include "git_sha1.h"
#include "util/mesa-sha1.h"
+#include "git_sha1.h"
/* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */
#define UUID_SIZE 16
void
fd_get_driver_uuid(void *uuid)
{
- const char *driver_id = PACKAGE_VERSION MESA_GIT_SHA1;
+ const char *driver_id = PACKAGE_VERSION MESA_GIT_SHA1;
- /* The driver UUID is used for determining sharability of images and memory
- * between two Vulkan instances in separate processes, but also to
- * determining memory objects and sharability between Vulkan and OpenGL
- * driver. People who want to share memory need to also check the device
- * UUID.
- */
- struct mesa_sha1 sha1_ctx;
- _mesa_sha1_init(&sha1_ctx);
+ /* The driver UUID is used for determining sharability of images and memory
+ * between two Vulkan instances in separate processes, but also to
+ * determining memory objects and sharability between Vulkan and OpenGL
+ * driver. People who want to share memory need to also check the device
+ * UUID.
+ */
+ struct mesa_sha1 sha1_ctx;
+ _mesa_sha1_init(&sha1_ctx);
- _mesa_sha1_update(&sha1_ctx, driver_id, strlen(driver_id));
+ _mesa_sha1_update(&sha1_ctx, driver_id, strlen(driver_id));
- uint8_t sha1[SHA1_DIGEST_LENGTH];
- _mesa_sha1_final(&sha1_ctx, sha1);
+ uint8_t sha1[SHA1_DIGEST_LENGTH];
+ _mesa_sha1_final(&sha1_ctx, sha1);
- assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
- memcpy(uuid, sha1, UUID_SIZE);
+ assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
+ memcpy(uuid, sha1, UUID_SIZE);
}
void
fd_get_device_uuid(void *uuid, unsigned gpu_id)
{
- struct mesa_sha1 sha1_ctx;
- _mesa_sha1_init(&sha1_ctx);
+ struct mesa_sha1 sha1_ctx;
+ _mesa_sha1_init(&sha1_ctx);
- /* The device UUID uniquely identifies the given device within the machine.
- * Since we never have more than one device, this doesn't need to be a real
- * UUID, so we use SHA1("freedreno" + gpu_id).
- *
- * @TODO: Using the GPU id could be too restrictive on the off-chance that
- * someone would like to use this UUID to cache pre-tiled images or something
- * of the like, and use them across devices. In the future, we could allow
- * that by:
- * * Being a bit loose about GPU id and hash only the generation's
- * 'major' number (e.g, '6' instead of '630').
- *
- * * Include HW specific constants that are relevant for layout resolving,
- * like minimum width to enable UBWC, tile_align_w, etc.
- *
- * This would allow cached device memory to be safely used from HW in
- * (slightly) different revisions of the same generation.
- */
+ /* The device UUID uniquely identifies the given device within the machine.
+ * Since we never have more than one device, this doesn't need to be a real
+ * UUID, so we use SHA1("freedreno" + gpu_id).
+ *
+ * @TODO: Using the GPU id could be too restrictive on the off-chance that
+ * someone would like to use this UUID to cache pre-tiled images or something
+ * of the like, and use them across devices. In the future, we could allow
+ * that by:
+ * * Being a bit loose about GPU id and hash only the generation's
+ * 'major' number (e.g, '6' instead of '630').
+ *
+ * * Include HW specific constants that are relevant for layout resolving,
+ * like minimum width to enable UBWC, tile_align_w, etc.
+ *
+ * This would allow cached device memory to be safely used from HW in
+ * (slightly) different revisions of the same generation.
+ */
- static const char *device_name = "freedreno";
- _mesa_sha1_update(&sha1_ctx, device_name, strlen(device_name));
+ static const char *device_name = "freedreno";
+ _mesa_sha1_update(&sha1_ctx, device_name, strlen(device_name));
- _mesa_sha1_update(&sha1_ctx, &gpu_id, sizeof(gpu_id));
+ _mesa_sha1_update(&sha1_ctx, &gpu_id, sizeof(gpu_id));
- uint8_t sha1[SHA1_DIGEST_LENGTH];
- _mesa_sha1_final(&sha1_ctx, sha1);
+ uint8_t sha1[SHA1_DIGEST_LENGTH];
+ _mesa_sha1_final(&sha1_ctx, sha1);
- assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
- memcpy(uuid, sha1, UUID_SIZE);
+ assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
+ memcpy(uuid, sha1, UUID_SIZE);
}